source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4568

Last change on this file since 4568 was 4568, checked in by cameron, 4 years ago

Abstract conditions for if-summary computation

File size: 45.1 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/carry_data.h>
10#include <pablo/printer_pablos.h>
11#include <cc/cc_namemap.hpp>
12#include <re/re_name.h>
13#include <stdexcept>
14#include <include/simd-lib/bitblock.hpp>
15#include <sstream>
16#include <llvm/IR/Verifier.h>
17#include <llvm/Pass.h>
18#include <llvm/PassManager.h>
19#include <llvm/ADT/SmallVector.h>
20#include <llvm/Analysis/Passes.h>
21#include <llvm/IR/BasicBlock.h>
22#include <llvm/IR/CallingConv.h>
23#include <llvm/IR/Constants.h>
24#include <llvm/IR/DataLayout.h>
25#include <llvm/IR/DerivedTypes.h>
26#include <llvm/IR/Function.h>
27#include <llvm/IR/GlobalVariable.h>
28#include <llvm/IR/InlineAsm.h>
29#include <llvm/IR/Instructions.h>
30#include <llvm/IR/LLVMContext.h>
31#include <llvm/IR/Module.h>
32#include <llvm/Support/FormattedStream.h>
33#include <llvm/Support/MathExtras.h>
34#include <llvm/Support/Casting.h>
35#include <llvm/Support/Compiler.h>
36#include <llvm/Support/Debug.h>
37#include <llvm/Support/TargetSelect.h>
38#include <llvm/Support/Host.h>
39#include <llvm/Transforms/Scalar.h>
40#include <llvm/ExecutionEngine/ExecutionEngine.h>
41#include <llvm/ExecutionEngine/MCJIT.h>
42#include <llvm/IRReader/IRReader.h>
43#include <llvm/Bitcode/ReaderWriter.h>
44#include <llvm/Support/MemoryBuffer.h>
45#include <llvm/IR/IRBuilder.h>
46#include <llvm/Support/CommandLine.h>
47#include <llvm/ADT/Twine.h>
48#include <iostream>
49
50static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
51static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
52
53static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
54static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
55static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
56
57extern "C" {
58  void wrapped_print_register(char * regName, BitBlock bit_block) {
59      print_register<BitBlock>(regName, bit_block);
60  }
61}
62
63namespace pablo {
64
65PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
66: mBasisBits(basisBits)
67, mMod(new Module("icgrep", getGlobalContext()))
68, mBasicBlock(nullptr)
69, mExecutionEngine(nullptr)
70, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
71, mBasisBitsInputPtr(nullptr)
72, mCarryDataPtr(nullptr)
73, mBlockNo(nullptr)
74, mWhileDepth(0)
75, mIfDepth(0)
76, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
77, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
78, mFunctionType(nullptr)
79, mFunction(nullptr)
80, mBasisBitsAddr(nullptr)
81, mOutputAddrPtr(nullptr)
82, mMaxWhileDepth(0)
83, mPrintRegisterFunction(nullptr)
84{
85    //Create the jit execution engine.up
86    InitializeNativeTarget();
87    InitializeNativeTargetAsmPrinter();
88    InitializeNativeTargetAsmParser();
89    DefineTypes();
90}
91
92PabloCompiler::~PabloCompiler()
93{
94
95}
96   
97void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
98    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
99}
100
101void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
102    IRBuilder <> b(mBasicBlock);
103    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
104    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
105                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
106                                                   /*isConstant=*/ true,
107                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
108                                                   /*Initializer=*/ regNameData);
109    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
110    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
111}
112
113CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
114{
115    mWhileDepth = 0;
116    mIfDepth = 0;
117    mMaxWhileDepth = 0;
118    // Get the total number of carry entries; add 1 extra element for the block number.
119    unsigned totalCarryDataSize = pb.carryData.enumerate(pb) + 1;
120    Examine(pb); 
121    mCarryInVector.resize(totalCarryDataSize);
122    mCarryOutVector.resize(totalCarryDataSize);
123    mCarryDataSummaryIdx.resize(totalCarryDataSize);
124    std::string errMessage;
125    EngineBuilder builder(mMod);
126    builder.setErrorStr(&errMessage);
127    builder.setMCPU(sys::getHostCPUName());
128    builder.setUseMCJIT(true);
129    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
130    mExecutionEngine = builder.create();
131    if (mExecutionEngine == nullptr) {
132        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
133    }
134    DeclareFunctions();
135
136    DeclareCallFunctions();
137
138    Function::arg_iterator args = mFunction->arg_begin();
139    mBasisBitsAddr = args++;
140    mBasisBitsAddr->setName("basis_bits");
141    mCarryDataPtr = args++;
142    mCarryDataPtr->setName("carry_data");
143    mOutputAddrPtr = args++;
144    mOutputAddrPtr->setName("output");
145
146    mWhileDepth = 0;
147    mIfDepth = 0;
148    mMaxWhileDepth = 0;
149    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
150    IRBuilder<> b(mBasicBlock);
151
152    //The basis bits structure
153    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
154        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
155        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
156        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
157        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
158    }
159   
160    // The block number is a 64-bit integer at the end of the carry data area.
161    Value * blockNoPtr = b.CreateBitCast(b.CreateGEP(mCarryDataPtr, b.getInt64(totalCarryDataSize)), Type::getInt64PtrTy(b.getContext()));
162    mBlockNo = b.CreateLoad(blockNoPtr);
163    //Generate the IR instructions for the function.
164    compileBlock(pb);
165    {   IRBuilder<> b(mBasicBlock);  // may be in new basic block, set builder
166        b.CreateStore(b.CreateAdd(mBlockNo, b.getInt64(1)), blockNoPtr);
167    }
168
169    if (DumpTrace || TraceNext) {
170        genPrintRegister("blockNo", genCarryDataLoad(totalCarryDataSize));
171    }
172    if (LLVM_UNLIKELY(mWhileDepth != 0)) {
173        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mWhileDepth) + ")");
174    }
175
176    //Terminate the block
177    ReturnInst::Create(mMod->getContext(), mBasicBlock);
178
179    //Display the IR that has been generated by this module.
180    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
181        mMod->dump();
182    }
183    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
184    verifyModule(*mMod, &dbgs());
185
186    mExecutionEngine->finalizeObject();
187
188    //Return the required size of the carry data area to the process_block function.
189    return CompiledPabloFunction(totalCarryDataSize * sizeof(BitBlock), mFunction, mExecutionEngine);
190}
191
192void PabloCompiler::DefineTypes()
193{
194    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
195    if (structBasisBits == nullptr) {
196        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
197    }
198    std::vector<Type*>StructTy_struct_Basis_bits_fields;
199    for (int i = 0; i != mBasisBits.size(); i++)
200    {
201        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
202    }
203    if (structBasisBits->isOpaque()) {
204        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
205    }
206    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
207
208    std::vector<Type*>functionTypeArgs;
209    functionTypeArgs.push_back(mBasisBitsInputPtr);
210
211    //The carry data array.
212    //A pointer to the BitBlock vector.
213    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
214
215    //The output structure.
216    StructType * outputStruct = mMod->getTypeByName("struct.Output");
217    if (!outputStruct) {
218        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
219    }
220    if (outputStruct->isOpaque()) {
221        std::vector<Type*>fields;
222        fields.push_back(mBitBlockType);
223        fields.push_back(mBitBlockType);
224        outputStruct->setBody(fields, /*isPacked=*/false);
225    }
226    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
227
228    //The &output parameter.
229    functionTypeArgs.push_back(outputStructPtr);
230
231    mFunctionType = FunctionType::get(
232     /*Result=*/Type::getVoidTy(mMod->getContext()),
233     /*Params=*/functionTypeArgs,
234     /*isVarArg=*/false);
235}
236
237void PabloCompiler::DeclareFunctions()
238{
239    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
240    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
241    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
242    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
243
244#ifdef USE_UADD_OVERFLOW
245#ifdef USE_TWO_UADD_OVERFLOW
246    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
247    std::vector<Type*>StructTy_0_fields;
248    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
249    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
250    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
251
252    std::vector<Type*>FuncTy_1_args;
253    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
254    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
255    FunctionType* FuncTy_1 = FunctionType::get(
256                                              /*Result=*/StructTy_0,
257                                              /*Params=*/FuncTy_1_args,
258                                              /*isVarArg=*/false);
259
260    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
261                                              std::to_string(BLOCK_SIZE));
262    if (!mFunctionUaddOverflow) {
263        mFunctionUaddOverflow= Function::Create(
264          /*Type=*/ FuncTy_1,
265          /*Linkage=*/ GlobalValue::ExternalLinkage,
266          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
267        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
268    }
269    AttributeSet mFunctionUaddOverflowPAL;
270    {
271        SmallVector<AttributeSet, 4> Attrs;
272        AttributeSet PAS;
273        {
274          AttrBuilder B;
275          B.addAttribute(Attribute::NoUnwind);
276          B.addAttribute(Attribute::ReadNone);
277          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
278        }
279
280        Attrs.push_back(PAS);
281        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
282    }
283    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
284#else
285    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
286    std::vector<Type*>StructTy_0_fields;
287    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
288    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
289    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
290
291    std::vector<Type*>FuncTy_1_args;
292    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
293    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
294    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
295    FunctionType* FuncTy_1 = FunctionType::get(
296                                              /*Result=*/StructTy_0,
297                                              /*Params=*/FuncTy_1_args,
298                                              /*isVarArg=*/false);
299
300    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
301                                              std::to_string(BLOCK_SIZE));
302    if (!mFunctionUaddOverflowCarryin) {
303        mFunctionUaddOverflowCarryin = Function::Create(
304          /*Type=*/ FuncTy_1,
305          /*Linkage=*/ GlobalValue::ExternalLinkage,
306          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
307        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
308    }
309    AttributeSet mFunctionUaddOverflowCarryinPAL;
310    {
311        SmallVector<AttributeSet, 4> Attrs;
312        AttributeSet PAS;
313        {
314          AttrBuilder B;
315          B.addAttribute(Attribute::NoUnwind);
316          B.addAttribute(Attribute::ReadNone);
317          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
318        }
319
320        Attrs.push_back(PAS);
321        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
322    }
323    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
324#endif
325#endif
326
327    //Starts on process_block
328    SmallVector<AttributeSet, 4> Attrs;
329    AttributeSet PAS;
330    {
331        AttrBuilder B;
332        B.addAttribute(Attribute::ReadOnly);
333        B.addAttribute(Attribute::NoCapture);
334        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
335    }
336    Attrs.push_back(PAS);
337    {
338        AttrBuilder B;
339        B.addAttribute(Attribute::NoCapture);
340        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
341    }
342    Attrs.push_back(PAS);
343    {
344        AttrBuilder B;
345        B.addAttribute(Attribute::NoCapture);
346        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
347    }
348    Attrs.push_back(PAS);
349    {
350        AttrBuilder B;
351        B.addAttribute(Attribute::NoUnwind);
352        B.addAttribute(Attribute::UWTable);
353        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
354    }
355    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
356
357    //Create the function that will be generated.
358    mFunction = mMod->getFunction("process_block");
359    if (!mFunction) {
360        mFunction = Function::Create(
361            /*Type=*/mFunctionType,
362            /*Linkage=*/GlobalValue::ExternalLinkage,
363            /*Name=*/"process_block", mMod);
364        mFunction->setCallingConv(CallingConv::C);
365    }
366    mFunction->setAttributes(AttrSet);
367}
368   
369void PabloCompiler::Examine(PabloBlock & blk) {
370    for (Statement * stmt : blk) {
371        if (Call * call = dyn_cast<Call>(stmt)) {
372            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
373        }
374        else if (If * ifStatement = dyn_cast<If>(stmt)) {
375            ++mIfDepth;
376            Examine(ifStatement->getBody());
377            --mIfDepth;
378        }
379        else if (While * whileStatement = dyn_cast<While>(stmt)) {
380            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
381            Examine(whileStatement->getBody());
382            --mWhileDepth;
383        }
384    }
385}
386
387void PabloCompiler::DeclareCallFunctions() {
388    for (auto mapping : mCalleeMap) {
389        const String * callee = mapping.first;
390        //std::cerr << callee->str() << " to be declared\n";
391        auto ei = mExternalMap.find(callee->value());
392        if (ei != mExternalMap.end()) {
393            void * fn_ptr = ei->second;
394            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
395            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
396            if (LLVM_UNLIKELY(externalValue == nullptr)) {
397                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
398            }
399            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
400            mCalleeMap[callee] = externalValue;
401        }
402        else {
403            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
404        }
405    }
406}
407
408void PabloCompiler::compileBlock(const PabloBlock & blk) {
409    for (const Statement * statement : blk) {
410        compileStatement(statement);
411    }
412}
413
414
415
416
417void PabloCompiler::compileIf(const If * ifStatement) {       
418        //
419        //  The If-ElseZero stmt:
420        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
421        //  If the value of the predicate is nonzero, then determine the values of variables
422        //  <var>* by executing the given statements.  Otherwise, the value of the
423        //  variables are all zero.  Requirements: (a) no variable that is defined within
424        //  the body of the if may be accessed outside unless it is explicitly
425        //  listed in the variable list, (b) every variable in the defined list receives
426        //  a value within the body, and (c) the logical consequence of executing
427        //  the statements in the event that the predicate is zero is that the
428        //  values of all defined variables indeed work out to be 0.
429        //
430        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
431        //  is inserted for each variable in the defined variable list.  It receives
432        //  a zero value from the ifentry block and the defined value from the if
433        //  body.
434        //
435        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
436        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
437        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
438       
439        IRBuilder<> b_entry(ifEntryBlock);
440        mBasicBlock = ifEntryBlock;
441        const PabloBlockCarryData & cd = ifStatement -> getBody().carryData;
442   
443        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
444        const unsigned carrySummaryIndex = cd.summaryCarryDataIndex();
445       
446        Value* if_test_value = compileExpression(ifStatement->getCondition());
447        if (cd.blockHasCarries()) {
448            // load the summary variable
449            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
450            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
451        }
452        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
453
454        // Entry processing is complete, now handle the body of the if.
455        mBasicBlock = ifBodyBlock;
456        compileBlock(ifStatement -> getBody());
457
458        // If we compiled an If or a While statement, we won't be in the same basic block as before.
459        // Create the branch from the current basic block to the end block.
460        IRBuilder<> bIfBody(mBasicBlock);
461        // After the recursive compile, now insert the code to compute the summary
462        // carry over variable.
463       
464        if (cd.explicitSummaryRequired()) {
465            // If there was only one carry entry, then it also serves as the summary variable.
466            // Otherwise, we need to combine entries to compute the summary.
467            Value * carry_summary = mZeroInitializer;
468            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
469                int s = mCarryDataSummaryIdx[c];
470                if (s == -1) {
471                    Value* carryq_value = mCarryOutVector[c];
472                    if (carry_summary == mZeroInitializer) {
473                        carry_summary = carryq_value;
474                    }
475                    else {
476                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
477                    }
478                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
479                }
480            }
481            genCarryDataStore(carry_summary, carrySummaryIndex);
482        }
483        bIfBody.CreateBr(ifEndBlock);
484        //End Block
485        IRBuilder<> bEnd(ifEndBlock);
486        for (const PabloAST * node : ifStatement->getDefined()) {
487            const Assign * assign = cast<Assign>(node);
488            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
489            auto f = mMarkerMap.find(assign);
490            assert (f != mMarkerMap.end());
491            phi->addIncoming(mZeroInitializer, ifEntryBlock);
492            phi->addIncoming(f->second, mBasicBlock);
493            mMarkerMap[assign] = phi;
494        }
495        // Create the phi Node for the summary variable, if needed.
496        if (cd.summaryNeededInParentBlock()) {
497            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
498            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
499            summary_phi->addIncoming(mCarryOutVector[carrySummaryIndex], mBasicBlock);
500            mCarryOutVector[carrySummaryIndex] = summary_phi;
501        }
502       
503        // Set the basic block to the new end block
504        mBasicBlock = ifEndBlock;
505}
506
507void PabloCompiler::compileWhile(const While * whileStatement) {
508        const PabloBlockCarryData & cd = whileStatement -> getBody().carryData;
509        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
510        const unsigned carryDataSize = cd.getTotalCarryDataSize();
511   
512        if (mWhileDepth == 0) {
513            for (auto i = 0; i < carryDataSize; ++i) {
514                genCarryDataLoad(baseCarryDataIdx + i);
515            }
516        }
517
518        SmallVector<const Next*, 4> nextNodes;
519        for (const PabloAST * node : whileStatement->getBody()) {
520            if (isa<Next>(node)) {
521                nextNodes.push_back(cast<Next>(node));
522            }
523        }
524
525        // Compile the initial iteration statements; the calls to genCarryDataStore will update the
526        // mCarryOutVector with the appropriate values. Although we're not actually entering a new basic
527        // block yet, increment the nesting depth so that any calls to genCarryDataLoad or genCarryDataStore
528        // will refer to the previous value.
529
530        ++mWhileDepth;
531
532        compileBlock(whileStatement->getBody());
533
534        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
535        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
536        // but works for now.
537
538        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
539        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
540        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
541
542        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
543        // may not be same one that we entered the function with.
544        IRBuilder<> bEntry(mBasicBlock);
545        bEntry.CreateBr(whileCondBlock);
546
547        // CONDITION BLOCK
548        IRBuilder<> bCond(whileCondBlock);
549        // generate phi nodes for any carry propogating instruction
550        std::vector<PHINode*> phiNodes(carryDataSize + nextNodes.size());
551        unsigned index = 0;
552        for (index = 0; index < carryDataSize; ++index) {
553            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
554            phi->addIncoming(mCarryOutVector[baseCarryDataIdx + index], mBasicBlock);
555            mCarryInVector[baseCarryDataIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
556            phiNodes[index] = phi;
557        }
558        // and for any Next nodes in the loop body
559        for (const Next * n : nextNodes) {
560            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
561            auto f = mMarkerMap.find(n->getInitial());
562            assert (f != mMarkerMap.end());
563            phi->addIncoming(f->second, mBasicBlock);
564            mMarkerMap[n->getInitial()] = phi;
565            phiNodes[index++] = phi;
566        }
567
568        mBasicBlock = whileCondBlock;
569        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
570
571        // BODY BLOCK
572        //std::cerr << "Compile loop body\n";
573        mBasicBlock = whileBodyBlock;
574        compileBlock(whileStatement->getBody());
575        // update phi nodes for any carry propogating instruction
576        IRBuilder<> bWhileBody(mBasicBlock);
577        for (index = 0; index < carryDataSize; ++index) {
578            PHINode * phi = phiNodes[index];
579            Value * carryOut = bWhileBody.CreateOr(phi, mCarryOutVector[baseCarryDataIdx + index]);
580            phi->addIncoming(carryOut, mBasicBlock);
581            mCarryOutVector[baseCarryDataIdx + index] = phi;
582        }
583       
584        // and for any Next nodes in the loop body
585        for (const Next * n : nextNodes) {
586            auto f = mMarkerMap.find(n->getInitial());
587            assert (f != mMarkerMap.end());
588            PHINode * phi = phiNodes[index++];
589            phi->addIncoming(f->second, mBasicBlock);
590            mMarkerMap[n->getInitial()] = phi;
591        }
592
593        bWhileBody.CreateBr(whileCondBlock);
594
595        // EXIT BLOCK
596        mBasicBlock = whileEndBlock;
597        if (--mWhileDepth == 0) {
598            for (index = 0; index < carryDataSize; ++index) {
599                genCarryDataStore(phiNodes[index], baseCarryDataIdx + index);
600            }
601        }
602 
603}
604
605void PabloCompiler::compileStatement(const Statement * stmt)
606{
607    IRBuilder<> b(mBasicBlock);
608    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
609        Value * expr = compileExpression(assign->getExpr());
610        if (DumpTrace) {
611            genPrintRegister(assign->getName()->to_string(), expr);
612        }
613        mMarkerMap[assign] = expr;
614        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
615            SetOutputValue(expr, assign->getOutputIndex());
616        }
617    }
618    else if (const Next * next = dyn_cast<const Next>(stmt)) {
619        Value * expr = compileExpression(next->getExpr());
620        if (TraceNext) {
621            genPrintRegister(next->getInitial()->getName()->to_string(), expr);
622        }
623        mMarkerMap[next->getInitial()] = expr;
624    }
625    else if (const If * ifStatement = dyn_cast<const If>(stmt))
626    {
627        compileIf(ifStatement);
628    }
629    else if (const While * whileStatement = dyn_cast<const While>(stmt))
630    {
631        compileWhile(whileStatement);
632    }
633    else if (const Call* call = dyn_cast<Call>(stmt)) {
634        //Call the callee once and store the result in the marker map.
635        auto mi = mMarkerMap.find(call);
636        if (mi == mMarkerMap.end()) {
637            auto ci = mCalleeMap.find(call->getCallee());
638            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
639                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
640            }
641            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
642        }
643        // return mi->second;
644    }
645    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
646        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
647        if (DumpTrace) {
648            genPrintRegister(stmt->getName()->to_string(), expr);
649        }
650        mMarkerMap[pablo_and] = expr;
651        // return expr;
652    }
653    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
654        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
655        if (DumpTrace) {
656            genPrintRegister(stmt->getName()->to_string(), expr);
657        }
658        mMarkerMap[pablo_or] = expr;
659        // return expr;
660    }
661    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
662        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
663        mMarkerMap[pablo_xor] = expr;
664        // return expr;
665    }
666    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
667        Value* ifMask = compileExpression(sel->getCondition());
668        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
669        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
670        Value * expr = b.CreateOr(ifTrue, ifFalse);
671        if (DumpTrace) {
672            genPrintRegister(stmt->getName()->to_string(), expr);
673        }
674        mMarkerMap[sel] = expr;
675        // return expr;
676    }
677    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
678        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
679        if (DumpTrace) {
680            genPrintRegister(stmt->getName()->to_string(), expr);
681        }
682        mMarkerMap[pablo_not] = expr;
683        // return expr;
684    }
685    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
686        Value* strm_value = compileExpression(adv->getExpr());
687        int shift = adv->getAdvanceAmount();
688        unsigned advance_index = adv->getLocalAdvanceIndex();
689        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
690        if (DumpTrace) {
691            genPrintRegister(stmt->getName()->to_string(), expr);
692        }
693        mMarkerMap[adv] = expr;
694        // return expr;
695    }
696    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
697    {
698        Value * marker = compileExpression(mstar->getMarker());
699        Value * cc = compileExpression(mstar->getCharClass());
700        Value * marker_and_cc = b.CreateAnd(marker, cc);
701        unsigned carry_index = mstar->getLocalCarryIndex();
702        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
703        if (DumpTrace) {
704            genPrintRegister(stmt->getName()->to_string(), expr);
705        }
706        mMarkerMap[mstar] = expr;
707        // return expr;
708    }
709    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
710    {
711        Value * marker_expr = compileExpression(sthru->getScanFrom());
712        Value * cc_expr = compileExpression(sthru->getScanThru());
713        unsigned carry_index = sthru->getLocalCarryIndex();
714        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
715        if (DumpTrace) {
716            genPrintRegister(stmt->getName()->to_string(), expr);
717        }
718        mMarkerMap[sthru] = expr;
719        // return expr;
720    }
721    else {
722        llvm::raw_os_ostream cerr(std::cerr);
723        PabloPrinter::print(stmt, cerr);
724        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
725    }
726}
727
728Value * PabloCompiler::compileExpression(const PabloAST * expr) {
729    if (isa<Ones>(expr)) {
730        return mOneInitializer;
731    }
732    else if (isa<Zeroes>(expr)) {
733        return mZeroInitializer;
734    }
735    else if (const Next * next = dyn_cast<Next>(expr)) {
736        expr = next->getInitial();
737    }
738    auto f = mMarkerMap.find(expr);
739    if (f == mMarkerMap.end()) {
740        std::string o;
741        llvm::raw_string_ostream str(o);
742        str << "\"";
743        PabloPrinter::print(expr, str);
744        str << "\" was used before definition!";
745        throw std::runtime_error(str.str());
746    }
747    return f->second;
748}
749
750
751#ifdef USE_UADD_OVERFLOW
752#ifdef USE_TWO_UADD_OVERFLOW
753PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
754    std::vector<Value*> struct_res_params;
755    struct_res_params.push_back(int128_e1);
756    struct_res_params.push_back(int128_e2);
757    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
758    struct_res->setCallingConv(CallingConv::C);
759    struct_res->setTailCall(false);
760    AttributeSet struct_res_PAL;
761    struct_res->setAttributes(struct_res_PAL);
762
763    SumWithOverflowPack ret;
764
765    std::vector<unsigned> int128_sum_indices;
766    int128_sum_indices.push_back(0);
767    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
768
769    std::vector<unsigned> int1_obit_indices;
770    int1_obit_indices.push_back(1);
771    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
772
773    return ret;
774}
775#else
776PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
777    std::vector<Value*> struct_res_params;
778    struct_res_params.push_back(int128_e1);
779    struct_res_params.push_back(int128_e2);
780    struct_res_params.push_back(int1_cin);
781    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
782    struct_res->setCallingConv(CallingConv::C);
783    struct_res->setTailCall(false);
784    AttributeSet struct_res_PAL;
785    struct_res->setAttributes(struct_res_PAL);
786
787    SumWithOverflowPack ret;
788
789    std::vector<unsigned> int128_sum_indices;
790    int128_sum_indices.push_back(0);
791    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
792
793    std::vector<unsigned> int1_obit_indices;
794    int1_obit_indices.push_back(1);
795    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
796
797    return ret;
798}
799#endif
800#endif
801
802
803Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
804    IRBuilder<> b(mBasicBlock);
805    const PabloBlockCarryData & cd = blk->carryData;
806    const unsigned carryIdx = cd.carryOpCarryDataOffset(localIndex);
807    Value* carryq_value = genCarryDataLoad(carryIdx);
808#ifdef USE_TWO_UADD_OVERFLOW
809    //This is the ideal implementation, which uses two uadd.with.overflow
810    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
811    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
812    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
813    CastInst* int128_carryq_value = new BitCastInst(carryq_value, b.getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
814
815    SumWithOverflowPack sumpack0, sumpack1;
816
817    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
818    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
819
820    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
821    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
822
823    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
824    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
825    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
826    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
827    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
828
829#elif defined USE_UADD_OVERFLOW
830    //use llvm.uadd.with.overflow.i128 or i256
831    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
832    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
833
834    //get i1 carryin from iBLOCK_SIZE
835    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
836    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
837    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
838
839    SumWithOverflowPack sumpack0;
840    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
841    Value* obit = sumpack0.obit;
842    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
843
844    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
845    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
846    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
847    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
848#elif (BLOCK_SIZE == 128)
849    //calculate carry through logical ops
850    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
851    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
852    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
853    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
854    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
855    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
856
857    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
858    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
859#else
860    //BLOCK_SIZE == 256, there is no other implementation
861    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
862#endif //USE_TWO_UADD_OVERFLOW
863
864    genCarryDataStore(carry_out, carryIdx);
865    return sum;
866}
867//#define CARRY_DEBUG
868Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
869    assert (index < mCarryInVector.size());
870    if (mWhileDepth == 0) {
871        IRBuilder<> b(mBasicBlock);
872        mCarryInVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
873    }
874#ifdef CARRY_DEBUG
875    std::cerr << "genCarryDataLoad " << index << std::endl;
876    genPrintRegister("carry_in_" + std::to_string(index), mCarryInVector[index]);
877#endif
878    return mCarryInVector[index];
879}
880
881void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
882    assert (carryOut);
883    assert (index < mCarryOutVector.size());
884    if (mWhileDepth == 0) {
885        IRBuilder<> b(mBasicBlock);
886        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
887    }
888    mCarryDataSummaryIdx[index] = -1;
889    mCarryOutVector[index] = carryOut;
890#ifdef CARRY_DEBUG
891    std::cerr << "genCarryDataStore " << index << std::endl;
892    genPrintRegister("carry_out_" + std::to_string(index), mCarryOutVector[index]);
893#endif
894    //std::cerr << "mCarryOutVector[" << index << "]]\n";
895}
896
897inline Value* PabloCompiler::genBitBlockAny(Value* test) {
898    IRBuilder<> b(mBasicBlock);
899    Value* cast_marker_value_1 = b.CreateBitCast(test, b.getIntNTy(BLOCK_SIZE));
900    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(b.getIntNTy(BLOCK_SIZE), 0));
901}
902
903Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
904    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
905    IRBuilder<> b(mBasicBlock);
906    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
907    Value * v = b.CreateBitCast(op, vType);
908    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
909}
910
911Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
912    IRBuilder<> b(mBasicBlock);
913    Value* i128_val = b.CreateBitCast(e, b.getIntNTy(BLOCK_SIZE));
914    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
915}
916
917inline Value* PabloCompiler::genNot(Value* expr) {
918    IRBuilder<> b(mBasicBlock);
919    return b.CreateXor(expr, mOneInitializer, "not");
920}
921
922Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
923    if (shift_amount >= LongAdvanceBase) {
924        return genLongAdvanceWithCarry(strm_value, shift_amount, localIndex, blk);
925    }
926    else if (shift_amount == 1) {
927        return genUnitAdvanceWithCarry(strm_value, localIndex, blk);
928    }
929    IRBuilder<> b(mBasicBlock);
930    const PabloBlockCarryData & cd = blk->carryData;
931    const auto advanceIndex = cd.shortAdvanceCarryDataOffset(localIndex);
932    Value* result_value;
933   
934    if (shift_amount == 0) {
935        result_value = genCarryDataLoad(advanceIndex);
936    }
937    else {
938        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
939        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
940        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
941        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
942    }
943    genCarryDataStore(strm_value, advanceIndex);
944    return result_value;
945}
946                   
947Value* PabloCompiler::genUnitAdvanceWithCarry(Value* strm_value, unsigned localIndex, const PabloBlock * blk) {
948    IRBuilder<> b(mBasicBlock);
949    const PabloBlockCarryData & cd = blk->carryData;
950    const auto advanceIndex = cd.unitAdvanceCarryDataOffset(localIndex);
951    Value* result_value;
952   
953#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
954    Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(advanceIndex));
955    Value* srli_1_value = b.CreateLShr(strm_value, 63);
956    Value* packed_shuffle;
957    Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
958    Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
959    packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
960   
961    Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
962    Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
963   
964    Value* shl_value = b.CreateShl(strm_value, const_packed_2);
965    result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
966#else
967    Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
968    Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
969    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, 1), b.CreateLShr(advanceq_longint, BLOCK_SIZE - 1), "advance");
970    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
971   
972#endif
973    genCarryDataStore(strm_value, advanceIndex);
974    return result_value;
975}
976                   
977                    //
978// Generate code for long advances >= LongAdvanceBase
979//
980Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
981    IRBuilder<> b(mBasicBlock);
982    const PabloBlockCarryData & cd = blk->carryData;
983    const unsigned block_shift = shift_amount % BLOCK_SIZE;
984    const unsigned advanceEntries = cd.longAdvanceEntries(shift_amount);
985    const unsigned bufsize = cd.longAdvanceBufferSize(shift_amount);
986    std::cerr << "shift_amount = " << shift_amount << " bufsize = " << bufsize << std::endl;
987    Value * indexMask = b.getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
988    Value * advBaseIndex = b.getInt64(cd.longAdvanceCarryDataOffset(localIndex));
989    Value * storeIndex = b.CreateAdd(b.CreateAnd(mBlockNo, indexMask), advBaseIndex);
990    Value * loadIndex = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries)), indexMask), advBaseIndex);
991    Value * storePtr = b.CreateGEP(mCarryDataPtr, storeIndex);
992    Value * loadPtr = b.CreateGEP(mCarryDataPtr, loadIndex);
993    Value* result_value;
994
995    if (block_shift == 0) {
996        result_value = b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8);
997    }
998    else if (advanceEntries == 1) {
999        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1000        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
1001        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1002        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1003    }
1004    else {
1005        // The advance is based on the two oldest bit blocks in the advance buffer.
1006        // The buffer is maintained as a circular buffer of size bufsize.
1007        // Indexes within the buffer are computed by bitwise and with the indexMask.
1008        Value * loadIndex2 = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries-1)), indexMask), advBaseIndex);
1009        Value * loadPtr2 = b.CreateGEP(mCarryDataPtr, loadIndex2);
1010        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1011        //genPrintRegister("advanceq_longint", b.CreateBitCast(advanceq_longint, mBitBlockType));
1012        Value* strm_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr2, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1013        //genPrintRegister("strm_longint", b.CreateBitCast(strm_longint, mBitBlockType));
1014        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1015        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1016    }
1017    b.CreateAlignedStore(strm_value, storePtr, BLOCK_SIZE/8);
1018    return result_value;
1019}
1020   
1021void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1022    IRBuilder<> b(mBasicBlock);
1023    if (marker->getType()->isPointerTy()) {
1024        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1025    }
1026    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1027    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1028    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1029}
1030
1031CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1032: CarryDataSize(carryDataSize)
1033, FunctionPointer(executionEngine->getPointerToFunction(function))
1034, mFunction(function)
1035, mExecutionEngine(executionEngine)
1036{
1037
1038}
1039
1040// Clean up the memory for the compiled function once we're finished using it.
1041CompiledPabloFunction::~CompiledPabloFunction() {
1042    if (mExecutionEngine) {
1043        assert (mFunction);
1044        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1045        delete mExecutionEngine;
1046    }
1047}
1048
1049}
Note: See TracBrowser for help on using the repository browser.