source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4545

Last change on this file since 4545 was 4545, checked in by cameron, 4 years ago

Separate CarryIn? and CarryOut? data; introduce mIfDepth and mWhileDepth

File size: 44.3 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/printer_pablos.h>
10#include <cc/cc_namemap.hpp>
11#include <re/re_name.h>
12#include <stdexcept>
13#include <include/simd-lib/bitblock.hpp>
14#include <sstream>
15#include <llvm/IR/Verifier.h>
16#include <llvm/Pass.h>
17#include <llvm/PassManager.h>
18#include <llvm/ADT/SmallVector.h>
19#include <llvm/Analysis/Passes.h>
20#include <llvm/IR/BasicBlock.h>
21#include <llvm/IR/CallingConv.h>
22#include <llvm/IR/Constants.h>
23#include <llvm/IR/DataLayout.h>
24#include <llvm/IR/DerivedTypes.h>
25#include <llvm/IR/Function.h>
26#include <llvm/IR/GlobalVariable.h>
27#include <llvm/IR/InlineAsm.h>
28#include <llvm/IR/Instructions.h>
29#include <llvm/IR/LLVMContext.h>
30#include <llvm/IR/Module.h>
31#include <llvm/Support/FormattedStream.h>
32#include <llvm/Support/MathExtras.h>
33#include <llvm/Support/Casting.h>
34#include <llvm/Support/Compiler.h>
35#include <llvm/Support/Debug.h>
36#include <llvm/Support/TargetSelect.h>
37#include <llvm/Support/Host.h>
38#include <llvm/Transforms/Scalar.h>
39#include <llvm/ExecutionEngine/ExecutionEngine.h>
40#include <llvm/ExecutionEngine/MCJIT.h>
41#include <llvm/IRReader/IRReader.h>
42#include <llvm/Bitcode/ReaderWriter.h>
43#include <llvm/Support/MemoryBuffer.h>
44#include <llvm/IR/IRBuilder.h>
45#include <llvm/Support/CommandLine.h>
46#include <llvm/ADT/Twine.h>
47#include <iostream>
48
49static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
50static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
51
52static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
53static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
54static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
55
56extern "C" {
57  void wrapped_print_register(char * regName, BitBlock bit_block) {
58      print_register<BitBlock>(regName, bit_block);
59  }
60}
61
62namespace pablo {
63
64PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
65: mBasisBits(basisBits)
66, mMod(new Module("icgrep", getGlobalContext()))
67, mBasicBlock(nullptr)
68, mExecutionEngine(nullptr)
69, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
70, mBasisBitsInputPtr(nullptr)
71, mCarryDataPtr(nullptr)
72, mWhileDepth(0)
73, mIfDepth(0)
74, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
75, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
76, mFunctionType(nullptr)
77, mFunction(nullptr)
78, mBasisBitsAddr(nullptr)
79, mOutputAddrPtr(nullptr)
80, mMaxWhileDepth(0)
81, mPrintRegisterFunction(nullptr)
82{
83    //Create the jit execution engine.up
84    InitializeNativeTarget();
85    InitializeNativeTargetAsmPrinter();
86    InitializeNativeTargetAsmParser();
87    DefineTypes();
88}
89
90PabloCompiler::~PabloCompiler()
91{
92
93}
94   
95void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
96    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
97}
98
99void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
100    IRBuilder <> b(mBasicBlock);
101    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
102    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
103                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
104                                                   /*isConstant=*/ true,
105                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
106                                                   /*Initializer=*/ regNameData);
107    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
108    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
109}
110
111CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
112{
113    mWhileDepth = 0;
114    mIfDepth = 0;
115    mMaxWhileDepth = 0;
116    unsigned totalCarryDataSize = Examine(pb, 0); 
117    mCarryInVector.resize(totalCarryDataSize);
118    mCarryOutVector.resize(totalCarryDataSize);
119    mCarryDataSummaryIdx.resize(totalCarryDataSize);
120    std::string errMessage;
121    EngineBuilder builder(mMod);
122    builder.setErrorStr(&errMessage);
123    builder.setMCPU(sys::getHostCPUName());
124    builder.setUseMCJIT(true);
125    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
126    mExecutionEngine = builder.create();
127    if (mExecutionEngine == nullptr) {
128        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
129    }
130    DeclareFunctions();
131
132    DeclareCallFunctions();
133
134    Function::arg_iterator args = mFunction->arg_begin();
135    mBasisBitsAddr = args++;
136    mBasisBitsAddr->setName("basis_bits");
137    mCarryDataPtr = args++;
138    mCarryDataPtr->setName("carry_data");
139    mOutputAddrPtr = args++;
140    mOutputAddrPtr->setName("output");
141
142    mWhileDepth = 0;
143    mIfDepth = 0;
144    mMaxWhileDepth = 0;
145    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
146
147    //The basis bits structure
148    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
149        IRBuilder<> b(mBasicBlock);
150        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
151        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
152        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
153        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
154    }
155
156    //Generate the IR instructions for the function.
157    compileBlock(pb);
158
159    if (LLVM_UNLIKELY(mWhileDepth != 0)) {
160        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mWhileDepth) + ")");
161    }
162
163    //Terminate the block
164    ReturnInst::Create(mMod->getContext(), mBasicBlock);
165
166    //Display the IR that has been generated by this module.
167    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
168        mMod->dump();
169    }
170    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
171    verifyModule(*mMod, &dbgs());
172
173    mExecutionEngine->finalizeObject();
174
175    //Return the required size of the carry data area to the process_block function.
176    // Reserve 1 element in the carry data area for current block number (future). TODO
177    return CompiledPabloFunction((totalCarryDataSize + 1) * sizeof(BitBlock), mFunction, mExecutionEngine);
178}
179
180void PabloCompiler::DefineTypes()
181{
182    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
183    if (structBasisBits == nullptr) {
184        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
185    }
186    std::vector<Type*>StructTy_struct_Basis_bits_fields;
187    for (int i = 0; i != mBasisBits.size(); i++)
188    {
189        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
190    }
191    if (structBasisBits->isOpaque()) {
192        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
193    }
194    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
195
196    std::vector<Type*>functionTypeArgs;
197    functionTypeArgs.push_back(mBasisBitsInputPtr);
198
199    //The carry data array.
200    //A pointer to the BitBlock vector.
201    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
202
203    //The output structure.
204    StructType * outputStruct = mMod->getTypeByName("struct.Output");
205    if (!outputStruct) {
206        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
207    }
208    if (outputStruct->isOpaque()) {
209        std::vector<Type*>fields;
210        fields.push_back(mBitBlockType);
211        fields.push_back(mBitBlockType);
212        outputStruct->setBody(fields, /*isPacked=*/false);
213    }
214    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
215
216    //The &output parameter.
217    functionTypeArgs.push_back(outputStructPtr);
218
219    mFunctionType = FunctionType::get(
220     /*Result=*/Type::getVoidTy(mMod->getContext()),
221     /*Params=*/functionTypeArgs,
222     /*isVarArg=*/false);
223}
224
225void PabloCompiler::DeclareFunctions()
226{
227    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
228    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
229    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
230    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
231
232#ifdef USE_UADD_OVERFLOW
233#ifdef USE_TWO_UADD_OVERFLOW
234    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
235    std::vector<Type*>StructTy_0_fields;
236    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
237    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
238    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
239
240    std::vector<Type*>FuncTy_1_args;
241    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
242    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
243    FunctionType* FuncTy_1 = FunctionType::get(
244                                              /*Result=*/StructTy_0,
245                                              /*Params=*/FuncTy_1_args,
246                                              /*isVarArg=*/false);
247
248    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
249                                              std::to_string(BLOCK_SIZE));
250    if (!mFunctionUaddOverflow) {
251        mFunctionUaddOverflow= Function::Create(
252          /*Type=*/ FuncTy_1,
253          /*Linkage=*/ GlobalValue::ExternalLinkage,
254          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
255        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
256    }
257    AttributeSet mFunctionUaddOverflowPAL;
258    {
259        SmallVector<AttributeSet, 4> Attrs;
260        AttributeSet PAS;
261        {
262          AttrBuilder B;
263          B.addAttribute(Attribute::NoUnwind);
264          B.addAttribute(Attribute::ReadNone);
265          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
266        }
267
268        Attrs.push_back(PAS);
269        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
270    }
271    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
272#else
273    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
274    std::vector<Type*>StructTy_0_fields;
275    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
276    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
277    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
278
279    std::vector<Type*>FuncTy_1_args;
280    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
281    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
282    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
283    FunctionType* FuncTy_1 = FunctionType::get(
284                                              /*Result=*/StructTy_0,
285                                              /*Params=*/FuncTy_1_args,
286                                              /*isVarArg=*/false);
287
288    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
289                                              std::to_string(BLOCK_SIZE));
290    if (!mFunctionUaddOverflowCarryin) {
291        mFunctionUaddOverflowCarryin = Function::Create(
292          /*Type=*/ FuncTy_1,
293          /*Linkage=*/ GlobalValue::ExternalLinkage,
294          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
295        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
296    }
297    AttributeSet mFunctionUaddOverflowCarryinPAL;
298    {
299        SmallVector<AttributeSet, 4> Attrs;
300        AttributeSet PAS;
301        {
302          AttrBuilder B;
303          B.addAttribute(Attribute::NoUnwind);
304          B.addAttribute(Attribute::ReadNone);
305          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
306        }
307
308        Attrs.push_back(PAS);
309        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
310    }
311    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
312#endif
313#endif
314
315    //Starts on process_block
316    SmallVector<AttributeSet, 4> Attrs;
317    AttributeSet PAS;
318    {
319        AttrBuilder B;
320        B.addAttribute(Attribute::ReadOnly);
321        B.addAttribute(Attribute::NoCapture);
322        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
323    }
324    Attrs.push_back(PAS);
325    {
326        AttrBuilder B;
327        B.addAttribute(Attribute::NoCapture);
328        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
329    }
330    Attrs.push_back(PAS);
331    {
332        AttrBuilder B;
333        B.addAttribute(Attribute::NoCapture);
334        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
335    }
336    Attrs.push_back(PAS);
337    {
338        AttrBuilder B;
339        B.addAttribute(Attribute::NoUnwind);
340        B.addAttribute(Attribute::UWTable);
341        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
342    }
343    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
344
345    //Create the function that will be generated.
346    mFunction = mMod->getFunction("process_block");
347    if (!mFunction) {
348        mFunction = Function::Create(
349            /*Type=*/mFunctionType,
350            /*Linkage=*/GlobalValue::ExternalLinkage,
351            /*Name=*/"process_block", mMod);
352        mFunction->setCallingConv(CallingConv::C);
353    }
354    mFunction->setAttributes(AttrSet);
355}
356   
357// CarryDataNumbering
358//
359// For each PabloBlock, a contiguous CarryData area holds carry,
360// and advance values that are generated in one block for use in the
361// next.  For a given block, the carry data area contains the
362// carries, the advances and the nested data for contained blocks,
363// if any.
364// Notes:
365//   (a) an additional data entry is created for each if-statement
366//       having more than one carry or advance opreation within it.  This
367//       additional entry is a summary entry which must be nonzero to
368//       indicate that there are carry or advance bits associated with
369//       any operation within the if-structure (at any nesting level).
370//   (b) advancing by a large amount may require multiple advance entries.
371//       the number of advance entries for an operation Adv(x, n) is
372//       (n + BLOCK_SIZE - 1) / BLOCK_SIZE
373//
374// Examine precomputes some CarryNumbering and AdvanceNumbering, as
375// well as mMaxWhileDepth of while loops.
376//
377unsigned PabloCompiler::Examine(PabloBlock & blk, unsigned carryDataIndexIn) {
378    // Count local carries and advances at this level.
379    unsigned carryDataIndex = carryDataIndexIn;
380    unsigned localCarries = 0;
381    unsigned localAdvances = 0;
382    unsigned nestedCarryDataSize = 0;
383    for (Statement * stmt : blk) {
384        if (Advance * adv = dyn_cast<Advance>(stmt)) {
385            adv->setLocalAdvanceIndex(localAdvances);
386            localAdvances += (adv->getAdvanceAmount() + BLOCK_SIZE - 1) / BLOCK_SIZE;
387        }
388        else if (MatchStar * m = dyn_cast<MatchStar>(stmt)) {
389            m->setLocalCarryIndex(localCarries);
390            ++localCarries;
391        }
392        else if (ScanThru * s = dyn_cast<ScanThru>(stmt)) {
393            s->setLocalCarryIndex(localCarries);
394            ++localCarries;
395        }
396    }
397    carryDataIndex += localCarries + localAdvances;
398    for (Statement * stmt : blk) {
399        if (Call * call = dyn_cast<Call>(stmt)) {
400            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
401        }
402        else if (If * ifStatement = dyn_cast<If>(stmt)) {
403            ++mIfDepth;
404            const auto ifCarryDataSize = Examine(ifStatement->getBody(), carryDataIndex);
405            --mIfDepth;
406            nestedCarryDataSize += ifCarryDataSize;
407            carryDataIndex += ifCarryDataSize;
408        }
409        else if (While * whileStatement = dyn_cast<While>(stmt)) {
410            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
411            const auto whileCarryDataSize = Examine(whileStatement->getBody(), carryDataIndex);
412            --mWhileDepth;
413            nestedCarryDataSize += whileCarryDataSize;
414            carryDataIndex += whileCarryDataSize;
415        }
416    }
417    blk.setCarryIndexBase(carryDataIndexIn);
418    blk.setLocalCarryCount(localCarries);
419    blk.setLocalAdvanceCount(localAdvances);
420    unsigned totalCarryDataSize = localCarries + localAdvances + nestedCarryDataSize;
421    if ((mIfDepth > 0) && (totalCarryDataSize > 1)) {
422        // Need extra space for the summary variable, always the last
423        // entry within an if block.
424        totalCarryDataSize += 1;
425    }
426    blk.setTotalCarryDataSize(totalCarryDataSize);
427    return totalCarryDataSize;
428}
429
430void PabloCompiler::DeclareCallFunctions() {
431    for (auto mapping : mCalleeMap) {
432        const String * callee = mapping.first;
433        //std::cerr << callee->str() << " to be declared\n";
434        auto ei = mExternalMap.find(callee->value());
435        if (ei != mExternalMap.end()) {
436            void * fn_ptr = ei->second;
437            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
438            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
439            if (LLVM_UNLIKELY(externalValue == nullptr)) {
440                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
441            }
442            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
443            mCalleeMap[callee] = externalValue;
444        }
445        else {
446            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
447        }
448    }
449}
450
451void PabloCompiler::compileBlock(const PabloBlock & blk) {
452    for (const Statement * statement : blk) {
453        compileStatement(statement);
454    }
455}
456
457
458
459
460void PabloCompiler::compileIf(const If * ifStatement) {       
461        //
462        //  The If-ElseZero stmt:
463        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
464        //  If the value of the predicate is nonzero, then determine the values of variables
465        //  <var>* by executing the given statements.  Otherwise, the value of the
466        //  variables are all zero.  Requirements: (a) no variable that is defined within
467        //  the body of the if may be accessed outside unless it is explicitly
468        //  listed in the variable list, (b) every variable in the defined list receives
469        //  a value within the body, and (c) the logical consequence of executing
470        //  the statements in the event that the predicate is zero is that the
471        //  values of all defined variables indeed work out to be 0.
472        //
473        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
474        //  is inserted for each variable in the defined variable list.  It receives
475        //  a zero value from the ifentry block and the defined value from the if
476        //  body.
477        //
478        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
479        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
480        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
481       
482        IRBuilder<> b_entry(ifEntryBlock);
483        mBasicBlock = ifEntryBlock;
484   
485        const unsigned baseCarryDataIdx = ifStatement->getBody().getCarryIndexBase();
486        const unsigned carryDataSize = ifStatement->getBody().getTotalCarryDataSize();
487        const unsigned carrySummaryIndex = baseCarryDataIdx + carryDataSize - 1;
488       
489        Value* if_test_value = compileExpression(ifStatement->getCondition());
490        if (carryDataSize > 0) {
491            // load the summary variable
492            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
493            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
494        }
495        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
496
497        // Entry processing is complete, now handle the body of the if.
498        mBasicBlock = ifBodyBlock;
499        compileBlock(ifStatement -> getBody());
500
501        // If we compiled an If or a While statement, we won't be in the same basic block as before.
502        // Create the branch from the current basic block to the end block.
503        IRBuilder<> bIfBody(mBasicBlock);
504        // After the recursive compile, now insert the code to compute the summary
505        // carry over variable.
506       
507        if (carryDataSize > 1) {
508            // If there was only one carry entry, then it also serves as the summary variable.
509            // Otherwise, we need to combine entries to compute the summary.
510            Value * carry_summary = mZeroInitializer;
511            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
512                int s = mCarryDataSummaryIdx[c];
513                if (s == -1) {
514                    Value* carryq_value = mCarryOutVector[c];
515                    if (carry_summary == mZeroInitializer) {
516                        carry_summary = carryq_value;
517                    }
518                    else {
519                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
520                    }
521                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
522                }
523            }
524            genCarryDataStore(carry_summary, carrySummaryIndex);
525        }
526        bIfBody.CreateBr(ifEndBlock);
527        //End Block
528        IRBuilder<> bEnd(ifEndBlock);
529        for (const PabloAST * node : ifStatement->getDefined()) {
530            const Assign * assign = cast<Assign>(node);
531            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
532            auto f = mMarkerMap.find(assign);
533            assert (f != mMarkerMap.end());
534            phi->addIncoming(mZeroInitializer, ifEntryBlock);
535            phi->addIncoming(f->second, mBasicBlock);
536            mMarkerMap[assign] = phi;
537        }
538        // Create the phi Node for the summary variable.
539        if (carryDataSize > 0) {
540            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
541            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
542            summary_phi->addIncoming(mCarryOutVector[carrySummaryIndex], mBasicBlock);
543            mCarryOutVector[carrySummaryIndex] = summary_phi;
544        }
545       
546        // Set the basic block to the new end block
547        mBasicBlock = ifEndBlock;
548}
549
550void PabloCompiler::compileWhile(const While * whileStatement) {
551        const unsigned baseCarryDataIdx = whileStatement->getBody().getCarryIndexBase();
552        const unsigned carryDataSize = whileStatement->getBody().getTotalCarryDataSize();
553   
554        if (mWhileDepth == 0) {
555            for (auto i = 0; i < carryDataSize; ++i) {
556                genCarryDataLoad(baseCarryDataIdx + i);
557            }
558        }
559
560        SmallVector<const Next*, 4> nextNodes;
561        for (const PabloAST * node : whileStatement->getBody()) {
562            if (isa<Next>(node)) {
563                nextNodes.push_back(cast<Next>(node));
564            }
565        }
566
567        // Compile the initial iteration statements; the calls to genCarryDataStore will update the
568        // mCarryOutVector with the appropriate values. Although we're not actually entering a new basic
569        // block yet, increment the nesting depth so that any calls to genCarryDataLoad or genCarryDataStore
570        // will refer to the previous value.
571
572        ++mWhileDepth;
573
574        compileBlock(whileStatement->getBody());
575
576        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
577        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
578        // but works for now.
579
580        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
581        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
582        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
583
584        // Note: compileBlock may update the mBasicBlock pointer if the body contains nested loops. It
585        // may not be same one that we entered the function with.
586        IRBuilder<> bEntry(mBasicBlock);
587        bEntry.CreateBr(whileCondBlock);
588
589        // CONDITION BLOCK
590        IRBuilder<> bCond(whileCondBlock);
591        // generate phi nodes for any carry propogating instruction
592        std::vector<PHINode*> phiNodes(carryDataSize + nextNodes.size());
593        unsigned index = 0;
594        for (index = 0; index < carryDataSize; ++index) {
595            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
596            phi->addIncoming(mCarryOutVector[baseCarryDataIdx + index], mBasicBlock);
597            mCarryInVector[baseCarryDataIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
598            phiNodes[index] = phi;
599        }
600        // and for any Next nodes in the loop body
601        for (const Next * n : nextNodes) {
602            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->value());
603            auto f = mMarkerMap.find(n->getInitial());
604            assert (f != mMarkerMap.end());
605            phi->addIncoming(f->second, mBasicBlock);
606            mMarkerMap[n->getInitial()] = phi;
607            phiNodes[index++] = phi;
608        }
609
610        mBasicBlock = whileCondBlock;
611        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
612
613        // BODY BLOCK
614        std::cerr << "Compile loop body\n";
615        mBasicBlock = whileBodyBlock;
616        compileBlock(whileStatement->getBody());
617        // update phi nodes for any carry propogating instruction
618        IRBuilder<> bWhileBody(mBasicBlock);
619        for (index = 0; index < carryDataSize; ++index) {
620            PHINode * phi = phiNodes[index];
621            Value * carryOut = bWhileBody.CreateOr(phi, mCarryOutVector[baseCarryDataIdx + index]);
622            phi->addIncoming(carryOut, mBasicBlock);
623            mCarryOutVector[baseCarryDataIdx + index] = phi;
624        }
625       
626        // and for any Next nodes in the loop body
627        for (const Next * n : nextNodes) {
628            auto f = mMarkerMap.find(n->getInitial());
629            assert (f != mMarkerMap.end());
630            PHINode * phi = phiNodes[index++];
631            phi->addIncoming(f->second, mBasicBlock);
632            mMarkerMap[n->getInitial()] = phi;
633        }
634
635        bWhileBody.CreateBr(whileCondBlock);
636
637        // EXIT BLOCK
638        mBasicBlock = whileEndBlock;
639        if (--mWhileDepth == 0) {
640            for (index = 0; index < carryDataSize; ++index) {
641                genCarryDataStore(phiNodes[index], baseCarryDataIdx + index);
642            }
643        }
644 
645}
646
647void PabloCompiler::compileStatement(const Statement * stmt)
648{
649    IRBuilder<> b(mBasicBlock);
650    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
651        Value * expr = compileExpression(assign->getExpr());
652        if (DumpTrace) {
653            genPrintRegister(assign->getName()->to_string(), expr);
654        }
655        mMarkerMap[assign] = expr;
656        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
657            SetOutputValue(expr, assign->getOutputIndex());
658        }
659    }
660    else if (const Next * next = dyn_cast<const Next>(stmt)) {
661        Value * expr = compileExpression(next->getExpr());
662        if (TraceNext) {
663            genPrintRegister(next->getInitial()->getName()->to_string(), expr);
664        }
665        mMarkerMap[next->getInitial()] = expr;
666    }
667    else if (const If * ifStatement = dyn_cast<const If>(stmt))
668    {
669        compileIf(ifStatement);
670    }
671    else if (const While * whileStatement = dyn_cast<const While>(stmt))
672    {
673        compileWhile(whileStatement);
674    }
675    else if (const Call* call = dyn_cast<Call>(stmt)) {
676        //Call the callee once and store the result in the marker map.
677        auto mi = mMarkerMap.find(call);
678        if (mi == mMarkerMap.end()) {
679            auto ci = mCalleeMap.find(call->getCallee());
680            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
681                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
682            }
683            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
684        }
685        // return mi->second;
686    }
687    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
688        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
689        mMarkerMap[pablo_and] = expr;
690        // return expr;
691    }
692    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
693        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
694        mMarkerMap[pablo_or] = expr;
695        // return expr;
696    }
697    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
698        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
699        mMarkerMap[pablo_xor] = expr;
700        // return expr;
701    }
702    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
703        Value* ifMask = compileExpression(sel->getCondition());
704        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
705        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
706        Value * expr = b.CreateOr(ifTrue, ifFalse);
707        mMarkerMap[sel] = expr;
708        // return expr;
709    }
710    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
711        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
712        mMarkerMap[pablo_not] = expr;
713        // return expr;
714    }
715    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
716        Value* strm_value = compileExpression(adv->getExpr());
717        int shift = adv->getAdvanceAmount();
718        unsigned advance_index = adv->getLocalAdvanceIndex();
719        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
720        mMarkerMap[adv] = expr;
721        // return expr;
722    }
723    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
724    {
725        Value * marker = compileExpression(mstar->getMarker());
726        Value * cc = compileExpression(mstar->getCharClass());
727        Value * marker_and_cc = b.CreateAnd(marker, cc);
728        unsigned carry_index = mstar->getLocalCarryIndex();
729        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
730        mMarkerMap[mstar] = expr;
731        // return expr;
732    }
733    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
734    {
735        Value * marker_expr = compileExpression(sthru->getScanFrom());
736        Value * cc_expr = compileExpression(sthru->getScanThru());
737        unsigned carry_index = sthru->getLocalCarryIndex();
738        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
739        mMarkerMap[sthru] = expr;
740        // return expr;
741    }
742    else {
743        PabloPrinter::print(stmt, std::cerr);
744        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
745    }
746}
747
748Value * PabloCompiler::compileExpression(const PabloAST * expr) {
749    if (isa<Ones>(expr)) {
750        return mOneInitializer;
751    }
752    else if (isa<Zeroes>(expr)) {
753        return mZeroInitializer;
754    }
755    else if (const Next * next = dyn_cast<Next>(expr)) {
756        expr = next->getInitial();
757    }
758    auto f = mMarkerMap.find(expr);
759    if (f == mMarkerMap.end()) {
760        std::stringstream str;
761        str << "\"";
762        PabloPrinter::print(expr, str);
763        str << "\" was used before definition!";
764        throw std::runtime_error(str.str());
765    }
766    return f->second;
767}
768
769
770#ifdef USE_UADD_OVERFLOW
771#ifdef USE_TWO_UADD_OVERFLOW
772PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
773    std::vector<Value*> struct_res_params;
774    struct_res_params.push_back(int128_e1);
775    struct_res_params.push_back(int128_e2);
776    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
777    struct_res->setCallingConv(CallingConv::C);
778    struct_res->setTailCall(false);
779    AttributeSet struct_res_PAL;
780    struct_res->setAttributes(struct_res_PAL);
781
782    SumWithOverflowPack ret;
783
784    std::vector<unsigned> int128_sum_indices;
785    int128_sum_indices.push_back(0);
786    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
787
788    std::vector<unsigned> int1_obit_indices;
789    int1_obit_indices.push_back(1);
790    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
791
792    return ret;
793}
794#else
795PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
796    std::vector<Value*> struct_res_params;
797    struct_res_params.push_back(int128_e1);
798    struct_res_params.push_back(int128_e2);
799    struct_res_params.push_back(int1_cin);
800    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
801    struct_res->setCallingConv(CallingConv::C);
802    struct_res->setTailCall(false);
803    AttributeSet struct_res_PAL;
804    struct_res->setAttributes(struct_res_PAL);
805
806    SumWithOverflowPack ret;
807
808    std::vector<unsigned> int128_sum_indices;
809    int128_sum_indices.push_back(0);
810    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
811
812    std::vector<unsigned> int1_obit_indices;
813    int1_obit_indices.push_back(1);
814    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
815
816    return ret;
817}
818#endif
819#endif
820
821
822Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
823    IRBuilder<> b(mBasicBlock);
824
825    //CarryQ - carry in.
826    const int carryIdx = blk->getCarryIndexBase() + localIndex;
827    Value* carryq_value = genCarryDataLoad(carryIdx);
828#ifdef USE_TWO_UADD_OVERFLOW
829    //This is the ideal implementation, which uses two uadd.with.overflow
830    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
831    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
832    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
833    CastInst* int128_carryq_value = new BitCastInst(carryq_value, b.getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
834
835    SumWithOverflowPack sumpack0, sumpack1;
836
837    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
838    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
839
840    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
841    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
842
843    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
844    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
845    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
846    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
847    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
848
849#elif defined USE_UADD_OVERFLOW
850    //use llvm.uadd.with.overflow.i128 or i256
851    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
852    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
853
854    //get i1 carryin from iBLOCK_SIZE
855    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
856    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
857    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
858
859    SumWithOverflowPack sumpack0;
860    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
861    Value* obit = sumpack0.obit;
862    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
863
864    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
865    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
866    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
867    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
868#elif (BLOCK_SIZE == 128)
869    //calculate carry through logical ops
870    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
871    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
872    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
873    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
874    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
875    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
876
877    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
878    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
879#else
880    //BLOCK_SIZE == 256, there is no other implementation
881    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
882#endif //USE_TWO_UADD_OVERFLOW
883
884    genCarryDataStore(carry_out, carryIdx);
885    return sum;
886}
887//#define CARRY_DEBUG
888Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
889    assert (index < mCarryInVector.size());
890    if (mWhileDepth == 0) {
891        IRBuilder<> b(mBasicBlock);
892        mCarryInVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
893    }
894#ifdef CARRY_DEBUG
895    genPrintRegister("carry_in_" + std::to_string(index), mCarryInVector[index]);
896#endif
897    return mCarryInVector[index];
898}
899
900void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
901    assert (carryOut);
902    assert (index < mCarryOutVector.size());
903    if (mWhileDepth == 0) {
904        IRBuilder<> b(mBasicBlock);
905        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
906    }
907    mCarryDataSummaryIdx[index] = -1;
908#ifdef CARRY_DEBUG
909    genPrintRegister("carry_out_" + std::to_string(index), mCarryOutVector[index]);
910#endif
911    mCarryOutVector[index] = carryOut;
912    std::cerr << "mCarryOutVector[" << index << "]]\n";
913}
914
915inline Value* PabloCompiler::genBitBlockAny(Value* test) {
916    IRBuilder<> b(mBasicBlock);
917    Value* cast_marker_value_1 = b.CreateBitCast(test, b.getIntNTy(BLOCK_SIZE));
918    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(b.getIntNTy(BLOCK_SIZE), 0));
919}
920
921Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
922    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
923    IRBuilder<> b(mBasicBlock);
924    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
925    Value * v = b.CreateBitCast(op, vType);
926    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
927}
928
929Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
930    IRBuilder<> b(mBasicBlock);
931    Value* i128_val = b.CreateBitCast(e, b.getIntNTy(BLOCK_SIZE));
932    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
933}
934
935inline Value* PabloCompiler::genNot(Value* expr) {
936    IRBuilder<> b(mBasicBlock);
937    return b.CreateXor(expr, mOneInitializer, "not");
938}
939Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
940    IRBuilder<> b(mBasicBlock);
941    int advEntries = (shift_amount - 1) / BLOCK_SIZE + 1;
942    int block_shift = shift_amount % BLOCK_SIZE;
943    const auto advanceIndex = blk->getCarryIndexBase() + blk->getLocalCarryCount() + localIndex;
944    const auto storeIdx = advanceIndex;
945    const auto loadIdx = advanceIndex + advEntries - 1;
946    Value* result_value;
947   
948    if (advEntries == 1) {
949        if (block_shift == 0) { 
950            result_value = genCarryDataLoad(loadIdx);
951            //b.CreateCall(mFunc_print_register, result_value);
952        }
953#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
954        if (block_shift == 1) {
955            Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(loadIdx));
956            Value* srli_1_value = b.CreateLShr(strm_value, 63);
957            Value* packed_shuffle;
958            Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
959            Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
960            packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
961
962            Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
963            Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
964
965            Value* shl_value = b.CreateShl(strm_value, const_packed_2);
966            result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
967        }
968        else { //if (block_shift < BLOCK_SIZE) {
969            // This is the preferred logic, but is too slow for the general case.
970            // We need to speed up our custom LLVM for this code.
971            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
972            Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
973            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
974            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
975        }
976#else
977        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
978        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
979        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
980        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
981
982#endif
983    }
984    else {
985        if (block_shift == 0) {
986            result_value = genCarryDataLoad(loadIdx);
987        }
988        else { 
989            // The advance is based on the two oldest bit blocks in the advance queue.
990            Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(loadIdx), b.getIntNTy(BLOCK_SIZE));
991            Value* strm_longint = b.CreateBitCast(genCarryDataLoad(loadIdx-1), b.getIntNTy(BLOCK_SIZE));
992            Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
993            result_value = b.CreateBitCast(adv_longint, mBitBlockType);
994            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx));
995            //b.CreateCall(mFunc_print_register, genCarryDataLoad(loadIdx-1));
996            //b.CreateCall(mFunc_print_register, result_value);
997        }
998        // copy entries from previous blocks forward
999        for (int i = loadIdx; i > storeIdx; i--) {
1000            genCarryDataStore(genCarryDataLoad(i-1), i);
1001        }
1002    }
1003    genCarryDataStore(strm_value, storeIdx);
1004    return result_value;
1005}
1006
1007void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1008    IRBuilder<> b(mBasicBlock);
1009    if (marker->getType()->isPointerTy()) {
1010        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1011    }
1012    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1013    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1014    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1015}
1016
1017CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1018: CarryDataSize(carryDataSize)
1019, FunctionPointer(executionEngine->getPointerToFunction(function))
1020, mFunction(function)
1021, mExecutionEngine(executionEngine)
1022{
1023
1024}
1025
1026// Clean up the memory for the compiled function once we're finished using it.
1027CompiledPabloFunction::~CompiledPabloFunction() {
1028    if (mExecutionEngine) {
1029        assert (mFunction);
1030        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1031        delete mExecutionEngine;
1032    }
1033}
1034
1035}
Note: See TracBrowser for help on using the repository browser.