source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4599

Last change on this file since 4599 was 4595, checked in by cameron, 4 years ago

Restructure compilation of while statements.

File size: 47.0 KB
Line 
1/*
2 *  Copyright (c) 2014-15 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
9#include <pablo/carry_data.h>
10#include <pablo/printer_pablos.h>
11#include <cc/cc_namemap.hpp>
12#include <re/re_name.h>
13#include <stdexcept>
14#include <include/simd-lib/bitblock.hpp>
15#include <sstream>
16#include <llvm/IR/Verifier.h>
17#include <llvm/Pass.h>
18#include <llvm/PassManager.h>
19#include <llvm/ADT/SmallVector.h>
20#include <llvm/Analysis/Passes.h>
21#include <llvm/IR/BasicBlock.h>
22#include <llvm/IR/CallingConv.h>
23#include <llvm/IR/Constants.h>
24#include <llvm/IR/DataLayout.h>
25#include <llvm/IR/DerivedTypes.h>
26#include <llvm/IR/Function.h>
27#include <llvm/IR/GlobalVariable.h>
28#include <llvm/IR/InlineAsm.h>
29#include <llvm/IR/Instructions.h>
30#include <llvm/IR/LLVMContext.h>
31#include <llvm/IR/Module.h>
32#include <llvm/Support/FormattedStream.h>
33#include <llvm/Support/MathExtras.h>
34#include <llvm/Support/Casting.h>
35#include <llvm/Support/Compiler.h>
36#include <llvm/Support/Debug.h>
37#include <llvm/Support/TargetSelect.h>
38#include <llvm/Support/Host.h>
39#include <llvm/Transforms/Scalar.h>
40#include <llvm/ExecutionEngine/ExecutionEngine.h>
41#include <llvm/ExecutionEngine/MCJIT.h>
42#include <llvm/IRReader/IRReader.h>
43#include <llvm/Bitcode/ReaderWriter.h>
44#include <llvm/Support/MemoryBuffer.h>
45#include <llvm/IR/IRBuilder.h>
46#include <llvm/Support/CommandLine.h>
47#include <llvm/ADT/Twine.h>
48#include <iostream>
49
50static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
51static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
52
53static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
54static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
55static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
56
57extern "C" {
58  void wrapped_print_register(char * regName, BitBlock bit_block) {
59      print_register<BitBlock>(regName, bit_block);
60  }
61}
62
63namespace pablo {
64
65PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
66: mBasisBits(basisBits)
67#ifdef USE_LLVM_3_5
68, mMod(new Module("icgrep", getGlobalContext()))
69#else
70, mModOwner(make_unique<Module>("icgrep", getGlobalContext()))
71, mMod(mModOwner.get())
72#endif
73, mBasicBlock(nullptr)
74, mExecutionEngine(nullptr)
75, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
76, mBasisBitsInputPtr(nullptr)
77, mCarryDataPtr(nullptr)
78, mBlockNo(nullptr)
79, mWhileDepth(0)
80, mIfDepth(0)
81, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
82, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
83, mFunctionType(nullptr)
84, mFunction(nullptr)
85, mBasisBitsAddr(nullptr)
86, mOutputAddrPtr(nullptr)
87, mMaxWhileDepth(0)
88, mPrintRegisterFunction(nullptr)
89{
90    //Create the jit execution engine.up
91    InitializeNativeTarget();
92    InitializeNativeTargetAsmPrinter();
93    InitializeNativeTargetAsmParser();
94    DefineTypes();
95}
96
97PabloCompiler::~PabloCompiler()
98{
99
100}
101   
102void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
103    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
104}
105
106void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
107    IRBuilder <> b(mBasicBlock);
108    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
109    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
110                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
111                                                   /*isConstant=*/ true,
112                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
113                                                   /*Initializer=*/ regNameData);
114    Value * regStrPtr = b.CreateGEP(regStrVar, {b.getInt64(0), b.getInt32(0)});
115    b.CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
116}
117
118CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
119{
120    mWhileDepth = 0;
121    mIfDepth = 0;
122    mMaxWhileDepth = 0;
123    // Get the total number of carry entries; add 1 extra element for the block number.
124    unsigned totalCarryDataSize = pb.carryData.enumerate(pb) + 1;
125    Examine(pb); 
126    mCarryInVector.resize(totalCarryDataSize);
127    mCarryOutVector.resize(totalCarryDataSize);
128    mCarryDataSummaryIdx.resize(totalCarryDataSize);
129    std::string errMessage;
130#ifdef USE_LLVM_3_5
131    EngineBuilder builder(mMod);
132#else
133    EngineBuilder builder(std::move(mModOwner));
134#endif
135    builder.setErrorStr(&errMessage);
136    builder.setMCPU(sys::getHostCPUName());
137#ifdef USE_LLVM_3_5
138    builder.setUseMCJIT(true);
139#endif
140    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
141    mExecutionEngine = builder.create();
142    if (mExecutionEngine == nullptr) {
143        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
144    }
145    DeclareFunctions();
146
147    DeclareCallFunctions();
148
149    Function::arg_iterator args = mFunction->arg_begin();
150    mBasisBitsAddr = args++;
151    mBasisBitsAddr->setName("basis_bits");
152    mCarryDataPtr = args++;
153    mCarryDataPtr->setName("carry_data");
154    mOutputAddrPtr = args++;
155    mOutputAddrPtr->setName("output");
156
157    mWhileDepth = 0;
158    mIfDepth = 0;
159    mMaxWhileDepth = 0;
160    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
161    IRBuilder<> b(mBasicBlock);
162
163    //The basis bits structure
164    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
165        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
166        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
167        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
168        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
169    }
170   
171    // The block number is a 64-bit integer at the end of the carry data area.
172    Value * blockNoPtr = b.CreateBitCast(b.CreateGEP(mCarryDataPtr, b.getInt64(totalCarryDataSize - 1)), Type::getInt64PtrTy(b.getContext()));
173    mBlockNo = b.CreateLoad(blockNoPtr);
174    //Generate the IR instructions for the function.
175    compileBlock(pb);
176    {   IRBuilder<> b(mBasicBlock);  // may be in new basic block, set builder
177        b.CreateStore(b.CreateAdd(mBlockNo, b.getInt64(1)), blockNoPtr);
178    }
179
180    if (DumpTrace || TraceNext) {
181        genPrintRegister("blockNo", genCarryDataLoad(totalCarryDataSize - 1));
182    }
183    if (LLVM_UNLIKELY(mWhileDepth != 0)) {
184        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mWhileDepth) + ")");
185    }
186
187    //Terminate the block
188    ReturnInst::Create(mMod->getContext(), mBasicBlock);
189
190    //Display the IR that has been generated by this module.
191    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
192        mMod->dump();
193    }
194    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
195    verifyModule(*mMod, &dbgs());
196
197    mExecutionEngine->finalizeObject();
198
199    //Return the required size of the carry data area to the process_block function.
200    return CompiledPabloFunction(totalCarryDataSize * sizeof(BitBlock), mFunction, mExecutionEngine);
201}
202
203void PabloCompiler::DefineTypes()
204{
205    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
206    if (structBasisBits == nullptr) {
207        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
208    }
209    std::vector<Type*>StructTy_struct_Basis_bits_fields;
210    for (int i = 0; i != mBasisBits.size(); i++)
211    {
212        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
213    }
214    if (structBasisBits->isOpaque()) {
215        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
216    }
217    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
218
219    std::vector<Type*>functionTypeArgs;
220    functionTypeArgs.push_back(mBasisBitsInputPtr);
221
222    //The carry data array.
223    //A pointer to the BitBlock vector.
224    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
225
226    //The output structure.
227    StructType * outputStruct = mMod->getTypeByName("struct.Output");
228    if (!outputStruct) {
229        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
230    }
231    if (outputStruct->isOpaque()) {
232        std::vector<Type*>fields;
233        fields.push_back(mBitBlockType);
234        fields.push_back(mBitBlockType);
235        outputStruct->setBody(fields, /*isPacked=*/false);
236    }
237    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
238
239    //The &output parameter.
240    functionTypeArgs.push_back(outputStructPtr);
241
242    mFunctionType = FunctionType::get(
243     /*Result=*/Type::getVoidTy(mMod->getContext()),
244     /*Params=*/functionTypeArgs,
245     /*isVarArg=*/false);
246}
247
248void PabloCompiler::DeclareFunctions()
249{
250    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
251    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
252    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
253    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
254
255#ifdef USE_UADD_OVERFLOW
256#ifdef USE_TWO_UADD_OVERFLOW
257    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
258    std::vector<Type*>StructTy_0_fields;
259    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
260    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
261    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
262
263    std::vector<Type*>FuncTy_1_args;
264    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
265    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
266    FunctionType* FuncTy_1 = FunctionType::get(
267                                              /*Result=*/StructTy_0,
268                                              /*Params=*/FuncTy_1_args,
269                                              /*isVarArg=*/false);
270
271    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
272                                              std::to_string(BLOCK_SIZE));
273    if (!mFunctionUaddOverflow) {
274        mFunctionUaddOverflow= Function::Create(
275          /*Type=*/ FuncTy_1,
276          /*Linkage=*/ GlobalValue::ExternalLinkage,
277          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
278        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
279    }
280    AttributeSet mFunctionUaddOverflowPAL;
281    {
282        SmallVector<AttributeSet, 4> Attrs;
283        AttributeSet PAS;
284        {
285          AttrBuilder B;
286          B.addAttribute(Attribute::NoUnwind);
287          B.addAttribute(Attribute::ReadNone);
288          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
289        }
290
291        Attrs.push_back(PAS);
292        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
293    }
294    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
295#else
296    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
297    std::vector<Type*>StructTy_0_fields;
298    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
299    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
300    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
301
302    std::vector<Type*>FuncTy_1_args;
303    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
304    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
305    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
306    FunctionType* FuncTy_1 = FunctionType::get(
307                                              /*Result=*/StructTy_0,
308                                              /*Params=*/FuncTy_1_args,
309                                              /*isVarArg=*/false);
310
311    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
312                                              std::to_string(BLOCK_SIZE));
313    if (!mFunctionUaddOverflowCarryin) {
314        mFunctionUaddOverflowCarryin = Function::Create(
315          /*Type=*/ FuncTy_1,
316          /*Linkage=*/ GlobalValue::ExternalLinkage,
317          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
318        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
319    }
320    AttributeSet mFunctionUaddOverflowCarryinPAL;
321    {
322        SmallVector<AttributeSet, 4> Attrs;
323        AttributeSet PAS;
324        {
325          AttrBuilder B;
326          B.addAttribute(Attribute::NoUnwind);
327          B.addAttribute(Attribute::ReadNone);
328          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
329        }
330
331        Attrs.push_back(PAS);
332        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
333    }
334    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
335#endif
336#endif
337
338    //Starts on process_block
339    SmallVector<AttributeSet, 4> Attrs;
340    AttributeSet PAS;
341    {
342        AttrBuilder B;
343        B.addAttribute(Attribute::ReadOnly);
344        B.addAttribute(Attribute::NoCapture);
345        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
346    }
347    Attrs.push_back(PAS);
348    {
349        AttrBuilder B;
350        B.addAttribute(Attribute::NoCapture);
351        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
352    }
353    Attrs.push_back(PAS);
354    {
355        AttrBuilder B;
356        B.addAttribute(Attribute::NoCapture);
357        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
358    }
359    Attrs.push_back(PAS);
360    {
361        AttrBuilder B;
362        B.addAttribute(Attribute::NoUnwind);
363        B.addAttribute(Attribute::UWTable);
364        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
365    }
366    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
367
368    //Create the function that will be generated.
369    mFunction = mMod->getFunction("process_block");
370    if (!mFunction) {
371        mFunction = Function::Create(
372            /*Type=*/mFunctionType,
373            /*Linkage=*/GlobalValue::ExternalLinkage,
374            /*Name=*/"process_block", mMod);
375        mFunction->setCallingConv(CallingConv::C);
376    }
377    mFunction->setAttributes(AttrSet);
378}
379   
380void PabloCompiler::Examine(PabloBlock & blk) {
381    for (Statement * stmt : blk) {
382        if (Call * call = dyn_cast<Call>(stmt)) {
383            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
384        }
385        else if (If * ifStatement = dyn_cast<If>(stmt)) {
386            ++mIfDepth;
387            Examine(ifStatement->getBody());
388            --mIfDepth;
389        }
390        else if (While * whileStatement = dyn_cast<While>(stmt)) {
391            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
392            Examine(whileStatement->getBody());
393            --mWhileDepth;
394        }
395    }
396}
397
398void PabloCompiler::DeclareCallFunctions() {
399    for (auto mapping : mCalleeMap) {
400        const String * callee = mapping.first;
401        //std::cerr << callee->str() << " to be declared\n";
402        auto ei = mExternalMap.find(callee->value());
403        if (ei != mExternalMap.end()) {
404            void * fn_ptr = ei->second;
405            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
406            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
407            if (LLVM_UNLIKELY(externalValue == nullptr)) {
408                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
409            }
410            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
411            mCalleeMap[callee] = externalValue;
412        }
413        else {
414            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
415        }
416    }
417}
418
419void PabloCompiler::compileBlock(const PabloBlock & blk) {
420    for (const Statement * statement : blk) {
421        compileStatement(statement);
422    }
423}
424
425
426
427
428void PabloCompiler::compileIf(const If * ifStatement) {       
429        //
430        //  The If-ElseZero stmt:
431        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
432        //  If the value of the predicate is nonzero, then determine the values of variables
433        //  <var>* by executing the given statements.  Otherwise, the value of the
434        //  variables are all zero.  Requirements: (a) no variable that is defined within
435        //  the body of the if may be accessed outside unless it is explicitly
436        //  listed in the variable list, (b) every variable in the defined list receives
437        //  a value within the body, and (c) the logical consequence of executing
438        //  the statements in the event that the predicate is zero is that the
439        //  values of all defined variables indeed work out to be 0.
440        //
441        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
442        //  is inserted for each variable in the defined variable list.  It receives
443        //  a zero value from the ifentry block and the defined value from the if
444        //  body.
445        //
446        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
447        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
448        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
449       
450        IRBuilder<> b_entry(ifEntryBlock);
451        mBasicBlock = ifEntryBlock;
452        const PabloBlockCarryData & cd = ifStatement -> getBody().carryData;
453   
454        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
455        const unsigned carrySummaryIndex = cd.summaryCarryDataIndex();
456       
457        Value* if_test_value = compileExpression(ifStatement->getCondition());
458        if (cd.blockHasCarries()) {
459            // load the summary variable
460            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
461            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_data);
462        }
463        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
464
465        // Entry processing is complete, now handle the body of the if.
466        mBasicBlock = ifBodyBlock;
467        compileBlock(ifStatement -> getBody());
468
469        // If we compiled an If or a While statement, we won't be in the same basic block as before.
470        // Create the branch from the current basic block to the end block.
471        IRBuilder<> bIfBody(mBasicBlock);
472        // After the recursive compile, now insert the code to compute the summary
473        // carry over variable.
474       
475        if (cd.explicitSummaryRequired()) {
476            // If there was only one carry entry, then it also serves as the summary variable.
477            // Otherwise, we need to combine entries to compute the summary.
478            Value * carry_summary = mZeroInitializer;
479            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
480                int s = mCarryDataSummaryIdx[c];
481                if (s == -1) {
482                    Value* carryq_value = mCarryOutVector[c];
483                    if (carry_summary == mZeroInitializer) {
484                        carry_summary = carryq_value;
485                    }
486                    else {
487                        carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
488                    }
489                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
490                }
491            }
492            genCarryDataStore(carry_summary, carrySummaryIndex);
493        }
494        bIfBody.CreateBr(ifEndBlock);
495        //End Block
496        IRBuilder<> bEnd(ifEndBlock);
497        for (const PabloAST * node : ifStatement->getDefined()) {
498            const Assign * assign = cast<Assign>(node);
499            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, assign->getName()->value());
500            auto f = mMarkerMap.find(assign);
501            assert (f != mMarkerMap.end());
502            phi->addIncoming(mZeroInitializer, ifEntryBlock);
503            phi->addIncoming(f->second, mBasicBlock);
504            mMarkerMap[assign] = phi;
505        }
506        // Create the phi Node for the summary variable, if needed.
507        if (cd.summaryNeededInParentBlock()) {
508            PHINode * summary_phi = bEnd.CreatePHI(mBitBlockType, 2, "summary");
509            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
510            summary_phi->addIncoming(mCarryOutVector[carrySummaryIndex], mBasicBlock);
511            mCarryOutVector[carrySummaryIndex] = summary_phi;
512        }
513       
514        // Set the basic block to the new end block
515        mBasicBlock = ifEndBlock;
516}
517
518// If the following preload is turned off, we have incorrect results with the
519// ./icgrep -c '[A-Z]((([a-zA-Z]*a[a-zA-Z]*[ ])*[a-zA-Z]*e[a-zA-Z]*[ ])*[a-zA-Z]*s[a-zA-Z]*[ ])*[.?!]' ../performance/data/howto
520   
521#define PRELOAD_WHILE_CARRIES_AT_TOP_LEVEL 1
522//#define SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
523
524void PabloCompiler::compileWhile(const While * whileStatement) {
525        BasicBlock* whileEntryBlock = mBasicBlock;
526        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
527        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
528   
529   
530        const PabloBlockCarryData & cd = whileStatement -> getBody().carryData;
531        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
532
533#ifdef PRELOAD_WHILE_CARRIES_AT_TOP_LEVEL
534        const unsigned carryDataSize = cd.getTotalCarryDataSize();
535        if (mWhileDepth == 0)
536#else
537        const unsigned carryDataSize = cd.getLocalCarryDataSize();
538#endif
539        {
540            for (auto i = baseCarryDataIdx; i < baseCarryDataIdx + carryDataSize; ++i) {
541                IRBuilder<> b(mBasicBlock);
542                mCarryInVector[i] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(i)), BLOCK_SIZE/8, false);
543            }
544        }
545        SmallVector<const Next*, 4> nextNodes;
546        SmallVector<PHINode *, 4> nextPhis;
547        for (const PabloAST * node : whileStatement->getBody()) {
548            if (isa<Next>(node)) {
549                nextNodes.push_back(cast<Next>(node));
550            }
551        }
552   
553        // On entry to the while structure, proceed to execute the first iteration
554        // of the loop body unconditionally.   The while condition is tested at the end of
555        // the loop.
556
557        IRBuilder<> bEntry(mBasicBlock);
558        // Jump to the while body block immediately.
559        bEntry.CreateBr(whileBodyBlock);
560        mBasicBlock = whileBodyBlock;
561        IRBuilder<> bBody(whileBodyBlock);
562   
563        //
564        // There are 3 sets of Phi nodes for the while loop.
565        // (1) Carry-ins: (a) incoming carry data first iterations, (b) zero thereafter
566        // (2) Carry-out accumulators: (a) zero first iteration, (b) |= carry-out of each iteration
567        // (3) Next nodes: (a) values set up before loop, (b) modified values calculated in loop.
568
569#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
570        std::vector<PHINode *> carryInPhis(carryDataSize);
571#endif
572        std::vector<PHINode *> carryOutAccumPhis(carryDataSize);
573   
574        // Set initial values of phi nodes for loop body using values at while entry.
575        for (unsigned index = 0; index < carryDataSize; ++index) {
576#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
577            PHINode * phi_in = bBody.CreatePHI(mBitBlockType, 2);
578            phi_in->addIncoming(mCarryInVector[baseCarryDataIdx + index], whileEntryBlock);
579            carryInPhis[index] = phi_in;
580            mCarryInVector[baseCarryDataIdx + index] = phi_in;
581#endif
582            PHINode * phi_out = bBody.CreatePHI(mBitBlockType, 2);
583            phi_out->addIncoming(mZeroInitializer, whileEntryBlock);
584            carryOutAccumPhis[index] = phi_out;
585            mCarryOutVector[baseCarryDataIdx + index] = mZeroInitializer;
586        }
587   
588        // for any Next nodes in the loop body, initialize to (a) pre-loop value.
589        for (const Next * n : nextNodes) {
590            PHINode * phi = bBody.CreatePHI(mBitBlockType, 2, n->getName()->value());
591            auto f = mMarkerMap.find(n->getInitial());
592            assert (f != mMarkerMap.end());
593            phi->addIncoming(f->second, whileEntryBlock);
594            mMarkerMap[n->getInitial()] = phi;
595            nextPhis.push_back(phi);
596        }
597
598        //
599        // Now compile the loop body proper.  Carry-out accumulated values
600        // and iterated values of Next nodes will be computed.
601        ++mWhileDepth;
602        compileBlock(whileStatement->getBody());
603   
604        //  The while body might involve separate blocks depending on compile;
605        //  identify the final block generated.
606        BasicBlock * whileBodyFinalBlock = mBasicBlock;
607        IRBuilder<> bBodyFinal(mBasicBlock);
608   
609        // Add the phiNode branches for carry in, carry out nodes.
610        for (unsigned index = 0; index < carryDataSize; ++index) {
611#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
612            carryInPhis[index]->addIncoming(mZeroInitializer, whileBodyFinalBlock);
613#endif
614            PHINode * phi = carryOutAccumPhis[index];
615            Value * carryOut = bBodyFinal.CreateOr(phi, mCarryOutVector[baseCarryDataIdx + index]);
616            phi->addIncoming(carryOut, whileBodyFinalBlock);
617            mCarryOutVector[baseCarryDataIdx + index] = carryOut;
618        }
619
620        // Terminate the while loop body with a conditional branch back.
621        bBodyFinal.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
622
623        // and for any Next nodes in the loop body
624        for (unsigned i = 0; i < nextNodes.size(); i++) {
625            const Next * n = nextNodes[i];
626            auto f = mMarkerMap.find(n->getInitial());
627            assert (f != mMarkerMap.end());
628            PHINode * phi = nextPhis[i];
629            if (LLVM_UNLIKELY(f->second == phi)) {
630                throw std::runtime_error("Unexpected Phi node for Next node.");
631            }
632            phi->addIncoming(f->second, whileBodyFinalBlock);
633            //mMarkerMap[n->getInitial()] = f->second;
634        }
635
636        // EXIT BLOCK
637        mBasicBlock = whileEndBlock;
638        IRBuilder<> bEnd(whileEndBlock);
639        --mWhileDepth;
640
641#ifdef PRELOAD_WHILE_CARRIES_AT_TOP_LEVEL
642        if (mWhileDepth == 0)
643#endif
644        {
645            for (unsigned index = baseCarryDataIdx; index < baseCarryDataIdx + carryDataSize; ++index) {
646                bEnd.CreateAlignedStore(mCarryOutVector[index], bEnd.CreateGEP(mCarryDataPtr, bEnd.getInt64(index)), BLOCK_SIZE/8, false);
647            }
648        }
649}
650
651void PabloCompiler::compileStatement(const Statement * stmt)
652{
653    IRBuilder<> b(mBasicBlock);
654    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
655        Value * expr = compileExpression(assign->getExpr());
656        if (DumpTrace) {
657            genPrintRegister(assign->getName()->to_string(), expr);
658        }
659        mMarkerMap[assign] = expr;
660        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
661            SetOutputValue(expr, assign->getOutputIndex());
662        }
663    }
664    else if (const Next * next = dyn_cast<const Next>(stmt)) {
665        Value * expr = compileExpression(next->getExpr());
666        if (TraceNext) {
667            genPrintRegister(next->getInitial()->getName()->to_string(), expr);
668        }
669        mMarkerMap[next->getInitial()] = expr;
670    }
671    else if (const If * ifStatement = dyn_cast<const If>(stmt))
672    {
673        compileIf(ifStatement);
674    }
675    else if (const While * whileStatement = dyn_cast<const While>(stmt))
676    {
677        compileWhile(whileStatement);
678    }
679    else if (const Call* call = dyn_cast<Call>(stmt)) {
680        //Call the callee once and store the result in the marker map.
681        auto mi = mMarkerMap.find(call);
682        if (mi == mMarkerMap.end()) {
683            auto ci = mCalleeMap.find(call->getCallee());
684            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
685                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
686            }
687            mi = mMarkerMap.insert(std::make_pair(call, b.CreateCall(ci->second, mBasisBitsAddr))).first;
688        }
689        // return mi->second;
690    }
691    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
692        Value * expr = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
693        if (DumpTrace) {
694            genPrintRegister(stmt->getName()->to_string(), expr);
695        }
696        mMarkerMap[pablo_and] = expr;
697        // return expr;
698    }
699    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
700        Value * expr = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
701        if (DumpTrace) {
702            genPrintRegister(stmt->getName()->to_string(), expr);
703        }
704        mMarkerMap[pablo_or] = expr;
705        // return expr;
706    }
707    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
708        Value * expr = b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
709        mMarkerMap[pablo_xor] = expr;
710        // return expr;
711    }
712    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
713        Value* ifMask = compileExpression(sel->getCondition());
714        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
715        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
716        Value * expr = b.CreateOr(ifTrue, ifFalse);
717        if (DumpTrace) {
718            genPrintRegister(stmt->getName()->to_string(), expr);
719        }
720        mMarkerMap[sel] = expr;
721        // return expr;
722    }
723    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
724        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
725        if (DumpTrace) {
726            genPrintRegister(stmt->getName()->to_string(), expr);
727        }
728        mMarkerMap[pablo_not] = expr;
729        // return expr;
730    }
731    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
732        Value* strm_value = compileExpression(adv->getExpr());
733        int shift = adv->getAdvanceAmount();
734        unsigned advance_index = adv->getLocalAdvanceIndex();
735        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
736        if (DumpTrace) {
737            genPrintRegister(stmt->getName()->to_string(), expr);
738        }
739        mMarkerMap[adv] = expr;
740        // return expr;
741    }
742    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
743    {
744        Value * marker = compileExpression(mstar->getMarker());
745        Value * cc = compileExpression(mstar->getCharClass());
746        Value * marker_and_cc = b.CreateAnd(marker, cc);
747        unsigned carry_index = mstar->getLocalCarryIndex();
748        Value * expr = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
749        if (DumpTrace) {
750            genPrintRegister(stmt->getName()->to_string(), expr);
751        }
752        mMarkerMap[mstar] = expr;
753        // return expr;
754    }
755    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
756    {
757        Value * marker_expr = compileExpression(sthru->getScanFrom());
758        Value * cc_expr = compileExpression(sthru->getScanThru());
759        unsigned carry_index = sthru->getLocalCarryIndex();
760        Value * expr = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
761        if (DumpTrace) {
762            genPrintRegister(stmt->getName()->to_string(), expr);
763        }
764        mMarkerMap[sthru] = expr;
765        // return expr;
766    }
767    else {
768        llvm::raw_os_ostream cerr(std::cerr);
769        PabloPrinter::print(stmt, cerr);
770        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
771    }
772}
773
774Value * PabloCompiler::compileExpression(const PabloAST * expr) {
775    if (isa<Ones>(expr)) {
776        return mOneInitializer;
777    }
778    else if (isa<Zeroes>(expr)) {
779        return mZeroInitializer;
780    }
781    else if (const Next * next = dyn_cast<Next>(expr)) {
782        expr = next->getInitial();
783    }
784    auto f = mMarkerMap.find(expr);
785    if (f == mMarkerMap.end()) {
786        std::string o;
787        llvm::raw_string_ostream str(o);
788        str << "\"";
789        PabloPrinter::print(expr, str);
790        str << "\" was used before definition!";
791        throw std::runtime_error(str.str());
792    }
793    return f->second;
794}
795
796
797#ifdef USE_UADD_OVERFLOW
798#ifdef USE_TWO_UADD_OVERFLOW
799PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
800    std::vector<Value*> struct_res_params;
801    struct_res_params.push_back(int128_e1);
802    struct_res_params.push_back(int128_e2);
803    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
804    struct_res->setCallingConv(CallingConv::C);
805    struct_res->setTailCall(false);
806    AttributeSet struct_res_PAL;
807    struct_res->setAttributes(struct_res_PAL);
808
809    SumWithOverflowPack ret;
810
811    std::vector<unsigned> int128_sum_indices;
812    int128_sum_indices.push_back(0);
813    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
814
815    std::vector<unsigned> int1_obit_indices;
816    int1_obit_indices.push_back(1);
817    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
818
819    return ret;
820}
821#else
822PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
823    std::vector<Value*> struct_res_params;
824    struct_res_params.push_back(int128_e1);
825    struct_res_params.push_back(int128_e2);
826    struct_res_params.push_back(int1_cin);
827    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
828    struct_res->setCallingConv(CallingConv::C);
829    struct_res->setTailCall(false);
830    AttributeSet struct_res_PAL;
831    struct_res->setAttributes(struct_res_PAL);
832
833    SumWithOverflowPack ret;
834
835    std::vector<unsigned> int128_sum_indices;
836    int128_sum_indices.push_back(0);
837    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
838
839    std::vector<unsigned> int1_obit_indices;
840    int1_obit_indices.push_back(1);
841    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
842
843    return ret;
844}
845#endif
846#endif
847
848
849Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
850    IRBuilder<> b(mBasicBlock);
851    const PabloBlockCarryData & cd = blk->carryData;
852    const unsigned carryIdx = cd.carryOpCarryDataOffset(localIndex);
853    Value* carryq_value = genCarryDataLoad(carryIdx);
854#ifdef USE_TWO_UADD_OVERFLOW
855    //This is the ideal implementation, which uses two uadd.with.overflow
856    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
857    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
858    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
859    CastInst* int128_carryq_value = new BitCastInst(carryq_value, b.getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
860
861    SumWithOverflowPack sumpack0, sumpack1;
862
863    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
864    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
865
866    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
867    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
868
869    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
870    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
871    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
872    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
873    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
874
875#elif defined USE_UADD_OVERFLOW
876    //use llvm.uadd.with.overflow.i128 or i256
877    CastInst* int128_e1 = new BitCastInst(e1, b.getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
878    CastInst* int128_e2 = new BitCastInst(e2, b.getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
879
880    //get i1 carryin from iBLOCK_SIZE
881    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
882    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
883    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
884
885    SumWithOverflowPack sumpack0;
886    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
887    Value* obit = sumpack0.obit;
888    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
889
890    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
891    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
892    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
893    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
894#elif (BLOCK_SIZE == 128)
895    //calculate carry through logical ops
896    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
897    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
898    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
899    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
900    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
901    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
902
903    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
904    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))));
905#else
906    //BLOCK_SIZE == 256, there is no other implementation
907    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
908#endif //USE_TWO_UADD_OVERFLOW
909
910    genCarryDataStore(carry_out, carryIdx);
911    return sum;
912}
913//#define CARRY_DEBUG
914Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
915    assert (index < mCarryInVector.size());
916    if (mWhileDepth == 0) {
917        IRBuilder<> b(mBasicBlock);
918        mCarryInVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
919    }
920#ifdef CARRY_DEBUG
921    std::cerr << "genCarryDataLoad " << index << std::endl;
922    genPrintRegister("carry_in_" + std::to_string(index), mCarryInVector[index]);
923#endif
924    return mCarryInVector[index];
925}
926
927void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
928    assert (carryOut);
929    assert (index < mCarryOutVector.size());
930    if (mWhileDepth == 0) {
931        IRBuilder<> b(mBasicBlock);
932        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryDataPtr, b.getInt64(index)), BLOCK_SIZE/8, false);
933    }
934    mCarryDataSummaryIdx[index] = -1;
935    mCarryOutVector[index] = carryOut;
936#ifdef CARRY_DEBUG
937    std::cerr << "genCarryDataStore " << index << std::endl;
938    genPrintRegister("carry_out_" + std::to_string(index), mCarryOutVector[index]);
939#endif
940    //std::cerr << "mCarryOutVector[" << index << "]]\n";
941}
942
943inline Value* PabloCompiler::genBitBlockAny(Value* test) {
944    IRBuilder<> b(mBasicBlock);
945    Value* cast_marker_value_1 = b.CreateBitCast(test, b.getIntNTy(BLOCK_SIZE));
946    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(b.getIntNTy(BLOCK_SIZE), 0));
947}
948
949Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
950    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
951    IRBuilder<> b(mBasicBlock);
952    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
953    Value * v = b.CreateBitCast(op, vType);
954    return b.CreateBitCast(b.CreateLShr(v, FieldWidth - 1), mBitBlockType);
955}
956
957Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
958    IRBuilder<> b(mBasicBlock);
959    Value* i128_val = b.CreateBitCast(e, b.getIntNTy(BLOCK_SIZE));
960    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
961}
962
963inline Value* PabloCompiler::genNot(Value* expr) {
964    IRBuilder<> b(mBasicBlock);
965    return b.CreateXor(expr, mOneInitializer, "not");
966}
967
968Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
969    if (shift_amount >= LongAdvanceBase) {
970        return genLongAdvanceWithCarry(strm_value, shift_amount, localIndex, blk);
971    }
972    else if (shift_amount == 1) {
973        return genUnitAdvanceWithCarry(strm_value, localIndex, blk);
974    }
975    IRBuilder<> b(mBasicBlock);
976    const PabloBlockCarryData & cd = blk->carryData;
977    const auto advanceIndex = cd.shortAdvanceCarryDataOffset(localIndex);
978    Value* result_value;
979   
980    if (shift_amount == 0) {
981        result_value = genCarryDataLoad(advanceIndex);
982    }
983    else {
984        Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
985        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
986        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
987        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
988    }
989    genCarryDataStore(strm_value, advanceIndex);
990    return result_value;
991}
992                   
993Value* PabloCompiler::genUnitAdvanceWithCarry(Value* strm_value, unsigned localIndex, const PabloBlock * blk) {
994    IRBuilder<> b(mBasicBlock);
995    const PabloBlockCarryData & cd = blk->carryData;
996    const auto advanceIndex = cd.unitAdvanceCarryDataOffset(localIndex);
997    Value* result_value;
998   
999#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
1000    Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(advanceIndex));
1001    Value* srli_1_value = b.CreateLShr(strm_value, 63);
1002    Value* packed_shuffle;
1003    Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1004    Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1005    packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1006   
1007    Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1008    Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1009   
1010    Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1011    result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1012#else
1013    Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
1014    Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
1015    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, 1), b.CreateLShr(advanceq_longint, BLOCK_SIZE - 1), "advance");
1016    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1017   
1018#endif
1019    genCarryDataStore(strm_value, advanceIndex);
1020    return result_value;
1021}
1022                   
1023//
1024// Generate code for long advances >= LongAdvanceBase
1025//
1026Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
1027    IRBuilder<> b(mBasicBlock);
1028    const PabloBlockCarryData & cd = blk->carryData;
1029    const unsigned block_shift = shift_amount % BLOCK_SIZE;
1030    const unsigned advanceEntries = cd.longAdvanceEntries(shift_amount);
1031    const unsigned bufsize = cd.longAdvanceBufferSize(shift_amount);
1032    //std::cerr << "shift_amount = " << shift_amount << " bufsize = " << bufsize << std::endl;
1033    Value * indexMask = b.getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
1034    Value * advBaseIndex = b.getInt64(cd.longAdvanceCarryDataOffset(localIndex));
1035    Value * storeIndex = b.CreateAdd(b.CreateAnd(mBlockNo, indexMask), advBaseIndex);
1036    Value * loadIndex = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries)), indexMask), advBaseIndex);
1037    Value * storePtr = b.CreateGEP(mCarryDataPtr, storeIndex);
1038    Value * loadPtr = b.CreateGEP(mCarryDataPtr, loadIndex);
1039    Value* result_value;
1040
1041    if (block_shift == 0) {
1042        result_value = b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8);
1043    }
1044    else if (advanceEntries == 1) {
1045        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1046        Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
1047        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1048        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1049    }
1050    else {
1051        // The advance is based on the two oldest bit blocks in the advance buffer.
1052        // The buffer is maintained as a circular buffer of size bufsize.
1053        // Indexes within the buffer are computed by bitwise and with the indexMask.
1054        Value * loadIndex2 = b.CreateAdd(b.CreateAnd(b.CreateSub(mBlockNo, b.getInt64(advanceEntries-1)), indexMask), advBaseIndex);
1055        Value * loadPtr2 = b.CreateGEP(mCarryDataPtr, loadIndex2);
1056        Value* advanceq_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1057        //genPrintRegister("advanceq_longint", b.CreateBitCast(advanceq_longint, mBitBlockType));
1058        Value* strm_longint = b.CreateBitCast(b.CreateAlignedLoad(loadPtr2, BLOCK_SIZE/8), b.getIntNTy(BLOCK_SIZE));
1059        //genPrintRegister("strm_longint", b.CreateBitCast(strm_longint, mBitBlockType));
1060        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1061        result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1062    }
1063    b.CreateAlignedStore(strm_value, storePtr, BLOCK_SIZE/8);
1064    return result_value;
1065}
1066   
1067void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1068    IRBuilder<> b(mBasicBlock);
1069    if (marker->getType()->isPointerTy()) {
1070        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1071    }
1072    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1073    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1074    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1075}
1076
1077CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1078: CarryDataSize(carryDataSize)
1079, FunctionPointer(executionEngine->getPointerToFunction(function))
1080, mFunction(function)
1081, mExecutionEngine(executionEngine)
1082{
1083
1084}
1085
1086// Clean up the memory for the compiled function once we're finished using it.
1087CompiledPabloFunction::~CompiledPabloFunction() {
1088    if (mExecutionEngine) {
1089        assert (mFunction);
1090        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1091        delete mExecutionEngine;
1092    }
1093}
1094
1095}
Note: See TracBrowser for help on using the repository browser.