source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4628

Last change on this file since 4628 was 4628, checked in by cameron, 4 years ago

Use a single IRBuilder, calling SetInsertPoint? as appropriate

File size: 46.9 KB
RevLine 
[3850]1/*
[4533]2 *  Copyright (c) 2014-15 International Characters.
[3850]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4237]7#include <pablo/pablo_compiler.h>
8#include <pablo/codegenstate.h>
[4566]9#include <pablo/carry_data.h>
[4237]10#include <pablo/printer_pablos.h>
[4249]11#include <cc/cc_namemap.hpp>
12#include <re/re_name.h>
[4237]13#include <stdexcept>
[4240]14#include <include/simd-lib/bitblock.hpp>
[4416]15#include <sstream>
[4274]16#include <llvm/IR/Verifier.h>
17#include <llvm/Pass.h>
18#include <llvm/PassManager.h>
19#include <llvm/ADT/SmallVector.h>
20#include <llvm/Analysis/Passes.h>
21#include <llvm/IR/BasicBlock.h>
22#include <llvm/IR/CallingConv.h>
23#include <llvm/IR/Constants.h>
24#include <llvm/IR/DataLayout.h>
25#include <llvm/IR/DerivedTypes.h>
26#include <llvm/IR/Function.h>
27#include <llvm/IR/GlobalVariable.h>
28#include <llvm/IR/InlineAsm.h>
29#include <llvm/IR/Instructions.h>
30#include <llvm/IR/LLVMContext.h>
31#include <llvm/IR/Module.h>
32#include <llvm/Support/FormattedStream.h>
33#include <llvm/Support/MathExtras.h>
34#include <llvm/Support/Casting.h>
[4280]35#include <llvm/Support/Compiler.h>
[4274]36#include <llvm/Support/Debug.h>
37#include <llvm/Support/TargetSelect.h>
38#include <llvm/Support/Host.h>
39#include <llvm/Transforms/Scalar.h>
40#include <llvm/ExecutionEngine/ExecutionEngine.h>
41#include <llvm/ExecutionEngine/MCJIT.h>
42#include <llvm/IRReader/IRReader.h>
43#include <llvm/Bitcode/ReaderWriter.h>
44#include <llvm/Support/MemoryBuffer.h>
45#include <llvm/IR/IRBuilder.h>
[4510]46#include <llvm/Support/CommandLine.h>
47#include <llvm/ADT/Twine.h>
[4438]48#include <iostream>
[4237]49
[4544]50static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
[4378]51static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
52
[4544]53static cl::OptionCategory fTracingOptions("Run-time Tracing Options", "These options control execution traces.");
54static cl::opt<bool> TraceNext("trace-next-nodes", cl::init(false), cl::desc("Generate dynamic traces of executed Next nodes (while control variables)."), cl::cat(fTracingOptions));
55static cl::opt<bool> DumpTrace("dump-trace", cl::init(false), cl::desc("Generate dynamic traces of executed assignments."), cl::cat(fTracingOptions));
56
[4237]57extern "C" {
[4542]58  void wrapped_print_register(char * regName, BitBlock bit_block) {
59      print_register<BitBlock>(regName, bit_block);
[4237]60  }
61}
62
63namespace pablo {
64
[4270]65PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
66: mBasisBits(basisBits)
[4572]67#ifdef USE_LLVM_3_5
[4237]68, mMod(new Module("icgrep", getGlobalContext()))
[4572]69#else
70, mModOwner(make_unique<Module>("icgrep", getGlobalContext()))
71, mMod(mModOwner.get())
72#endif
[4628]73, mBuilder(&LLVM_Builder)
[4237]74, mExecutionEngine(nullptr)
[4270]75, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
[4237]76, mBasisBitsInputPtr(nullptr)
[4538]77, mCarryDataPtr(nullptr)
[4548]78, mBlockNo(nullptr)
[4545]79, mWhileDepth(0)
80, mIfDepth(0)
[4270]81, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
82, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
[4253]83, mFunctionType(nullptr)
[4274]84, mFunction(nullptr)
[4237]85, mBasisBitsAddr(nullptr)
[4253]86, mOutputAddrPtr(nullptr)
[4545]87, mMaxWhileDepth(0)
[4510]88, mPrintRegisterFunction(nullptr)
[4237]89{
90    //Create the jit execution engine.up
91    InitializeNativeTarget();
92    InitializeNativeTargetAsmPrinter();
93    InitializeNativeTargetAsmParser();
94    DefineTypes();
95}
96
97PabloCompiler::~PabloCompiler()
98{
[4516]99
[4237]100}
[4382]101   
102void PabloCompiler::InstallExternalFunction(std::string C_fn_name, void * fn_ptr) {
103    mExternalMap.insert(std::make_pair(C_fn_name, fn_ptr));
104}
[4237]105
[4542]106void PabloCompiler::genPrintRegister(std::string regName, Value * bitblockValue) {
107    Constant * regNameData = ConstantDataArray::getString(mMod->getContext(), regName);
108    GlobalVariable *regStrVar = new GlobalVariable(*mMod, 
109                                                   ArrayType::get(IntegerType::get(mMod->getContext(), 8), regName.length()+1),
110                                                   /*isConstant=*/ true,
111                                                   /*Linkage=*/ GlobalValue::PrivateLinkage,
112                                                   /*Initializer=*/ regNameData);
[4628]113    Value * regStrPtr = mBuilder->CreateGEP(regStrVar, {mBuilder->getInt64(0), mBuilder->getInt32(0)});
114    mBuilder->CreateCall(mPrintRegisterFunction, {regStrPtr, bitblockValue});
[4542]115}
[4382]116
[4516]117CompiledPabloFunction PabloCompiler::compile(PabloBlock & pb)
[4237]118{
[4545]119    mWhileDepth = 0;
120    mIfDepth = 0;
121    mMaxWhileDepth = 0;
[4566]122    // Get the total number of carry entries; add 1 extra element for the block number.
123    unsigned totalCarryDataSize = pb.carryData.enumerate(pb) + 1;
124    Examine(pb); 
[4545]125    mCarryInVector.resize(totalCarryDataSize);
126    mCarryOutVector.resize(totalCarryDataSize);
[4541]127    mCarryDataSummaryIdx.resize(totalCarryDataSize);
[4276]128    std::string errMessage;
[4572]129#ifdef USE_LLVM_3_5
[4276]130    EngineBuilder builder(mMod);
[4572]131#else
132    EngineBuilder builder(std::move(mModOwner));
133#endif
[4276]134    builder.setErrorStr(&errMessage);
135    builder.setMCPU(sys::getHostCPUName());
[4572]136#ifdef USE_LLVM_3_5
[4276]137    builder.setUseMCJIT(true);
[4572]138#endif
[4545]139    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
[4276]140    mExecutionEngine = builder.create();
141    if (mExecutionEngine == nullptr) {
142        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
143    }
[4421]144    DeclareFunctions();
[4276]145
[4417]146    DeclareCallFunctions();
[4276]147
[4274]148    Function::arg_iterator args = mFunction->arg_begin();
[4250]149    mBasisBitsAddr = args++;
150    mBasisBitsAddr->setName("basis_bits");
[4538]151    mCarryDataPtr = args++;
152    mCarryDataPtr->setName("carry_data");
[4253]153    mOutputAddrPtr = args++;
154    mOutputAddrPtr->setName("output");
[4237]155
[4545]156    mWhileDepth = 0;
157    mIfDepth = 0;
158    mMaxWhileDepth = 0;
[4628]159    BasicBlock * b = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
160    mBuilder->SetInsertPoint(b);
[4237]161
162    //The basis bits structure
[4270]163    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
[4628]164        Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(i)};
165        Value * gep = mBuilder->CreateGEP(mBasisBitsAddr, indices);
166        LoadInst * basisBit = mBuilder->CreateAlignedLoad(gep, BLOCK_SIZE/8, false, mBasisBits[i]->getName()->to_string());
[4410]167        mMarkerMap.insert(std::make_pair(mBasisBits[i], basisBit));
[4237]168    }
[4548]169   
170    // The block number is a 64-bit integer at the end of the carry data area.
[4628]171    Value * blockNoPtr = mBuilder->CreateBitCast(mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(totalCarryDataSize - 1)), Type::getInt64PtrTy(mBuilder->getContext()));
172    mBlockNo = mBuilder->CreateLoad(blockNoPtr);
[4237]173    //Generate the IR instructions for the function.
[4539]174    compileBlock(pb);
[4628]175   
176    mBuilder->CreateStore(mBuilder->CreateAdd(mBlockNo, mBuilder->getInt64(1)), blockNoPtr);
[4257]177
[4548]178    if (DumpTrace || TraceNext) {
[4589]179        genPrintRegister("blockNo", genCarryDataLoad(totalCarryDataSize - 1));
[4548]180    }
[4545]181    if (LLVM_UNLIKELY(mWhileDepth != 0)) {
182        throw std::runtime_error("Non-zero nesting depth error (" + std::to_string(mWhileDepth) + ")");
[4433]183    }
184
[4237]185    //Terminate the block
[4628]186    ReturnInst::Create(mMod->getContext(), mBuilder->GetInsertBlock());
[4237]187
[4348]188    //Display the IR that has been generated by this module.
[4433]189    if (LLVM_UNLIKELY(DumpGeneratedIR)) {
190        mMod->dump();
[4348]191    }
[4237]192    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
193    verifyModule(*mMod, &dbgs());
194
195    mExecutionEngine->finalizeObject();
196
[4538]197    //Return the required size of the carry data area to the process_block function.
[4566]198    return CompiledPabloFunction(totalCarryDataSize * sizeof(BitBlock), mFunction, mExecutionEngine);
[4237]199}
200
201void PabloCompiler::DefineTypes()
202{
[4254]203    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
204    if (structBasisBits == nullptr) {
205        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
[4237]206    }
207    std::vector<Type*>StructTy_struct_Basis_bits_fields;
[4270]208    for (int i = 0; i != mBasisBits.size(); i++)
[4237]209    {
[4270]210        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
[4237]211    }
[4254]212    if (structBasisBits->isOpaque()) {
213        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
[4237]214    }
[4254]215    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
[4237]216
[4254]217    std::vector<Type*>functionTypeArgs;
218    functionTypeArgs.push_back(mBasisBitsInputPtr);
[4237]219
[4538]220    //The carry data array.
[4237]221    //A pointer to the BitBlock vector.
[4270]222    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
[4237]223
224    //The output structure.
[4253]225    StructType * outputStruct = mMod->getTypeByName("struct.Output");
226    if (!outputStruct) {
227        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
[4237]228    }
[4253]229    if (outputStruct->isOpaque()) {
[4254]230        std::vector<Type*>fields;
[4270]231        fields.push_back(mBitBlockType);
232        fields.push_back(mBitBlockType);
[4254]233        outputStruct->setBody(fields, /*isPacked=*/false);
[4237]234    }
[4253]235    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
[4237]236
237    //The &output parameter.
[4254]238    functionTypeArgs.push_back(outputStructPtr);
[4237]239
[4253]240    mFunctionType = FunctionType::get(
[4237]241     /*Result=*/Type::getVoidTy(mMod->getContext()),
[4254]242     /*Params=*/functionTypeArgs,
[4237]243     /*isVarArg=*/false);
244}
245
246void PabloCompiler::DeclareFunctions()
247{
248    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
[4542]249    mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
[4510]250    mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
[4628]251    // to call->  mBuilder->CreateCall(mFunc_print_register, unicode_category);
[4237]252
253#ifdef USE_UADD_OVERFLOW
[4301]254#ifdef USE_TWO_UADD_OVERFLOW
[4240]255    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
[4237]256    std::vector<Type*>StructTy_0_fields;
257    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
258    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
259    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
260
261    std::vector<Type*>FuncTy_1_args;
262    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
263    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
264    FunctionType* FuncTy_1 = FunctionType::get(
265                                              /*Result=*/StructTy_0,
266                                              /*Params=*/FuncTy_1_args,
267                                              /*isVarArg=*/false);
268
[4301]269    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
[4275]270                                              std::to_string(BLOCK_SIZE));
271    if (!mFunctionUaddOverflow) {
[4301]272        mFunctionUaddOverflow= Function::Create(
[4240]273          /*Type=*/ FuncTy_1,
274          /*Linkage=*/ GlobalValue::ExternalLinkage,
[4301]275          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
[4275]276        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
[4237]277    }
[4275]278    AttributeSet mFunctionUaddOverflowPAL;
[4237]279    {
280        SmallVector<AttributeSet, 4> Attrs;
281        AttributeSet PAS;
282        {
283          AttrBuilder B;
284          B.addAttribute(Attribute::NoUnwind);
285          B.addAttribute(Attribute::ReadNone);
286          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
287        }
288
289        Attrs.push_back(PAS);
[4275]290        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
[4237]291    }
[4275]292    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
[4301]293#else
294    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
295    std::vector<Type*>StructTy_0_fields;
296    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
297    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
298    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
299
300    std::vector<Type*>FuncTy_1_args;
301    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
302    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
303    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
304    FunctionType* FuncTy_1 = FunctionType::get(
305                                              /*Result=*/StructTy_0,
306                                              /*Params=*/FuncTy_1_args,
307                                              /*isVarArg=*/false);
308
309    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
310                                              std::to_string(BLOCK_SIZE));
311    if (!mFunctionUaddOverflowCarryin) {
312        mFunctionUaddOverflowCarryin = Function::Create(
313          /*Type=*/ FuncTy_1,
314          /*Linkage=*/ GlobalValue::ExternalLinkage,
315          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
316        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
317    }
318    AttributeSet mFunctionUaddOverflowCarryinPAL;
319    {
320        SmallVector<AttributeSet, 4> Attrs;
321        AttributeSet PAS;
322        {
323          AttrBuilder B;
324          B.addAttribute(Attribute::NoUnwind);
325          B.addAttribute(Attribute::ReadNone);
326          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
327        }
328
329        Attrs.push_back(PAS);
330        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
331    }
332    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
[4237]333#endif
[4301]334#endif
[4237]335
336    //Starts on process_block
[4538]337    SmallVector<AttributeSet, 4> Attrs;
[4237]338    AttributeSet PAS;
339    {
340        AttrBuilder B;
341        B.addAttribute(Attribute::ReadOnly);
342        B.addAttribute(Attribute::NoCapture);
343        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
344    }
345    Attrs.push_back(PAS);
346    {
347        AttrBuilder B;
348        B.addAttribute(Attribute::NoCapture);
349        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
350    }
351    Attrs.push_back(PAS);
352    {
353        AttrBuilder B;
354        B.addAttribute(Attribute::NoCapture);
355        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
356    }
357    Attrs.push_back(PAS);
358    {
359        AttrBuilder B;
360        B.addAttribute(Attribute::NoUnwind);
361        B.addAttribute(Attribute::UWTable);
362        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
363    }
364    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
365
366    //Create the function that will be generated.
[4274]367    mFunction = mMod->getFunction("process_block");
368    if (!mFunction) {
369        mFunction = Function::Create(
[4253]370            /*Type=*/mFunctionType,
[4237]371            /*Linkage=*/GlobalValue::ExternalLinkage,
372            /*Name=*/"process_block", mMod);
[4274]373        mFunction->setCallingConv(CallingConv::C);
[4237]374    }
[4274]375    mFunction->setAttributes(AttrSet);
[4237]376}
[4541]377   
[4566]378void PabloCompiler::Examine(PabloBlock & blk) {
[4539]379    for (Statement * stmt : blk) {
[4433]380        if (Call * call = dyn_cast<Call>(stmt)) {
381            mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
382        }
[4257]383        else if (If * ifStatement = dyn_cast<If>(stmt)) {
[4545]384            ++mIfDepth;
[4566]385            Examine(ifStatement->getBody());
[4545]386            --mIfDepth;
[4237]387        }
[4257]388        else if (While * whileStatement = dyn_cast<While>(stmt)) {
[4545]389            mMaxWhileDepth = std::max(mMaxWhileDepth, ++mWhileDepth);
[4566]390            Examine(whileStatement->getBody());
[4545]391            --mWhileDepth;
[4237]392        }
393    }
394}
395
[4276]396void PabloCompiler::DeclareCallFunctions() {
397    for (auto mapping : mCalleeMap) {
398        const String * callee = mapping.first;
[4382]399        //std::cerr << callee->str() << " to be declared\n";
[4510]400        auto ei = mExternalMap.find(callee->value());
[4382]401        if (ei != mExternalMap.end()) {
402            void * fn_ptr = ei->second;
403            //std::cerr << "Ptr found:" <<  std::hex << ((intptr_t) fn_ptr) << std::endl;
[4510]404            Value * externalValue = mMod->getOrInsertFunction(callee->value(), mBitBlockType, mBasisBitsInputPtr, NULL);
[4382]405            if (LLVM_UNLIKELY(externalValue == nullptr)) {
[4510]406                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
[4382]407            }
408            mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
409            mCalleeMap[callee] = externalValue;
[4276]410        }
[4382]411        else {
[4510]412            throw std::runtime_error("External function \"" + callee->to_string() + "\" not installed");
[4382]413        }
[4276]414    }
415}
416
[4539]417void PabloCompiler::compileBlock(const PabloBlock & blk) {
418    for (const Statement * statement : blk) {
[4343]419        compileStatement(statement);
[4237]420    }
421}
422
[4541]423
424
425
426void PabloCompiler::compileIf(const If * ifStatement) {       
[4410]427        //
428        //  The If-ElseZero stmt:
429        //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
430        //  If the value of the predicate is nonzero, then determine the values of variables
431        //  <var>* by executing the given statements.  Otherwise, the value of the
432        //  variables are all zero.  Requirements: (a) no variable that is defined within
433        //  the body of the if may be accessed outside unless it is explicitly
434        //  listed in the variable list, (b) every variable in the defined list receives
435        //  a value within the body, and (c) the logical consequence of executing
436        //  the statements in the event that the predicate is zero is that the
437        //  values of all defined variables indeed work out to be 0.
438        //
439        //  Simple Implementation with Phi nodes:  a phi node in the if exit block
440        //  is inserted for each variable in the defined variable list.  It receives
441        //  a zero value from the ifentry block and the defined value from the if
442        //  body.
443        //
[4628]444        BasicBlock * ifEntryBlock = mBuilder->GetInsertBlock();
[4274]445        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
446        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
[4344]447       
[4566]448        const PabloBlockCarryData & cd = ifStatement -> getBody().carryData;
[4541]449   
[4566]450        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
451        const unsigned carrySummaryIndex = cd.summaryCarryDataIndex();
[4541]452       
[4344]453        Value* if_test_value = compileExpression(ifStatement->getCondition());
[4568]454        if (cd.blockHasCarries()) {
[4541]455            // load the summary variable
456            Value* last_if_pending_data = genCarryDataLoad(carrySummaryIndex);
[4628]457            if_test_value = mBuilder->CreateOr(if_test_value, last_if_pending_data);
[4344]458        }
[4628]459        mBuilder->CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
[4237]460
[4344]461        // Entry processing is complete, now handle the body of the if.
[4628]462        mBuilder->SetInsertPoint(ifBodyBlock);
[4541]463        compileBlock(ifStatement -> getBody());
[4628]464   
[4568]465        if (cd.explicitSummaryRequired()) {
[4541]466            // If there was only one carry entry, then it also serves as the summary variable.
467            // Otherwise, we need to combine entries to compute the summary.
[4344]468            Value * carry_summary = mZeroInitializer;
[4541]469            for (int c = baseCarryDataIdx; c < carrySummaryIndex; c++) {
470                int s = mCarryDataSummaryIdx[c];
[4401]471                if (s == -1) {
[4545]472                    Value* carryq_value = mCarryOutVector[c];
[4410]473                    if (carry_summary == mZeroInitializer) {
474                        carry_summary = carryq_value;
475                    }
476                    else {
[4628]477                        carry_summary = mBuilder->CreateOr(carry_summary, carryq_value);
[4410]478                    }
[4541]479                    mCarryDataSummaryIdx[c] = carrySummaryIndex;
[4401]480                }
[4237]481            }
[4541]482            genCarryDataStore(carry_summary, carrySummaryIndex);
[4288]483        }
[4628]484        BasicBlock * ifBodyFinalBlock = mBuilder->GetInsertBlock();
485        mBuilder->CreateBr(ifEndBlock);
[4344]486        //End Block
[4628]487        mBuilder->SetInsertPoint(ifEndBlock);
[4510]488        for (const PabloAST * node : ifStatement->getDefined()) {
489            const Assign * assign = cast<Assign>(node);
[4628]490            PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, assign->getName()->value());
[4510]491            auto f = mMarkerMap.find(assign);
[4595]492            assert (f != mMarkerMap.end());
[4344]493            phi->addIncoming(mZeroInitializer, ifEntryBlock);
[4628]494            phi->addIncoming(f->second, ifBodyFinalBlock);
[4510]495            mMarkerMap[assign] = phi;
[4237]496        }
[4568]497        // Create the phi Node for the summary variable, if needed.
498        if (cd.summaryNeededInParentBlock()) {
[4628]499            PHINode * summary_phi = mBuilder->CreatePHI(mBitBlockType, 2, "summary");
[4410]500            summary_phi->addIncoming(mZeroInitializer, ifEntryBlock);
[4628]501            summary_phi->addIncoming(mCarryOutVector[carrySummaryIndex], ifBodyFinalBlock);
[4545]502            mCarryOutVector[carrySummaryIndex] = summary_phi;
[4403]503        }
504       
[4535]505}
506
[4595]507// If the following preload is turned off, we have incorrect results with the
508// ./icgrep -c '[A-Z]((([a-zA-Z]*a[a-zA-Z]*[ ])*[a-zA-Z]*e[a-zA-Z]*[ ])*[a-zA-Z]*s[a-zA-Z]*[ ])*[.?!]' ../performance/data/howto
509   
510#define PRELOAD_WHILE_CARRIES_AT_TOP_LEVEL 1
511//#define SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
512
[4535]513void PabloCompiler::compileWhile(const While * whileStatement) {
[4628]514        //BasicBlock* whileEntryBlock = mBasicBlock;
515        BasicBlock * whileEntryBlock = mBuilder->GetInsertBlock();
516        BasicBlock * whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
517        BasicBlock * whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
[4595]518   
519   
[4566]520        const PabloBlockCarryData & cd = whileStatement -> getBody().carryData;
521        const unsigned baseCarryDataIdx = cd.getBlockCarryDataIndex();
[4595]522
523#ifdef PRELOAD_WHILE_CARRIES_AT_TOP_LEVEL
[4566]524        const unsigned carryDataSize = cd.getTotalCarryDataSize();
[4595]525        if (mWhileDepth == 0)
526#else
527        const unsigned carryDataSize = cd.getLocalCarryDataSize();
528#endif
529        {
530            for (auto i = baseCarryDataIdx; i < baseCarryDataIdx + carryDataSize; ++i) {
[4628]531                mCarryInVector[i] = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(i)), BLOCK_SIZE/8, false);
[4257]532            }
[4289]533        }
[4276]534        SmallVector<const Next*, 4> nextNodes;
[4576]535        SmallVector<PHINode *, 4> nextPhis;
[4276]536        for (const PabloAST * node : whileStatement->getBody()) {
[4264]537            if (isa<Next>(node)) {
538                nextNodes.push_back(cast<Next>(node));
539            }
540        }
[4595]541   
542        // On entry to the while structure, proceed to execute the first iteration
543        // of the loop body unconditionally.   The while condition is tested at the end of
544        // the loop.
[4264]545
[4628]546        mBuilder->CreateBr(whileBodyBlock);
547        mBuilder->SetInsertPoint(whileBodyBlock);
[4595]548   
549        //
550        // There are 3 sets of Phi nodes for the while loop.
551        // (1) Carry-ins: (a) incoming carry data first iterations, (b) zero thereafter
552        // (2) Carry-out accumulators: (a) zero first iteration, (b) |= carry-out of each iteration
553        // (3) Next nodes: (a) values set up before loop, (b) modified values calculated in loop.
[4576]554
[4595]555#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
556        std::vector<PHINode *> carryInPhis(carryDataSize);
557#endif
558        std::vector<PHINode *> carryOutAccumPhis(carryDataSize);
559   
560        // Set initial values of phi nodes for loop body using values at while entry.
561        for (unsigned index = 0; index < carryDataSize; ++index) {
562#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
[4628]563            PHINode * phi_in = mBuilder->CreatePHI(mBitBlockType, 2);
[4595]564            phi_in->addIncoming(mCarryInVector[baseCarryDataIdx + index], whileEntryBlock);
565            carryInPhis[index] = phi_in;
566            mCarryInVector[baseCarryDataIdx + index] = phi_in;
567#endif
[4628]568            PHINode * phi_out = mBuilder->CreatePHI(mBitBlockType, 2);
[4595]569            phi_out->addIncoming(mZeroInitializer, whileEntryBlock);
570            carryOutAccumPhis[index] = phi_out;
571            mCarryOutVector[baseCarryDataIdx + index] = mZeroInitializer;
[4257]572        }
[4595]573   
574        // for any Next nodes in the loop body, initialize to (a) pre-loop value.
[4276]575        for (const Next * n : nextNodes) {
[4628]576            PHINode * phi = mBuilder->CreatePHI(mBitBlockType, 2, n->getName()->value());
[4410]577            auto f = mMarkerMap.find(n->getInitial());
[4264]578            assert (f != mMarkerMap.end());
[4595]579            phi->addIncoming(f->second, whileEntryBlock);
[4410]580            mMarkerMap[n->getInitial()] = phi;
[4576]581            nextPhis.push_back(phi);
[4264]582        }
[4237]583
[4595]584        //
585        // Now compile the loop body proper.  Carry-out accumulated values
586        // and iterated values of Next nodes will be computed.
587        ++mWhileDepth;
[4539]588        compileBlock(whileStatement->getBody());
[4595]589   
[4628]590        BasicBlock * whileBodyFinalBlock = mBuilder->GetInsertBlock();
[4595]591        // Add the phiNode branches for carry in, carry out nodes.
592        for (unsigned index = 0; index < carryDataSize; ++index) {
593#ifdef SET_WHILE_CARRY_IN_TO_ZERO_AFTER_FIRST_ITERATION
594            carryInPhis[index]->addIncoming(mZeroInitializer, whileBodyFinalBlock);
595#endif
596            PHINode * phi = carryOutAccumPhis[index];
[4628]597            Value * carryOut = mBuilder->CreateOr(phi, mCarryOutVector[baseCarryDataIdx + index]);
[4595]598            phi->addIncoming(carryOut, whileBodyFinalBlock);
599            mCarryOutVector[baseCarryDataIdx + index] = carryOut;
[4237]600        }
[4595]601
602        // Terminate the while loop body with a conditional branch back.
[4628]603        mBuilder->CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
[4595]604
[4264]605        // and for any Next nodes in the loop body
[4595]606        for (unsigned i = 0; i < nextNodes.size(); i++) {
[4576]607            const Next * n = nextNodes[i];
[4410]608            auto f = mMarkerMap.find(n->getInitial());
[4264]609            assert (f != mMarkerMap.end());
[4576]610            PHINode * phi = nextPhis[i];
[4595]611            if (LLVM_UNLIKELY(f->second == phi)) {
612                throw std::runtime_error("Unexpected Phi node for Next node.");
613            }
614            phi->addIncoming(f->second, whileBodyFinalBlock);
615            //mMarkerMap[n->getInitial()] = f->second;
[4264]616        }
[4237]617
[4257]618        // EXIT BLOCK
[4628]619        mBuilder->SetInsertPoint(whileEndBlock);
[4595]620        --mWhileDepth;
621
622#ifdef PRELOAD_WHILE_CARRIES_AT_TOP_LEVEL
623        if (mWhileDepth == 0)
624#endif
625        {
626            for (unsigned index = baseCarryDataIdx; index < baseCarryDataIdx + carryDataSize; ++index) {
[4628]627                mBuilder->CreateAlignedStore(mCarryOutVector[index], mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(index)), BLOCK_SIZE/8, false);
[4289]628            }
[4237]629        }
[4535]630}
631
632void PabloCompiler::compileStatement(const Statement * stmt)
633{
634    if (const Assign * assign = dyn_cast<const Assign>(stmt)) {
635        Value * expr = compileExpression(assign->getExpr());
[4544]636        if (DumpTrace) {
637            genPrintRegister(assign->getName()->to_string(), expr);
638        }
[4535]639        mMarkerMap[assign] = expr;
640        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
641            SetOutputValue(expr, assign->getOutputIndex());
642        }
[4237]643    }
[4535]644    else if (const Next * next = dyn_cast<const Next>(stmt)) {
645        Value * expr = compileExpression(next->getExpr());
[4544]646        if (TraceNext) {
647            genPrintRegister(next->getInitial()->getName()->to_string(), expr);
648        }
[4535]649        mMarkerMap[next->getInitial()] = expr;
650    }
651    else if (const If * ifStatement = dyn_cast<const If>(stmt))
652    {
653        compileIf(ifStatement);
654    }
655    else if (const While * whileStatement = dyn_cast<const While>(stmt))
656    {
657        compileWhile(whileStatement);
658    }
[4410]659    else if (const Call* call = dyn_cast<Call>(stmt)) {
[4237]660        //Call the callee once and store the result in the marker map.
[4410]661        auto mi = mMarkerMap.find(call);
[4237]662        if (mi == mMarkerMap.end()) {
663            auto ci = mCalleeMap.find(call->getCallee());
[4280]664            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
[4510]665                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->to_string() + "\"");
[4237]666            }
[4628]667            mi = mMarkerMap.insert(std::make_pair(call, mBuilder->CreateCall(ci->second, mBasisBitsAddr))).first;
[4237]668        }
[4410]669        // return mi->second;
[4237]670    }
[4410]671    else if (const And * pablo_and = dyn_cast<And>(stmt)) {
[4628]672        Value * expr = mBuilder->CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
[4546]673        if (DumpTrace) {
674            genPrintRegister(stmt->getName()->to_string(), expr);
675        }
[4410]676        mMarkerMap[pablo_and] = expr;
677        // return expr;
[4237]678    }
[4410]679    else if (const Or * pablo_or = dyn_cast<Or>(stmt)) {
[4628]680        Value * expr = mBuilder->CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
[4546]681        if (DumpTrace) {
682            genPrintRegister(stmt->getName()->to_string(), expr);
683        }
[4410]684        mMarkerMap[pablo_or] = expr;
685        // return expr;
[4237]686    }
[4410]687    else if (const Xor * pablo_xor = dyn_cast<Xor>(stmt)) {
[4628]688        Value * expr = mBuilder->CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
[4410]689        mMarkerMap[pablo_xor] = expr;
690        // return expr;
[4237]691    }
[4410]692    else if (const Sel * sel = dyn_cast<Sel>(stmt)) {
[4264]693        Value* ifMask = compileExpression(sel->getCondition());
[4628]694        Value* ifTrue = mBuilder->CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
695        Value* ifFalse = mBuilder->CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
696        Value * expr = mBuilder->CreateOr(ifTrue, ifFalse);
[4546]697        if (DumpTrace) {
698            genPrintRegister(stmt->getName()->to_string(), expr);
699        }
[4410]700        mMarkerMap[sel] = expr;
701        // return expr;
[4237]702    }
[4410]703    else if (const Not * pablo_not = dyn_cast<Not>(stmt)) {
704        Value * expr = genNot(compileExpression(pablo_not->getExpr()));
[4546]705        if (DumpTrace) {
706            genPrintRegister(stmt->getName()->to_string(), expr);
707        }
[4410]708        mMarkerMap[pablo_not] = expr;
709        // return expr;
[4237]710    }
[4410]711    else if (const Advance * adv = dyn_cast<Advance>(stmt)) {
[4237]712        Value* strm_value = compileExpression(adv->getExpr());
[4270]713        int shift = adv->getAdvanceAmount();
[4541]714        unsigned advance_index = adv->getLocalAdvanceIndex();
715        Value * expr = genAdvanceWithCarry(strm_value, shift, advance_index, stmt->getParent());
[4546]716        if (DumpTrace) {
717            genPrintRegister(stmt->getName()->to_string(), expr);
718        }
[4410]719        mMarkerMap[adv] = expr;
720        // return expr;
[4237]721    }
[4410]722    else if (const MatchStar * mstar = dyn_cast<MatchStar>(stmt))
[4237]723    {
[4410]724        Value * marker = compileExpression(mstar->getMarker());
725        Value * cc = compileExpression(mstar->getCharClass());
[4628]726        Value * marker_and_cc = mBuilder->CreateAnd(marker, cc);
[4541]727        unsigned carry_index = mstar->getLocalCarryIndex();
[4628]728        Value * expr = mBuilder->CreateOr(mBuilder->CreateXor(genAddWithCarry(marker_and_cc, cc, carry_index, stmt->getParent()), cc), marker, "matchstar");
[4546]729        if (DumpTrace) {
730            genPrintRegister(stmt->getName()->to_string(), expr);
731        }
[4410]732        mMarkerMap[mstar] = expr;
733        // return expr;
[4237]734    }
[4410]735    else if (const ScanThru * sthru = dyn_cast<ScanThru>(stmt))
[4237]736    {
[4410]737        Value * marker_expr = compileExpression(sthru->getScanFrom());
738        Value * cc_expr = compileExpression(sthru->getScanThru());
[4541]739        unsigned carry_index = sthru->getLocalCarryIndex();
[4628]740        Value * expr = mBuilder->CreateAnd(genAddWithCarry(marker_expr, cc_expr, carry_index, stmt->getParent()), genNot(cc_expr), "scanthru");
[4546]741        if (DumpTrace) {
742            genPrintRegister(stmt->getName()->to_string(), expr);
743        }
[4410]744        mMarkerMap[sthru] = expr;
745        // return expr;
[4237]746    }
[4359]747    else {
[4567]748        llvm::raw_os_ostream cerr(std::cerr);
749        PabloPrinter::print(stmt, cerr);
[4410]750        throw std::runtime_error("Unrecognized Pablo Statement! can't compile.");
751    }
752}
753
754Value * PabloCompiler::compileExpression(const PabloAST * expr) {
755    if (isa<Ones>(expr)) {
756        return mOneInitializer;
757    }
758    else if (isa<Zeroes>(expr)) {
759        return mZeroInitializer;
760    }
761    else if (const Next * next = dyn_cast<Next>(expr)) {
762        expr = next->getInitial();
763    }
764    auto f = mMarkerMap.find(expr);
765    if (f == mMarkerMap.end()) {
[4567]766        std::string o;
767        llvm::raw_string_ostream str(o);
[4416]768        str << "\"";
769        PabloPrinter::print(expr, str);
770        str << "\" was used before definition!";
771        throw std::runtime_error(str.str());
[4359]772    }
[4410]773    return f->second;
[4237]774}
775
[4537]776
[4237]777#ifdef USE_UADD_OVERFLOW
[4301]778#ifdef USE_TWO_UADD_OVERFLOW
779PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
780    std::vector<Value*> struct_res_params;
781    struct_res_params.push_back(int128_e1);
782    struct_res_params.push_back(int128_e2);
783    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
784    struct_res->setCallingConv(CallingConv::C);
785    struct_res->setTailCall(false);
786    AttributeSet struct_res_PAL;
787    struct_res->setAttributes(struct_res_PAL);
788
789    SumWithOverflowPack ret;
790
791    std::vector<unsigned> int128_sum_indices;
792    int128_sum_indices.push_back(0);
793    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
794
795    std::vector<unsigned> int1_obit_indices;
796    int1_obit_indices.push_back(1);
797    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
798
799    return ret;
800}
801#else
[4275]802PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
[4237]803    std::vector<Value*> struct_res_params;
804    struct_res_params.push_back(int128_e1);
805    struct_res_params.push_back(int128_e2);
[4240]806    struct_res_params.push_back(int1_cin);
[4301]807    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
[4237]808    struct_res->setCallingConv(CallingConv::C);
809    struct_res->setTailCall(false);
810    AttributeSet struct_res_PAL;
811    struct_res->setAttributes(struct_res_PAL);
812
813    SumWithOverflowPack ret;
814
815    std::vector<unsigned> int128_sum_indices;
816    int128_sum_indices.push_back(0);
817    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
818
819    std::vector<unsigned> int1_obit_indices;
820    int1_obit_indices.push_back(1);
821    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
822
823    return ret;
824}
825#endif
[4301]826#endif
[4237]827
[4537]828
[4541]829Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2, unsigned localIndex, const PabloBlock * blk) {
[4566]830    const PabloBlockCarryData & cd = blk->carryData;
831    const unsigned carryIdx = cd.carryOpCarryDataOffset(localIndex);
[4541]832    Value* carryq_value = genCarryDataLoad(carryIdx);
[4301]833#ifdef USE_TWO_UADD_OVERFLOW
834    //This is the ideal implementation, which uses two uadd.with.overflow
835    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
[4628]836    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
837    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
838    CastInst* int128_carryq_value = new BitCastInst(carryq_value, mBuilder->getIntNTy(BLOCK_SIZE), "carryq_128", mBasicBlock);
[4237]839
[4301]840    SumWithOverflowPack sumpack0, sumpack1;
841
842    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
843    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
844
[4628]845    Value* obit = mBuilder->CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
846    Value* sum = mBuilder->CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
[4301]847
848    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
849    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
850    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
851    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
852    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
853
854#elif defined USE_UADD_OVERFLOW
[4237]855    //use llvm.uadd.with.overflow.i128 or i256
[4628]856    CastInst* int128_e1 = new BitCastInst(e1, mBuilder->getIntNTy(BLOCK_SIZE), "e1_128", mBasicBlock);
857    CastInst* int128_e2 = new BitCastInst(e2, mBuilder->getIntNTy(BLOCK_SIZE), "e2_128", mBasicBlock);
[4301]858
859    //get i1 carryin from iBLOCK_SIZE
860    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
[4240]861    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
862    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
[4301]863
[4240]864    SumWithOverflowPack sumpack0;
865    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
866    Value* obit = sumpack0.obit;
[4628]867    Value* sum = mBuilder->CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
[4301]868
[4237]869    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
[4275]870    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
[4237]871    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
872    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
[4301]873#elif (BLOCK_SIZE == 128)
[4237]874    //calculate carry through logical ops
[4628]875    Value* carrygen = mBuilder->CreateAnd(e1, e2, "carrygen");
876    Value* carryprop = mBuilder->CreateOr(e1, e2, "carryprop");
877    Value* digitsum = mBuilder->CreateAdd(e1, e2, "digitsum");
878    Value* partial = mBuilder->CreateAdd(digitsum, carryq_value, "partial");
879    Value* digitcarry = mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(partial)));
880    Value* mid_carry_in = genShiftLeft64(mBuilder->CreateLShr(digitcarry, 63), "mid_carry_in");
[4237]881
[4628]882    Value* sum = mBuilder->CreateAdd(partial, mid_carry_in, "sum");
883    Value* carry_out = genShiftHighbitToLow(BLOCK_SIZE, mBuilder->CreateOr(carrygen, mBuilder->CreateAnd(carryprop, genNot(sum))));
[4290]884#else
885    //BLOCK_SIZE == 256, there is no other implementation
886    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
[4301]887#endif //USE_TWO_UADD_OVERFLOW
[4290]888
[4541]889    genCarryDataStore(carry_out, carryIdx);
[4237]890    return sum;
891}
[4543]892//#define CARRY_DEBUG
[4541]893Value* PabloCompiler::genCarryDataLoad(const unsigned index) {
[4545]894    assert (index < mCarryInVector.size());
895    if (mWhileDepth == 0) {
[4628]896        mCarryInVector[index] = mBuilder->CreateAlignedLoad(mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(index)), BLOCK_SIZE/8, false);
[4257]897    }
[4542]898#ifdef CARRY_DEBUG
[4566]899    std::cerr << "genCarryDataLoad " << index << std::endl;
[4545]900    genPrintRegister("carry_in_" + std::to_string(index), mCarryInVector[index]);
[4542]901#endif
[4545]902    return mCarryInVector[index];
[4237]903}
904
[4541]905void PabloCompiler::genCarryDataStore(Value* carryOut, const unsigned index ) {
[4257]906    assert (carryOut);
[4545]907    assert (index < mCarryOutVector.size());
908    if (mWhileDepth == 0) {
[4628]909        mBuilder->CreateAlignedStore(carryOut, mBuilder->CreateGEP(mCarryDataPtr, mBuilder->getInt64(index)), BLOCK_SIZE/8, false);
[4257]910    }
[4541]911    mCarryDataSummaryIdx[index] = -1;
[4566]912    mCarryOutVector[index] = carryOut;
[4542]913#ifdef CARRY_DEBUG
[4566]914    std::cerr << "genCarryDataStore " << index << std::endl;
[4545]915    genPrintRegister("carry_out_" + std::to_string(index), mCarryOutVector[index]);
[4542]916#endif
[4546]917    //std::cerr << "mCarryOutVector[" << index << "]]\n";
[4237]918}
919
[4258]920inline Value* PabloCompiler::genBitBlockAny(Value* test) {
[4628]921    Value* cast_marker_value_1 = mBuilder->CreateBitCast(test, mBuilder->getIntNTy(BLOCK_SIZE));
922    return mBuilder->CreateICmpEQ(cast_marker_value_1, ConstantInt::get(mBuilder->getIntNTy(BLOCK_SIZE), 0));
[4237]923}
924
[4542]925Value * PabloCompiler::genShiftHighbitToLow(unsigned FieldWidth, Value * op) {
926    unsigned FieldCount = BLOCK_SIZE/FieldWidth;
927    VectorType * vType = VectorType::get(IntegerType::get(mMod->getContext(), FieldWidth), FieldCount);
[4628]928    Value * v = mBuilder->CreateBitCast(op, vType);
929    return mBuilder->CreateBitCast(mBuilder->CreateLShr(v, FieldWidth - 1), mBitBlockType);
[4237]930}
931
932Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
[4628]933    Value* i128_val = mBuilder->CreateBitCast(e, mBuilder->getIntNTy(BLOCK_SIZE));
934    return mBuilder->CreateBitCast(mBuilder->CreateShl(i128_val, 64, namehint), mBitBlockType);
[4237]935}
936
[4253]937inline Value* PabloCompiler::genNot(Value* expr) {
[4628]938    return mBuilder->CreateXor(expr, mOneInitializer, "not");
[4237]939}
[4547]940
[4541]941Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
[4547]942    if (shift_amount >= LongAdvanceBase) {
943        return genLongAdvanceWithCarry(strm_value, shift_amount, localIndex, blk);
944    }
[4566]945    else if (shift_amount == 1) {
946        return genUnitAdvanceWithCarry(strm_value, localIndex, blk);
947    }
948    const PabloBlockCarryData & cd = blk->carryData;
949    const auto advanceIndex = cd.shortAdvanceCarryDataOffset(localIndex);
[4547]950    Value* result_value;
951   
952    if (shift_amount == 0) {
953        result_value = genCarryDataLoad(advanceIndex);
954    }
[4566]955    else {
[4628]956        Value* advanceq_longint = mBuilder->CreateBitCast(genCarryDataLoad(advanceIndex), mBuilder->getIntNTy(BLOCK_SIZE));
957        Value* strm_longint = mBuilder->CreateBitCast(strm_value, mBuilder->getIntNTy(BLOCK_SIZE));
958        Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, shift_amount), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
959        result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
[4547]960    }
[4566]961    genCarryDataStore(strm_value, advanceIndex);
962    return result_value;
963}
964                   
965Value* PabloCompiler::genUnitAdvanceWithCarry(Value* strm_value, unsigned localIndex, const PabloBlock * blk) {
966    const PabloBlockCarryData & cd = blk->carryData;
967    const auto advanceIndex = cd.unitAdvanceCarryDataOffset(localIndex);
968    Value* result_value;
969   
970#if (BLOCK_SIZE == 128) && !defined(USE_LONG_INTEGER_SHIFT)
971    Value* advanceq_value = genShiftHighbitToLow(BLOCK_SIZE, genCarryDataLoad(advanceIndex));
[4628]972    Value* srli_1_value = mBuilder->CreateLShr(strm_value, 63);
[4566]973    Value* packed_shuffle;
[4628]974    Constant* const_packed_1_elems [] = {mBuilder->getInt32(0), mBuilder->getInt32(2)};
[4566]975    Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
[4628]976    packed_shuffle = mBuilder->CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
[4566]977   
[4628]978    Constant* const_packed_2_elems[] = {mBuilder->getInt64(1), mBuilder->getInt64(1)};
[4566]979    Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
980   
[4628]981    Value* shl_value = mBuilder->CreateShl(strm_value, const_packed_2);
982    result_value = mBuilder->CreateOr(shl_value, packed_shuffle, "advance");
[4547]983#else
[4628]984    Value* advanceq_longint = mBuilder->CreateBitCast(genCarryDataLoad(advanceIndex), mBuilder->getIntNTy(BLOCK_SIZE));
985    Value* strm_longint = mBuilder->CreateBitCast(strm_value, mBuilder->getIntNTy(BLOCK_SIZE));
986    Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, 1), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - 1), "advance");
987    result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
[4547]988   
989#endif
990    genCarryDataStore(strm_value, advanceIndex);
991    return result_value;
992}
[4566]993                   
[4595]994//
[4547]995// Generate code for long advances >= LongAdvanceBase
996//
997Value* PabloCompiler::genLongAdvanceWithCarry(Value* strm_value, int shift_amount, unsigned localIndex, const PabloBlock * blk) {
[4566]998    const PabloBlockCarryData & cd = blk->carryData;
[4549]999    const unsigned block_shift = shift_amount % BLOCK_SIZE;
[4566]1000    const unsigned advanceEntries = cd.longAdvanceEntries(shift_amount);
1001    const unsigned bufsize = cd.longAdvanceBufferSize(shift_amount);
[4595]1002    //std::cerr << "shift_amount = " << shift_amount << " bufsize = " << bufsize << std::endl;
[4628]1003    Value * indexMask = mBuilder->getInt64(bufsize - 1);  // A mask to implement circular buffer indexing
1004    Value * advBaseIndex = mBuilder->getInt64(cd.longAdvanceCarryDataOffset(localIndex));
1005    Value * storeIndex = mBuilder->CreateAdd(mBuilder->CreateAnd(mBlockNo, indexMask), advBaseIndex);
1006    Value * loadIndex = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries)), indexMask), advBaseIndex);
1007    Value * storePtr = mBuilder->CreateGEP(mCarryDataPtr, storeIndex);
1008    Value * loadPtr = mBuilder->CreateGEP(mCarryDataPtr, loadIndex);
[4421]1009    Value* result_value;
[4237]1010
[4547]1011    if (block_shift == 0) {
[4628]1012        result_value = mBuilder->CreateAlignedLoad(loadPtr, BLOCK_SIZE/8);
[4547]1013    }
[4549]1014    else if (advanceEntries == 1) {
[4628]1015        Value* advanceq_longint = mBuilder->CreateBitCast(mBuilder->CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), mBuilder->getIntNTy(BLOCK_SIZE));
1016        Value* strm_longint = mBuilder->CreateBitCast(strm_value, mBuilder->getIntNTy(BLOCK_SIZE));
1017        Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, block_shift), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "advance");
1018        result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
[4267]1019    }
[4421]1020    else {
[4549]1021        // The advance is based on the two oldest bit blocks in the advance buffer.
[4566]1022        // The buffer is maintained as a circular buffer of size bufsize.
[4549]1023        // Indexes within the buffer are computed by bitwise and with the indexMask.
[4628]1024        Value * loadIndex2 = mBuilder->CreateAdd(mBuilder->CreateAnd(mBuilder->CreateSub(mBlockNo, mBuilder->getInt64(advanceEntries-1)), indexMask), advBaseIndex);
1025        Value * loadPtr2 = mBuilder->CreateGEP(mCarryDataPtr, loadIndex2);
1026        Value* advanceq_longint = mBuilder->CreateBitCast(mBuilder->CreateAlignedLoad(loadPtr, BLOCK_SIZE/8), mBuilder->getIntNTy(BLOCK_SIZE));
1027        //genPrintRegister("advanceq_longint", mBuilder->CreateBitCast(advanceq_longint, mBitBlockType));
1028        Value* strm_longint = mBuilder->CreateBitCast(mBuilder->CreateAlignedLoad(loadPtr2, BLOCK_SIZE/8), mBuilder->getIntNTy(BLOCK_SIZE));
1029        //genPrintRegister("strm_longint", mBuilder->CreateBitCast(strm_longint, mBitBlockType));
1030        Value* adv_longint = mBuilder->CreateOr(mBuilder->CreateShl(strm_longint, block_shift), mBuilder->CreateLShr(advanceq_longint, BLOCK_SIZE - block_shift), "longadvance");
1031        result_value = mBuilder->CreateBitCast(adv_longint, mBitBlockType);
[4267]1032    }
[4628]1033    mBuilder->CreateAlignedStore(strm_value, storePtr, BLOCK_SIZE/8);
[4421]1034    return result_value;
[4237]1035}
[4547]1036   
[4268]1037void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1038    if (marker->getType()->isPointerTy()) {
[4628]1039        marker = mBuilder->CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
[4268]1040    }
[4628]1041    Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(index)};
1042    Value* gep = mBuilder->CreateGEP(mOutputAddrPtr, indices);
1043    mBuilder->CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
[4237]1044}
[4268]1045
[4538]1046CompiledPabloFunction::CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine)
1047: CarryDataSize(carryDataSize)
[4516]1048, FunctionPointer(executionEngine->getPointerToFunction(function))
1049, mFunction(function)
1050, mExecutionEngine(executionEngine)
1051{
1052
[4268]1053}
[4516]1054
1055// Clean up the memory for the compiled function once we're finished using it.
1056CompiledPabloFunction::~CompiledPabloFunction() {
1057    if (mExecutionEngine) {
1058        assert (mFunction);
1059        // mExecutionEngine->freeMachineCodeForFunction(mFunction); // This function only prints a "not supported" message. Reevaluate with LLVM 3.6.
1060        delete mExecutionEngine;
1061    }
1062}
1063
1064}
Note: See TracBrowser for help on using the repository browser.