source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4353

Last change on this file since 4353 was 4353, checked in by cameron, 5 years ago

Group options; hide LLVM options

File size: 46.0 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7/*
8 *  Copyright (c) 2014 International Characters.
9 *  This software is licensed to the public under the Open Software License 3.0.
10 *  icgrep is a trademark of International Characters.
11 */
12
13#include <pablo/pablo_compiler.h>
14#include <pablo/codegenstate.h>
15#include <pablo/printer_pablos.h>
16#include <cc/cc_namemap.hpp>
17#include <re/re_name.h>
18#include <stdexcept>
19#include <include/simd-lib/bitblock.hpp>
20
21#ifdef USE_LLVM_3_4
22#include <llvm/Analysis/Verifier.h>
23#include <llvm/Assembly/PrintModulePass.h>
24#include <llvm/Linker.h>
25#endif
26#ifdef USE_LLVM_3_5
27#include <llvm/IR/Verifier.h>
28#endif
29
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/SmallVector.h>
33#include <llvm/Analysis/Passes.h>
34#include <llvm/IR/BasicBlock.h>
35#include <llvm/IR/CallingConv.h>
36#include <llvm/IR/Constants.h>
37#include <llvm/IR/DataLayout.h>
38#include <llvm/IR/DerivedTypes.h>
39#include <llvm/IR/Function.h>
40#include <llvm/IR/GlobalVariable.h>
41#include <llvm/IR/InlineAsm.h>
42#include <llvm/IR/Instructions.h>
43#include <llvm/IR/LLVMContext.h>
44#include <llvm/IR/Module.h>
45#include <llvm/Support/FormattedStream.h>
46#include <llvm/Support/MathExtras.h>
47#include <llvm/Support/Casting.h>
48#include <llvm/Support/Compiler.h>
49#include <llvm/Support/Debug.h>
50#include <llvm/Support/TargetSelect.h>
51#include <llvm/Support/Host.h>
52#include <llvm/Transforms/Scalar.h>
53#include <llvm/ExecutionEngine/ExecutionEngine.h>
54#include <llvm/ExecutionEngine/MCJIT.h>
55#include <llvm/IRReader/IRReader.h>
56#include <llvm/Bitcode/ReaderWriter.h>
57#include <llvm/Support/MemoryBuffer.h>
58#include <llvm/IR/IRBuilder.h>
59
60//#define DUMP_GENERATED_IR
61#include "llvm/Support/CommandLine.h"
62
63cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options",
64                                    "These options control dumping of LLVM IR.");
65
66static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"), cl::cat(eIRDumpOptions));
67
68extern "C" {
69  void wrapped_print_register(BitBlock bit_block) {
70      print_register<BitBlock>("", bit_block);
71  }
72}
73
74#define CREATE_GENERAL_CODE_CATEGORY(SUFFIX) \
75SUFFIX * f##SUFFIX = nullptr; \
76extern "C" { \
77    BitBlock __get_category_##SUFFIX(Basis_bits &basis_bits) { \
78        if (f##SUFFIX == nullptr) f##SUFFIX = new SUFFIX(); \
79        Struct_##SUFFIX output; \
80        f##SUFFIX->do_block(basis_bits, output); \
81        return output.cc; \
82    } \
83}
84
85CREATE_GENERAL_CODE_CATEGORY(Cc)
86CREATE_GENERAL_CODE_CATEGORY(Cf)
87CREATE_GENERAL_CODE_CATEGORY(Cn)
88CREATE_GENERAL_CODE_CATEGORY(Co)
89CREATE_GENERAL_CODE_CATEGORY(Cs)
90CREATE_GENERAL_CODE_CATEGORY(Ll)
91CREATE_GENERAL_CODE_CATEGORY(Lm)
92CREATE_GENERAL_CODE_CATEGORY(Lo)
93CREATE_GENERAL_CODE_CATEGORY(Lt)
94CREATE_GENERAL_CODE_CATEGORY(Lu)
95CREATE_GENERAL_CODE_CATEGORY(Mc)
96CREATE_GENERAL_CODE_CATEGORY(Me)
97CREATE_GENERAL_CODE_CATEGORY(Mn)
98CREATE_GENERAL_CODE_CATEGORY(Nd)
99CREATE_GENERAL_CODE_CATEGORY(Nl)
100CREATE_GENERAL_CODE_CATEGORY(No)
101CREATE_GENERAL_CODE_CATEGORY(Pc)
102CREATE_GENERAL_CODE_CATEGORY(Pd)
103CREATE_GENERAL_CODE_CATEGORY(Pe)
104CREATE_GENERAL_CODE_CATEGORY(Pf)
105CREATE_GENERAL_CODE_CATEGORY(Pi)
106CREATE_GENERAL_CODE_CATEGORY(Po)
107CREATE_GENERAL_CODE_CATEGORY(Ps)
108CREATE_GENERAL_CODE_CATEGORY(Sc)
109CREATE_GENERAL_CODE_CATEGORY(Sk)
110CREATE_GENERAL_CODE_CATEGORY(Sm)
111CREATE_GENERAL_CODE_CATEGORY(So)
112CREATE_GENERAL_CODE_CATEGORY(Zl)
113CREATE_GENERAL_CODE_CATEGORY(Zp)
114CREATE_GENERAL_CODE_CATEGORY(Zs)
115
116#undef CREATE_GENERAL_CODE_CATEGORY
117
118namespace pablo {
119
120PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
121: mBasisBits(basisBits)
122, mMod(new Module("icgrep", getGlobalContext()))
123, mBasicBlock(nullptr)
124, mExecutionEngine(nullptr)
125, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
126, mBasisBitsInputPtr(nullptr)
127, mCarryQueueIdx(0)
128, mCarryQueuePtr(nullptr)
129, mNestingDepth(0)
130, mCarryQueueSize(0)
131, mAdvanceQueueIdx(0)
132, mAdvanceQueuePtr(nullptr)
133, mAdvanceQueueSize(0)
134, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
135, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
136, mFunctionType(nullptr)
137, mFunction(nullptr)
138, mBasisBitsAddr(nullptr)
139, mOutputAddrPtr(nullptr)
140, mMaxNestingDepth(0)
141{
142    //Create the jit execution engine.up
143    InitializeNativeTarget();
144    InitializeNativeTargetAsmPrinter();
145    InitializeNativeTargetAsmParser();
146    DefineTypes();
147    DeclareFunctions();
148}
149
150PabloCompiler::~PabloCompiler()
151{
152    delete mMod;
153    delete fPs;
154    delete fNl;
155    delete fNo;
156    delete fLo;
157    delete fLl;
158    delete fLm;
159    delete fNd;
160    delete fPc;
161    delete fLt;
162    delete fLu;
163    delete fPf;
164    delete fPd;
165    delete fPe;
166    delete fPi;
167    delete fPo;
168    delete fMe;
169    delete fMc;
170    delete fMn;
171    delete fSk;
172    delete fSo;
173    delete fSm;
174    delete fSc;
175    delete fZl;
176    delete fCo;
177    delete fCn;
178    delete fCc;
179    delete fCf;
180    delete fCs;
181    delete fZp;
182    delete fZs;
183}
184
185LLVM_Gen_RetVal PabloCompiler::compile(PabloBlock & pb)
186{
187    mNestingDepth = 0;
188    mMaxNestingDepth = 0;
189    mCarryQueueSize = 0;
190    mAdvanceQueueSize = 0;
191    Examine(pb.statements());
192    mCarryQueueVector.resize(mCarryQueueSize);
193    mAdvanceQueueVector.resize(mAdvanceQueueSize);
194    std::string errMessage;
195    EngineBuilder builder(mMod);
196    builder.setErrorStr(&errMessage);
197    builder.setMCPU(sys::getHostCPUName());
198    builder.setUseMCJIT(true);
199    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
200    mExecutionEngine = builder.create();
201    if (mExecutionEngine == nullptr) {
202        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
203    }
204
205    if (!mCalleeMap.empty()) {
206        DeclareCallFunctions();
207    }
208
209    Function::arg_iterator args = mFunction->arg_begin();
210    mBasisBitsAddr = args++;
211    mBasisBitsAddr->setName("basis_bits");
212    mCarryQueuePtr = args++;
213    mCarryQueuePtr->setName("carry_q");
214    mAdvanceQueuePtr = args++;
215    mAdvanceQueuePtr->setName("advance_q");
216    mOutputAddrPtr = args++;
217    mOutputAddrPtr->setName("output");
218
219    //Create the carry and advance queues.
220    mCarryQueueIdx = 0;
221    mAdvanceQueueIdx = 0;
222    mNestingDepth = 0;
223    mMaxNestingDepth = 0;
224    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
225
226    //The basis bits structure
227    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
228        IRBuilder<> b(mBasicBlock);
229        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
230        const String * const name = mBasisBits[i]->getName();
231        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
232        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, name->str());
233        mMarkerMap.insert(std::make_pair(name, basisBit));
234    }
235
236    //Generate the IR instructions for the function.
237    compileStatements(pb.statements());
238
239    assert (mCarryQueueIdx == mCarryQueueSize);
240    assert (mAdvanceQueueIdx == mAdvanceQueueSize);
241    assert (mNestingDepth == 0);
242    //Terminate the block
243    ReturnInst::Create(mMod->getContext(), mBasicBlock);
244
245    //Display the IR that has been generated by this module.
246    if (DumpGeneratedIR) {
247      mMod->dump();
248    }
249
250
251
252    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
253    #ifdef USE_LLVM_3_5
254    verifyModule(*mMod, &dbgs());
255    #endif
256    #ifdef USE_LLVM_3_4
257    verifyModule(*mMod, PrintMessageAction);
258    #endif
259
260    //Use the pass manager to run optimizations on the function.
261    FunctionPassManager fpm(mMod);
262 #ifdef USE_LLVM_3_5
263    mMod->setDataLayout(mExecutionEngine->getDataLayout());
264    // Set up the optimizer pipeline.  Start with registering info about how the target lays out data structures.
265    fpm.add(new DataLayoutPass(mMod));
266#endif
267#ifdef USE_LLVM_3_4
268    fpm.add(new DataLayout(*mExecutionEngine->getDataLayout()));
269#endif
270    fpm.doInitialization();
271    fpm.run(*mFunction);
272
273    mExecutionEngine->finalizeObject();
274
275    LLVM_Gen_RetVal retVal;
276    //Return the required size of the carry queue and a pointer to the process_block function.
277    retVal.carry_q_size = mCarryQueueVector.size();
278    retVal.advance_q_size = mAdvanceQueueVector.size();
279    retVal.process_block_fptr = mExecutionEngine->getPointerToFunction(mFunction);
280
281    return retVal;
282}
283
284void PabloCompiler::DefineTypes()
285{
286    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
287    if (structBasisBits == nullptr) {
288        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
289    }
290    std::vector<Type*>StructTy_struct_Basis_bits_fields;
291    for (int i = 0; i != mBasisBits.size(); i++)
292    {
293        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
294    }
295    if (structBasisBits->isOpaque()) {
296        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
297    }
298    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
299
300    std::vector<Type*>functionTypeArgs;
301    functionTypeArgs.push_back(mBasisBitsInputPtr);
302
303    //The carry q array.
304    //A pointer to the BitBlock vector.
305    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
306    // Advance q array
307    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
308
309    //The output structure.
310    StructType * outputStruct = mMod->getTypeByName("struct.Output");
311    if (!outputStruct) {
312        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
313    }
314    if (outputStruct->isOpaque()) {
315        std::vector<Type*>fields;
316        fields.push_back(mBitBlockType);
317        fields.push_back(mBitBlockType);
318        outputStruct->setBody(fields, /*isPacked=*/false);
319    }
320    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
321
322    //The &output parameter.
323    functionTypeArgs.push_back(outputStructPtr);
324
325    mFunctionType = FunctionType::get(
326     /*Result=*/Type::getVoidTy(mMod->getContext()),
327     /*Params=*/functionTypeArgs,
328     /*isVarArg=*/false);
329}
330
331void PabloCompiler::DeclareFunctions()
332{
333    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
334    //mFunc_print_register = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mXi64Vect, NULL);
335    //mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mFunc_print_register), (void *)&wrapped_print_register);
336    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
337
338#ifdef USE_UADD_OVERFLOW
339#ifdef USE_TWO_UADD_OVERFLOW
340    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
341    std::vector<Type*>StructTy_0_fields;
342    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
343    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
344    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
345
346    std::vector<Type*>FuncTy_1_args;
347    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
348    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
349    FunctionType* FuncTy_1 = FunctionType::get(
350                                              /*Result=*/StructTy_0,
351                                              /*Params=*/FuncTy_1_args,
352                                              /*isVarArg=*/false);
353
354    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
355                                              std::to_string(BLOCK_SIZE));
356    if (!mFunctionUaddOverflow) {
357        mFunctionUaddOverflow= Function::Create(
358          /*Type=*/ FuncTy_1,
359          /*Linkage=*/ GlobalValue::ExternalLinkage,
360          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
361        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
362    }
363    AttributeSet mFunctionUaddOverflowPAL;
364    {
365        SmallVector<AttributeSet, 4> Attrs;
366        AttributeSet PAS;
367        {
368          AttrBuilder B;
369          B.addAttribute(Attribute::NoUnwind);
370          B.addAttribute(Attribute::ReadNone);
371          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
372        }
373
374        Attrs.push_back(PAS);
375        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
376    }
377    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
378#else
379    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
380    std::vector<Type*>StructTy_0_fields;
381    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
382    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
383    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
384
385    std::vector<Type*>FuncTy_1_args;
386    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
387    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
388    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
389    FunctionType* FuncTy_1 = FunctionType::get(
390                                              /*Result=*/StructTy_0,
391                                              /*Params=*/FuncTy_1_args,
392                                              /*isVarArg=*/false);
393
394    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
395                                              std::to_string(BLOCK_SIZE));
396    if (!mFunctionUaddOverflowCarryin) {
397        mFunctionUaddOverflowCarryin = Function::Create(
398          /*Type=*/ FuncTy_1,
399          /*Linkage=*/ GlobalValue::ExternalLinkage,
400          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
401        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
402    }
403    AttributeSet mFunctionUaddOverflowCarryinPAL;
404    {
405        SmallVector<AttributeSet, 4> Attrs;
406        AttributeSet PAS;
407        {
408          AttrBuilder B;
409          B.addAttribute(Attribute::NoUnwind);
410          B.addAttribute(Attribute::ReadNone);
411          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
412        }
413
414        Attrs.push_back(PAS);
415        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
416    }
417    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
418#endif
419#endif
420
421    //Starts on process_block
422    SmallVector<AttributeSet, 5> Attrs;
423    AttributeSet PAS;
424    {
425        AttrBuilder B;
426        B.addAttribute(Attribute::ReadOnly);
427        B.addAttribute(Attribute::NoCapture);
428        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
429    }
430    Attrs.push_back(PAS);
431    {
432        AttrBuilder B;
433        B.addAttribute(Attribute::NoCapture);
434        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
435    }
436    Attrs.push_back(PAS);
437    {
438        AttrBuilder B;
439        B.addAttribute(Attribute::NoCapture);
440        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
441    }
442    Attrs.push_back(PAS);
443    {
444        AttrBuilder B;
445        B.addAttribute(Attribute::NoCapture);
446        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
447    }
448    Attrs.push_back(PAS);
449    {
450        AttrBuilder B;
451        B.addAttribute(Attribute::NoUnwind);
452        B.addAttribute(Attribute::UWTable);
453        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
454    }
455    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
456
457    //Create the function that will be generated.
458    mFunction = mMod->getFunction("process_block");
459    if (!mFunction) {
460        mFunction = Function::Create(
461            /*Type=*/mFunctionType,
462            /*Linkage=*/GlobalValue::ExternalLinkage,
463            /*Name=*/"process_block", mMod);
464        mFunction->setCallingConv(CallingConv::C);
465    }
466    mFunction->setAttributes(AttrSet);
467}
468
469void PabloCompiler::Examine(StatementList & stmts) {
470    for (Statement * stmt : stmts) {
471        if (Assign * assign = dyn_cast<Assign>(stmt)) {
472            Examine(assign->getExpr());
473        }
474        if (Next * next = dyn_cast<Next>(stmt)) {
475            Examine(next->getExpr());
476        }
477        else if (If * ifStatement = dyn_cast<If>(stmt)) {
478            const auto preIfCarryCount = mCarryQueueSize;
479            const auto preIfAdvanceCount = mAdvanceQueueSize;
480            Examine(ifStatement->getCondition());
481            Examine(ifStatement->getBody());
482            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
483            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
484            if ((ifCarryCount + ifAdvanceCount) > 1) {
485              ++mAdvanceQueueSize;
486              ++ifAdvanceCount;
487            }
488            ifStatement->setInclusiveCarryCount(ifCarryCount);
489            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
490        }
491        else if (While * whileStatement = dyn_cast<While>(stmt)) {
492            const auto preWhileCarryCount = mCarryQueueSize;
493            const auto preWhileAdvanceCount = mAdvanceQueueSize;
494            Examine(whileStatement->getCondition());
495            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
496            Examine(whileStatement->getBody());
497            --mNestingDepth;
498            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
499            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
500        }
501    }
502}
503
504void PabloCompiler::Examine(PabloAST *expr)
505{
506    if (Call * call = dyn_cast<Call>(expr)) {
507        mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
508    }
509    else if (And * pablo_and = dyn_cast<And>(expr)) {
510        Examine(pablo_and->getExpr1());
511        Examine(pablo_and->getExpr2());
512    }
513    else if (Or * pablo_or = dyn_cast<Or>(expr)) {
514        Examine(pablo_or->getExpr1());
515        Examine(pablo_or->getExpr2());
516    }
517    else if (Sel * pablo_sel = dyn_cast<Sel>(expr)) {
518        Examine(pablo_sel->getCondition());
519        Examine(pablo_sel->getTrueExpr());
520        Examine(pablo_sel->getFalseExpr());
521    }
522    else if (Not * pablo_not = dyn_cast<Not>(expr)) {
523        Examine(pablo_not->getExpr());
524    }
525    else if (Advance * adv = dyn_cast<Advance>(expr)) {
526        ++mAdvanceQueueSize;
527        Examine(adv->getExpr());
528    }
529    else if (MatchStar * mstar = dyn_cast<MatchStar>(expr)) {
530        ++mCarryQueueSize;
531        Examine(mstar->getMarker());
532        Examine(mstar->getCharClass());
533    }
534    else if (ScanThru * sthru = dyn_cast<ScanThru>(expr)) {
535        ++mCarryQueueSize;
536        Examine(sthru->getScanFrom());
537        Examine(sthru->getScanThru());
538    }
539}
540
541void PabloCompiler::DeclareCallFunctions() {
542    for (auto mapping : mCalleeMap) {
543        const String * callee = mapping.first;
544        void * callee_ptr = nullptr;
545        #define CHECK_GENERAL_CODE_CATEGORY(SUFFIX) \
546            if (callee->str() == #SUFFIX) { \
547                callee_ptr = (void*)&__get_category_##SUFFIX; \
548            } else
549        CHECK_GENERAL_CODE_CATEGORY(Cc)
550        CHECK_GENERAL_CODE_CATEGORY(Cf)
551        CHECK_GENERAL_CODE_CATEGORY(Cn)
552        CHECK_GENERAL_CODE_CATEGORY(Co)
553        CHECK_GENERAL_CODE_CATEGORY(Cs)
554        CHECK_GENERAL_CODE_CATEGORY(Ll)
555        CHECK_GENERAL_CODE_CATEGORY(Lm)
556        CHECK_GENERAL_CODE_CATEGORY(Lo)
557        CHECK_GENERAL_CODE_CATEGORY(Lt)
558        CHECK_GENERAL_CODE_CATEGORY(Lu)
559        CHECK_GENERAL_CODE_CATEGORY(Mc)
560        CHECK_GENERAL_CODE_CATEGORY(Me)
561        CHECK_GENERAL_CODE_CATEGORY(Mn)
562        CHECK_GENERAL_CODE_CATEGORY(Nd)
563        CHECK_GENERAL_CODE_CATEGORY(Nl)
564        CHECK_GENERAL_CODE_CATEGORY(No)
565        CHECK_GENERAL_CODE_CATEGORY(Pc)
566        CHECK_GENERAL_CODE_CATEGORY(Pd)
567        CHECK_GENERAL_CODE_CATEGORY(Pe)
568        CHECK_GENERAL_CODE_CATEGORY(Pf)
569        CHECK_GENERAL_CODE_CATEGORY(Pi)
570        CHECK_GENERAL_CODE_CATEGORY(Po)
571        CHECK_GENERAL_CODE_CATEGORY(Ps)
572        CHECK_GENERAL_CODE_CATEGORY(Sc)
573        CHECK_GENERAL_CODE_CATEGORY(Sk)
574        CHECK_GENERAL_CODE_CATEGORY(Sm)
575        CHECK_GENERAL_CODE_CATEGORY(So)
576        CHECK_GENERAL_CODE_CATEGORY(Zl)
577        CHECK_GENERAL_CODE_CATEGORY(Zp)
578        CHECK_GENERAL_CODE_CATEGORY(Zs)
579        // OTHERWISE ...
580        throw std::runtime_error("Unknown unicode category \"" + callee->str() + "\"");
581        #undef CHECK_GENERAL_CODE_CATEGORY
582        Value * unicodeCategory = mMod->getOrInsertFunction("__get_category_" + callee->str(), mBitBlockType, mBasisBitsInputPtr, NULL);
583        if (LLVM_UNLIKELY(unicodeCategory == nullptr)) {
584            throw std::runtime_error("Could not create static method call for unicode category \"" + callee->str() + "\"");
585        }
586        mExecutionEngine->addGlobalMapping(cast<GlobalValue>(unicodeCategory), callee_ptr);
587        mCalleeMap[callee] = unicodeCategory;
588    }
589}
590
591void PabloCompiler::compileStatements(const StatementList & stmts) {
592    for (const PabloAST * statement : stmts) {
593        compileStatement(statement);
594    }
595}
596
597void PabloCompiler::compileStatement(const PabloAST * stmt)
598{
599    if (const Assign * assign = dyn_cast<const Assign>(stmt))
600    {
601        Value* expr = compileExpression(assign->getExpr());
602        mMarkerMap[assign->getName()] = expr;
603        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
604            SetOutputValue(expr, assign->getOutputIndex());
605        }
606    }
607    if (const Next * next = dyn_cast<const Next>(stmt))
608    {
609        Value* expr = compileExpression(next->getExpr());
610        mMarkerMap[next->getName()] = expr;
611    }
612    else if (const If * ifStatement = dyn_cast<const If>(stmt))
613    //
614    //  The If-ElseZero stmt:
615    //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
616    //  If the value of the predicate is nonzero, then determine the values of variables
617    //  <var>* by executing the given statements.  Otherwise, the value of the
618    //  variables are all zero.  Requirements: (a) no variable that is defined within
619    //  the body of the if may be accessed outside unless it is explicitly 
620    //  listed in the variable list, (b) every variable in the defined list receives
621    //  a value within the body, and (c) the logical consequence of executing
622    //  the statements in the event that the predicate is zero is that the
623    //  values of all defined variables indeed work out to be 0.
624    //
625    //  Simple Implementation with Phi nodes:  a phi node in the if exit block
626    //  is inserted for each variable in the defined variable list.  It receives
627    //  a zero value from the ifentry block and the defined value from the if
628    //  body.
629    //
630    {
631        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
632        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
633        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
634       
635        const auto baseCarryQueueIdx = mCarryQueueIdx;
636        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
637       
638        int ifCarryCount = ifStatement->getInclusiveCarryCount();
639        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
640        //  Carry/Advance queue strategy.   
641        //  If there are any carries or advances at any nesting level within the
642        //  if statement, then the statement must be executed.   A "summary"
643        //  carryover variable is determined for this purpose, consisting of the
644        //  or of all of the carry and advance variables within the if.
645        //  This variable is determined as follows.
646        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
647        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
648        //       carryover variable is just the single carry queue entry.
649        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
650        //       carryover variable is just the advance carry queue entry.
651        //  (d)  Otherwise, an additional advance queue entry is created for the
652        //       summary variable.
653        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
654        //  variable is just last advance queue entry.
655        //
656       
657        IRBuilder<> b_entry(ifEntryBlock);
658        mBasicBlock = ifEntryBlock;
659        Value* if_test_value = compileExpression(ifStatement->getCondition());
660       
661        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
662            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
663            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
664        }
665        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
666            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
667            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
668        }
669        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
670
671        // Entry processing is complete, now handle the body of the if.
672       
673        IRBuilder<> bIfBody(ifBodyBlock);
674        mBasicBlock = ifBodyBlock;
675       
676        compileStatements(ifStatement->getBody());
677       
678        // After the recursive compile, now insert the code to compute the summary
679        // carry over variable.
680       
681        if ((ifCarryCount + ifAdvanceCount) > 1) {
682            // A summary variable is needed.
683
684            Value * carry_summary = mZeroInitializer;
685            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++)
686            {
687                Value* carryq_value = genCarryInLoad(c);
688                carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
689            }
690            // Note that the limit in the following uses -1, because
691            // last entry of the advance queue is for the summary variable.
692            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++)
693            {
694                Value* advance_q_value = genAdvanceInLoad(c);
695                carry_summary = bIfBody.CreateOr(advance_q_value, carry_summary);
696            }
697            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++); //baseAdvanceQueueIdx + ifAdvanceCount - 1);
698        }
699        bIfBody.CreateBr(ifEndBlock);
700
701        //End Block
702        IRBuilder<> bEnd(ifEndBlock);
703        mBasicBlock = ifEndBlock;
704       
705        for (const Assign * a : ifStatement->getDefined()) {
706            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, a->getName()->str());
707            auto f = mMarkerMap.find(a->getName());
708            assert (f != mMarkerMap.end());
709            phi->addIncoming(mZeroInitializer, ifEntryBlock);
710            phi->addIncoming(f->second, ifBodyBlock);
711            mMarkerMap[a->getName()] = phi;
712        }
713    }
714    else if (const While * whileStatement = dyn_cast<const While>(stmt))
715    {
716        const auto baseCarryQueueIdx = mCarryQueueIdx;
717        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
718        if (mNestingDepth == 0) {
719            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
720                genCarryInLoad(baseCarryQueueIdx + i);
721            }
722            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
723                genAdvanceInLoad(baseAdvanceQueueIdx + i);
724            }
725        }
726
727        SmallVector<const Next*, 4> nextNodes;
728        for (const PabloAST * node : whileStatement->getBody()) {
729            if (isa<Next>(node)) {
730                nextNodes.push_back(cast<Next>(node));
731            }
732        }
733
734        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
735        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
736        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
737        // will refer to the previous value.
738
739        ++mNestingDepth;
740
741        compileStatements(whileStatement->getBody());
742
743        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
744        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
745        // but works for now.
746        mCarryQueueIdx = baseCarryQueueIdx;
747        mAdvanceQueueIdx = baseAdvanceQueueIdx;
748
749        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
750        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
751        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
752
753        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
754        // may not be same one that we entered the function with.
755        IRBuilder<> bEntry(mBasicBlock);
756        bEntry.CreateBr(whileCondBlock);
757
758        // CONDITION BLOCK
759        IRBuilder<> bCond(whileCondBlock);
760        // generate phi nodes for any carry propogating instruction
761        int whileCarryCount = whileStatement->getInclusiveCarryCount();
762        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
763        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
764        unsigned index = 0;
765        for (index = 0; index != whileCarryCount; ++index) {
766            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
767            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
768            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
769            phiNodes[index] = phi;
770        }
771        for (int i = 0; i != whileAdvanceCount; ++i) {
772            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
773            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
774            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
775            phiNodes[index++] = phi;
776        }
777        // and for any Next nodes in the loop body
778        for (const Next * n : nextNodes) {
779            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->str());
780            auto f = mMarkerMap.find(n->getName());
781            assert (f != mMarkerMap.end());
782            phi->addIncoming(f->second, mBasicBlock);
783            mMarkerMap[n->getName()] = phi;
784            phiNodes[index++] = phi;
785        }
786
787        mBasicBlock = whileCondBlock;
788        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
789
790        // BODY BLOCK
791        mBasicBlock = whileBodyBlock;
792        compileStatements(whileStatement->getBody());
793        // update phi nodes for any carry propogating instruction
794        IRBuilder<> bWhileBody(mBasicBlock);
795        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
796            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
797            PHINode * phi = phiNodes[index];
798            phi->addIncoming(carryOut, mBasicBlock);
799            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
800        }
801        for (int i = 0; i != whileAdvanceCount; ++i) {
802            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
803            PHINode * phi = phiNodes[index++];
804            phi->addIncoming(advOut, mBasicBlock);
805            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
806        }
807        // and for any Next nodes in the loop body
808        for (const Next * n : nextNodes) {
809            auto f = mMarkerMap.find(n->getName());
810            assert (f != mMarkerMap.end());
811            PHINode * phi = phiNodes[index++];
812            phi->addIncoming(f->second, mBasicBlock);
813            mMarkerMap[n->getName()] = phi;
814        }
815
816        bWhileBody.CreateBr(whileCondBlock);
817
818        // EXIT BLOCK
819        mBasicBlock = whileEndBlock;
820        if (--mNestingDepth == 0) {
821            for (index = 0; index != whileCarryCount; ++index) {
822                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
823            }
824            for (index = 0; index != whileAdvanceCount; ++index) {
825                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
826            }
827        }
828    }
829}
830
831Value * PabloCompiler::compileExpression(const PabloAST * expr)
832{
833    Value * retVal = nullptr;
834    IRBuilder<> b(mBasicBlock);
835    if (isa<Ones>(expr)) {
836        retVal = mOneInitializer;
837    }
838    else if (isa<Zeroes>(expr)) {
839        retVal = mZeroInitializer;
840    }
841    else if (const Call* call = dyn_cast<Call>(expr)) {
842        //Call the callee once and store the result in the marker map.
843        auto mi = mMarkerMap.find(call->getCallee());
844        if (mi == mMarkerMap.end()) {
845            auto ci = mCalleeMap.find(call->getCallee());
846            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
847                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->str() + "\"");
848            }
849            mi = mMarkerMap.insert(std::make_pair(call->getCallee(), b.CreateCall(ci->second, mBasisBitsAddr))).first;
850        }
851        retVal = mi->second;
852    }
853    else if (const Var * var = dyn_cast<Var>(expr))
854    {
855        auto f = mMarkerMap.find(var->getName());
856        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
857            throw std::runtime_error((var->getName()->str()) + " used before creation.");
858        }
859        retVal = f->second;
860    }
861    else if (const And * pablo_and = dyn_cast<And>(expr))
862    {
863        retVal = b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
864    }
865    else if (const Or * pablo_or = dyn_cast<Or>(expr))
866    {
867        retVal = b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
868    }
869    else if (const Sel * sel = dyn_cast<Sel>(expr))
870    {
871        Value* ifMask = compileExpression(sel->getCondition());
872        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
873        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
874        retVal = b.CreateOr(ifTrue, ifFalse);
875    }
876    else if (const Not * pablo_not = dyn_cast<Not>(expr))
877    {
878        retVal = genNot(compileExpression(pablo_not->getExpr()));
879    }
880    else if (const Advance * adv = dyn_cast<Advance>(expr))
881    {
882        Value* strm_value = compileExpression(adv->getExpr());
883        int shift = adv->getAdvanceAmount();
884        retVal = genAdvanceWithCarry(strm_value, shift);
885    }
886    else if (const MatchStar * mstar = dyn_cast<MatchStar>(expr))
887    {
888        Value* marker = compileExpression(mstar->getMarker());
889        Value* cc = compileExpression(mstar->getCharClass());
890        Value* marker_and_cc = b.CreateAnd(marker, cc);
891        retVal = b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
892    }
893    else if (const ScanThru * sthru = dyn_cast<ScanThru>(expr))
894    {
895        Value* marker_expr = compileExpression(sthru->getScanFrom());
896        Value* cc_expr = compileExpression(sthru->getScanThru());
897        retVal = b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
898    }
899    return retVal;
900}
901
902#ifdef USE_UADD_OVERFLOW
903#ifdef USE_TWO_UADD_OVERFLOW
904PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
905    std::vector<Value*> struct_res_params;
906    struct_res_params.push_back(int128_e1);
907    struct_res_params.push_back(int128_e2);
908    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
909    struct_res->setCallingConv(CallingConv::C);
910    struct_res->setTailCall(false);
911    AttributeSet struct_res_PAL;
912    struct_res->setAttributes(struct_res_PAL);
913
914    SumWithOverflowPack ret;
915
916    std::vector<unsigned> int128_sum_indices;
917    int128_sum_indices.push_back(0);
918    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
919
920    std::vector<unsigned> int1_obit_indices;
921    int1_obit_indices.push_back(1);
922    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
923
924    return ret;
925}
926#else
927PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
928    std::vector<Value*> struct_res_params;
929    struct_res_params.push_back(int128_e1);
930    struct_res_params.push_back(int128_e2);
931    struct_res_params.push_back(int1_cin);
932    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
933    struct_res->setCallingConv(CallingConv::C);
934    struct_res->setTailCall(false);
935    AttributeSet struct_res_PAL;
936    struct_res->setAttributes(struct_res_PAL);
937
938    SumWithOverflowPack ret;
939
940    std::vector<unsigned> int128_sum_indices;
941    int128_sum_indices.push_back(0);
942    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
943
944    std::vector<unsigned> int1_obit_indices;
945    int1_obit_indices.push_back(1);
946    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
947
948    return ret;
949}
950#endif
951#endif
952
953Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
954    IRBuilder<> b(mBasicBlock);
955
956    //CarryQ - carry in.
957    const int carryIdx = mCarryQueueIdx++;
958    Value* carryq_value = genCarryInLoad(carryIdx);
959#ifdef USE_TWO_UADD_OVERFLOW
960    //This is the ideal implementation, which uses two uadd.with.overflow
961    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
962    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
963    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
964    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
965
966    SumWithOverflowPack sumpack0, sumpack1;
967
968    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
969    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
970
971    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
972    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
973
974    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
975    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
976    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
977    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
978    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
979
980#elif defined USE_UADD_OVERFLOW
981    //use llvm.uadd.with.overflow.i128 or i256
982    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
983    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
984
985    //get i1 carryin from iBLOCK_SIZE
986    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
987    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
988    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
989
990    SumWithOverflowPack sumpack0;
991    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
992    Value* obit = sumpack0.obit;
993    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
994
995    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
996    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
997    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
998    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
999#elif (BLOCK_SIZE == 128)
1000    //calculate carry through logical ops
1001    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
1002    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
1003    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
1004    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
1005    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
1006    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
1007
1008    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
1009    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
1010#else
1011    //BLOCK_SIZE == 256, there is no other implementation
1012    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
1013#endif //USE_TWO_UADD_OVERFLOW
1014
1015    genCarryOutStore(carry_out, carryIdx);
1016    return sum;
1017}
1018
1019Value* PabloCompiler::genCarryInLoad(const unsigned index) {
1020    assert (index < mCarryQueueVector.size());
1021    if (mNestingDepth == 0) {
1022        IRBuilder<> b(mBasicBlock);
1023        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1024    }
1025    return mCarryQueueVector[index];
1026}
1027
1028void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
1029    assert (carryOut);
1030    assert (index < mCarryQueueVector.size());
1031    if (mNestingDepth == 0) {
1032        IRBuilder<> b(mBasicBlock);
1033        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1034    }
1035    mCarryQueueVector[index] = carryOut;
1036}
1037
1038Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
1039    assert (index < mAdvanceQueueVector.size());
1040    if (mNestingDepth == 0) {
1041        IRBuilder<> b(mBasicBlock);
1042        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1043    }
1044    return mAdvanceQueueVector[index];
1045}
1046
1047void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
1048    assert (advanceOut);
1049    assert (index < mAdvanceQueueVector.size());
1050    if (mNestingDepth == 0) {
1051        IRBuilder<> b(mBasicBlock);
1052        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1053    }
1054    mAdvanceQueueVector[index] = advanceOut;
1055}
1056
1057inline Value* PabloCompiler::genBitBlockAny(Value* test) {
1058    IRBuilder<> b(mBasicBlock);
1059    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1060    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
1061}
1062
1063Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
1064    IRBuilder<> b(mBasicBlock);
1065    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1066    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
1067}
1068
1069Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
1070    IRBuilder<> b(mBasicBlock);
1071    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1072    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
1073}
1074
1075inline Value* PabloCompiler::genNot(Value* expr) {
1076    IRBuilder<> b(mBasicBlock);
1077    return b.CreateXor(expr, mOneInitializer, "not");
1078}
1079
1080Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
1081
1082    IRBuilder<> b(mBasicBlock);
1083
1084    const auto advanceIdx = mAdvanceQueueIdx++;
1085#ifdef USE_LONG_INTEGER_SHIFT
1086    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1087    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1088    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1089    Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1090    genAdvanceOutStore(strm_value, advanceIdx);
1091
1092    return result_value;
1093#elif (BLOCK_SIZE == 128)
1094    if (shift_amount == 1) {
1095        Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(advanceIdx));
1096        Value* srli_1_value = b.CreateLShr(strm_value, 63);
1097        Value* packed_shuffle;
1098        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1099        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1100        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1101
1102        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1103        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1104
1105        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1106        Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1107
1108        //CarryQ - carry out:
1109        genAdvanceOutStore(strm_value, advanceIdx);
1110
1111        return result_value;
1112    }
1113    else if (shift_amount < 64) {
1114        // This is the preferred logic, but is too slow for the general case.
1115        // We need to speed up our custom LLVM for this code.
1116        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1117        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1118        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1119        Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1120        genAdvanceOutStore(strm_value, advanceIdx);
1121
1122        return result_value;
1123    }
1124    else {//if (shift_amount >= 64) {
1125        throw std::runtime_error("Shift amount >= 64 in Advance is currently unsupported.");
1126    }
1127#else
1128    //BLOCK_SIZE == 256
1129    static_assert(false, "Advance with carry on 256-bit bitblock requires long integer shifts (USE_LONG_INTEGER_SHIFT).");
1130#endif //USE_LONG_INTEGER_SHIFT
1131}
1132
1133void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1134    IRBuilder<> b(mBasicBlock);
1135    if (marker->getType()->isPointerTy()) {
1136        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1137    }
1138    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1139    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1140    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1141}
1142
1143}
Note: See TracBrowser for help on using the repository browser.