source: icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp @ 4359

Last change on this file since 4359 was 4359, checked in by cameron, 5 years ago

Sel -> Xor optimization; implement Pablo Xor in printer, compiler

File size: 46.0 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7/*
8 *  Copyright (c) 2014 International Characters.
9 *  This software is licensed to the public under the Open Software License 3.0.
10 *  icgrep is a trademark of International Characters.
11 */
12
13#include <pablo/pablo_compiler.h>
14#include <pablo/codegenstate.h>
15#include <pablo/printer_pablos.h>
16#include <cc/cc_namemap.hpp>
17#include <re/re_name.h>
18#include <stdexcept>
19#include <include/simd-lib/bitblock.hpp>
20
21#ifdef USE_LLVM_3_4
22#include <llvm/Analysis/Verifier.h>
23#include <llvm/Assembly/PrintModulePass.h>
24#include <llvm/Linker.h>
25#endif
26#ifdef USE_LLVM_3_5
27#include <llvm/IR/Verifier.h>
28#endif
29
30#include <llvm/Pass.h>
31#include <llvm/PassManager.h>
32#include <llvm/ADT/SmallVector.h>
33#include <llvm/Analysis/Passes.h>
34#include <llvm/IR/BasicBlock.h>
35#include <llvm/IR/CallingConv.h>
36#include <llvm/IR/Constants.h>
37#include <llvm/IR/DataLayout.h>
38#include <llvm/IR/DerivedTypes.h>
39#include <llvm/IR/Function.h>
40#include <llvm/IR/GlobalVariable.h>
41#include <llvm/IR/InlineAsm.h>
42#include <llvm/IR/Instructions.h>
43#include <llvm/IR/LLVMContext.h>
44#include <llvm/IR/Module.h>
45#include <llvm/Support/FormattedStream.h>
46#include <llvm/Support/MathExtras.h>
47#include <llvm/Support/Casting.h>
48#include <llvm/Support/Compiler.h>
49#include <llvm/Support/Debug.h>
50#include <llvm/Support/TargetSelect.h>
51#include <llvm/Support/Host.h>
52#include <llvm/Transforms/Scalar.h>
53#include <llvm/ExecutionEngine/ExecutionEngine.h>
54#include <llvm/ExecutionEngine/MCJIT.h>
55#include <llvm/IRReader/IRReader.h>
56#include <llvm/Bitcode/ReaderWriter.h>
57#include <llvm/Support/MemoryBuffer.h>
58#include <llvm/IR/IRBuilder.h>
59
60//#define DUMP_GENERATED_IR
61#include "llvm/Support/CommandLine.h"
62static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("print LLVM IR generated by RE compilation"));
63
64extern "C" {
65  void wrapped_print_register(BitBlock bit_block) {
66      print_register<BitBlock>("", bit_block);
67  }
68}
69
70#define CREATE_GENERAL_CODE_CATEGORY(SUFFIX) \
71SUFFIX * f##SUFFIX = nullptr; \
72extern "C" { \
73    BitBlock __get_category_##SUFFIX(Basis_bits &basis_bits) { \
74        if (f##SUFFIX == nullptr) f##SUFFIX = new SUFFIX(); \
75        Struct_##SUFFIX output; \
76        f##SUFFIX->do_block(basis_bits, output); \
77        return output.cc; \
78    } \
79}
80
81CREATE_GENERAL_CODE_CATEGORY(Cc)
82CREATE_GENERAL_CODE_CATEGORY(Cf)
83CREATE_GENERAL_CODE_CATEGORY(Cn)
84CREATE_GENERAL_CODE_CATEGORY(Co)
85CREATE_GENERAL_CODE_CATEGORY(Cs)
86CREATE_GENERAL_CODE_CATEGORY(Ll)
87CREATE_GENERAL_CODE_CATEGORY(Lm)
88CREATE_GENERAL_CODE_CATEGORY(Lo)
89CREATE_GENERAL_CODE_CATEGORY(Lt)
90CREATE_GENERAL_CODE_CATEGORY(Lu)
91CREATE_GENERAL_CODE_CATEGORY(Mc)
92CREATE_GENERAL_CODE_CATEGORY(Me)
93CREATE_GENERAL_CODE_CATEGORY(Mn)
94CREATE_GENERAL_CODE_CATEGORY(Nd)
95CREATE_GENERAL_CODE_CATEGORY(Nl)
96CREATE_GENERAL_CODE_CATEGORY(No)
97CREATE_GENERAL_CODE_CATEGORY(Pc)
98CREATE_GENERAL_CODE_CATEGORY(Pd)
99CREATE_GENERAL_CODE_CATEGORY(Pe)
100CREATE_GENERAL_CODE_CATEGORY(Pf)
101CREATE_GENERAL_CODE_CATEGORY(Pi)
102CREATE_GENERAL_CODE_CATEGORY(Po)
103CREATE_GENERAL_CODE_CATEGORY(Ps)
104CREATE_GENERAL_CODE_CATEGORY(Sc)
105CREATE_GENERAL_CODE_CATEGORY(Sk)
106CREATE_GENERAL_CODE_CATEGORY(Sm)
107CREATE_GENERAL_CODE_CATEGORY(So)
108CREATE_GENERAL_CODE_CATEGORY(Zl)
109CREATE_GENERAL_CODE_CATEGORY(Zp)
110CREATE_GENERAL_CODE_CATEGORY(Zs)
111
112#undef CREATE_GENERAL_CODE_CATEGORY
113
114namespace pablo {
115
116PabloCompiler::PabloCompiler(const std::vector<Var*> & basisBits)
117: mBasisBits(basisBits)
118, mMod(new Module("icgrep", getGlobalContext()))
119, mBasicBlock(nullptr)
120, mExecutionEngine(nullptr)
121, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
122, mBasisBitsInputPtr(nullptr)
123, mCarryQueueIdx(0)
124, mCarryQueuePtr(nullptr)
125, mNestingDepth(0)
126, mCarryQueueSize(0)
127, mAdvanceQueueIdx(0)
128, mAdvanceQueuePtr(nullptr)
129, mAdvanceQueueSize(0)
130, mZeroInitializer(ConstantAggregateZero::get(mBitBlockType))
131, mOneInitializer(ConstantVector::getAllOnesValue(mBitBlockType))
132, mFunctionType(nullptr)
133, mFunction(nullptr)
134, mBasisBitsAddr(nullptr)
135, mOutputAddrPtr(nullptr)
136, mMaxNestingDepth(0)
137{
138    //Create the jit execution engine.up
139    InitializeNativeTarget();
140    InitializeNativeTargetAsmPrinter();
141    InitializeNativeTargetAsmParser();
142    DefineTypes();
143    DeclareFunctions();
144}
145
146PabloCompiler::~PabloCompiler()
147{
148    delete mMod;
149    delete fPs;
150    delete fNl;
151    delete fNo;
152    delete fLo;
153    delete fLl;
154    delete fLm;
155    delete fNd;
156    delete fPc;
157    delete fLt;
158    delete fLu;
159    delete fPf;
160    delete fPd;
161    delete fPe;
162    delete fPi;
163    delete fPo;
164    delete fMe;
165    delete fMc;
166    delete fMn;
167    delete fSk;
168    delete fSo;
169    delete fSm;
170    delete fSc;
171    delete fZl;
172    delete fCo;
173    delete fCn;
174    delete fCc;
175    delete fCf;
176    delete fCs;
177    delete fZp;
178    delete fZs;
179}
180
181LLVM_Gen_RetVal PabloCompiler::compile(PabloBlock & pb)
182{
183    mNestingDepth = 0;
184    mMaxNestingDepth = 0;
185    mCarryQueueSize = 0;
186    mAdvanceQueueSize = 0;
187    Examine(pb.statements());
188    mCarryQueueVector.resize(mCarryQueueSize);
189    mAdvanceQueueVector.resize(mAdvanceQueueSize);
190    std::string errMessage;
191    EngineBuilder builder(mMod);
192    builder.setErrorStr(&errMessage);
193    builder.setMCPU(sys::getHostCPUName());
194    builder.setUseMCJIT(true);
195    builder.setOptLevel(mMaxNestingDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
196    mExecutionEngine = builder.create();
197    if (mExecutionEngine == nullptr) {
198        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
199    }
200
201    if (!mCalleeMap.empty()) {
202        DeclareCallFunctions();
203    }
204
205    Function::arg_iterator args = mFunction->arg_begin();
206    mBasisBitsAddr = args++;
207    mBasisBitsAddr->setName("basis_bits");
208    mCarryQueuePtr = args++;
209    mCarryQueuePtr->setName("carry_q");
210    mAdvanceQueuePtr = args++;
211    mAdvanceQueuePtr->setName("advance_q");
212    mOutputAddrPtr = args++;
213    mOutputAddrPtr->setName("output");
214
215    //Create the carry and advance queues.
216    mCarryQueueIdx = 0;
217    mAdvanceQueueIdx = 0;
218    mNestingDepth = 0;
219    mMaxNestingDepth = 0;
220    mBasicBlock = BasicBlock::Create(mMod->getContext(), "parabix_entry", mFunction,0);
221
222    //The basis bits structure
223    for (unsigned i = 0; i != mBasisBits.size(); ++i) {
224        IRBuilder<> b(mBasicBlock);
225        Value* indices[] = {b.getInt64(0), b.getInt32(i)};
226        const String * const name = mBasisBits[i]->getName();
227        Value * gep = b.CreateGEP(mBasisBitsAddr, indices);
228        LoadInst * basisBit = b.CreateAlignedLoad(gep, BLOCK_SIZE/8, false, name->str());
229        mMarkerMap.insert(std::make_pair(name, basisBit));
230    }
231
232    //Generate the IR instructions for the function.
233    compileStatements(pb.statements());
234
235    assert (mCarryQueueIdx == mCarryQueueSize);
236    assert (mAdvanceQueueIdx == mAdvanceQueueSize);
237    assert (mNestingDepth == 0);
238    //Terminate the block
239    ReturnInst::Create(mMod->getContext(), mBasicBlock);
240
241    //Display the IR that has been generated by this module.
242    if (DumpGeneratedIR) {
243      mMod->dump();
244    }
245
246
247
248    //Create a verifier.  The verifier will print an error message if our module is malformed in any way.
249    #ifdef USE_LLVM_3_5
250    verifyModule(*mMod, &dbgs());
251    #endif
252    #ifdef USE_LLVM_3_4
253    verifyModule(*mMod, PrintMessageAction);
254    #endif
255
256    //Use the pass manager to run optimizations on the function.
257    FunctionPassManager fpm(mMod);
258 #ifdef USE_LLVM_3_5
259    mMod->setDataLayout(mExecutionEngine->getDataLayout());
260    // Set up the optimizer pipeline.  Start with registering info about how the target lays out data structures.
261    fpm.add(new DataLayoutPass(mMod));
262#endif
263#ifdef USE_LLVM_3_4
264    fpm.add(new DataLayout(*mExecutionEngine->getDataLayout()));
265#endif
266    fpm.doInitialization();
267    fpm.run(*mFunction);
268
269    mExecutionEngine->finalizeObject();
270
271    LLVM_Gen_RetVal retVal;
272    //Return the required size of the carry queue and a pointer to the process_block function.
273    retVal.carry_q_size = mCarryQueueVector.size();
274    retVal.advance_q_size = mAdvanceQueueVector.size();
275    retVal.process_block_fptr = mExecutionEngine->getPointerToFunction(mFunction);
276
277    return retVal;
278}
279
280void PabloCompiler::DefineTypes()
281{
282    StructType * structBasisBits = mMod->getTypeByName("struct.Basis_bits");
283    if (structBasisBits == nullptr) {
284        structBasisBits = StructType::create(mMod->getContext(), "struct.Basis_bits");
285    }
286    std::vector<Type*>StructTy_struct_Basis_bits_fields;
287    for (int i = 0; i != mBasisBits.size(); i++)
288    {
289        StructTy_struct_Basis_bits_fields.push_back(mBitBlockType);
290    }
291    if (structBasisBits->isOpaque()) {
292        structBasisBits->setBody(StructTy_struct_Basis_bits_fields, /*isPacked=*/false);
293    }
294    mBasisBitsInputPtr = PointerType::get(structBasisBits, 0);
295
296    std::vector<Type*>functionTypeArgs;
297    functionTypeArgs.push_back(mBasisBitsInputPtr);
298
299    //The carry q array.
300    //A pointer to the BitBlock vector.
301    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
302    // Advance q array
303    functionTypeArgs.push_back(PointerType::get(mBitBlockType, 0));
304
305    //The output structure.
306    StructType * outputStruct = mMod->getTypeByName("struct.Output");
307    if (!outputStruct) {
308        outputStruct = StructType::create(mMod->getContext(), "struct.Output");
309    }
310    if (outputStruct->isOpaque()) {
311        std::vector<Type*>fields;
312        fields.push_back(mBitBlockType);
313        fields.push_back(mBitBlockType);
314        outputStruct->setBody(fields, /*isPacked=*/false);
315    }
316    PointerType* outputStructPtr = PointerType::get(outputStruct, 0);
317
318    //The &output parameter.
319    functionTypeArgs.push_back(outputStructPtr);
320
321    mFunctionType = FunctionType::get(
322     /*Result=*/Type::getVoidTy(mMod->getContext()),
323     /*Params=*/functionTypeArgs,
324     /*isVarArg=*/false);
325}
326
327void PabloCompiler::DeclareFunctions()
328{
329    //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
330    //mFunc_print_register = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), mXi64Vect, NULL);
331    //mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mFunc_print_register), (void *)&wrapped_print_register);
332    // to call->  b.CreateCall(mFunc_print_register, unicode_category);
333
334#ifdef USE_UADD_OVERFLOW
335#ifdef USE_TWO_UADD_OVERFLOW
336    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
337    std::vector<Type*>StructTy_0_fields;
338    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
339    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
340    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
341
342    std::vector<Type*>FuncTy_1_args;
343    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
344    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
345    FunctionType* FuncTy_1 = FunctionType::get(
346                                              /*Result=*/StructTy_0,
347                                              /*Params=*/FuncTy_1_args,
348                                              /*isVarArg=*/false);
349
350    mFunctionUaddOverflow = mMod->getFunction("llvm.uadd.with.overflow.i" +
351                                              std::to_string(BLOCK_SIZE));
352    if (!mFunctionUaddOverflow) {
353        mFunctionUaddOverflow= Function::Create(
354          /*Type=*/ FuncTy_1,
355          /*Linkage=*/ GlobalValue::ExternalLinkage,
356          /*Name=*/ "llvm.uadd.with.overflow.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
357        mFunctionUaddOverflow->setCallingConv(CallingConv::C);
358    }
359    AttributeSet mFunctionUaddOverflowPAL;
360    {
361        SmallVector<AttributeSet, 4> Attrs;
362        AttributeSet PAS;
363        {
364          AttrBuilder B;
365          B.addAttribute(Attribute::NoUnwind);
366          B.addAttribute(Attribute::ReadNone);
367          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
368        }
369
370        Attrs.push_back(PAS);
371        mFunctionUaddOverflowPAL = AttributeSet::get(mMod->getContext(), Attrs);
372    }
373    mFunctionUaddOverflow->setAttributes(mFunctionUaddOverflowPAL);
374#else
375    // Type Definitions for llvm.uadd.with.overflow.carryin.i128 or .i256
376    std::vector<Type*>StructTy_0_fields;
377    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
378    StructTy_0_fields.push_back(IntegerType::get(mMod->getContext(), 1));
379    StructType *StructTy_0 = StructType::get(mMod->getContext(), StructTy_0_fields, /*isPacked=*/false);
380
381    std::vector<Type*>FuncTy_1_args;
382    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
383    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), BLOCK_SIZE));
384    FuncTy_1_args.push_back(IntegerType::get(mMod->getContext(), 1));
385    FunctionType* FuncTy_1 = FunctionType::get(
386                                              /*Result=*/StructTy_0,
387                                              /*Params=*/FuncTy_1_args,
388                                              /*isVarArg=*/false);
389
390    mFunctionUaddOverflowCarryin = mMod->getFunction("llvm.uadd.with.overflow.carryin.i" +
391                                              std::to_string(BLOCK_SIZE));
392    if (!mFunctionUaddOverflowCarryin) {
393        mFunctionUaddOverflowCarryin = Function::Create(
394          /*Type=*/ FuncTy_1,
395          /*Linkage=*/ GlobalValue::ExternalLinkage,
396          /*Name=*/ "llvm.uadd.with.overflow.carryin.i" + std::to_string(BLOCK_SIZE), mMod); // (external, no body)
397        mFunctionUaddOverflowCarryin->setCallingConv(CallingConv::C);
398    }
399    AttributeSet mFunctionUaddOverflowCarryinPAL;
400    {
401        SmallVector<AttributeSet, 4> Attrs;
402        AttributeSet PAS;
403        {
404          AttrBuilder B;
405          B.addAttribute(Attribute::NoUnwind);
406          B.addAttribute(Attribute::ReadNone);
407          PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
408        }
409
410        Attrs.push_back(PAS);
411        mFunctionUaddOverflowCarryinPAL = AttributeSet::get(mMod->getContext(), Attrs);
412    }
413    mFunctionUaddOverflowCarryin->setAttributes(mFunctionUaddOverflowCarryinPAL);
414#endif
415#endif
416
417    //Starts on process_block
418    SmallVector<AttributeSet, 5> Attrs;
419    AttributeSet PAS;
420    {
421        AttrBuilder B;
422        B.addAttribute(Attribute::ReadOnly);
423        B.addAttribute(Attribute::NoCapture);
424        PAS = AttributeSet::get(mMod->getContext(), 1U, B);
425    }
426    Attrs.push_back(PAS);
427    {
428        AttrBuilder B;
429        B.addAttribute(Attribute::NoCapture);
430        PAS = AttributeSet::get(mMod->getContext(), 2U, B);
431    }
432    Attrs.push_back(PAS);
433    {
434        AttrBuilder B;
435        B.addAttribute(Attribute::NoCapture);
436        PAS = AttributeSet::get(mMod->getContext(), 3U, B);
437    }
438    Attrs.push_back(PAS);
439    {
440        AttrBuilder B;
441        B.addAttribute(Attribute::NoCapture);
442        PAS = AttributeSet::get(mMod->getContext(), 4U, B);
443    }
444    Attrs.push_back(PAS);
445    {
446        AttrBuilder B;
447        B.addAttribute(Attribute::NoUnwind);
448        B.addAttribute(Attribute::UWTable);
449        PAS = AttributeSet::get(mMod->getContext(), ~0U, B);
450    }
451    AttributeSet AttrSet = AttributeSet::get(mMod->getContext(), Attrs);
452
453    //Create the function that will be generated.
454    mFunction = mMod->getFunction("process_block");
455    if (!mFunction) {
456        mFunction = Function::Create(
457            /*Type=*/mFunctionType,
458            /*Linkage=*/GlobalValue::ExternalLinkage,
459            /*Name=*/"process_block", mMod);
460        mFunction->setCallingConv(CallingConv::C);
461    }
462    mFunction->setAttributes(AttrSet);
463}
464
465void PabloCompiler::Examine(StatementList & stmts) {
466    for (Statement * stmt : stmts) {
467        if (Assign * assign = dyn_cast<Assign>(stmt)) {
468            Examine(assign->getExpr());
469        }
470        if (Next * next = dyn_cast<Next>(stmt)) {
471            Examine(next->getExpr());
472        }
473        else if (If * ifStatement = dyn_cast<If>(stmt)) {
474            const auto preIfCarryCount = mCarryQueueSize;
475            const auto preIfAdvanceCount = mAdvanceQueueSize;
476            Examine(ifStatement->getCondition());
477            Examine(ifStatement->getBody());
478            int ifCarryCount = mCarryQueueSize - preIfCarryCount;
479            int ifAdvanceCount = mAdvanceQueueSize - preIfAdvanceCount;
480            if ((ifCarryCount + ifAdvanceCount) > 1) {
481              ++mAdvanceQueueSize;
482              ++ifAdvanceCount;
483            }
484            ifStatement->setInclusiveCarryCount(ifCarryCount);
485            ifStatement->setInclusiveAdvanceCount(ifAdvanceCount);
486        }
487        else if (While * whileStatement = dyn_cast<While>(stmt)) {
488            const auto preWhileCarryCount = mCarryQueueSize;
489            const auto preWhileAdvanceCount = mAdvanceQueueSize;
490            Examine(whileStatement->getCondition());
491            mMaxNestingDepth = std::max(mMaxNestingDepth, ++mNestingDepth);
492            Examine(whileStatement->getBody());
493            --mNestingDepth;
494            whileStatement->setInclusiveCarryCount(mCarryQueueSize - preWhileCarryCount);
495            whileStatement->setInclusiveAdvanceCount(mAdvanceQueueSize - preWhileAdvanceCount);
496        }
497    }
498}
499
500void PabloCompiler::Examine(PabloAST *expr)
501{
502    if (Call * call = dyn_cast<Call>(expr)) {
503        mCalleeMap.insert(std::make_pair(call->getCallee(), nullptr));
504    }
505    else if (And * pablo_and = dyn_cast<And>(expr)) {
506        Examine(pablo_and->getExpr1());
507        Examine(pablo_and->getExpr2());
508    }
509    else if (Or * pablo_or = dyn_cast<Or>(expr)) {
510        Examine(pablo_or->getExpr1());
511        Examine(pablo_or->getExpr2());
512    }
513    else if (Sel * pablo_sel = dyn_cast<Sel>(expr)) {
514        Examine(pablo_sel->getCondition());
515        Examine(pablo_sel->getTrueExpr());
516        Examine(pablo_sel->getFalseExpr());
517    }
518    else if (Not * pablo_not = dyn_cast<Not>(expr)) {
519        Examine(pablo_not->getExpr());
520    }
521    else if (Advance * adv = dyn_cast<Advance>(expr)) {
522        ++mAdvanceQueueSize;
523        Examine(adv->getExpr());
524    }
525    else if (MatchStar * mstar = dyn_cast<MatchStar>(expr)) {
526        ++mCarryQueueSize;
527        Examine(mstar->getMarker());
528        Examine(mstar->getCharClass());
529    }
530    else if (ScanThru * sthru = dyn_cast<ScanThru>(expr)) {
531        ++mCarryQueueSize;
532        Examine(sthru->getScanFrom());
533        Examine(sthru->getScanThru());
534    }
535}
536
537void PabloCompiler::DeclareCallFunctions() {
538    for (auto mapping : mCalleeMap) {
539        const String * callee = mapping.first;
540        void * callee_ptr = nullptr;
541        #define CHECK_GENERAL_CODE_CATEGORY(SUFFIX) \
542            if (callee->str() == #SUFFIX) { \
543                callee_ptr = (void*)&__get_category_##SUFFIX; \
544            } else
545        CHECK_GENERAL_CODE_CATEGORY(Cc)
546        CHECK_GENERAL_CODE_CATEGORY(Cf)
547        CHECK_GENERAL_CODE_CATEGORY(Cn)
548        CHECK_GENERAL_CODE_CATEGORY(Co)
549        CHECK_GENERAL_CODE_CATEGORY(Cs)
550        CHECK_GENERAL_CODE_CATEGORY(Ll)
551        CHECK_GENERAL_CODE_CATEGORY(Lm)
552        CHECK_GENERAL_CODE_CATEGORY(Lo)
553        CHECK_GENERAL_CODE_CATEGORY(Lt)
554        CHECK_GENERAL_CODE_CATEGORY(Lu)
555        CHECK_GENERAL_CODE_CATEGORY(Mc)
556        CHECK_GENERAL_CODE_CATEGORY(Me)
557        CHECK_GENERAL_CODE_CATEGORY(Mn)
558        CHECK_GENERAL_CODE_CATEGORY(Nd)
559        CHECK_GENERAL_CODE_CATEGORY(Nl)
560        CHECK_GENERAL_CODE_CATEGORY(No)
561        CHECK_GENERAL_CODE_CATEGORY(Pc)
562        CHECK_GENERAL_CODE_CATEGORY(Pd)
563        CHECK_GENERAL_CODE_CATEGORY(Pe)
564        CHECK_GENERAL_CODE_CATEGORY(Pf)
565        CHECK_GENERAL_CODE_CATEGORY(Pi)
566        CHECK_GENERAL_CODE_CATEGORY(Po)
567        CHECK_GENERAL_CODE_CATEGORY(Ps)
568        CHECK_GENERAL_CODE_CATEGORY(Sc)
569        CHECK_GENERAL_CODE_CATEGORY(Sk)
570        CHECK_GENERAL_CODE_CATEGORY(Sm)
571        CHECK_GENERAL_CODE_CATEGORY(So)
572        CHECK_GENERAL_CODE_CATEGORY(Zl)
573        CHECK_GENERAL_CODE_CATEGORY(Zp)
574        CHECK_GENERAL_CODE_CATEGORY(Zs)
575        // OTHERWISE ...
576        throw std::runtime_error("Unknown unicode category \"" + callee->str() + "\"");
577        #undef CHECK_GENERAL_CODE_CATEGORY
578        Value * unicodeCategory = mMod->getOrInsertFunction("__get_category_" + callee->str(), mBitBlockType, mBasisBitsInputPtr, NULL);
579        if (LLVM_UNLIKELY(unicodeCategory == nullptr)) {
580            throw std::runtime_error("Could not create static method call for unicode category \"" + callee->str() + "\"");
581        }
582        mExecutionEngine->addGlobalMapping(cast<GlobalValue>(unicodeCategory), callee_ptr);
583        mCalleeMap[callee] = unicodeCategory;
584    }
585}
586
587void PabloCompiler::compileStatements(const StatementList & stmts) {
588    for (const PabloAST * statement : stmts) {
589        compileStatement(statement);
590    }
591}
592
593void PabloCompiler::compileStatement(const PabloAST * stmt)
594{
595    if (const Assign * assign = dyn_cast<const Assign>(stmt))
596    {
597        Value* expr = compileExpression(assign->getExpr());
598        mMarkerMap[assign->getName()] = expr;
599        if (LLVM_UNLIKELY(assign->isOutputAssignment())) {
600            SetOutputValue(expr, assign->getOutputIndex());
601        }
602    }
603    if (const Next * next = dyn_cast<const Next>(stmt))
604    {
605        Value* expr = compileExpression(next->getExpr());
606        mMarkerMap[next->getName()] = expr;
607    }
608    else if (const If * ifStatement = dyn_cast<const If>(stmt))
609    //
610    //  The If-ElseZero stmt:
611    //  if <predicate:expr> then <body:stmt>* elsezero <defined:var>* endif
612    //  If the value of the predicate is nonzero, then determine the values of variables
613    //  <var>* by executing the given statements.  Otherwise, the value of the
614    //  variables are all zero.  Requirements: (a) no variable that is defined within
615    //  the body of the if may be accessed outside unless it is explicitly 
616    //  listed in the variable list, (b) every variable in the defined list receives
617    //  a value within the body, and (c) the logical consequence of executing
618    //  the statements in the event that the predicate is zero is that the
619    //  values of all defined variables indeed work out to be 0.
620    //
621    //  Simple Implementation with Phi nodes:  a phi node in the if exit block
622    //  is inserted for each variable in the defined variable list.  It receives
623    //  a zero value from the ifentry block and the defined value from the if
624    //  body.
625    //
626    {
627        BasicBlock * ifEntryBlock = mBasicBlock;  // The block we are in.
628        BasicBlock * ifBodyBlock = BasicBlock::Create(mMod->getContext(), "if.body", mFunction, 0);
629        BasicBlock * ifEndBlock = BasicBlock::Create(mMod->getContext(), "if.end", mFunction, 0);
630       
631        const auto baseCarryQueueIdx = mCarryQueueIdx;
632        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
633       
634        int ifCarryCount = ifStatement->getInclusiveCarryCount();
635        int ifAdvanceCount = ifStatement->getInclusiveAdvanceCount();
636        //  Carry/Advance queue strategy.   
637        //  If there are any carries or advances at any nesting level within the
638        //  if statement, then the statement must be executed.   A "summary"
639        //  carryover variable is determined for this purpose, consisting of the
640        //  or of all of the carry and advance variables within the if.
641        //  This variable is determined as follows.
642        //  (a)  If the CarryCount and AdvanceCount are both 0, there is no summary variable.
643        //  (b)  If the CarryCount is 1 and the AdvanceCount is 0, then the summary
644        //       carryover variable is just the single carry queue entry.
645        //  (c)  If the CarryCount is 0 and the AdvanceCount is 1, then the summary
646        //       carryover variable is just the advance carry queue entry.
647        //  (d)  Otherwise, an additional advance queue entry is created for the
648        //       summary variable.
649        //  Note that the test for cases (c) and (d) may be combined: the summary carryover
650        //  variable is just last advance queue entry.
651        //
652       
653        IRBuilder<> b_entry(ifEntryBlock);
654        mBasicBlock = ifEntryBlock;
655        Value* if_test_value = compileExpression(ifStatement->getCondition());
656       
657        if ((ifCarryCount == 1) && (ifAdvanceCount == 0)) {
658            Value* last_if_pending_carries = genCarryInLoad(baseCarryQueueIdx);
659            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_carries);
660        }
661        else if ((ifCarryCount > 0) || (ifAdvanceCount > 0)) {
662            Value* last_if_pending_advances = genAdvanceInLoad(baseAdvanceQueueIdx + ifAdvanceCount - 1);
663            if_test_value = b_entry.CreateOr(if_test_value, last_if_pending_advances);
664        }
665        b_entry.CreateCondBr(genBitBlockAny(if_test_value), ifEndBlock, ifBodyBlock);
666
667        // Entry processing is complete, now handle the body of the if.
668       
669        IRBuilder<> bIfBody(ifBodyBlock);
670        mBasicBlock = ifBodyBlock;
671       
672        compileStatements(ifStatement->getBody());
673       
674        // After the recursive compile, now insert the code to compute the summary
675        // carry over variable.
676       
677        if ((ifCarryCount + ifAdvanceCount) > 1) {
678            // A summary variable is needed.
679
680            Value * carry_summary = mZeroInitializer;
681            for (int c = baseCarryQueueIdx; c < baseCarryQueueIdx + ifCarryCount; c++)
682            {
683                Value* carryq_value = genCarryInLoad(c);
684                carry_summary = bIfBody.CreateOr(carry_summary, carryq_value);
685            }
686            // Note that the limit in the following uses -1, because
687            // last entry of the advance queue is for the summary variable.
688            for (int c = baseAdvanceQueueIdx; c < baseAdvanceQueueIdx + ifAdvanceCount - 1; c++)
689            {
690                Value* advance_q_value = genAdvanceInLoad(c);
691                carry_summary = bIfBody.CreateOr(advance_q_value, carry_summary);
692            }
693            genAdvanceOutStore(carry_summary, mAdvanceQueueIdx++); //baseAdvanceQueueIdx + ifAdvanceCount - 1);
694        }
695        bIfBody.CreateBr(ifEndBlock);
696
697        //End Block
698        IRBuilder<> bEnd(ifEndBlock);
699        mBasicBlock = ifEndBlock;
700       
701        for (const Assign * a : ifStatement->getDefined()) {
702            PHINode * phi = bEnd.CreatePHI(mBitBlockType, 2, a->getName()->str());
703            auto f = mMarkerMap.find(a->getName());
704            assert (f != mMarkerMap.end());
705            phi->addIncoming(mZeroInitializer, ifEntryBlock);
706            phi->addIncoming(f->second, ifBodyBlock);
707            mMarkerMap[a->getName()] = phi;
708        }
709    }
710    else if (const While * whileStatement = dyn_cast<const While>(stmt))
711    {
712        const auto baseCarryQueueIdx = mCarryQueueIdx;
713        const auto baseAdvanceQueueIdx = mAdvanceQueueIdx;
714        if (mNestingDepth == 0) {
715            for (auto i = 0; i != whileStatement->getInclusiveCarryCount(); ++i) {
716                genCarryInLoad(baseCarryQueueIdx + i);
717            }
718            for (auto i = 0; i != whileStatement->getInclusiveAdvanceCount(); ++i) {
719                genAdvanceInLoad(baseAdvanceQueueIdx + i);
720            }
721        }
722
723        SmallVector<const Next*, 4> nextNodes;
724        for (const PabloAST * node : whileStatement->getBody()) {
725            if (isa<Next>(node)) {
726                nextNodes.push_back(cast<Next>(node));
727            }
728        }
729
730        // Compile the initial iteration statements; the calls to genCarryOutStore will update the
731        // mCarryQueueVector with the appropriate values. Although we're not actually entering a new basic
732        // block yet, increment the nesting depth so that any calls to genCarryInLoad or genCarryOutStore
733        // will refer to the previous value.
734
735        ++mNestingDepth;
736
737        compileStatements(whileStatement->getBody());
738
739        // Reset the carry queue index. Note: this ought to be changed in the future. Currently this assumes
740        // that compiling the while body twice will generate the equivalent IR. This is not necessarily true
741        // but works for now.
742        mCarryQueueIdx = baseCarryQueueIdx;
743        mAdvanceQueueIdx = baseAdvanceQueueIdx;
744
745        BasicBlock* whileCondBlock = BasicBlock::Create(mMod->getContext(), "while.cond", mFunction, 0);
746        BasicBlock* whileBodyBlock = BasicBlock::Create(mMod->getContext(), "while.body", mFunction, 0);
747        BasicBlock* whileEndBlock = BasicBlock::Create(mMod->getContext(), "while.end", mFunction, 0);
748
749        // Note: compileStatements may update the mBasicBlock pointer if the body contains nested loops. It
750        // may not be same one that we entered the function with.
751        IRBuilder<> bEntry(mBasicBlock);
752        bEntry.CreateBr(whileCondBlock);
753
754        // CONDITION BLOCK
755        IRBuilder<> bCond(whileCondBlock);
756        // generate phi nodes for any carry propogating instruction
757        int whileCarryCount = whileStatement->getInclusiveCarryCount();
758        int whileAdvanceCount = whileStatement->getInclusiveAdvanceCount();
759        std::vector<PHINode*> phiNodes(whileCarryCount + whileAdvanceCount + nextNodes.size());
760        unsigned index = 0;
761        for (index = 0; index != whileCarryCount; ++index) {
762            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
763            phi->addIncoming(mCarryQueueVector[baseCarryQueueIdx + index], mBasicBlock);
764            mCarryQueueVector[baseCarryQueueIdx + index] = mZeroInitializer; // (use phi for multi-carry mode.)
765            phiNodes[index] = phi;
766        }
767        for (int i = 0; i != whileAdvanceCount; ++i) {
768            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2);
769            phi->addIncoming(mAdvanceQueueVector[baseAdvanceQueueIdx + i], mBasicBlock);
770            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = mZeroInitializer; // (use phi for multi-carry mode.)
771            phiNodes[index++] = phi;
772        }
773        // and for any Next nodes in the loop body
774        for (const Next * n : nextNodes) {
775            PHINode * phi = bCond.CreatePHI(mBitBlockType, 2, n->getName()->str());
776            auto f = mMarkerMap.find(n->getName());
777            assert (f != mMarkerMap.end());
778            phi->addIncoming(f->second, mBasicBlock);
779            mMarkerMap[n->getName()] = phi;
780            phiNodes[index++] = phi;
781        }
782
783        mBasicBlock = whileCondBlock;
784        bCond.CreateCondBr(genBitBlockAny(compileExpression(whileStatement->getCondition())), whileEndBlock, whileBodyBlock);
785
786        // BODY BLOCK
787        mBasicBlock = whileBodyBlock;
788        compileStatements(whileStatement->getBody());
789        // update phi nodes for any carry propogating instruction
790        IRBuilder<> bWhileBody(mBasicBlock);
791        for (index = 0; index != whileStatement->getInclusiveCarryCount(); ++index) {
792            Value * carryOut = bWhileBody.CreateOr(phiNodes[index], mCarryQueueVector[baseCarryQueueIdx + index]);
793            PHINode * phi = phiNodes[index];
794            phi->addIncoming(carryOut, mBasicBlock);
795            mCarryQueueVector[baseCarryQueueIdx + index] = phi;
796        }
797        for (int i = 0; i != whileAdvanceCount; ++i) {
798            Value * advOut = bWhileBody.CreateOr(phiNodes[index], mAdvanceQueueVector[baseAdvanceQueueIdx + i]);
799            PHINode * phi = phiNodes[index++];
800            phi->addIncoming(advOut, mBasicBlock);
801            mAdvanceQueueVector[baseAdvanceQueueIdx + i] = phi;
802        }
803        // and for any Next nodes in the loop body
804        for (const Next * n : nextNodes) {
805            auto f = mMarkerMap.find(n->getName());
806            assert (f != mMarkerMap.end());
807            PHINode * phi = phiNodes[index++];
808            phi->addIncoming(f->second, mBasicBlock);
809            mMarkerMap[n->getName()] = phi;
810        }
811
812        bWhileBody.CreateBr(whileCondBlock);
813
814        // EXIT BLOCK
815        mBasicBlock = whileEndBlock;
816        if (--mNestingDepth == 0) {
817            for (index = 0; index != whileCarryCount; ++index) {
818                genCarryOutStore(phiNodes[index], baseCarryQueueIdx + index);
819            }
820            for (index = 0; index != whileAdvanceCount; ++index) {
821                genAdvanceOutStore(phiNodes[whileCarryCount + index], baseAdvanceQueueIdx + index);
822            }
823        }
824    }
825}
826
827Value * PabloCompiler::compileExpression(const PabloAST * expr)
828{
829    IRBuilder<> b(mBasicBlock);
830    if (isa<Ones>(expr)) {
831        return mOneInitializer;
832    }
833    else if (isa<Zeroes>(expr)) {
834        return mZeroInitializer;
835    }
836    else if (const Call* call = dyn_cast<Call>(expr)) {
837        //Call the callee once and store the result in the marker map.
838        auto mi = mMarkerMap.find(call->getCallee());
839        if (mi == mMarkerMap.end()) {
840            auto ci = mCalleeMap.find(call->getCallee());
841            if (LLVM_UNLIKELY(ci == mCalleeMap.end())) {
842                throw std::runtime_error("Unexpected error locating static function for \"" + call->getCallee()->str() + "\"");
843            }
844            mi = mMarkerMap.insert(std::make_pair(call->getCallee(), b.CreateCall(ci->second, mBasisBitsAddr))).first;
845        }
846        return mi->second;
847    }
848    else if (const Var * var = dyn_cast<Var>(expr))
849    {
850        auto f = mMarkerMap.find(var->getName());
851        if (LLVM_UNLIKELY(f == mMarkerMap.end())) {
852            throw std::runtime_error((var->getName()->str()) + " used before creation.");
853        }
854        return f->second;
855    }
856    else if (const And * pablo_and = dyn_cast<And>(expr))
857    {
858        return b.CreateAnd(compileExpression(pablo_and->getExpr1()), compileExpression(pablo_and->getExpr2()), "and");
859    }
860    else if (const Or * pablo_or = dyn_cast<Or>(expr))
861    {
862        return b.CreateOr(compileExpression(pablo_or->getExpr1()), compileExpression(pablo_or->getExpr2()), "or");
863    }
864    else if (const Xor * pablo_xor = dyn_cast<Xor>(expr))
865    {
866        return b.CreateXor(compileExpression(pablo_xor->getExpr1()), compileExpression(pablo_xor->getExpr2()), "xor");
867    }
868    else if (const Sel * sel = dyn_cast<Sel>(expr))
869    {
870        Value* ifMask = compileExpression(sel->getCondition());
871        Value* ifTrue = b.CreateAnd(ifMask, compileExpression(sel->getTrueExpr()));
872        Value* ifFalse = b.CreateAnd(genNot(ifMask), compileExpression(sel->getFalseExpr()));
873        return b.CreateOr(ifTrue, ifFalse);
874    }
875    else if (const Not * pablo_not = dyn_cast<Not>(expr))
876    {
877        return genNot(compileExpression(pablo_not->getExpr()));
878    }
879    else if (const Advance * adv = dyn_cast<Advance>(expr))
880    {
881        Value* strm_value = compileExpression(adv->getExpr());
882        int shift = adv->getAdvanceAmount();
883        return genAdvanceWithCarry(strm_value, shift);
884    }
885    else if (const MatchStar * mstar = dyn_cast<MatchStar>(expr))
886    {
887        Value* marker = compileExpression(mstar->getMarker());
888        Value* cc = compileExpression(mstar->getCharClass());
889        Value* marker_and_cc = b.CreateAnd(marker, cc);
890        return b.CreateOr(b.CreateXor(genAddWithCarry(marker_and_cc, cc), cc), marker, "matchstar");
891    }
892    else if (const ScanThru * sthru = dyn_cast<ScanThru>(expr))
893    {
894        Value* marker_expr = compileExpression(sthru->getScanFrom());
895        Value* cc_expr = compileExpression(sthru->getScanThru());
896        return b.CreateAnd(genAddWithCarry(marker_expr, cc_expr), genNot(cc_expr), "scanthru");
897    }
898    else {
899        throw std::runtime_error("Unrecognized Pablo expression type; can't compile.");
900    }
901
902}
903
904#ifdef USE_UADD_OVERFLOW
905#ifdef USE_TWO_UADD_OVERFLOW
906PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2) {
907    std::vector<Value*> struct_res_params;
908    struct_res_params.push_back(int128_e1);
909    struct_res_params.push_back(int128_e2);
910    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflow, struct_res_params, "uadd_overflow_res", mBasicBlock);
911    struct_res->setCallingConv(CallingConv::C);
912    struct_res->setTailCall(false);
913    AttributeSet struct_res_PAL;
914    struct_res->setAttributes(struct_res_PAL);
915
916    SumWithOverflowPack ret;
917
918    std::vector<unsigned> int128_sum_indices;
919    int128_sum_indices.push_back(0);
920    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
921
922    std::vector<unsigned> int1_obit_indices;
923    int1_obit_indices.push_back(1);
924    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
925
926    return ret;
927}
928#else
929PabloCompiler::SumWithOverflowPack PabloCompiler::callUaddOverflow(Value* int128_e1, Value* int128_e2, Value* int1_cin) {
930    std::vector<Value*> struct_res_params;
931    struct_res_params.push_back(int128_e1);
932    struct_res_params.push_back(int128_e2);
933    struct_res_params.push_back(int1_cin);
934    CallInst* struct_res = CallInst::Create(mFunctionUaddOverflowCarryin, struct_res_params, "uadd_overflow_res", mBasicBlock);
935    struct_res->setCallingConv(CallingConv::C);
936    struct_res->setTailCall(false);
937    AttributeSet struct_res_PAL;
938    struct_res->setAttributes(struct_res_PAL);
939
940    SumWithOverflowPack ret;
941
942    std::vector<unsigned> int128_sum_indices;
943    int128_sum_indices.push_back(0);
944    ret.sum = ExtractValueInst::Create(struct_res, int128_sum_indices, "sum", mBasicBlock);
945
946    std::vector<unsigned> int1_obit_indices;
947    int1_obit_indices.push_back(1);
948    ret.obit = ExtractValueInst::Create(struct_res, int1_obit_indices, "obit", mBasicBlock);
949
950    return ret;
951}
952#endif
953#endif
954
955Value* PabloCompiler::genAddWithCarry(Value* e1, Value* e2) {
956    IRBuilder<> b(mBasicBlock);
957
958    //CarryQ - carry in.
959    const int carryIdx = mCarryQueueIdx++;
960    Value* carryq_value = genCarryInLoad(carryIdx);
961#ifdef USE_TWO_UADD_OVERFLOW
962    //This is the ideal implementation, which uses two uadd.with.overflow
963    //The back end should be able to recognize this pattern and combine it into uadd.with.overflow.carryin
964    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
965    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
966    CastInst* int128_carryq_value = new BitCastInst(carryq_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "carryq_128", mBasicBlock);
967
968    SumWithOverflowPack sumpack0, sumpack1;
969
970    sumpack0 = callUaddOverflow(int128_e1, int128_e2);
971    sumpack1 = callUaddOverflow(sumpack0.sum, int128_carryq_value);
972
973    Value* obit = b.CreateOr(sumpack0.obit, sumpack1.obit, "carry_bit");
974    Value* sum = b.CreateBitCast(sumpack1.sum, mBitBlockType, "ret_sum");
975
976    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
977    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
978    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
979    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
980    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
981
982#elif defined USE_UADD_OVERFLOW
983    //use llvm.uadd.with.overflow.i128 or i256
984    CastInst* int128_e1 = new BitCastInst(e1, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e1_128", mBasicBlock);
985    CastInst* int128_e2 = new BitCastInst(e2, IntegerType::get(mMod->getContext(), BLOCK_SIZE), "e2_128", mBasicBlock);
986
987    //get i1 carryin from iBLOCK_SIZE
988    ConstantInt* const_int32_6 = ConstantInt::get(mMod->getContext(), APInt(32, StringRef("0"), 10));
989    ExtractElementInst * int64_carryq_value = ExtractElementInst::Create(carryq_value, const_int32_6, "carryq_64", mBasicBlock);
990    CastInst* int1_carryq_value = new TruncInst(int64_carryq_value, IntegerType::get(mMod->getContext(), 1), "carryq_1", mBasicBlock);
991
992    SumWithOverflowPack sumpack0;
993    sumpack0 = callUaddOverflow(int128_e1, int128_e2, int1_carryq_value);
994    Value* obit = sumpack0.obit;
995    Value* sum = b.CreateBitCast(sumpack0.sum, mBitBlockType, "sum");
996
997    /*obit is the i1 carryout, zero extend and insert it into a v2i64 or v4i64 vector.*/
998    ConstantAggregateZero* const_packed_5 = ConstantAggregateZero::get(mBitBlockType);
999    CastInst* int64_o0 = new ZExtInst(obit, IntegerType::get(mMod->getContext(), 64), "o0", mBasicBlock);
1000    InsertElementInst* carry_out = InsertElementInst::Create(const_packed_5, int64_o0, const_int32_6, "carry_out", mBasicBlock);
1001#elif (BLOCK_SIZE == 128)
1002    //calculate carry through logical ops
1003    Value* carrygen = b.CreateAnd(e1, e2, "carrygen");
1004    Value* carryprop = b.CreateOr(e1, e2, "carryprop");
1005    Value* digitsum = b.CreateAdd(e1, e2, "digitsum");
1006    Value* partial = b.CreateAdd(digitsum, carryq_value, "partial");
1007    Value* digitcarry = b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(partial)));
1008    Value* mid_carry_in = genShiftLeft64(b.CreateLShr(digitcarry, 63), "mid_carry_in");
1009
1010    Value* sum = b.CreateAdd(partial, mid_carry_in, "sum");
1011    Value* carry_out = genShiftHighbitToLow(b.CreateOr(carrygen, b.CreateAnd(carryprop, genNot(sum))), "carry_out");
1012#else
1013    //BLOCK_SIZE == 256, there is no other implementation
1014    static_assert(false, "Add with carry for 256-bit bitblock requires USE_UADD_OVERFLOW");
1015#endif //USE_TWO_UADD_OVERFLOW
1016
1017    genCarryOutStore(carry_out, carryIdx);
1018    return sum;
1019}
1020
1021Value* PabloCompiler::genCarryInLoad(const unsigned index) {
1022    assert (index < mCarryQueueVector.size());
1023    if (mNestingDepth == 0) {
1024        IRBuilder<> b(mBasicBlock);
1025        mCarryQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1026    }
1027    return mCarryQueueVector[index];
1028}
1029
1030void PabloCompiler::genCarryOutStore(Value* carryOut, const unsigned index ) {
1031    assert (carryOut);
1032    assert (index < mCarryQueueVector.size());
1033    if (mNestingDepth == 0) {
1034        IRBuilder<> b(mBasicBlock);
1035        b.CreateAlignedStore(carryOut, b.CreateGEP(mCarryQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1036    }
1037    mCarryQueueVector[index] = carryOut;
1038}
1039
1040Value* PabloCompiler::genAdvanceInLoad(const unsigned index) {
1041    assert (index < mAdvanceQueueVector.size());
1042    if (mNestingDepth == 0) {
1043        IRBuilder<> b(mBasicBlock);
1044        mAdvanceQueueVector[index] = b.CreateAlignedLoad(b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1045    }
1046    return mAdvanceQueueVector[index];
1047}
1048
1049void PabloCompiler::genAdvanceOutStore(Value* advanceOut, const unsigned index ) {
1050    assert (advanceOut);
1051    assert (index < mAdvanceQueueVector.size());
1052    if (mNestingDepth == 0) {
1053        IRBuilder<> b(mBasicBlock);
1054        b.CreateAlignedStore(advanceOut, b.CreateGEP(mAdvanceQueuePtr, b.getInt64(index)), BLOCK_SIZE/8, false);
1055    }
1056    mAdvanceQueueVector[index] = advanceOut;
1057}
1058
1059inline Value* PabloCompiler::genBitBlockAny(Value* test) {
1060    IRBuilder<> b(mBasicBlock);
1061    Value* cast_marker_value_1 = b.CreateBitCast(test, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1062    return b.CreateICmpEQ(cast_marker_value_1, ConstantInt::get(IntegerType::get(mMod->getContext(), BLOCK_SIZE), 0));
1063}
1064
1065Value* PabloCompiler::genShiftHighbitToLow(Value* e, const Twine &namehint) {
1066    IRBuilder<> b(mBasicBlock);
1067    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1068    return b.CreateBitCast(b.CreateLShr(i128_val, BLOCK_SIZE - 1, namehint), mBitBlockType);
1069}
1070
1071Value* PabloCompiler::genShiftLeft64(Value* e, const Twine &namehint) {
1072    IRBuilder<> b(mBasicBlock);
1073    Value* i128_val = b.CreateBitCast(e, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1074    return b.CreateBitCast(b.CreateShl(i128_val, 64, namehint), mBitBlockType);
1075}
1076
1077inline Value* PabloCompiler::genNot(Value* expr) {
1078    IRBuilder<> b(mBasicBlock);
1079    return b.CreateXor(expr, mOneInitializer, "not");
1080}
1081
1082Value* PabloCompiler::genAdvanceWithCarry(Value* strm_value, int shift_amount) {
1083
1084    IRBuilder<> b(mBasicBlock);
1085
1086    const auto advanceIdx = mAdvanceQueueIdx++;
1087#ifdef USE_LONG_INTEGER_SHIFT
1088    Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1089    Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1090    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1091    Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1092    genAdvanceOutStore(strm_value, advanceIdx);
1093
1094    return result_value;
1095#elif (BLOCK_SIZE == 128)
1096    if (shift_amount == 1) {
1097        Value* advanceq_value = genShiftHighbitToLow(genAdvanceInLoad(advanceIdx));
1098        Value* srli_1_value = b.CreateLShr(strm_value, 63);
1099        Value* packed_shuffle;
1100        Constant* const_packed_1_elems [] = {b.getInt32(0), b.getInt32(2)};
1101        Constant* const_packed_1 = ConstantVector::get(const_packed_1_elems);
1102        packed_shuffle = b.CreateShuffleVector(advanceq_value, srli_1_value, const_packed_1);
1103
1104        Constant* const_packed_2_elems[] = {b.getInt64(1), b.getInt64(1)};
1105        Constant* const_packed_2 = ConstantVector::get(const_packed_2_elems);
1106
1107        Value* shl_value = b.CreateShl(strm_value, const_packed_2);
1108        Value* result_value = b.CreateOr(shl_value, packed_shuffle, "advance");
1109
1110        //CarryQ - carry out:
1111        genAdvanceOutStore(strm_value, advanceIdx);
1112
1113        return result_value;
1114    }
1115    else if (shift_amount < 64) {
1116        // This is the preferred logic, but is too slow for the general case.
1117        // We need to speed up our custom LLVM for this code.
1118        Value* advanceq_longint = b.CreateBitCast(genAdvanceInLoad(advanceIdx), IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1119        Value* strm_longint = b.CreateBitCast(strm_value, IntegerType::get(mMod->getContext(), BLOCK_SIZE));
1120        Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
1121        Value* result_value = b.CreateBitCast(adv_longint, mBitBlockType);
1122        genAdvanceOutStore(strm_value, advanceIdx);
1123
1124        return result_value;
1125    }
1126    else {//if (shift_amount >= 64) {
1127        throw std::runtime_error("Shift amount >= 64 in Advance is currently unsupported.");
1128    }
1129#else
1130    //BLOCK_SIZE == 256
1131    static_assert(false, "Advance with carry on 256-bit bitblock requires long integer shifts (USE_LONG_INTEGER_SHIFT).");
1132#endif //USE_LONG_INTEGER_SHIFT
1133}
1134
1135void PabloCompiler::SetOutputValue(Value * marker, const unsigned index) {
1136    IRBuilder<> b(mBasicBlock);
1137    if (marker->getType()->isPointerTy()) {
1138        marker = b.CreateAlignedLoad(marker, BLOCK_SIZE/8, false);
1139    }
1140    Value* indices[] = {b.getInt64(0), b.getInt32(index)};
1141    Value* gep = b.CreateGEP(mOutputAddrPtr, indices);
1142    b.CreateAlignedStore(marker, gep, BLOCK_SIZE/8, false);
1143}
1144
1145}
Note: See TracBrowser for help on using the repository browser.