Ignore:
Timestamp:
Feb 15, 2017, 4:08:37 PM (2 years ago)
Author:
nmedfort
Message:

memcpy/memset support for 32-bit systems; more error messages/handling; bug fix for ParabixCharacterClassKernelBuilder?. continued work on parenthesis matching + expandable buffers.

Location:
icGREP/icgrep-devel/icgrep
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5309 r5320  
    112112Value * CBuilder::CreateMalloc(Type * type, Value * size) {
    113113    DataLayout DL(getModule());
    114     Type * const intTy = getIntPtrTy(DL);
    115     Constant * const width = ConstantExpr::getSizeOf(type);
     114    IntegerType * const intTy = getIntPtrTy(DL);
    116115    if (size->getType() != intTy) {
    117116        if (isa<Constant>(size)) {
    118117            size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
    119118        } else {
    120             size = CreateTruncOrBitCast(size, intTy);
    121         }
     119            size = CreateZExtOrTrunc(size, intTy);
     120        }
     121    }   
     122    Constant * width = ConstantExpr::getSizeOf(type);
     123    if (LLVM_UNLIKELY(width->getType() != intTy)) {
     124        width = ConstantExpr::getIntegerCast(width, intTy, false);
    122125    }
    123126    if (!width->isOneValue()) {
     
    136139        malloc->setDoesNotAlias(0);
    137140    }
     141    assert (size->getType() == intTy);
    138142    CallInst * ci = CreateCall(malloc, size);
    139143    ci->setTailCall();
     
    146150    DataLayout DL(getModule());
    147151    IntegerType * const intTy = getIntPtrTy(DL);
     152    if (size->getType() != intTy) {
     153        if (isa<Constant>(size)) {
     154            size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
     155        } else {
     156            size = CreateZExtOrTrunc(size, intTy);
     157        }
     158    }
    148159    const auto byteWidth = (intTy->getBitWidth() / 8);
    149160    Constant * const offset = ConstantInt::get(intTy, alignment + byteWidth - 1);
    150     Constant * const width = ConstantExpr::getSizeOf(type);
     161    Constant * width = ConstantExpr::getSizeOf(type);
     162    if (LLVM_UNLIKELY(width->getType() != intTy)) {
     163        width = ConstantExpr::getIntegerCast(width, intTy, false);
     164    }
    151165    if (!width->isOneValue()) {
    152166        if (isa<Constant>(size)) {
     
    154168        } else {
    155169            size = CreateMul(size, width);
    156         }
    157     }
    158     if (size->getType() != intTy) {
    159         if (isa<Constant>(size)) {
    160             size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
    161         } else {
    162             size = CreateTruncOrBitCast(size, intTy);
    163170        }
    164171    }
     
    168175        size = CreateAdd(size, offset);
    169176    }
     177    assert (size->getType() == intTy);
    170178    Value * unaligned = CreatePtrToInt(CreateMalloc(getInt8Ty(), size), intTy);
    171179    Value * aligned = CreateAnd(CreateAdd(unaligned, offset), ConstantExpr::getNot(ConstantInt::get(intTy, alignment - 1)));
     
    220228Value * CBuilder::CreateRealloc(Value * ptr, Value * size) {
    221229    DataLayout DL(getModule());
    222     Type * const intTy = getIntPtrTy(DL);
     230    IntegerType * const intTy = getIntPtrTy(DL);
    223231    PointerType * type = cast<PointerType>(ptr->getType());
    224     Constant * const width = ConstantExpr::getSizeOf(type->getPointerElementType());
     232    Constant * width = ConstantExpr::getSizeOf(type->getPointerElementType());
     233    if (LLVM_UNLIKELY(width->getType() != intTy)) {
     234        width = ConstantExpr::getIntegerCast(width, intTy, false);
     235    }
    225236    if (size->getType() != intTy) {
    226237        if (isa<Constant>(size)) {
    227238            size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
    228239        } else {
    229             size = CreateTruncOrBitCast(size, intTy);
     240            size = CreateZExtOrTrunc(size, intTy);
    230241        }
    231242    }
     
    245256        realloc->setDoesNotAlias(1);
    246257    }
     258    assert (size->getType() == intTy);
    247259    CallInst * ci = CreateCall(realloc, {ptr, size});
    248260    ci->setTailCall();
     
    252264
    253265void CBuilder::CreateMemZero(Value * ptr, Value * size, const unsigned alignment) {
    254     assert (ptr->getType()->isPointerTy() && size->getType()->isIntegerTy());
    255     Type * const type = ptr->getType();
    256     Constant * const width = ConstantExpr::getSizeOf(type->getPointerElementType());
     266    DataLayout DL(getModule());
     267    IntegerType * const intTy = getIntPtrTy(DL);
     268    Constant * width = ConstantExpr::getSizeOf(ptr->getType()->getPointerElementType());
     269    if (LLVM_UNLIKELY(width->getType() != intTy)) {
     270        width = ConstantExpr::getIntegerCast(width, intTy, false);
     271    }
     272    if (size->getType() != intTy) {
     273        if (isa<Constant>(size)) {
     274            size = ConstantExpr::getIntegerCast(cast<Constant>(size), intTy, false);
     275        } else {
     276            size = CreateZExtOrTrunc(size, intTy);
     277        }
     278    }
    257279    if (isa<Constant>(size)) {
    258280        size = ConstantExpr::getMul(cast<Constant>(size), width);
     
    260282        size = CreateMul(size, width);
    261283    }
     284    assert (size->getType() == intTy);
    262285    CreateMemSet(CreatePointerCast(ptr, getInt8PtrTy()), getInt8(0), size, alignment);
    263286}
     
    268291
    269292LoadInst * CBuilder::CreateAtomicLoadAcquire(Value * ptr) {
    270     unsigned alignment = cast<PointerType>(ptr->getType())->getElementType()->getPrimitiveSizeInBits() / 8;
     293    const auto alignment = ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits() / 8;
    271294    LoadInst * inst = CreateAlignedLoad(ptr, alignment);
    272295    inst->setOrdering(AtomicOrdering::Acquire);
     
    275298}
    276299StoreInst * CBuilder::CreateAtomicStoreRelease(Value * val, Value * ptr) {
    277     unsigned alignment = cast<PointerType>(ptr->getType())->getElementType()->getPrimitiveSizeInBits() / 8;
     300    const auto alignment = ptr->getType()->getPointerElementType()->getPrimitiveSizeInBits() / 8;
    278301    StoreInst * inst = CreateAlignedStore(val, ptr, alignment);
    279302    inst->setOrdering(AtomicOrdering::Release);
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.cpp

    r5267 r5320  
    1010#include <IR_Gen/idisa_i64_builder.h>
    1111#include <IR_Gen/idisa_nvptx_builder.h>
    12 #include <llvm/ADT/Triple.h>
     12#include <llvm/IR/Module.h>
    1313
    1414namespace IDISA {
     
    1616IDISA_Builder * GetIDISA_Builder(llvm::Module * mod) {
    1717    const bool hasAVX2 = AVX2_available();
    18     const bool isArch32Bit = Triple(llvm::sys::getProcessTriple()).isArch32Bit();
     18    DataLayout DL(mod);
     19    Type * const intTy = DL.getIntPtrType(mod->getContext());
     20    const auto registerWidth = intTy->getIntegerBitWidth();
    1921    if (LLVM_LIKELY(codegen::BlockSize == 0)) {  // No BlockSize override: use processor SIMD width
    2022        codegen::BlockSize = hasAVX2 ? 256 : 128;
     
    2224    if (codegen::BlockSize >= 256) {
    2325        if (hasAVX2) {
    24             return new IDISA_AVX2_Builder(mod, isArch32Bit ? 32 : 64, codegen::BlockSize);
     26            return new IDISA_AVX2_Builder(mod, registerWidth, codegen::BlockSize);
    2527        }
    2628    } else if (codegen::BlockSize == 64) {
    27         return new IDISA_I64_Builder(mod, isArch32Bit ? 32 : 64);
     29        return new IDISA_I64_Builder(mod, registerWidth);
    2830    }
    29     return new IDISA_SSE2_Builder(mod, isArch32Bit ? 32 : 64, codegen::BlockSize);
     31    return new IDISA_SSE2_Builder(mod, registerWidth, codegen::BlockSize);
    3032}
    3133
  • icGREP/icgrep-devel/icgrep/array-test.cpp

    r5316 r5320  
    1818#include <llvm/Support/raw_ostream.h>              // for errs
    1919#include <pablo/pablo_kernel.h>                    // for PabloKernel
     20#include <pablo/pe_zeroes.h>
    2021#include <toolchain.h>                             // for JIT_to_ExecutionEn...
    2122#include <pablo/builder.hpp>                       // for PabloBuilder
     
    3132namespace llvm { class Type; }
    3233namespace pablo { class Integer; }
    33 namespace pablo { class PabloAST; }
    3434namespace pablo { class Var; }
    3535
     
    6464    PabloAST * rparen = pb.createAnd(temp3, temp8, "rparens");
    6565    PabloAST * parens = pb.createOr(lparen, rparen);
    66 
    6766    PabloAST * pscan = pb.createScanTo(pb.createAdvance(lparen, 1), parens, "pscan");
    68 
    6967    PabloAST * closed = pb.createAnd(pscan, rparen, "closed");
    7068
    7169    pb.createAssign(pb.createExtract(matches, 0), closed);
    7270
    73     Var * all_closed = pb.createVar("all_closed", closed);
    74     Var * pending_lparen = pb.createVar("pending_lparen", pb.createAnd(pscan, lparen));
    75     Var * unmatched_rparen = pb.createVar("unmatched_rparen", pb.createAnd(rparen, pb.createNot(closed)));
    76     Var * in_play = pb.createVar("in_play", pb.createOr(pending_lparen, unmatched_rparen));
    77 
    78 
    79     Integer * one = pb.getInteger(1);
    80 
    81     Var * index = pb.createVar("i", one);
     71    Var * const all_closed = pb.createVar("all_closed", closed);
     72    Var * const pending_lparen = pb.createVar("pending_lparen", pb.createAnd(pscan, lparen));
     73    PabloAST * unmatched_rparen = pb.createAnd(rparen, pb.createNot(closed));
     74    Var * const in_play = pb.createVar("in_play", pb.createOr(pending_lparen, unmatched_rparen));
     75    Var * const errors = pb.createVar("errors", pb.createZeroes());
     76
     77    Integer * const one = pb.getInteger(1);
     78    Var * const index = pb.createVar("i", one);
    8279
    8380    PabloBuilder body = PabloBuilder::Create(pb);
     
    8885        closed = body.createAnd(pscan, rparen);
    8986        body.createAssign(body.createExtract(matches, index), closed);
     87        body.createAssign(all_closed, body.createOr(all_closed, closed));
     88        body.createAssign(errors, body.createOr(errors, body.createAtEOF(pscan)));
     89
    9090        body.createAssign(pending_lparen, body.createAnd(pscan, lparen));
    91         body.createAssign(all_closed, body.createOr(all_closed, closed));
    92         body.createAssign(unmatched_rparen, body.createAnd(rparen, body.createNot(all_closed)));
     91
     92        unmatched_rparen = body.createAnd(rparen, body.createNot(all_closed));
    9393        body.createAssign(in_play, body.createOr(pending_lparen, unmatched_rparen));
    9494        body.createAssign(index, body.createAdd(index, one));
    9595
    9696
     97    pb.createAssign(errors, pb.createOr(errors, pb.createAnd(rparen, pb.createNot(all_closed))));
     98    pb.createAssign(kernel->getOutputStreamVar("errors"), errors);
     99
    97100    pb.print(errs());
    98101
    99102}
     103
     104//42    def Match_Parens(lex, matches):
     105//43            parens = lex.LParen | lex.RParen
     106//44            i = 0
     107//45            pscan = pablo.AdvanceThenScanTo(lex.LParen, parens)
     108//46            matches.closed[0] = pscan & lex.RParen
     109//47            all_closed = matches.closed[0]
     110//48            matches.error = pablo.atEOF(pscan)
     111//49            # Not matched, still pending.
     112//50            pending_LParen = pscan & lex.LParen
     113//51            RParen_unmatched = lex.RParen &~ matches.closed[0]
     114//52            inPlay = pending_LParen | RParen_unmatched
     115//53            while pending_LParen:
     116//54                    i += 1
     117//55                    pscan = pablo.AdvanceThenScanTo(pending_LParen, inPlay)
     118//56                    matches.closed[i] = pscan & lex.RParen
     119//57                    all_closed |= matches.closed[i]
     120//58                    matches.error |= pablo.atEOF(pscan)
     121//59                    pending_LParen = pscan & lex.LParen
     122//60                    RParen_unmatched = lex.RParen &~ all_closed
     123//61                    inPlay = pending_LParen | RParen_unmatched
     124//62            #
     125//63            # Any closing paren that was not actually used to close
     126//64            # an opener is in error.
     127//65            matches.error |= lex.RParen &~ all_closed
    100128
    101129Function * pipeline(IDISA::IDISA_Builder * iBuilder, const unsigned count) {
     
    117145    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8, 1));
    118146    ExpandableBuffer matches(iBuilder, iBuilder->getStreamSetTy(count, 1), 2);
     147    SingleBlockBuffer errors(iBuilder, iBuilder->getStreamTy());
    119148
    120149    MMapSourceKernel mmapK(iBuilder);
     
    122151    mmapK.setInitialArguments({fileSize});
    123152   
    124     S2PKernel  s2pk(iBuilder);
     153    S2PKernel s2pk(iBuilder);
    125154    s2pk.generateKernel({&ByteStream}, {&BasisBits});
    126155
    127156    PabloKernel bm(iBuilder, "MatchParens",
    128157        {Binding{iBuilder->getStreamSetTy(8), "input"}},
    129         {Binding{iBuilder->getStreamSetTy(count), "matches"}});
     158        {Binding{iBuilder->getStreamSetTy(count), "matches"}, Binding{iBuilder->getStreamTy(), "errors"}});
    130159
    131160    generate(&bm);
    132161
    133     bm.generateKernel({&BasisBits}, {&matches});
     162    bm.generateKernel({&BasisBits}, {&matches, &errors});
    134163
    135164    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main, 0));
     
    138167    BasisBits.allocateBuffer();
    139168    matches.allocateBuffer();
     169    errors.allocateBuffer();
    140170
    141171    generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &bm});
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5314 r5320  
    223223    mmapK.setInitialArguments({fileSize});
    224224
     225
     226
    225227    kernel::ScanMatchKernel scanMatchK(iBuilder, grepType);
    226228    scanMatchK.generateKernel({&MatchResults}, {});
     
    330332    mmapK.setInitialArguments({fileSize});
    331333   
     334
     335    // ParabixCharacterClassKernelBuilder(IDISA::IDISA_Builder * iBuilder, std::string ccSetName, const std::vector<re::CC *> & charClasses, unsigned basisBitsCount);
     336
     337    std::vector<re::CC *> Y;
     338    Y.push_back(re::makeCC(1, 9));
     339
    332340    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    333341
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5267 r5320  
    9292// and signal error code 2 (grep convention).
    9393//
    94 static void icgrep_error_handler(void *UserData, const std::string &Message,
    95                              bool GenCrashDiag) {
    96 
     94static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
     95    #ifndef NDEBUG
     96    throw std::runtime_error(Message);
     97    #else
    9798    // Modified from LLVM's internal report_fatal_error logic.
    9899    SmallVector<char, 64> Buffer;
     
    102103    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
    103104    (void)written; // If something went wrong, we deliberately just give up.
    104 
    105105    // Run the interrupt handlers to make sure any special cleanups get done, in
    106106    // particular that we remove files registered with RemoveFileOnSignal.
    107107    llvm::sys::RunInterruptHandlers();
    108108    exit(2);
     109    #endif
    109110}
    110111
  • icGREP/icgrep-devel/icgrep/kernels/cc_kernel.cpp

    r5317 r5320  
    6868    auto & builder = ccc.getBuilder();
    6969    for (CC * cc : charClasses) {
    70         Var * const r = addOutput(cc->canonicalName(re::ByteClass), getStreamSetTy());
     70        Var * const r = addOutput(cc->canonicalName(re::ByteClass), getStreamTy());
    7171        builder.createAssign(r, ccc.compileCC("cc", cc, builder));
    7272    }
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5297 r5320  
    4545        (++args)->setName(binding.name);
    4646    }
    47     assert ((++args) == init->arg_end());
    4847
    4948    // Create the doSegment function prototype.
     
    6362        (++args)->setName(ss.name + "_availableItems");
    6463    }
    65     assert ((++args) == doSegment->arg_end());
    6664
    6765    // Add any additional kernel declarations
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5317 r5320  
    1212#include <llvm/IR/Module.h>
    1313#include <llvm/Support/raw_ostream.h>
     14#include <llvm/IR/LegacyPassManager.h>
     15#include <llvm/Transforms/Scalar.h>
    1416
    1517static const auto DO_BLOCK_SUFFIX = "_DoBlock";
     
    3234using namespace kernel;
    3335using namespace parabix;
     36using namespace llvm::legacy;
    3437
    3538unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
    3639    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    37         report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
     40        report_fatal_error("Cannot add field " + name + " to " + getName() + " after kernel state finalized");
    3841    }
    3942    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
    40         report_fatal_error("Kernel already contains field " + name);
     43        report_fatal_error(getName() + " already contains scalar field " + name);
    4144    }
    4245    const auto index = mKernelFields.size();
     
    7275        raw_string_ostream out(tmp);
    7376        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
    74         << mStreamSetInputs.size() << " input stream sets.";
     77            << mStreamSetInputs.size() << " input stream sets.";
    7578        report_fatal_error(out.str());
    7679    }
     
    7982        raw_string_ostream out(tmp);
    8083        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
    81         << mStreamSetOutputs.size() << " output stream sets.";
     84            << mStreamSetOutputs.size() << " output stream sets.";
    8285        report_fatal_error(out.str());
    8386    }
     
    123126
    124127void KernelBuilder::generateKernel(const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
     128
     129    mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
     130    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
     131        if (LLVM_UNLIKELY(mStreamSetInputBuffers[i] == nullptr)) {
     132            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
     133                               + " cannot be null when calling generateKernel()");
     134        }
     135    }
     136    if (LLVM_UNLIKELY(mStreamSetInputs.size() != mStreamSetInputBuffers.size())) {
     137        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetInputs.size()) +
     138                           " input stream sets but generateKernel() was given "
     139                           + std::to_string(mStreamSetInputBuffers.size()));
     140    }
     141
     142    mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
     143    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
     144        if (LLVM_UNLIKELY(mStreamSetOutputBuffers[i] == nullptr)) {
     145            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
     146                               + " cannot be null when calling generateKernel()");
     147        }
     148    }
     149    if (LLVM_UNLIKELY(mStreamSetOutputs.size() != mStreamSetOutputBuffers.size())) {
     150        report_fatal_error(getName() + ": expected " + std::to_string(mStreamSetOutputs.size())
     151                           + " output stream sets but generateKernel() was given "
     152                           + std::to_string(mStreamSetOutputBuffers.size()));
     153    }
     154
     155
    125156    auto savePoint = iBuilder->saveIP();
    126     Module * const m = iBuilder->getModule();
    127     mStreamSetInputBuffers.assign(inputs.begin(), inputs.end());
    128     mStreamSetOutputBuffers.assign(outputs.begin(), outputs.end());
    129157    prepareKernel(); // possibly overridden by the KernelBuilder subtype
    130     addKernelDeclarations(m);
     158    addKernelDeclarations(iBuilder->getModule());
    131159    callGenerateInitMethod();
    132160    generateInternalMethods();
     
    154182        producerPos.push_back(&*(args++));
    155183    }
    156     assert (args == mCurrentFunction->arg_end());
    157184    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
    158185    iBuilder->CreateRetVoid();
     
    177204    const auto f = mKernelMap.find(name);
    178205    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
    179         report_fatal_error("Kernel does not contain scalar: " + name);
     206        report_fatal_error(getName() + " does not contain scalar: " + name);
    180207    }
    181208    return iBuilder->getInt32(f->second);
     
    314341    const auto f = mStreamSetNameMap.find(name);
    315342    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
    316         throw std::runtime_error("Kernel " + getName() + " does not contain stream set: " + name);
     343        throw std::runtime_error(getName() + " does not contain stream set: " + name);
    317344    }
    318345    return f->second;
     
    329356        }
    330357    }
    331     report_fatal_error(f->getName() + " does not have parameter " + name);
     358    report_fatal_error(getName() + " does not have parameter " + name);
    332359}
    333360
     
    346373void KernelBuilder::createInstance() {
    347374    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    348         report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
     375        report_fatal_error("Cannot instantiate " + getName() + " before calling prepareKernel()");
    349376    }
    350377    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
    351     std::vector<Value *> init_args = {mKernelInstance};
    352     for (auto a : mInitialArguments) {
    353         init_args.push_back(a);
    354     }
    355     for (auto b : mStreamSetInputBuffers) {
    356         init_args.push_back(b->getStreamSetBasePtr());
    357     }
    358     for (auto b : mStreamSetOutputBuffers) {
    359         init_args.push_back(b->getStreamSetBasePtr());
    360     }
    361     Function * initMethod = getInitFunction();
    362     iBuilder->CreateCall(initMethod, init_args);
     378
     379    std::vector<Value *> args;
     380    args.reserve(mInitialArguments.size() + mStreamSetInputBuffers.size() + mStreamSetOutputBuffers.size() + 1);
     381    args.push_back(mKernelInstance);
     382    for (unsigned i = 0; i < mInitialArguments.size(); ++i) {
     383        Value * arg = mInitialArguments[i];
     384        if (LLVM_UNLIKELY(arg == nullptr)) {
     385            report_fatal_error(getName() + ": initial argument " + std::to_string(i)
     386                               + " cannot be null when calling createInstance()");
     387        }
     388        args.push_back(arg);
     389    }
     390    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
     391        assert (mStreamSetInputBuffers[i]);
     392        Value * arg = mStreamSetInputBuffers[i]->getStreamSetBasePtr();
     393        if (LLVM_UNLIKELY(arg == nullptr)) {
     394            report_fatal_error(getName() + ": input stream set " + std::to_string(i)
     395                               + " was not allocated prior to calling createInstance()");
     396        }
     397        args.push_back(arg);
     398    }
     399    assert (mStreamSetInputs.size() == mStreamSetInputBuffers.size());
     400    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
     401        assert (mStreamSetOutputBuffers[i]);
     402        Value * arg = mStreamSetOutputBuffers[i]->getStreamSetBasePtr();
     403        if (LLVM_UNLIKELY(arg == nullptr)) {
     404            report_fatal_error(getName() + ": output stream set " + std::to_string(i)
     405                               + " was not allocated prior to calling createInstance()");
     406        }
     407        args.push_back(arg);
     408    }
     409    assert (mStreamSetOutputs.size() == mStreamSetOutputBuffers.size());
     410    iBuilder->CreateCall(getInitFunction(), args);
    363411}
    364412
     
    474522    generateDoBlockMethod(); // must be implemented by the KernelBuilder subtype
    475523    iBuilder->CreateRetVoid();
     524
     525    // Use the pass manager to optimize the function.
     526    FunctionPassManager fpm(iBuilder->getModule());
     527    fpm.add(createReassociatePass());             //Reassociate expressions.
     528    fpm.add(createGVNPass());                     //Eliminate common subexpressions.
     529    fpm.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
     530    fpm.doInitialization();
     531    fpm.run(*mCurrentFunction);
    476532}
    477533
     
    522578    auto args = doBlock->arg_begin();
    523579    args->setName("self");
    524     assert ((++args) == doBlock->arg_end());
    525580
    526581    FunctionType * const finalBlockType = FunctionType::get(iBuilder->getVoidTy(), {selfType, iBuilder->getSizeTy()}, false);
     
    532587    args->setName("self");
    533588    (++args)->setName("remainingBytes");
    534     assert ((++args) == finalBlock->arg_end());
    535589}
    536590
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5305 r5320  
    253253
    254254void generatePipelineLoop(IDISA::IDISA_Builder * iBuilder, const std::vector<KernelBuilder *> & kernels) {
    255     for (auto k : kernels) k->createInstance();
    256    
     255    for (auto k : kernels) {
     256        k->createInstance();
     257    }
    257258    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    258259    Function * main = entryBlock->getParent();
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5317 r5320  
    2020using namespace llvm;
    2121using namespace IDISA;
     22
     23ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type);
     24
     25StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type);
    2226
    2327void StreamSetBuffer::allocateBuffer() {
     
    173177// Expandable Buffer
    174178
    175 void ExpandableBuffer::ensureStreamCapacity(llvm::Value * self, llvm::Value * streamIndex) const {
    176 
     179// Expandable Buffer
     180
     181void ExpandableBuffer::allocateBuffer() {
     182    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType());
     183    Value * const capacityPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     184    iBuilder->CreateStore(iBuilder->getSize(mInitialCapacity), capacityPtr);
     185    Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
     186    ConstantInt * const size = iBuilder->getSize(mBufferBlocks * mInitialCapacity);
     187    Value * const ptr = iBuilder->CreateAlignedMalloc(bufferType, size, iBuilder->getCacheAlignment());
     188    const auto alignment = bufferType->getPrimitiveSizeInBits() / 8;
     189    iBuilder->CreateMemZero(ptr, size, alignment);
     190    Value * const streamSetPtr = iBuilder->CreateGEP(mStreamSetBufferPtr, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     191    iBuilder->CreateStore(ptr, streamSetPtr);
     192}
     193
     194std::pair<Value *, Value *> ExpandableBuffer::getExpandedStreamOffset(llvm::Value * self, llvm::Value * streamIndex, Value * blockIndex) const {
     195
     196    // MDNode *Weights = MDBuilder(Ctx).createBranchWeights(42, 13);
     197
     198    // ENTRY
     199    Value * const capacityPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     200    Value * const capacity = iBuilder->CreateLoad(capacityPtr);
     201    Value * const streamSetPtr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     202    Value * const streamSet = iBuilder->CreateLoad(streamSetPtr);
     203
     204    // Are we guaranteed that we can access this stream?
     205    if (LLVM_UNLIKELY(isa<ConstantInt>(streamIndex))) {
     206        if (LLVM_LIKELY(cast<ConstantInt>(streamIndex)->getLimitedValue() < mInitialCapacity)) {
     207            return {streamSet, capacity};
     208        }
     209    }
     210
     211    BasicBlock * const entry = iBuilder->GetInsertBlock();
     212    BasicBlock * const expand = BasicBlock::Create(iBuilder->getContext(), "expand", entry->getParent());
     213    BasicBlock * const resume = BasicBlock::Create(iBuilder->getContext(), "resume", entry->getParent());
     214
     215    assert (streamIndex->getType() == capacity->getType());
     216    Value * cond = iBuilder->CreateICmpULT(streamIndex, capacity);
     217    iBuilder->CreateCondBr(cond, resume, expand);
     218    // EXPAND
     219    iBuilder->SetInsertPoint(expand);
     220    /// TODO: this should call a function rather than be inlined into the block. REVISIT once tested.
     221    Value * newCapacity = iBuilder->CreateMul(streamIndex, iBuilder->getSize(2));
     222    iBuilder->CreateStore(newCapacity, capacityPtr);
     223    Type * bufferType = getType()->getStructElementType(1)->getPointerElementType();
     224    Value * size = iBuilder->CreateMul(newCapacity, iBuilder->getSize(mBufferBlocks));
     225    Value * newStreamSet = iBuilder->CreateAlignedMalloc(bufferType, size, iBuilder->getCacheAlignment());
     226    iBuilder->CreateStore(newStreamSet, streamSetPtr);
     227    Value * const diffCapacity = iBuilder->CreateSub(newCapacity, capacity);
     228    const auto alignment = bufferType->getPrimitiveSizeInBits() / 8;
     229    for (unsigned i = 0; i < mBufferBlocks; ++i) {
     230        ConstantInt * const offset = iBuilder->getSize(i);
     231        Value * srcOffset = iBuilder->CreateMul(capacity, offset);
     232        Value * srcPtr = iBuilder->CreateGEP(streamSet, srcOffset);
     233        Value * destOffset = iBuilder->CreateMul(newCapacity, offset);
     234        Value * destPtr = iBuilder->CreateGEP(newStreamSet, destOffset);
     235        iBuilder->CreateMemCpy(destPtr, srcPtr, capacity, alignment);
     236        Value * destZeroOffset = iBuilder->CreateAdd(destOffset, capacity);
     237        Value * destZeroPtr = iBuilder->CreateGEP(newStreamSet, destZeroOffset);
     238        iBuilder->CreateMemZero(destZeroPtr, diffCapacity, alignment);
     239    }
     240
     241    iBuilder->CreateAlignedFree(streamSet);
     242    iBuilder->CreateBr(resume);
     243    // RESUME
     244    iBuilder->SetInsertPoint(resume);
     245
     246    PHINode * phiStreamSet = iBuilder->CreatePHI(streamSet->getType(), 2);
     247    phiStreamSet->addIncoming(streamSet, entry);
     248    phiStreamSet->addIncoming(newStreamSet, expand);
     249
     250    PHINode * phiCapacity = iBuilder->CreatePHI(capacity->getType(), 2);
     251    phiCapacity->addIncoming(capacity, entry);
     252    phiCapacity->addIncoming(newCapacity, expand);
     253
     254    Value * offset = iBuilder->CreateAdd(iBuilder->CreateMul(blockIndex, phiCapacity), streamIndex);
     255
     256    return {phiStreamSet, offset};
    177257}
    178258
    179259llvm::Value * ExpandableBuffer::getStreamBlockPtr(llvm::Value * self, Value * streamIndex, Value * blockIndex) const {
    180     ensureStreamCapacity(self, streamIndex);
    181 
    182 
    183 
    184     return nullptr;
    185 }
    186 
    187 llvm::Value * ExpandableBuffer::getStreamPackPtr(llvm::Value * self, llvm::Value * streamIndex, Value *blockIndex, Value *packIndex) const {
    188     ensureStreamCapacity(self, streamIndex);
    189 
    190 
    191     return nullptr;
     260    Value * ptr, * offset;
     261    std::tie(ptr, offset) = getExpandedStreamOffset(self, streamIndex, blockIndex);
     262    return iBuilder->CreateGEP(ptr, offset);
     263}
     264
     265llvm::Value * ExpandableBuffer::getStreamPackPtr(llvm::Value * self, llvm::Value * streamIndex, Value * blockIndex, Value * packIndex) const {
     266    Value * ptr, * offset;
     267    std::tie(ptr, offset) = getExpandedStreamOffset(self, streamIndex, blockIndex);
     268    return iBuilder->CreateGEP(ptr, {offset, packIndex});
    192269}
    193270
    194271Value * ExpandableBuffer::getStreamSetBlockPtr(Value *, Value *) const {
    195     report_fatal_error("Expandable buffers: getStreamSetBlockPtr not supported.");
     272    report_fatal_error("Expandable buffers: getStreamSetPtr is not supported.");
    196273}
    197274
    198275Value * ExpandableBuffer::getLinearlyAccessibleItems(llvm::Value *) const {
    199     report_fatal_error("Expandable buffers: getLinearlyAccessibleItems not supported.");
     276    report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
    200277}
    201278
    202279// Constructors
    203 
    204280SingleBlockBuffer::SingleBlockBuffer(IDISA::IDISA_Builder * b, llvm::Type * type)
    205 : StreamSetBuffer(BufferKind::BlockBuffer, b, type, 1, 0) {
     281: StreamSetBuffer(BufferKind::BlockBuffer, b, type, resolveStreamSetType(b, type), 1, 0) {
    206282
    207283}
    208284
    209285ExternalFileBuffer::ExternalFileBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, unsigned AddressSpace)
    210 : StreamSetBuffer(BufferKind::ExternalFileBuffer, b, type, 0, AddressSpace) {
     286: StreamSetBuffer(BufferKind::ExternalFileBuffer, b, type, resolveStreamSetType(b, type), 0, AddressSpace) {
    211287
    212288}
    213289
    214290CircularBuffer::CircularBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace)
    215 : StreamSetBuffer(BufferKind::CircularBuffer, b, type, bufferBlocks, AddressSpace) {
     291: StreamSetBuffer(BufferKind::CircularBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace) {
    216292
    217293}
    218294
    219295CircularCopybackBuffer::CircularCopybackBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
    220 : StreamSetBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
     296: StreamSetBuffer(BufferKind::CircularCopybackBuffer, b, type, resolveStreamSetType(b, type), bufferBlocks, AddressSpace), mOverflowBlocks(overflowBlocks) {
    221297
    222298}
    223299
    224300ExpandableBuffer::ExpandableBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace)
    225 : StreamSetBuffer(BufferKind::ExpandableBuffer, b, type, bufferBlocks, AddressSpace) {
    226 
    227 }
    228 
    229 inline Type * resolveStreamSetType(IDISA_Builder * const b, Type * const type) {
    230     if (type->isArrayTy()) {
    231         Type * ty = type->getArrayElementType();
    232         if (LLVM_LIKELY(ty->isVectorTy() && ty->getVectorNumElements() == 0)) {
    233             ty = ty->getVectorElementType();
    234             if (LLVM_LIKELY(ty->isIntegerTy())) {
    235                 const auto fieldWidth = cast<IntegerType>(ty)->getBitWidth();
    236                 ty = b->getBitBlockType();
    237                 if (fieldWidth != 1) {
    238                     ty = llvm::ArrayType::get(ty, fieldWidth);
    239                 }
    240                 return ArrayType::get(ty, type->getArrayNumElements());
     301: StreamSetBuffer(BufferKind::ExpandableBuffer, b, type, resolveExpandableStreamSetType(b, type), bufferBlocks, AddressSpace)
     302, mInitialCapacity(type->getArrayNumElements()) {
     303
     304}
     305
     306inline StreamSetBuffer::StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, Type * baseType, Type * resolvedType, unsigned blocks, unsigned AddressSpace)
     307: mBufferKind(k)
     308, iBuilder(b)
     309, mType(resolvedType)
     310, mBufferBlocks(blocks)
     311, mAddressSpace(AddressSpace)
     312, mStreamSetBufferPtr(nullptr)
     313, mBaseType(baseType) {
     314
     315}
     316
     317// Helper routines
     318ArrayType * resolveStreamSetType(IDISA_Builder * const b, Type * type) {
     319    unsigned numElements = 1;
     320    if (LLVM_LIKELY(type->isArrayTy())) {
     321        numElements = type->getArrayNumElements();
     322        type = type->getArrayElementType();
     323    }
     324    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
     325        type = type->getVectorElementType();
     326        if (LLVM_LIKELY(type->isIntegerTy())) {
     327            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
     328            type = b->getBitBlockType();
     329            if (fieldWidth != 1) {
     330                type = ArrayType::get(type, fieldWidth);
    241331            }
     332            return ArrayType::get(type, numElements);
    242333        }
    243334    }
     
    249340}
    250341
    251 StreamSetBuffer::StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, Type * type, unsigned blocks, unsigned AddressSpace)
    252 : mBufferKind(k)
    253 , iBuilder(b)
    254 , mType(resolveStreamSetType(b, type))
    255 , mBufferBlocks(blocks)
    256 , mAddressSpace(AddressSpace)
    257 , mStreamSetBufferPtr(nullptr)
    258 , mBaseType(type) {
    259 
    260 }
     342StructType * resolveExpandableStreamSetType(IDISA_Builder * const b, Type * type) {
     343    if (LLVM_LIKELY(type->isArrayTy())) {
     344        type = type->getArrayElementType();
     345    }
     346    if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
     347        type = type->getVectorElementType();
     348        if (LLVM_LIKELY(type->isIntegerTy())) {
     349            const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
     350            type = b->getBitBlockType();
     351            if (fieldWidth != 1) {
     352                type = ArrayType::get(type, fieldWidth);
     353            }
     354            return StructType::get(b->getSizeTy(), type->getPointerTo(), nullptr);
     355        }
     356    }
     357    std::string tmp;
     358    raw_string_ostream out(tmp);
     359    type->print(out);
     360    out << " is an unvalid stream set buffer type.";
     361    report_fatal_error(out.str());
     362}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5317 r5320  
    5858protected:
    5959
    60     StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, llvm::Type * type, unsigned blocks, unsigned AddressSpace);
     60    StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, llvm::Type * baseType, llvm::Type * resolvedType, unsigned blocks, unsigned AddressSpace);
    6161
    6262    // Get the buffer pointer for a given block of the stream.
     
    148148// within their set whenever the index exceeds its capacity
    149149//
     150// ExpandableBuffers do not allow access to the base stream set but will automatically increase the number of streams
     151// within their set whenever the index exceeds its capacity
     152//
    150153class ExpandableBuffer : public StreamSetBuffer {
    151154public:
     
    159162
    160163    llvm::Value * getLinearlyAccessibleItems(llvm::Value * fromPosition) const override;
    161    
     164
     165    void allocateBuffer() override;
     166
    162167protected:
    163168
    164     void ensureStreamCapacity(llvm::Value * self, llvm::Value * streamIndex) const;
     169    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockIndex) const override;
    165170
    166     llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockIndex) const override;
     171private:
     172
     173    std::pair<llvm::Value *, llvm::Value *> getExpandedStreamOffset(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * blockIndex) const;
     174
     175private:
     176
     177    const uint64_t  mInitialCapacity;
     178
    167179};
    168180
  • icGREP/icgrep-devel/icgrep/pablo/builder.hpp

    r5307 r5320  
    6666    }
    6767
    68     inline Var * createVar(const llvm::StringRef & name, llvm::Type * const type = nullptr) {
     68    inline Var * createVar(const llvm::StringRef name, llvm::Type * const type = nullptr) {
    6969        return createVar(makeName(name), type);
    7070    }
    7171
    72     inline Var * createVar(const llvm::StringRef & name, PabloAST * value) {
     72    inline Var * createVar(const llvm::StringRef name, PabloAST * value) {
    7373        Var * var = createVar(name, value->getType());
    7474        createAssign(var, value);
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5317 r5320  
    5757    mMarker.emplace(entryBlock->createOnes(), iBuilder->allOnes());
    5858
    59     for (unsigned i = 0; i < mKernel->getNumOfInputs(); ++i) {
    60         Var * var = mKernel->getInput(i);
    61         if (LLVM_UNLIKELY(var->isScalar())) {
    62             mMarker.emplace(var, mKernel->getScalarFieldPtr(var->getName()));
    63         }
    64     }
    65 
    66     for (unsigned i = 0; i < mKernel->getNumOfOutputs(); ++i) {
    67         Var * var = mKernel->getOutput(i);
    68         if (LLVM_UNLIKELY(var->isScalar())) {
    69             mMarker.emplace(var, mKernel->getScalarFieldPtr(var->getName()));
    70         }
    71     }
    72 
    7359    compileBlock(entryBlock);
    7460
     
    129115
    130116    for (const Var * var : ifStatement->getEscaped()) {
    131         auto f = mMarker.find(var);
    132         if (LLVM_UNLIKELY(f == mMarker.end())) {
    133             std::string tmp;
    134             raw_string_ostream out(tmp);
    135             var->print(out);
    136             out << " is uninitialized prior to entering ";
    137             ifStatement->print(out);
    138             report_fatal_error(out.str());
    139         }
    140         incoming.emplace_back(var, f->second);
     117        if (LLVM_UNLIKELY(var->isKernelParameter())) {
     118            Value * marker = nullptr;
     119            if (var->isScalar()) {
     120                marker = mKernel->getScalarFieldPtr(var->getName());
     121            } else if (var->isReadOnly()) {
     122                marker = mKernel->getInputStreamBlockPtr(var->getName(), iBuilder->getInt32(0));
     123            } else if (var->isReadNone()) {
     124                marker = mKernel->getOutputStreamBlockPtr(var->getName(), iBuilder->getInt32(0));
     125            }
     126            mMarker[var] = marker;
     127        } else {
     128            auto f = mMarker.find(var);
     129            if (LLVM_UNLIKELY(f == mMarker.end())) {
     130                std::string tmp;
     131                raw_string_ostream out(tmp);
     132                var->print(out);
     133                out << " is uninitialized prior to entering ";
     134                ifStatement->print(out);
     135                report_fatal_error(out.str());
     136            }
     137            incoming.emplace_back(var, f->second);
     138        }
    141139    }
    142140
     
    223221#endif
    224222    // On entry to the while structure, proceed to execute the first iteration
    225     // of the loop body unconditionally.   The while condition is tested at the end of
     223    // of the loop body unconditionally. The while condition is tested at the end of
    226224    // the loop.
     225
     226    for (const Var * var : escaped) {
     227        if (LLVM_UNLIKELY(var->isKernelParameter())) {
     228            Value * marker = nullptr;
     229            if (var->isScalar()) {
     230                marker = mKernel->getScalarFieldPtr(var->getName());
     231            } else if (var->isReadOnly()) {
     232                marker = mKernel->getInputStreamBlockPtr(var->getName(), iBuilder->getInt32(0));
     233            } else if (var->isReadNone()) {
     234                marker = mKernel->getOutputStreamBlockPtr(var->getName(), iBuilder->getInt32(0));
     235            }
     236            mMarker[var] = marker;
     237        }
     238    }
    227239
    228240    mCarryManager->enterLoopScope(whileBody);
     
    346358        Value * value = nullptr;
    347359        if (LLVM_UNLIKELY(isa<Assign>(stmt))) {
    348 
    349360            value = compileExpression(cast<Assign>(stmt)->getValue());
    350 
    351361            expr = cast<Assign>(stmt)->getVariable();
    352 
    353             bool storeRequired = false;
    354 
     362            Value * ptr = nullptr;
    355363            if (LLVM_LIKELY(isa<Var>(expr))) {
    356364                const Var * var = cast<Var>(expr);
     
    358366                    std::string tmp;
    359367                    raw_string_ostream out(tmp);
    360                     out << "cannot assign value to ";
     368                    out << mKernel->getName();
     369                    out << " cannot assign value to ";
    361370                    var->print(out);
    362371                    out << ": ";
     
    365374                    report_fatal_error(out.str());
    366375                }
    367                 storeRequired = var->isKernelParameter();
    368             }
    369 
    370             if (storeRequired || isa<Extract>(expr)) {
     376                if (var->isKernelParameter()) {
     377                    if (var->isScalar()) {
     378                        ptr = mKernel->getScalarFieldPtr(var->getName());
     379                    } else {
     380                        ptr = mKernel->getOutputStreamBlockPtr(var->getName(), iBuilder->getInt32(0));
     381                    }
     382                }
     383            } else if (isa<Extract>(expr)) {
    371384                const auto f = mMarker.find(expr);
    372385                if (LLVM_UNLIKELY(f == mMarker.end())) {
    373386                    std::string tmp;
    374387                    raw_string_ostream out(tmp);
    375                     out << "cannot assign value to ";
     388                    out << mKernel->getName();
     389                    out << " cannot assign value to ";
    376390                    expr->print(out);
    377391                    out << ": ";
     
    381395                    report_fatal_error(out.str());
    382396                }
    383                 Value * const ptr = f->second;
     397                ptr = f->second;
     398                assert (ptr);
     399            }
     400            if (ptr) {
    384401                iBuilder->CreateAlignedStore(value, ptr, getAlignment(value));
    385402                value = ptr;
    386403            }
    387 
    388404        } else if (const Extract * extract = dyn_cast<Extract>(stmt)) {
    389405            Value * index = compileExpression(extract->getIndex());
     
    397413                    std::string tmp;
    398414                    raw_string_ostream out(tmp);
    399                     out << "stream ";
     415                    out << mKernel->getName();
     416                    out << " stream ";
    400417                    expr->print(out);
    401418                    out << " cannot be read or written to";
     
    458475            }
    459476            Value * ptr = mKernel->getScalarFieldPtr(f->second);
    460             Value * count = iBuilder->CreateAlignedLoad(ptr, getPointerElementAlignment(ptr));
     477            const auto alignment = getPointerElementAlignment(ptr);
     478            Value * count = iBuilder->CreateAlignedLoad(ptr, alignment, c->getName() + "_accumulator");
    461479            Value * const partial = iBuilder->simd_popcount(counterSize, to_count);
    462480            if (LLVM_UNLIKELY(counterSize <= 1)) {
     
    471489            }
    472490            value = iBuilder->CreateAdd(value, count);
    473             iBuilder->CreateStore(value, ptr);
    474 
     491            iBuilder->CreateAlignedStore(value, ptr, alignment);
    475492        } else if (const Lookahead * l = dyn_cast<Lookahead>(stmt)) {
    476493            Var * var = nullptr;
     
    520537            std::string tmp;
    521538            raw_string_ostream out(tmp);
    522             out << "Internal error: ";
     539            out << "PabloCompiler: statement ";
    523540            stmt->print(out);
    524             out << " is not a recognized statement in the Pablo compiler.";
    525             throw std::runtime_error(out.str());
     541            out << " was not recognized by the compiler";
     542            report_fatal_error(out.str());
    526543        }
    527544
Note: See TracChangeset for help on using the changeset viewer.