Changeset 5307


Ignore:
Timestamp:
Feb 7, 2017, 3:23:42 PM (2 years ago)
Author:
nmedfort
Message:

Continued work on eliminating BlockNo?

Location:
icGREP/icgrep-devel/icgrep
Files:
20 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5298 r5307  
    363363}
    364364
    365 
    366365Value * IDISA_Builder::simd_and(Value * a, Value * b) {
    367366    return a->getType() == b->getType() ? CreateAnd(a, b) : CreateAnd(bitCast(a), bitCast(b));
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5298 r5307  
    119119    }
    120120
     121    llvm::VectorType * getStreamTy(const unsigned FieldWidth = 1) {
     122        return llvm::VectorType::get(llvm::IntegerType::getIntNTy(getContext(), FieldWidth), 0);
     123    }
     124
    121125    inline llvm::ArrayType * getStreamSetTy(const unsigned NumElements = 1, const unsigned FieldWidth = 1) {
    122126        return llvm::ArrayType::get(getStreamTy(FieldWidth), NumElements);
    123     }
    124    
    125     llvm::VectorType * getStreamTy(const unsigned FieldWidth = 1) {
    126         return llvm::VectorType::get(llvm::IntegerType::getIntNTy(getContext(), FieldWidth), 0);
    127127    }
    128128
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5302 r5307  
    8484    StdOutKernel stdoutK(iBuilder, 8);
    8585    stdoutK.generateKernel({&Base64out}, {});
    86 
    8786   
    8887    iBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", main,0));
    8988
    9089    ByteStream.setStreamSetBuffer(inputStream, fileSize);
    91 //    Radix64out.setEmptyBuffer(iBuilder->CreatePointerCast(outputStream, outputType));
    9290    Expanded3_4Out.allocateBuffer();
    9391    Radix64out.allocateBuffer();
    9492    Base64out.allocateBuffer();
    9593
    96 
    97     if (segmentPipelineParallel){
     94    if (segmentPipelineParallel) {
    9895        generateSegmentParallelPipeline(iBuilder, {&mmapK, &expandK, &radix64K, &base64K, &stdoutK});
    99     }
    100     else{
     96    } else {
    10197        generatePipelineLoop(iBuilder, {&mmapK, &expandK, &radix64K, &base64K, &stdoutK});
    10298    }
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5297 r5307  
    1010#include <vector>  // for vector
    1111namespace IDISA { class IDISA_Builder; }
     12namespace llvm { class ConstantInt; }
    1213namespace llvm { class Function; }
    1314namespace llvm { class Module; }
     
    1718namespace llvm { class Value; }
    1819
     20
    1921struct Binding {
    20     llvm::Type * type;
    21     std::string name;
    22     Binding(llvm::Type * type, const std::string & name) : type(type), name(name) {}
    23     Binding(llvm::Type * type, std::string && name) : type(type), name(name) {}
     22    Binding(llvm::Type * type, const std::string & name, const unsigned step = 0)
     23    : type(type), name(name), step(step) {
     24
     25    }
     26
     27    llvm::Type *        type;
     28    std::string         name;
     29    const unsigned      step;
    2430};
    2531
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5303 r5307  
    3737unsigned KernelBuilder::addScalar(Type * const type, const std::string & name) {
    3838    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    39         llvm::report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
     39        report_fatal_error("Cannot add kernel field " + name + " after kernel state finalized");
    4040    }
    4141    if (LLVM_UNLIKELY(mKernelMap.count(name))) {
    42         llvm::report_fatal_error("Kernel already contains field " + name);
     42        report_fatal_error("Kernel already contains field " + name);
    4343    }
    4444    const auto index = mKernelFields.size();
     
    5050unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
    5151    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    52         llvm::report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
     52        report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
    5353    }
    5454    const auto index = mKernelFields.size();
     
    6868void KernelBuilder::prepareKernel() {
    6969    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    70         llvm::report_fatal_error("Cannot prepare kernel after kernel state finalized");
     70        report_fatal_error("Cannot prepare kernel after kernel state finalized");
    7171    }
    7272    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
     
    7575        out << "kernel contains " << mStreamSetInputBuffers.size() << " input buffers for "
    7676        << mStreamSetInputs.size() << " input stream sets.";
    77         throw std::runtime_error(out.str());
     77        report_fatal_error(out.str());
    7878    }
    7979    if (mStreamSetOutputs.size() != mStreamSetOutputBuffers.size()) {
     
    8282        out << "kernel contains " << mStreamSetOutputBuffers.size() << " output buffers for "
    8383        << mStreamSetOutputs.size() << " output stream sets.";
    84         throw std::runtime_error(out.str());
    85     }
    86     unsigned blockSize = iBuilder->getBitBlockWidth();
     84        report_fatal_error(out.str());
     85    }
     86    const auto blockSize = iBuilder->getBitBlockWidth();
    8787    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    8888        if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < codegen::SegmentSize + (blockSize + mLookAheadPositions - 1)/blockSize)) {
    89             llvm::report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
     89            report_fatal_error("Kernel preparation: Buffer size too small " + mStreamSetInputs[i].name);
    9090        }
    91         mScalarInputs.push_back(Binding{mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX});
     91        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
    9292        addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    9393    }
    9494    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    95         mScalarInputs.push_back(Binding{mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX});
     95        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
    9696        addScalar(iBuilder->getSizeTy(), mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
    9797    }
    98     for (auto binding : mScalarInputs) {
     98    for (const auto binding : mScalarInputs) {
    9999        addScalar(binding.type, binding.name);
    100100    }
    101     for (auto binding : mScalarOutputs) {
     101    for (const auto binding : mScalarOutputs) {
    102102        addScalar(binding.type, binding.name);
    103103    }
    104     if (mStreamSetNameMap.empty()) prepareKernelSignature();
     104    if (mStreamSetNameMap.empty()) {
     105        prepareKernelSignature();
     106    }
    105107    for (auto binding : mInternalScalars) {
    106108        addScalar(binding.type, binding.name);
     
    178180    const auto f = mKernelMap.find(name);
    179181    if (LLVM_UNLIKELY(f == mKernelMap.end())) {
    180         llvm::report_fatal_error("Kernel does not contain scalar: " + name);
     182        report_fatal_error("Kernel does not contain scalar: " + name);
    181183    }
    182184    return iBuilder->getInt32(f->second);
     
    187189}
    188190
    189 Value * KernelBuilder::getScalarFieldPtr(llvm::Value * instance, Value * index) const {
     191Value * KernelBuilder::getScalarFieldPtr(Value * instance, Value * index) const {
    190192    return iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), index});
    191193}
    192194
    193 Value * KernelBuilder::getScalarFieldPtr(llvm::Value * instance, const std::string & fieldName) const {
     195Value * KernelBuilder::getScalarFieldPtr(Value * instance, const std::string & fieldName) const {
    194196    return getScalarFieldPtr(instance, getScalarIndex(fieldName));
    195197}
     
    235237}
    236238
    237 LoadInst * KernelBuilder::acquireLogicalSegmentNo(llvm::Value * instance) const {
     239LoadInst * KernelBuilder::acquireLogicalSegmentNo(Value * instance) const {
    238240    return iBuilder->CreateAtomicLoadAcquire(getScalarFieldPtr(instance, LOGICAL_SEGMENT_NO_SCALAR));
    239241}
     
    251253}
    252254
    253 llvm::Value * KernelBuilder::getInputStream(const std::string & name, llvm::Value * index) const {
    254     Value * ic = getProcessedItemCount(name);
    255     const StreamSetBuffer * buf = getStreamSetBuffer(name);
    256     ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
    257     return buf->getStream(getStreamSetBufferPtr(name), ic, index);
    258 }
    259 
    260 llvm::Value * KernelBuilder::getInputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const {
    261     Value * ic = getProcessedItemCount(name);
    262     const StreamSetBuffer * buf = getStreamSetBuffer(name);
    263     ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
    264     return buf->getStream(getStreamSetBufferPtr(name), ic, index1, index2);
    265 }
    266 
    267 llvm::Value * KernelBuilder::getOutputStream(const std::string & name, llvm::Value * index) const {
    268     Value * ic = getProducedItemCount(name);
    269     const StreamSetBuffer * buf = getStreamSetBuffer(name);
    270     ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
    271     return buf->getStream(getStreamSetBufferPtr(name), ic, index);
    272 }
    273 
    274 llvm::Value * KernelBuilder::getOutputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const {
    275     Value * ic = getProducedItemCount(name);
    276     const StreamSetBuffer * buf = getStreamSetBuffer(name);
    277     ic = iBuilder->CreateUDiv(ic, iBuilder->getSize(iBuilder->getBitBlockWidth()));
    278     return buf->getStream(getStreamSetBufferPtr(name), ic, index1, index2);
    279 }
    280 
    281 Value * KernelBuilder::getStreamView(llvm::Type * type, const std::string & name, Value * blockNo, Value * index) const {
    282     return getStreamSetBuffer(name)->getStreamView(type, getStreamSetBufferPtr(name), blockNo, index);
     255inline static uint64_t log2(const uint64_t x) {
     256    return (64 - __builtin_clzll(x)) - 1;
     257}
     258
     259inline static uint32_t log2(const uint32_t x) {
     260    return (32 - __builtin_clz(x)) - 1;
     261}
     262
     263inline Value * KernelBuilder::computeBlockIndex(const std::vector<Binding> & bindings, const std::string & name, Value * itemCount) const {
     264    for (const Binding & b : bindings) {
     265        if (b.name == name) {
     266            const auto divisor = (b.step == 0) ? iBuilder->getBitBlockWidth() : b.step;
     267            if (LLVM_LIKELY((divisor & (divisor - 1)) == 0)) {
     268                return iBuilder->CreateLShr(itemCount, log2(divisor));
     269            } else {
     270                return iBuilder->CreateUDiv(itemCount, iBuilder->getSize(divisor));
     271            }
     272        }
     273    }
     274    report_fatal_error("Error: no binding in " + getName() + " for " + name);
     275}
     276
     277Value * KernelBuilder::getInputStream(const std::string & name, Value * streamIndex) const {
     278    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
     279    const StreamSetBuffer * const buf = getStreamSetBuffer(name);
     280    return buf->getStream(getStreamSetBufferPtr(name), streamIndex, blockIndex);
     281}
     282
     283Value * KernelBuilder::getInputStream(const std::string & name, Value * streamIndex, Value * packIndex) const {
     284    Value * const blockIndex = computeBlockIndex(mStreamSetInputs, name, getProcessedItemCount(name));
     285    const StreamSetBuffer * const buf = getStreamSetBuffer(name);
     286    return buf->getStream(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex);
     287}
     288
     289Value * KernelBuilder::getOutputStream(const std::string & name, Value * streamIndex) const {
     290    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
     291    const StreamSetBuffer * const buf = getStreamSetBuffer(name);
     292    return buf->getStream(getStreamSetBufferPtr(name), streamIndex, blockIndex);
     293}
     294
     295Value * KernelBuilder::getOutputStream(const std::string & name, Value * streamIndex, Value * packIndex) const {
     296    Value * const blockIndex = computeBlockIndex(mStreamSetOutputs, name, getProducedItemCount(name));
     297    const StreamSetBuffer * const buf = getStreamSetBuffer(name);
     298    return buf->getStream(getStreamSetBufferPtr(name), streamIndex, blockIndex, packIndex);
     299}
     300
     301Value * KernelBuilder::getRawItemPointer(const std::string & name, Value * streamIndex, Value * absolutePosition) const {
     302    return getStreamSetBuffer(name)->getRawItemPointer(getStreamSetBufferPtr(name), streamIndex, absolutePosition);
    283303}
    284304
     
    286306    const auto f = mStreamSetNameMap.find(name);
    287307    if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
    288         llvm::report_fatal_error("Kernel " + getName() + " does not contain stream set: " + name);
     308        report_fatal_error("Kernel " + getName() + " does not contain stream set: " + name);
    289309    }
    290310    return f->second;
     
    304324}
    305325
    306 inline Value * KernelBuilder::getStreamSetBufferPtr(llvm::Value * index) const {
    307     return getScalarField(getSelf(), index);
    308 }
    309 
    310326Argument * KernelBuilder::getParameter(Function * const f, const std::string & name) const {
    311327    for (auto & arg : f->getArgumentList()) {
     
    314330        }
    315331    }
    316     llvm::report_fatal_error(f->getName() + " does not have parameter " + name);
    317 }
    318 
    319 Value * KernelBuilder::createDoSegmentCall(const std::vector<llvm::Value *> & args) const {
     332    report_fatal_error(f->getName() + " does not have parameter " + name);
     333}
     334
     335Value * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
    320336    return iBuilder->CreateCall(getDoSegmentFunction(), args);
    321337}
     
    335351void KernelBuilder::createInstance() {
    336352    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    337         llvm::report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
     353        report_fatal_error("Cannot create kernel instance before calling prepareKernel()");
    338354    }
    339355    mKernelInstance = iBuilder->CreateCacheAlignedAlloca(mKernelStateType);
     
    360376    CreateDoBlockMethodCall();
    361377}
     378
     379//Value * BlockOrientedKernel::loadBlock(const std::string & inputName, Value * const streamIndex) const {
     380
     381//}
     382
     383//Value * BlockOrientedKernel::loadPack(const std::string & inputName, Value * const streamIndex, Value * const packIndex) const {
     384
     385//}
     386
    362387
    363388//  The default doSegment method dispatches to the doBlock routine for
     
    476501    Function * const f = iBuilder->getModule()->getFunction(name);
    477502    if (LLVM_UNLIKELY(f == nullptr)) {
    478         llvm::report_fatal_error("Cannot find " + name);
     503        report_fatal_error("Cannot find " + name);
    479504    }
    480505    return f;
     
    489514    Function * const f = iBuilder->getModule()->getFunction(name);
    490515    if (LLVM_UNLIKELY(f == nullptr)) {
    491         llvm::report_fatal_error("Cannot find " + name);
     516        report_fatal_error("Cannot find " + name);
    492517    }
    493518    return f;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5299 r5307  
    175175    }
    176176
    177     llvm::Value * getInputStream(const std::string & name, llvm::Value * index) const;
    178 
    179     llvm::Value * getInputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const;
    180 
    181     llvm::Value * getOutputStream(const std::string & name, llvm::Value * index) const;
    182 
    183     llvm::Value * getOutputStream(const std::string & name, llvm::Value * index1, llvm::Value * index2) const;
    184 
    185     llvm::Value * getStreamView(llvm::Type * type, const std::string & name, llvm::Value * blockNo, llvm::Value * index) const;
    186 
     177    llvm::Value * getInputStream(const std::string & name, llvm::Value * streamIndex) const;
     178
     179    llvm::Value * getInputStream(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
     180
     181    llvm::Value * getOutputStream(const std::string & name, llvm::Value * streamIndex) const;
     182
     183    llvm::Value * getOutputStream(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
     184
     185    llvm::Value * getRawItemPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    187186
    188187    llvm::Value * getScalarFieldPtr(const std::string & name) const {
     
    234233    llvm::Value * getStreamSetBufferPtr(const std::string & name) const;
    235234
    236     llvm::Value * getStreamSetBufferPtr(llvm::Value * index) const;
     235//    llvm::Value * getStreamSetBufferPtr(llvm::Value * index) const;
    237236
    238237    llvm::Value * getScalarFieldPtr(llvm::Value * instance, const std::string & name) const;
     
    247246
    248247    void callGenerateDoSegmentMethod();
     248
     249private:
     250
     251    llvm::Value * computeBlockIndex(const std::vector<Binding> & binding, const std::string & name, llvm::Value * itemCount) const;
    249252
    250253protected:
     
    316319    virtual ~BlockOrientedKernel() { }
    317320
     321    llvm::Value * loadBlock(const std::string & inputName, llvm::Value * const streamIndex) const;
     322
     323    llvm::Value * loadPack(const std::string & inputName, llvm::Value * const streamIndex, llvm::Value * const packIndex) const;
     324
    318325    llvm::Function * getDoBlockFunction() const;
    319326
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.cpp

    r5292 r5307  
    4141
    4242MMapSourceKernel::MMapSourceKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment, unsigned codeUnitWidth)
    43 : SegmentOrientedKernel(iBuilder, "mmap_source", {}, {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}}, {Binding{iBuilder->getSizeTy(), "fileSize"}}, {}, {})
     43: SegmentOrientedKernel(iBuilder, "mmap_source",
     44    {},
     45    {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "sourceBuffer", iBuilder->getBitBlockWidth()}},
     46    {Binding{iBuilder->getSizeTy(), "fileSize"}}, {}, {})
    4447, mSegmentBlocks(blocksPerSegment)
    4548, mCodeUnitWidth(codeUnitWidth) {
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5303 r5307  
    6262
    6363}
    64    
     64
    6565
    6666void P2SKernelWithCompressedOutput::generateDoBlockMethod() {
    67     PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    6867    IntegerType * i32 = iBuilder->getInt32Ty();
    6968    PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
     
    8180    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
    8281
    83     Value * unitsGenerated = getProducedItemCount("byteStream"); // units generated to buffer
    84     Value * output_ptr = getStreamView(i8PtrTy, "byteStream", getBlockNo(), iBuilder->getInt32(0));
     82    Value * output_ptr = getOutputStream("byteStream", iBuilder->getInt32(0));
     83    output_ptr = iBuilder->CreatePointerCast(output_ptr, iBuilder->getInt8PtrTy());
    8584    Value * offset = iBuilder->getInt32(0);
    8685    for (unsigned j = 0; j < 8; ++j) {
     
    8887        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
    8988    }
     89
     90    Value * unitsGenerated = getProducedItemCount("byteStream"); // units generated to buffer
    9091    unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    9192    setProducedItemCount("byteStream", unitsGenerated);
     
    139140void P2S16KernelWithCompressedOutput::generateDoBlockMethod() {
    140141    IntegerType * i32Ty = iBuilder->getInt32Ty();
     142    PointerType * int16PtrTy = iBuilder->getInt16Ty()->getPointerTo();
    141143    PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
     144    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    142145
    143146    Value * hi_input[8];
     
    159162    Value * delCountBlock_ptr = getInputStream("deletionCounts", iBuilder->getInt32(0));
    160163    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
    161     PointerType * int16PtrTy = PointerType::get(iBuilder->getInt16Ty(), 0);
    162 
    163     ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
     164
     165
     166    Value * u16_output_ptr = getOutputStream("i16Stream", iBuilder->getInt32(0));
     167    u16_output_ptr = iBuilder->CreatePointerCast(u16_output_ptr, int16PtrTy);
    164168    Value * i16UnitsGenerated = getProducedItemCount("i16Stream"); // units generated to buffer
    165     Value * i16BlockNo = iBuilder->CreateUDiv(i16UnitsGenerated, stride);
    166     Value * u16_output_ptr = getStreamView(int16PtrTy, "i16Stream", i16BlockNo, iBuilder->CreateURem(i16UnitsGenerated, stride));
     169    u16_output_ptr = iBuilder->CreateGEP(u16_output_ptr, iBuilder->CreateURem(i16UnitsGenerated, stride));
    167170
    168171    Value * offset = ConstantInt::get(i32Ty, 0);
     
    176179        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
    177180        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j + 1)), i32Ty);
    178     }
     181    }   
    179182    Value * i16UnitsFinal = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    180183    setProducedItemCount("i16Stream", i16UnitsFinal);
    181     auto const &b  = getStreamSetBuffer("i16Stream");
     184    const auto b  = getStreamSetBuffer("i16Stream");
    182185
    183186    if (auto cb = dyn_cast<CircularCopybackBuffer>(b)) {
     
    199202}
    200203   
    201 P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder)
    202 : BlockOrientedKernel(iBuilder, "p2s_16_compress",
    203               {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
    204               {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
     204P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * b)
     205: BlockOrientedKernel(b, "p2s_16_compress",
     206              {Binding{b->getStreamSetTy(16, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "deletionCounts"}},
     207              {Binding{b->getStreamSetTy(1, 16), "i16Stream", b->getStride()}},
    205208              {},
    206209              {},
    207               {Binding{iBuilder->getSizeTy(), "unitsGenerated"}, Binding{iBuilder->getSizeTy(), "unitsWritten"}}) {
     210              {Binding{b->getSizeTy(), "unitsGenerated"}, Binding{b->getSizeTy(), "unitsWritten"}}) {
    208211    setDoBlockUpdatesProducedItemCountsAttribute(true);
    209212}
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5297 r5307  
    2020//
    2121// Using aligned SIMD loads, an inner loop processes three registers full of input
    22 // data (i.e., three BytePacks) to produce four registers full of output.   This is 
     22// data (i.e., three BytePacks) to produce four registers full of output.   This is
    2323// a 3 step process.
    2424// Step 1:  Load input_pack0, apply the shuffle operation to produce output_pack0.
     
    3838// with the input data completely processed for each tripleBlock.
    3939//
    40 // The pipeline must guarantee that the doSegment method is called with the 
     40// The pipeline must guarantee that the doSegment method is called with the
    4141// a continous buffer for the full segment (number of blocks).
    4242
     
    376376}
    377377
     378//// Special processing for the base 64 format.   The output must always contain a multiple
     379//// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
     380//// number of radix 64 values
     381//void base64Kernel::generateFinalBlockMethod(Value * remainingBytes) {
     382
     383//    BasicBlock * entry = iBuilder->GetInsertBlock();
     384//    BasicBlock * base64_loop = CreateBasicBlock("base64_loop");
     385//    BasicBlock * loopExit = CreateBasicBlock("loopExit");
     386//    BasicBlock * doPadding = CreateBasicBlock("doPadding");
     387//    BasicBlock * doPadding2 = CreateBasicBlock("doPadding2");
     388//    BasicBlock * fbExit = CreateBasicBlock("fbExit");
     389
     390//    Value * remainMod4 = iBuilder->CreateAnd(remainingBytes, iBuilder->getSize(3));
     391//    Value * padBytes = iBuilder->CreateSub(iBuilder->getSize(4), remainMod4);
     392//    padBytes = iBuilder->CreateAnd(padBytes, iBuilder->getSize(3));
     393
     394//    Constant * packSize = iBuilder->getSize(iBuilder->getStride() / 8);
     395
     396//    // Enter the loop only if there is at least one byte remaining to process.
     397//    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0)), fbExit, base64_loop);
     398
     399//    iBuilder->SetInsertPoint(base64_loop);
     400//    PHINode * idx = iBuilder->CreatePHI(iBuilder->getInt32Ty(), 2);
     401//    PHINode * loopRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     402//    idx->addIncoming(ConstantInt::getNullValue(iBuilder->getInt32Ty()), entry);
     403//    loopRemain->addIncoming(remainingBytes, entry);
     404//    Value * radix64streamPtr = getInputStream("radix64stream", iBuilder->getInt32(0), idx);
     405//    Value * bytepack = iBuilder->CreateBlockAlignedLoad(radix64streamPtr);
     406//    Value * base64pack = processPackData(bytepack);
     407//    Value * base64streamPtr = getOutputStream("base64stream", iBuilder->getInt32(0), idx);
     408
     409//    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(base64pack), base64streamPtr);
     410//    idx->addIncoming(iBuilder->CreateAdd(idx, ConstantInt::get(iBuilder->getInt32Ty(), 1)), base64_loop);
     411//    Value* remainAfterLoop = iBuilder->CreateSub(loopRemain, packSize);
     412//    loopRemain->addIncoming(remainAfterLoop, base64_loop);
     413
     414//    Value* continueLoop = iBuilder->CreateICmpSGT(remainAfterLoop, iBuilder->getSize(0));
     415//    iBuilder->CreateCondBr(continueLoop, base64_loop, loopExit);
     416
     417//    iBuilder->SetInsertPoint(loopExit);
     418//    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(padBytes, iBuilder->getSize(0)), fbExit, doPadding);
     419
     420//    iBuilder->SetInsertPoint(doPadding);
     421
     422//    base64streamPtr = getOutputStream("base64stream", iBuilder->getInt32(0), idx);
     423//    Value * i8streamPtr = iBuilder->CreatePointerCast(base64streamPtr, iBuilder->getInt8PtrTy());
     424//    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8streamPtr, remainingBytes));
     425//    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, iBuilder->getSize(3)), fbExit, doPadding2);
     426//    iBuilder->SetInsertPoint(doPadding2);
     427//    Value * finalPadPos = iBuilder->CreateAdd(remainingBytes, iBuilder->getSize(1));
     428//    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8streamPtr, finalPadPos));
     429//    iBuilder->CreateBr(fbExit);
     430//    iBuilder->SetInsertPoint(fbExit);
     431//    Value * produced = iBuilder->CreateAdd(getProducedItemCount("base64stream"), iBuilder->CreateAdd(remainingBytes, padBytes));
     432//    setProducedItemCount("base64stream", produced);
     433//}
     434
    378435// Special processing for the base 64 format.   The output must always contain a multiple
    379436// of 4 bytes.   When the number of radix 64 values is not a multiple of 4
     
    418475
    419476    iBuilder->SetInsertPoint(doPadding);
    420     Value * i8output_ptr = getStreamView(iBuilder->getInt8PtrTy(), "base64stream", getBlockNo(), iBuilder->getInt32(0));
     477    Value * i8output_ptr = getOutputStream("base64stream", iBuilder->getInt32(0));
     478    i8output_ptr = iBuilder->CreatePointerCast(i8output_ptr, iBuilder->getInt8PtrTy());
    421479    iBuilder->CreateStore(ConstantInt::get(iBuilder->getInt8Ty(), '='), iBuilder->CreateGEP(i8output_ptr, remainingBytes));
    422480    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(remainMod4, iBuilder->getSize(3)), fbExit, doPadding2);
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5292 r5307  
    1414void StdInKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
    1515
    16     Type * i8PtrTy = iBuilder->getInt8PtrTy();
    17    
    1816    BasicBlock * setTermination = CreateBasicBlock("setTermination");
    1917    BasicBlock * stdInExit = CreateBasicBlock("stdInExit");
    20     ConstantInt * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
     18//    ConstantInt * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
    2119    ConstantInt * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    2220    ConstantInt * segmentBytes = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth() * mCodeUnitWidth/8);
    2321    ConstantInt * stdin_fileno = iBuilder->getInt32(STDIN_FILENO);
    2422    Value * produced = getProducedItemCount("codeUnitBuffer");
    25     Value * blockNo = iBuilder->CreateUDiv(produced, blockItems);
    26     Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(produced, blockItems), itemBytes);
    27     Value * bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     23//    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(produced, blockItems), itemBytes);
     24//    Value * bytePtr = getRawItemPointer("codeUnitBuffer", iBuilder->getInt32(0), produced);
     25    Value * bytePtr = getOutputStream("codeUnitBuffer", iBuilder->getInt32(0));
     26    bytePtr = iBuilder->CreatePointerCast(bytePtr, iBuilder->getInt8PtrTy());
     27
     28
    2829   
    2930    Value * nRead = iBuilder->CreateReadCall(stdin_fileno, bytePtr, segmentBytes);
     
    6465void FileSource::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
    6566
    66     PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    6767    BasicBlock * closeFile = CreateBasicBlock("closeFile");
    6868    BasicBlock * fileSourceExit = CreateBasicBlock("fileSourceExit");
    69     Constant * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
    7069    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    7170   
    7271    Value * produced = getProducedItemCount("codeUnitBuffer");
    73     Value * blockNo = iBuilder->CreateUDiv(produced, blockItems);
    74     Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(produced, blockItems), itemBytes);
    75     Value * bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     72    Value * bytePtr = getOutputStream("codeUnitBuffer", iBuilder->getInt32(0));
     73    bytePtr = iBuilder->CreatePointerCast(bytePtr, iBuilder->getInt8PtrTy());
     74
    7675    Value * IOstreamPtr = getScalarField("IOstreamPtr");
    7776    Value * itemsToDo = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5303 r5307  
    77#include <IR_Gen/idisa_builder.h>
    88#include <kernels/streamset.h>
    9 #include <stdio.h>
    10 // #include <llvm/IR/Type.h>
    119namespace llvm { class Type; }
    1210
     
    1513
    1614namespace kernel {
    17            
     15
    1816// Rather than using doBlock logic to write one block at a time, this custom
    1917// doSegment method attempts to write the entire segment with a single write call.
     
    2321    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    2422
    25     Constant * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
    26     Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    27    
     23    Constant * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
     24    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth / 8);
    2825    Value * processed = getProcessedItemCount("codeUnitBuffer");
    2926    Value * itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
    3027    // There may be two memory areas if we are at the physical end of a circular buffer.
    31     auto const &b  = getStreamSetBuffer("codeUnitBuffer");
     28    const auto b  = getStreamSetBuffer("codeUnitBuffer");
    3229    Value * wraparound = nullptr;
    3330    if (isa<CircularBuffer>(b) || isa<CircularCopybackBuffer>(b)) {
     
    3734    }
    3835   
    39     //Value * blockNo = getBlockNo();
    40     Value * blockNo = iBuilder->CreateUDiv(processed, blockItems);
    41     Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    42     Value * bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     36    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateAnd(processed, blockItems), itemBytes);
     37    Value * bytePtr = iBuilder->CreatePointerCast(getInputStream("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     38    bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
     39
    4340    iBuilder->CreateWriteCall(iBuilder->getInt32(1), bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
     41
    4442    processed = iBuilder->CreateAdd(processed, itemsToDo);
    4543    setProcessedItemCount("codeUnitBuffer", processed);
    46     //setBlockNo(iBuilder->CreateUDiv(processed, blockItems));
    4744   
    4845    // Now we may process the second area (if required).
     
    5451       
    5552        // Calculate from the updated value of processed;
    56         blockNo = iBuilder->CreateUDiv(processed, blockItems);
    57         byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    58         bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     53        byteOffset = iBuilder->CreateMul(iBuilder->CreateAnd(processed, blockItems), itemBytes);
     54        Value * bytePtr = iBuilder->CreatePointerCast(getInputStream("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     55        bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
     56
    5957        itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
    6058        iBuilder->CreateWriteCall(iBuilder->getInt32(1), bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
     
    8684
    8785void FileSink::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     86
    8887    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    89    
     88
    9089    BasicBlock * closeFile = CreateBasicBlock("closeFile");
    9190    BasicBlock * fileOutExit = CreateBasicBlock("fileOutExit");
    9291    Constant * blockItems = iBuilder->getSize(iBuilder->getBitBlockWidth());
    9392    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    94    
     93
    9594    Value * IOstreamPtr = getScalarField("IOstreamPtr");
    9695    Value * processed = getProcessedItemCount("codeUnitBuffer");
     
    105104    }
    106105   
    107     Value * blockNo = iBuilder->CreateUDiv(processed, blockItems);
    108106    Value * byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    109     Value * bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     107    Value * bytePtr = iBuilder->CreatePointerCast(getInputStream("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     108    bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
     109
    110110    iBuilder->CreateFWriteCall(bytePtr, itemsToDo, itemBytes, IOstreamPtr);
    111    
     111
    112112   
    113113    processed = iBuilder->CreateAdd(processed, itemsToDo);
    114114    setProcessedItemCount("codeUnitBuffer", processed);
    115     //setBlockNo(iBuilder->CreateUDiv(processed, blockItems));
    116115   
    117116    // Now we may process the second area (if required).
     
    123122       
    124123        // Calculate from the updated value of processed;
    125         blockNo = iBuilder->CreateUDiv(processed, blockItems);
    126124        byteOffset = iBuilder->CreateMul(iBuilder->CreateURem(processed, blockItems), itemBytes);
    127         bytePtr = getStreamView(i8PtrTy, "codeUnitBuffer", blockNo, byteOffset);
     125        Value * bytePtr = iBuilder->CreatePointerCast(getInputStream("codeUnitBuffer", iBuilder->getInt32(0)), i8PtrTy);
     126        bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    128127        itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
    129128        iBuilder->CreateFWriteCall(bytePtr, itemsToDo, itemBytes, IOstreamPtr);
     
    134133    }
    135134    iBuilder->CreateCondBr(doFinal, closeFile, fileOutExit);
    136    
     135
    137136    iBuilder->SetInsertPoint(closeFile);
    138137    iBuilder->CreateFCloseCall(IOstreamPtr);
    139138    iBuilder->CreateBr(fileOutExit);
    140    
     139
    141140    iBuilder->SetInsertPoint(fileOutExit);
    142141}
     
    152151
    153152
     153
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5303 r5307  
    66#include "streamset.h"
    77#include <IR_Gen/idisa_builder.h>  // for IDISA_Builder
    8 #include <assert.h>                // for assert
    9 #include <llvm/IR/Type.h>          // for Type
    10 #include <stdexcept>               // for runtime_error
    118#include <llvm/IR/BasicBlock.h>    // for BasicBlock
    129#include <llvm/IR/Constants.h>     // for ConstantInt
     
    1512#include <llvm/IR/Module.h>        // for Module
    1613#include <llvm/IR/Value.h>         // for Value
     14#include <llvm/Support/raw_ostream.h>
     15
    1716namespace llvm { class Constant; }
    1817namespace llvm { class Function; }
     
    2221using namespace IDISA;
    2322
    24 Type * resolveVectorTy(IDISA_Builder * const b, Type * type) {
    25     if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
    26         type = type->getVectorElementType();
    27         if (LLVM_LIKELY(type->isIntegerTy())) {
    28             const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
    29             type = b->getBitBlockType();
    30             if (fieldWidth != 1) {
    31                 type = llvm::ArrayType::get(type, fieldWidth);
    32             }
    33         }
    34     }
    35     return type;
    36 }
    37 
    38 Type * StreamSetBuffer::resolveStreamSetBufferType(Type * const type) const {
    39     if (type->isArrayTy()) {
    40         return ArrayType::get(resolveVectorTy(iBuilder, type->getArrayElementType()), type->getArrayNumElements());
    41     } else if (type->isVectorTy()) {
    42         return resolveVectorTy(iBuilder, type);
    43     }
    44     return type;
    45 }
    46 
    4723void StreamSetBuffer::allocateBuffer() {
    4824    mStreamSetBufferPtr = iBuilder->CreateCacheAlignedAlloca(getType(), iBuilder->getSize(mBufferBlocks));
    4925}
    5026
    51 Value * StreamSetBuffer::getStream(Value * self, Value * blockNo, Value * index) const {
    52     return iBuilder->CreateGEP(getStreamSetPtr(self, blockNo), {iBuilder->getInt32(0), index});
    53 }
    54 
    55 Value * StreamSetBuffer::getStream(Value * self, Value * blockNo, Value * index1, Value * index2) const {
    56     return iBuilder->CreateGEP(getStreamSetPtr(self, blockNo), {iBuilder->getInt32(0), index1, index2});
    57 }
    58 
    59 Value * StreamSetBuffer::getStreamView(llvm::Type * type, llvm::Value * self, Value * blockNo, llvm::Value * index) const {
    60     return iBuilder->CreateGEP(iBuilder->CreatePointerCast(getStreamSetPtr(self, blockNo), type), index, "view");
     27Value * StreamSetBuffer::getStream(Value * self, Value * streamIndex, Value * blockIndex) const {
     28    return iBuilder->CreateGEP(getStreamSetPtr(self, blockIndex), {iBuilder->getInt32(0), streamIndex});
     29}
     30
     31Value * StreamSetBuffer::getStream(Value * self, Value * streamIndex, Value * blockIndex, Value * packIndex) const {
     32    return iBuilder->CreateGEP(getStreamSetPtr(self, blockIndex), {iBuilder->getInt32(0), streamIndex, packIndex});
     33}
     34
     35/**
     36 * @brief getRawItemPointer
     37 *
     38 * get a raw pointer the iN field at position absoluteItemPosition of the stream number streamIndex of the stream set.
     39 * In the case of a stream whose fields are less than one byte (8 bits) in size, the pointer is to the containing byte.
     40 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
     41 */
     42Value * StreamSetBuffer::getRawItemPointer(Value * self, Value * streamIndex, Value * absolutePosition) const {
     43    Value * ptr = self;
     44    if (isa<ConstantInt>(streamIndex) && cast<ConstantInt>(streamIndex)->isZero()) {
     45        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
     46    }
     47    IntegerType * const ty = cast<IntegerType>(mBaseType->getArrayElementType()->getVectorElementType());
     48    ptr = iBuilder->CreatePointerCast(ptr, ty->getPointerTo());
     49    if (LLVM_UNLIKELY(ty->getBitWidth() < 8)) {
     50        const auto bw = ty->getBitWidth();
     51        if (LLVM_LIKELY((bw & (bw - 1)) == 0)) { // is power of 2
     52            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8 / bw));
     53        } else {
     54            absolutePosition = iBuilder->CreateMul(absolutePosition, ConstantInt::get(absolutePosition->getType(), bw));
     55            absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8));
     56        }
     57    }
     58    return iBuilder->CreateGEP(ptr, absolutePosition);
    6159}
    6260
    6361Value * StreamSetBuffer::getLinearlyAccessibleItems(llvm::Value * fromPosition) const {
    64     if (isa<ArrayType>(mStreamSetType) && dyn_cast<ArrayType>(mStreamSetType)->getNumElements() > 1) {
     62    if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
    6563        Constant * stride = iBuilder->getSize(iBuilder->getStride());
    6664        return iBuilder->CreateSub(stride, iBuilder->CreateURem(fromPosition, stride));
     
    9088
    9189void ExternalFileBuffer::allocateBuffer() {
    92     throw std::runtime_error("External buffers cannot be allocated.");
     90    report_fatal_error("External buffers cannot be allocated.");
    9391}
    9492
     
    105103Value * CircularBuffer::getStreamSetPtr(Value * self, Value * blockNo) const {
    106104    assert (blockNo->getType()->isIntegerTy());
    107 
    108105    Value * offset = nullptr;
    109106    if (mBufferBlocks == 1) {
     
    188185}
    189186
    190 llvm::Value * ExpandableBuffer::getStreamView(llvm::Type * type, llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const {
    191     return nullptr;
    192 }
    193 
    194187// Constructors
    195188
     
    214207}
    215208
    216 
    217209ExpandableBuffer::ExpandableBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace)
    218210: StreamSetBuffer(BufferKind::ExpandableBuffer, b, type, bufferBlocks, AddressSpace) {
     
    224216}
    225217
     218inline Type * resolveStreamSetType(IDISA_Builder * const b, Type * const type) {
     219    if (type->isArrayTy()) {
     220        Type * ty = type->getArrayElementType();
     221        if (LLVM_LIKELY(ty->isVectorTy() && ty->getVectorNumElements() == 0)) {
     222            ty = ty->getVectorElementType();
     223            if (LLVM_LIKELY(ty->isIntegerTy())) {
     224                const auto fieldWidth = cast<IntegerType>(ty)->getBitWidth();
     225                ty = b->getBitBlockType();
     226                if (fieldWidth != 1) {
     227                    ty = llvm::ArrayType::get(ty, fieldWidth);
     228                }
     229                return ArrayType::get(ty, type->getArrayNumElements());
     230            }
     231        }
     232    }
     233    std::string tmp;
     234    raw_string_ostream out(tmp);
     235    type->print(out);
     236    out << " is an unvalid stream set buffer type.";
     237    report_fatal_error(out.str());
     238}
    226239
    227240StreamSetBuffer::StreamSetBuffer(BufferKind k, IDISA::IDISA_Builder * b, Type * type, unsigned blocks, unsigned AddressSpace)
    228241: mBufferKind(k)
    229242, iBuilder(b)
    230 , mStreamSetType(resolveStreamSetBufferType(type))
     243, mType(resolveStreamSetType(b, type))
    231244, mBufferBlocks(blocks)
    232245, mAddressSpace(AddressSpace)
    233246, mStreamSetBufferPtr(nullptr)
    234 , mBaseStreamSetType(type) {
    235 
    236 }
     247, mBaseType(type) {
     248
     249}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5303 r5307  
    99#include <llvm/IR/Type.h>  // for Type
    1010namespace IDISA { class IDISA_Builder; }
    11 namespace llvm { class PointerType; }
    1211namespace llvm { class Value; }
    1312namespace kernel { class KernelBuilder; }
     
    2726
    2827    llvm::Type * getType() const {
    29         return mStreamSetType;
     28        return mType;
    3029    }
    3130
    3231    llvm::Type * getBaseType() const {
    33         return mBaseStreamSetType;
     32        return mBaseType;
    3433    }
    3534
     
    4847    virtual void allocateBuffer();
    4948
    50     virtual llvm::Value * getStream(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const;
     49    virtual llvm::Value * getStream(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * blockIndex) const;
    5150
    52     virtual llvm::Value * getStream(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index1, llvm::Value * index2) const;
     51    virtual llvm::Value * getStream(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * blockIndex, llvm::Value * packIndex) const;
    5352   
    54     virtual llvm::Value * getStreamView(llvm::Type * type, llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const;
     53    llvm::Value * getRawItemPointer(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    5554
    5655    // The number of items that cam be linearly accessed from a given logical stream position.
     
    6463    virtual llvm::Value * getStreamSetPtr(llvm::Value * self, llvm::Value * blockNo) const = 0;
    6564
    66     llvm::Type * resolveStreamSetBufferType(llvm::Type * type) const;
    67 
    6865protected:
    6966    const BufferKind                mBufferKind;
    7067    IDISA::IDISA_Builder * const    iBuilder;
    71     llvm::Type * const              mStreamSetType;
     68    llvm::Type * const              mType;
    7269    const size_t                    mBufferBlocks;
    7370    const unsigned                  mAddressSpace;
    7471    llvm::Value *                   mStreamSetBufferPtr;
    75     llvm::Type * const              mBaseStreamSetType;
     72    llvm::Type * const              mBaseType;
    7673};   
    7774
     
    161158    llvm::Value * getStream(llvm::Value * self, llvm::Value * blockNo, llvm::Value * index1, llvm::Value * index2) const override;
    162159
    163     llvm::Value * getStreamView(llvm::Type * type, llvm::Value * self, llvm::Value * blockNo, llvm::Value * index) const override;
    164 
    165160    llvm::Value * getLinearlyAccessibleItems(llvm::Value * fromPosition) const override;
    166161   
  • icGREP/icgrep-devel/icgrep/pablo/builder.hpp

    r5285 r5307  
    194194    }
    195195
    196     llvm::Type * getStreamTy(const unsigned FieldWidth = 1) {
    197         return mPb->getStreamTy(FieldWidth);
    198     }
     196//    llvm::Type * getStreamTy(const unsigned FieldWidth = 1) {
     197//        return mPb->getStreamTy(FieldWidth);
     198//    }
    199199   
    200200    llvm::Type * getStreamSetTy(const unsigned NumElements = 1, const unsigned FieldWidth = 1) {
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5292 r5307  
    1616#include <pablo/pe_matchstar.h>
    1717#include <pablo/pe_var.h>
    18 
     18#include <llvm/Support/raw_ostream.h>
    1919using namespace llvm;
    2020
     
    5858
    5959    mKernel->addScalar(analyse(kernel->getEntryBlock()), "carries");
     60
     61    if (mHasLoop) {
     62        mKernel->addScalar(iBuilder->getInt32Ty(), "loopSelector");
     63    }
    6064}
    6165
     
    7680    assert (mCarryFrame.empty());
    7781    assert (mCarrySummary.empty());
     82
     83    if (mHasLoop) {
     84        mLoopSelector = mKernel->getScalarField("loopSelector");
     85    }
     86}
     87
     88/** ------------------------------------------------------------------------------------------------------------- *
     89 * @brief finalizeCodeGen
     90 ** ------------------------------------------------------------------------------------------------------------- */
     91void CarryManager::finalizeCodeGen() {
     92    if (mHasLoop) {
     93        mKernel->setScalarField("loopSelector", iBuilder->CreateXor(mLoopSelector, iBuilder->getInt32(1)));
     94    }
    7895}
    7996
     
    83100void CarryManager::enterLoopScope(const PabloBlock * const scope) {
    84101    assert (scope);
    85     if (mLoopDepth++ == 0) {
    86         Value * const blockNo = mKernel->getBlockNo();
    87         mLoopSelector = iBuilder->CreateAnd(blockNo, ConstantInt::get(blockNo->getType(), 1));
    88     }
     102    ++mLoopDepth;
    89103    enterScope(scope);
    90104}
     
    178192void CarryManager::leaveLoopScope(BasicBlock * const entryBlock, BasicBlock * const exitBlock) {
    179193    assert (mLoopDepth > 0);
    180     if (--mLoopDepth == 0) {
    181         mLoopSelector = nullptr;
    182     }
     194    --mLoopDepth;
    183195    leaveScope();
    184196}
     
    418430    mCarryPackPtr = carryInPtr;
    419431    if (mLoopDepth > 0) {
    420         carryInPtr = iBuilder->CreateGEP(carryInPtr, {iBuilder->getInt32(0), mLoopSelector});
     432        carryInPtr = iBuilder->CreateGEP(carryInPtr, {iBuilder->getInt32(0), mLoopSelector});       
    421433    }
    422434    assert (carryInPtr->getType()->getPointerElementType() == mCarryPackType);
     
    558570            state.push_back(analyse(cast<If>(stmt)->getBody(), ifDepth + 1, loopDepth));
    559571        } else if (LLVM_UNLIKELY(isa<While>(stmt))) {
     572            mHasLoop = true;
    560573            state.push_back(analyse(cast<While>(stmt)->getBody(), ifDepth, loopDepth + 1));
    561574        }
     
    604617: iBuilder(idb)
    605618, mKernel(nullptr)
     619, mSelf(nullptr)
    606620, mBitBlockType(idb->getBitBlockType())
    607621, mBitBlockWidth(idb->getBitBlockWidth())
     622, mCurrentFrame(nullptr)
    608623, mCurrentFrameIndex(0)
    609624, mCurrentScope(nullptr)
     
    612627, mCarryPackPtr(nullptr)
    613628, mIfDepth(0)
    614 , mLoopDepth(0) {
    615 
    616 }
    617 
    618 
    619 }
    620 
     629, mHasLoop(false)
     630, mLoopDepth(0)
     631, mLoopSelector(nullptr) {
     632
     633}
     634
     635}
     636
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.h

    r5292 r5307  
    4747
    4848    void initializeCodeGen();
     49
     50    void finalizeCodeGen();
    4951
    5052    /* Entering and leaving loops. */
     
    117119    unsigned                                        mIfDepth;
    118120
     121    bool                                            mHasLoop;
    119122    unsigned                                        mLoopDepth;
    120123    llvm::Value *                                   mLoopSelector;
  • icGREP/icgrep-devel/icgrep/pablo/codegenstate.h

    r5285 r5307  
    242242    While * createWhile(PabloAST * condition, PabloBlock * body);
    243243
    244     llvm::Type * getStreamTy(const unsigned FieldWidth = 1) {
    245         return mParent->getStreamTy(FieldWidth);
    246     }
     244//    llvm::Type * getStreamTy(const unsigned FieldWidth = 1) {
     245//        return mParent->getStreamTy(FieldWidth);
     246//    }
    247247   
    248248    llvm::Type * getStreamSetTy(const unsigned NumElements = 1, const unsigned FieldWidth = 1) {
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5297 r5307  
    5656    mMarker.emplace(entryBlock->createOnes(), iBuilder->allOnes());
    5757
    58     Value * const blockNo =  mKernel->getBlockNo();
    59 
    6058    for (unsigned i = 0; i < mKernel->getNumOfInputs(); ++i) {
    6159        Var * var = mKernel->getInput(i);
     
    6563            input = mKernel->getScalarFieldPtr(name);
    6664        } else {
    67             input = mKernel->getStreamSetPtr(name, blockNo);
     65            input = mKernel->getInputStream(name, iBuilder->getInt32(0));
    6866        }
    6967        mMarker.emplace(var, input);
     
    7775            output = mKernel->getScalarFieldPtr(name);
    7876        } else {
    79             output = mKernel->getStreamSetPtr(name, blockNo);
     77            output = mKernel->getOutputStream(name, iBuilder->getInt32(0));
    8078        }
    8179        mMarker.emplace(var, output);
     
    8381
    8482    compileBlock(entryBlock);
     83
     84    mCarryManager->finalizeCodeGen();
    8585
    8686}
     
    388388            Value * array = compileExpression(extract->getArray(), false);
    389389            Value * index = compileExpression(extract->getIndex());
    390             value = iBuilder->CreateGEP(array, {ConstantInt::getNullValue(index->getType()), index}, stmt->getName());
     390            value = iBuilder->CreateGEP(array, index, stmt->getName());
    391391        } else if (isa<And>(stmt)) {
    392392            value = compileExpression(stmt->getOperand(0));
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5300 r5307  
    4949    return f->second;
    5050}
    51 
    52 
    5351
    5452Var * PabloKernel::addInput(const std::string & name, Type * const type) {
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5300 r5307  
    153153    std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
    154154   
    155     PabloKernel wck(iBuilder, "wc", {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}}, {},
    156                       {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
     155    PabloKernel wck(iBuilder, "wc",
     156        {Binding{iBuilder->getStreamSetTy(8, 1), "u8bit"}},
     157        {},
     158        {Binding{iBuilder->getSizeTy(), "lineCount"}, Binding{iBuilder->getSizeTy(), "wordCount"}, Binding{iBuilder->getSizeTy(), "charCount"}});
    157159
    158160    wc_gen(&wck);
Note: See TracChangeset for help on using the changeset viewer.