Changeset 6047


Ignore:
Timestamp:
May 17, 2018, 3:41:00 PM (3 months ago)
Author:
nmedfort
Message:

Major refactoring of buffer types. Static buffers replace Circular and CircularCopyback?. External buffers unify Source/External?.

Location:
icGREP/icgrep-devel/icgrep
Files:
30 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6043 r6047  
    237237SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_DEBUG} -O1 -g -fno-omit-frame-pointer -fno-optimize-sibling-calls")
    238238
    239 #UNSET(HAS_ADDRESS_SANITIZER)
    240 #CHECK_INCLUDE_FILE_CXX("sanitizer/asan_interface.h" HAS_ADDRESS_SANITIZER)
    241 #IF (HAS_ADDRESS_SANITIZER)
    242 #SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAS_ADDRESS_SANITIZER -fsanitize=address")
    243 #ENDIF()
     239UNSET(HAS_ADDRESS_SANITIZER)
     240CHECK_INCLUDE_FILE_CXX("sanitizer/asan_interface.h" HAS_ADDRESS_SANITIZER)
     241IF (HAS_ADDRESS_SANITIZER)
     242SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAS_ADDRESS_SANITIZER -fsanitize=address")
     243ENDIF()
    244244
    245245
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5985 r6047  
    6666    const auto expandedSize = boost::lcm(boost::lcm(bufferSize, 3U), 4U);
    6767
    68     StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     68    StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    6969    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
    7070    mmapK->setInitialArguments({fileDescriptor});
    7171    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    7272   
    73     StreamSetBuffer * Expanded3_4Out = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), expandedSize);
     73    StreamSetBuffer * Expanded3_4Out = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), expandedSize);
    7474    Kernel * expandK = pxDriver.addKernelInstance<expand3_4Kernel>(iBuilder);
    7575    pxDriver.makeKernelCall(expandK, {ByteStream}, {Expanded3_4Out});
    7676   
    77     StreamSetBuffer * Radix64out = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferSize);
     77    StreamSetBuffer * Radix64out = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferSize);
    7878    Kernel * radix64K = pxDriver.addKernelInstance<radix64Kernel>(iBuilder);
    7979    pxDriver.makeKernelCall(radix64K, {Expanded3_4Out}, {Radix64out});
     
    8484        pxDriver.makeKernelCall(base64K, {Radix64out}, {Base64out});
    8585    } else {
    86         StreamSetBuffer * Base64out = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferSize);
     86        StreamSetBuffer * Base64out = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferSize);
    8787        Kernel * base64K = pxDriver.addKernelInstance<base64Kernel>(iBuilder);
    8888        pxDriver.makeKernelCall(base64K, {Radix64out}, {Base64out});       
  • icGREP/icgrep-devel/icgrep/character_deletion.cpp

    r6037 r6047  
    5757    auto & iBuilder = pxDriver.getBuilder();
    5858
    59     StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    60     StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), inputBufferBlocks);
     59    StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     60    StreamSetBuffer * BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), inputBufferBlocks);
    6161
    6262    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     
    7373    auto & iBuilder = pxDriver.getBuilder();
    7474
    75     StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), inputBufferBlocks);
     75    StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), inputBufferBlocks);
    7676    Kernel * ccK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "deletionMarker", std::vector<re::CC *>{re::makeCC(characterToBeDeleted)}, 8);
    7777
    7878    pxDriver.makeKernelCall(ccK, {BasisBits}, {CharacterMarkerBuffer});
    7979
    80     StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(4), inputBufferBlocks, 1);
    81     StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(4), inputBufferBlocks, 1);
     80    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), inputBufferBlocks);
     81    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), inputBufferBlocks);
    8282    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8);
    8383    pxDriver.makeKernelCall(delK, {CharacterMarkerBuffer, BasisBits}, {u16Swizzle0, u16Swizzle1});
    8484
    8585    // Produce unswizzled bit streams
    86     StreamSetBuffer * deletedBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
     86    StreamSetBuffer * deletedBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
    8787    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    8888
     
    9595    auto & iBuilder = pxDriver.getBuilder();
    9696
    97     StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), inputBufferBlocks);
     97    StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), inputBufferBlocks);
    9898    Kernel * ccK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "deletionMarker", std::vector<re::CC *>{re::makeCC(characterToBeDeleted)}, 8);
    9999    pxDriver.makeKernelCall(ccK, {BasisBits}, {CharacterMarkerBuffer});
    100100
    101     StreamSetBuffer * deletedBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
    102     StreamSetBuffer * deletionCounts = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
     101    StreamSetBuffer * deletedBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
     102    StreamSetBuffer * deletionCounts = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
    103103
    104104    Kernel * delK = pxDriver.addKernelInstance<DeleteByPEXTkernel>(iBuilder, 64, 8);
    105105    pxDriver.makeKernelCall(delK, {BasisBits, CharacterMarkerBuffer}, {deletedBits, deletionCounts});
    106106
    107     StreamSetBuffer * compressedBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
     107    StreamSetBuffer * compressedBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
    108108    Kernel * streamCompressionK = pxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    109109    pxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
     
    115115    auto & iBuilder = pxDriver.getBuilder();
    116116
    117     StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), inputBufferBlocks);
     117    StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), inputBufferBlocks);
    118118    Kernel * ccK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "deletionMarker", std::vector<re::CC *>{re::subtractCC(re::makeByte(0, 255), re::makeCC(characterToBeDeleted))}, 8);
    119119    pxDriver.makeKernelCall(ccK, {BasisBits}, {CharacterMarkerBuffer});
    120120
    121     StreamSetBuffer * compressedBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
     121    StreamSetBuffer * compressedBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), inputBufferBlocks);
    122122    StreamFilterCompiler filterCompiler(pxDriver, iBuilder->getStreamSetTy(8), inputBufferBlocks);
    123123    filterCompiler.makeCall(CharacterMarkerBuffer, BasisBits, compressedBits);
     
    180180//    StreamSetBuffer * deletedBits = generateDeletion(pxDriver, BasisBits, inputBufferBlocks);
    181181
    182     StreamSetBuffer * const deletedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), inputBufferBlocks);
     182    StreamSetBuffer * const deletedByteStream = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), inputBufferBlocks);
    183183    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    184184    pxDriver.makeKernelCall(p2sK, {deletedBits}, {deletedByteStream});
  • icGREP/icgrep-devel/icgrep/character_deposit.cpp

    r6033 r6047  
    6565    auto & iBuilder = pxDriver.getBuilder();
    6666
    67     StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    68     StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), bufferBlocks);
     67    StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     68    StreamSetBuffer * BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), bufferBlocks);
    6969
    7070    kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     
    8080    auto & iBuilder = pxDriver.getBuilder();
    8181
    82     StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), bufferBlocks);
     82    StreamSetBuffer * const CharacterMarkerBuffer = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), bufferBlocks);
    8383    Kernel * ccK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(characterToBeDeposit)}, 8);
    8484    pxDriver.makeKernelCall(ccK, {BasisBits}, {CharacterMarkerBuffer});
    8585
    8686
    87     StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferBlocks);
    88     StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferBlocks);
     87    StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferBlocks, 1);
     88    StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferBlocks, 1);
    8989    Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8);
    9090    pxDriver.makeKernelCall(delK, {CharacterMarkerBuffer, BasisBits}, {u16Swizzle0, u16Swizzle1});
     
    100100
    101101    // Produce unswizzled bit streams
    102     StreamSetBuffer * resultbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
     102    StreamSetBuffer * resultbits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
    103103    Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    104104
     
    110110    auto & iBuilder = pxDriver.getBuilder();
    111111
    112     StreamSetBuffer * const deletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), bufferBlocks);
     112    StreamSetBuffer * const deletionMarker = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), bufferBlocks);
    113113    Kernel * ccK1 = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "deletionMarker", std::vector<re::CC *>{re::subtractCC(re::makeByte(0, 255), re::makeCC(characterToBeDeposit))}, 8);
    114114    pxDriver.makeKernelCall(ccK1, {BasisBits}, {deletionMarker});
    115115
    116     StreamSetBuffer * const depositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), bufferBlocks);
     116    StreamSetBuffer * const depositMarker = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), bufferBlocks);
    117117    Kernel * ccK2 = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(characterToBeDeposit)}, 8);
    118118    pxDriver.makeKernelCall(ccK2, {BasisBits}, {depositMarker});
    119119
    120120    // Deletion
    121     StreamSetBuffer * deletedBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
    122     StreamSetBuffer * deletionCounts = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
     121    StreamSetBuffer * deletedBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
     122    StreamSetBuffer * deletionCounts = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
    123123
    124124    Kernel * delK = pxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    125125    pxDriver.makeKernelCall(delK, {BasisBits, deletionMarker}, {deletedBits, deletionCounts});
    126126
    127     StreamSetBuffer * compressedBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
     127    StreamSetBuffer * compressedBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), bufferBlocks);
    128128    Kernel * streamCompressionK = pxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    129129    pxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
     
    189189    }
    190190
    191     StreamSetBuffer * const ResultBytes = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferBlocks);
     191    StreamSetBuffer * const ResultBytes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), bufferBlocks);
    192192    Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    193193    pxDriver.makeKernelCall(p2sK, {resultbits}, {ResultBytes});
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r6008 r6047  
    257257    idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    258258
    259     auto ChStream = pxDriver.addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(4));
     259    auto ChStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(4));
    260260    auto mmapK = pxDriver.addKernelInstance<MemorySourceKernel>(idb, 4, 1);
    261261    mmapK->setInitialArguments({inputStream, fileSize});
    262262    pxDriver.makeKernelCall(mmapK, {}, {ChStream});
    263263
    264     auto MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
     264    auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    265265    auto editdk = pxDriver.addKernelInstance<PatternKernel>(idb, patterns);
    266266    pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
     
    327327    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main));
    328328
    329     auto ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     329    auto ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    330330
    331331    auto mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
     
    333333    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    334334
    335     auto BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
     335    auto BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    336336    auto s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
    337337    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     
    369369    idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    370370
    371     auto ByteStream = pxDriver.addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, 8));
     371    auto ByteStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
    372372
    373373    auto mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(idb);
     
    375375    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    376376
    377     auto ChStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(4), segmentSize * bufferSegments);
     377    auto ChStream = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), segmentSize * bufferSegments);
    378378    auto ccck = pxDriver.addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "ccc",
    379379        std::vector<re::CC *>{re::makeCC(re::makeCC(0x41), re::makeCC(0x61)),
     
    388388   
    389389    for(unsigned i = 0; i < n; ++i){
    390         auto MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
     390        auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    391391        auto editdk = pxDriver.addKernelInstance<PatternKernel>(idb, pattGroups[i]);
    392392        pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
     
    395395    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    396396    if (n > 1) {
    397         MergedResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
     397        MergedResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    398398        kernel::Kernel * streamsMergeK = pxDriver.addKernelInstance<kernel::StreamsMerge>(idb, editDistance + 1, n);
    399399        pxDriver.makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     
    436436    idb->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    437437
    438     auto ChStream = pxDriver.addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(4));
     438    auto ChStream = pxDriver.addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(4));
    439439    auto mmapK = pxDriver.addKernelInstance<MemorySourceKernel>(idb, 4, 1);
    440440    mmapK->setInitialArguments({inputStream, fileSize});
    441441    pxDriver.makeKernelCall(mmapK, {}, {ChStream});
    442442
    443     auto MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
     443    auto MatchResults = pxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(editDistance + 1), segmentSize * bufferSegments);
    444444    auto editdk = pxDriver.addKernelInstance<kernel::editdCPUKernel>(idb, editDistance, patternLen, groupSize);
    445445
     
    574574    Value * inputSize = iBuilder->CreateLoad(inputSizePtr);
    575575
    576     auto CCStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(4), 1);
     576    auto CCStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(4), 1);
    577577    auto sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(iBuilder, inputTy, segmentSize);
    578578    sourceK->setInitialArguments({inputThreadPtr, inputSize});
     
    680680    fileSize->setName("fileSize");
    681681
    682     StreamSetBuffer * MatchResults = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance + 1));
     682    StreamSetBuffer * MatchResults = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance + 1));
    683683    kernel::Kernel * sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(iBuilder, editDistance + 1, 8);
    684684    sourceK->setInitialArguments({inputStream, fileSize});
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r6008 r6047  
    262262        ByteStream = SourceStream;
    263263    } else if (mBinaryFilesMode == argv::WithoutMatch) {
    264         ByteStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 8), baseBufferSize);
     264        ByteStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 8), baseBufferSize);
    265265        kernel::Kernel * binaryCheckK = mGrepDriver->addKernelInstance<kernel::AbortOnNull>(idb);
    266266        binaryCheckK->setInitialArguments({callback_object_addr});
     
    270270        llvm::report_fatal_error("Binary mode not supported.");
    271271    }
    272     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     272    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    273273    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    274274   
     
    290290                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    291291                std::string ccNameStr = ccName->getFullName();
    292                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     292                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    293293                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    294294                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
     
    297297            }
    298298        }
    299         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     299        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    300300        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
    301301        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     
    312312                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    313313                std::string ccNameStr = ccName->getFullName();
    314                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     314                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    315315                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    316316                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
     
    319319            }
    320320        }
    321         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     321        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    322322        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
    323323        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     
    327327    } else {
    328328       
    329         StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
     329        StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), baseBufferSize);
    330330        kernel::Kernel * s2pk = nullptr;
    331331        if (PabloTransposition) {
     
    337337        mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    338338
    339         StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    340         StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    341 
    342         StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     339        StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     340        StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     341
     342        StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    343343        kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    344344        mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
     
    360360            for (auto p : mUnicodeProperties) {
    361361                auto name = p->getFullName();
    362                 StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     362                StreamSetBuffer * s = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    363363                propertyStream.emplace(std::make_pair(name, s));
    364364                kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
     
    368368        StreamSetBuffer * GCB_stream = nullptr;
    369369        if (anyGCB) {
    370             GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     370            GCB_stream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    371371            kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
    372372            mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
     
    399399            if (CC_Multiplexing) {
    400400                const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    401                 StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     401                StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    402402                if (UnicodeSets.size() <= 1) {
    403403                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
     
    409409                    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    410410                    auto numOfCharacterClasses = mpx_basis.size();
    411                     StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     411                    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    412412                    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    413413                    mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
     
    420420                }
    421421            } else {
    422                 StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     422                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    423423                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    424424                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     
    430430    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    431431    if (mREs.size() > 1) {
    432         MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     432        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    433433        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
    434434        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     
    438438        StreamSetBuffer * OriginalMatches = Matches;
    439439        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    440         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     440        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    441441        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    442442    }
     
    444444        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
    445445        StreamSetBuffer * OriginalMatches = Matches;
    446         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     446        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    447447        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
    448448    }
     
    451451        untilK->setInitialArguments({idb->getSize(mMaxCount)});
    452452        StreamSetBuffer * const AllMatches = Matches;
    453         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     453        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    454454        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    455455    }
     
    480480    call_back_object->setName("call_back_object");
    481481
    482     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
    483     kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
     482    StreamSetBuffer * const ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
     483    kernel::Kernel * const sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
    484484    sourceK->setInitialArguments({useMMap, fileDescriptor});
    485485    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
     
    557557    match_accumulator->setName("match_accumulator");
    558558
    559     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
     559    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, encodingBits));
    560560    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::FDSourceKernel>(idb);
    561561    sourceK->setInitialArguments({useMMap, fileDescriptor});
     
    816816   
    817817    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    818     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<SourceBuffer>(idb, idb->getStreamSetTy(1, 8));
     818    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer<ExternalBuffer>(idb, idb->getStreamSetTy(1, 8));
    819819    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance<kernel::MemorySourceKernel>(idb);
    820820    sourceK->setInitialArguments({buffer, length});
    821821    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    822     StreamSetBuffer * RecordBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     822    StreamSetBuffer * RecordBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    823823    std::string RBname = (mGrepRecordBreak == GrepRecordBreakKind::Null) ? "Null" : "LF";
    824824
     
    830830        mGrepDriver->makeKernelCall(breakK, {ByteStream}, {RecordBreakStream});
    831831    } else {
    832         BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
     832        BasisBits = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
    833833        kernel::Kernel * s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
    834834        mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     
    843843        MatchingRecords = RecordBreakStream;
    844844    } else {
    845         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     845        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    846846        kernel::Kernel * includeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, matchingRE, externalStreamNames);
    847847        mGrepDriver->makeKernelCall(includeK, {BasisBits}, {MatchResults});
    848         MatchingRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     848        MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    849849        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    850850        mGrepDriver->makeKernelCall(matchedLinesK, {MatchResults, RecordBreakStream}, {MatchingRecords});
    851851    }
    852852    if (!excludeNothing) {
    853         StreamSetBuffer * ExcludedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     853        StreamSetBuffer * ExcludedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    854854        kernel::Kernel * excludeK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, excludedRE, externalStreamNames);
    855855        mGrepDriver->makeKernelCall(excludeK, {BasisBits}, {ExcludedResults});
    856         StreamSetBuffer * ExcludedRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     856        StreamSetBuffer * ExcludedRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    857857        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    858858        mGrepDriver->makeKernelCall(matchedLinesK, {ExcludedResults, RecordBreakStream}, {ExcludedRecords});
     
    860860        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
    861861        if (!matchAllLines) {
    862             StreamSetBuffer * nonExcluded = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     862            StreamSetBuffer * nonExcluded = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    863863            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {nonExcluded});
    864864            StreamSetBuffer * included = MatchingRecords;
    865             MatchingRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     865            MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    866866            kernel::Kernel * streamsIntersectK = mGrepDriver->addKernelInstance<kernel::StreamsIntersect>(idb, 1, 2);
    867867            mGrepDriver->makeKernelCall(streamsIntersectK, {included, nonExcluded}, {MatchingRecords});
    868868        }
    869869        else {
    870             MatchingRecords = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     870            MatchingRecords = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    871871            mGrepDriver->makeKernelCall(invertK, {ExcludedRecords, RecordBreakStream}, {MatchingRecords});
    872872        }
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r5985 r6047  
    249249        // kernel writer maintain the current stride index and that the kernel logic has
    250250        // a natural breakpoint in the algorithm in which to record the number.
     251
     252        Expandable, /// NOT DONE
     253
     254        // Indicates that the number of stream sets in this buffer can increase.
    251255
    252256        /** KERNEL ATTRIBUTES **/
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r6038 r6047  
    800800        compressK = mDriver.addKernelInstance<FieldCompressKernel>(iBuilder, mIntraFieldCompressionWidth, N);
    801801    }
    802     parabix::StreamSetBuffer * compressedFields = mDriver.addBuffer<parabix::CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(N), mBufferBlocks);
    803     parabix::StreamSetBuffer * unitCounts = mDriver.addBuffer<parabix::CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), mBufferBlocks);
     802    parabix::StreamSetBuffer * compressedFields = mDriver.addBuffer<parabix::StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(N), mBufferBlocks);
     803    parabix::StreamSetBuffer * unitCounts = mDriver.addBuffer<parabix::StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), mBufferBlocks);
    804804   
    805805    mDriver.makeKernelCall(compressK, {inputs, mask}, {compressedFields, unitCounts});
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5985 r6047  
    616616
    617617/** ------------------------------------------------------------------------------------------------------------- *
    618  * @brief verifyBufferSize
    619  ** ------------------------------------------------------------------------------------------------------------- */
    620 bool Kernel::verifyBufferSize(const Binding & binding, const StreamSetBuffer * const buffer) const {
    621     if (LLVM_UNLIKELY(isa<SourceBuffer>(buffer) || isa<ExternalBuffer>(buffer))) {
    622         return true;
    623     }
    624     const ProcessingRate & rate = binding.getRate();
    625     if (requiresCopyBack(binding)) {
    626         const auto minOverflow = ceiling(rate.getUpperBound());
    627         if (LLVM_UNLIKELY(buffer->overflowSize() < minOverflow)) {
    628             report_fatal_error(getName() + ": " + binding.getName() + " requires " +
    629                                std::to_string(minOverflow) + " overflow blocks");
    630         }
    631     } else if (rate.isFixed() || binding.hasAttribute(Attribute::KindId::BlockSize)) {
    632         const auto r = rate.getUpperBound();
    633         if (LLVM_LIKELY(r.denominator() == 1)) {
    634             if (LLVM_UNLIKELY((buffer->getBufferBlocks() % r.numerator())) != 0) {
    635                 report_fatal_error(getName() + ": " + binding.getName() + " requires a multiple of " +
    636                                    std::to_string(r.numerator()) + " buffer blocks");
    637                 return false;
    638             }
    639         } else { // if (b % (n/d) != 0)
    640             const auto b = buffer->getBufferBlocks();
    641             const auto x = (b * r.denominator()) / r.numerator();
    642             if (LLVM_UNLIKELY((b * r.denominator()) != (r.numerator() * x))) {
    643                 report_fatal_error(getName() + ": " + binding.getName() + " requires a multiple of " +
    644                                    std::to_string(r.numerator()) + "/" + std::to_string(r.denominator()) + " buffer blocks");
    645                 return false;
    646             }
    647         }
    648     }
    649     return true;
    650 }
    651 
    652 
    653 /** ------------------------------------------------------------------------------------------------------------- *
    654618 * @brief requiresCopyBack
    655619 ** ------------------------------------------------------------------------------------------------------------- */
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5998 r6047  
    269269        return mAvailableItemCount[i];
    270270    }
    271 
    272     bool verifyBufferSize(const Binding & binding, const parabix::StreamSetBuffer * const buffer) const;
    273271
    274272    void verifyStreamSetDefinitions() const;
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r6008 r6047  
    421421}
    422422
    423 Value * KernelBuilder::getBufferedSize(const std::string & name) {
    424     return mKernel->getAnyStreamSetBuffer(name)->getBufferedSize(this, getStreamHandle(name));
    425 }
    426 
    427 void KernelBuilder::setBufferedSize(const std::string & name, Value * size) {
    428     mKernel->getAnyStreamSetBuffer(name)->setBufferedSize(this, getStreamHandle(name), size);
    429 }
    430 
    431423Value * KernelBuilder::getCapacity(const std::string & name) {
    432424    return mKernel->getAnyStreamSetBuffer(name)->getCapacity(this, getStreamHandle(name));
     
    435427void KernelBuilder::setCapacity(const std::string & name, Value * c) {
    436428    mKernel->getAnyStreamSetBuffer(name)->setCapacity(this, getStreamHandle(name), c);
    437 }
    438 
    439 void KernelBuilder::protectOutputStream(const std::string & name, const bool readOnly) {
    440     const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
    441     Value * const handle = getStreamHandle(name);
    442     Value * const base = buf->getBaseAddress(this, handle);
    443     Value * sz = ConstantExpr::getSizeOf(buf->getType());
    444     sz = CreateMul(sz, getInt64(buf->getBufferBlocks()));
    445     sz = CreateMul(sz, CreateZExt(buf->getStreamSetCount(this, handle), getInt64Ty()));
    446     CreateMProtect(base, sz, readOnly ? CBuilder::READ : (CBuilder::READ | CBuilder::WRITE));
    447429}
    448430   
     
    475457}
    476458
    477 void KernelBuilder::doubleCapacity(const std::string & name) {
    478     const StreamSetBuffer * const buf = mKernel->getAnyStreamSetBuffer(name);
    479     return buf->doubleCapacity(this, getStreamHandle(name));
    480 }
    481 
    482459BasicBlock * KernelBuilder::CreateConsumerWait() {
    483460    const auto consumers = mKernel->getStreamOutputs();
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r6008 r6047  
    154154
    155155    void setBaseAddress(const std::string & name, llvm::Value * addr);
    156 
    157     llvm::Value * getBufferedSize(const std::string & name);
    158    
    159     void setBufferedSize(const std::string & name, llvm::Value * size);
    160156   
    161157    llvm::Value * getCapacity(const std::string & name);
     
    188184        mKernel = kernel;
    189185    }
    190 
    191     void protectOutputStream(const std::string & name, const bool readOnly);
    192 
    193     void doubleCapacity(const std::string & name);
    194186
    195187    // overloading wrongly subsitutes this for CBuilder function. renamed for now until I can investigate why.
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_bitstream_match_copy_kernel.cpp

    r6039 r6047  
    3838        Constant* SIZE_1 = iBuilder->getSize(1);
    3939        Value* SIZE_64 = iBuilder->getSize(64); // maybe need to handle 32 bit machine
    40         Value* SIZE_INPUT_64_COUNT = iBuilder->getSize(this->getInputStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64);
     40        Value* SIZE_INPUT_64_COUNT = iBuilder->CreateUDiv(iBuilder->getCapacity(inputName), iBuilder->getSize(64));
    4141
    4242        Value* initCurrentPos = startPos;
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_deposit_uncompressed.cpp

    r6020 r6047  
    4747        Value* inputBufferBasePtr = iBuilder->getRawInputPointer("byteStream", iBuilder->getSize(0));
    4848        Value* outputBufferBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0));
    49 
    50 
    51         size_t outputBufferSize = this->getOutputStreamSetBuffer("outputStream")->getBufferBlocks() * iBuilder->getStride();
    52         Value* outputBufferSizeValue = iBuilder->getSize(outputBufferSize);
    53         Value* outputBufferSizeMask = iBuilder->getSize(outputBufferSize - 1);
     49        Value* outputBufferSizeValue = iBuilder->getCapacity("outputStream");
     50        Value* outputBufferSizeMask = iBuilder->CreateSub(outputBufferSizeValue, iBuilder->getSize(1));
    5451
    5552        Value* maskedOutputOffset = iBuilder->CreateAnd(uncompressedOutputPos, outputBufferSizeMask);
     
    8582    }
    8683
    87     size_t LZ4DepositUncompressedKernel::getInputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
    88         return this->getInputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
    89     }
    90 
    91     Value* LZ4DepositUncompressedKernel::generateLoadCircularInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value* offset, Type* pointerType) {
    92         size_t inputSize = this->getInputBufferSize(iBuilder, inputBufferName);
    93         Value* offsetMask = iBuilder->getSize(inputSize - 1);
     84    Value* LZ4DepositUncompressedKernel::generateLoadCircularInput(const unique_ptr<KernelBuilder> &iBuilder, const string & bufferName, Value* offset, Type* pointerType) {
     85        Value* offsetMask =  iBuilder->CreateSub(iBuilder->getCapacity(bufferName), iBuilder->getSize(1));
    9486        Value* maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    95 
    96         Value* inputBufferPtr = iBuilder->getRawInputPointer(inputBufferName, iBuilder->getSize(0));
    97 
     87        Value* inputBufferPtr = iBuilder->getRawInputPointer(bufferName, iBuilder->getSize(0));
    9888        inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
    9989        return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_deposit_uncompressed.h

    r6020 r6047  
    2121        virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) override;
    2222    private:
    23         inline llvm::Value* loadCurrentUncompressedData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::string& name);
    24         inline llvm::Value* generateLoadCircularInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName, llvm::Value* offset, llvm::Type* pointerType);
    25         inline size_t getInputBufferSize(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName);
     23        inline llvm::Value* loadCurrentUncompressedData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const std::string & name);
     24        inline llvm::Value* generateLoadCircularInput(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string & bufferName, llvm::Value* offset, llvm::Type* pointerType);
    2625        inline void increaseCurrentUncompressedDataIndex(const std::unique_ptr<KernelBuilder> &iBuilder);
    2726        inline void generateDepositUncompressed(const std::unique_ptr<KernelBuilder> &iBuilder);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r6042 r6047  
    337337
    338338        Value * currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
    339         Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64));
     339        Value * currentBlockCapacity = iBuilder->CreateUDiv(iBuilder->getCapacity(inputName), iBuilder->getSize(64));
     340        Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, currentBlockCapacity);
    340341        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
    341342
     
    367368
    368369    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
    369 //        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
    370         Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getInputStreamSetBuffer(inputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth());
     370        Value * capacity = iBuilder->getCapacity(inputBufferName);
    371371        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
    372         processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
     372        processed = iBuilder->CreateAnd(processed, iBuilder->CreateNeg(capacity));
    373373        Value * offset = iBuilder->CreateSub(globalOffset, processed);
    374374        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
     
    455455    void LZ4IndexBuilderKernel::storeM0(const std::unique_ptr<KernelBuilder> &b, llvm::Value* blockIndex, llvm::Value* value) {
    456456        int fw = 64;
    457         Value* m0BufferBlocks = b->getSize(this->getOutputStreamSetBuffer("M0Marker")->getBufferBlocks() * b->getBitBlockWidth() / fw);
     457        Value* m0BufferBlocks = b->CreateUDiv(b->getCapacity("M0Marker"), b->getSize(fw));
    458458        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
    459459        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("M0Marker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
     
    540540
    541541    void
    542     LZ4IndexBuilderKernel::storeDeletionMarker(const std::unique_ptr<KernelBuilder> &b, llvm::Value *blockIndex,
    543                                                llvm::Value *value) {
     542    LZ4IndexBuilderKernel::storeDeletionMarker(const std::unique_ptr<KernelBuilder> &b, llvm::Value *blockIndex, llvm::Value *value) {
    544543        int fw = 64;
    545         Value* m0BufferBlocks = b->getSize(this->getOutputStreamSetBuffer("deletionMarker")->getBufferBlocks() * b->getBitBlockWidth() / fw);
     544        Value* m0BufferBlocks = b->CreateUDiv(b->getCapacity("deletionMarker"), b->getSize(fw));
    546545        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
    547 
    548546        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("deletionMarker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
    549547        b->CreateStore(value, b->CreateGEP(outputBasePtr, indexRem));
     
    607605    }
    608606
    609     void LZ4IndexBuilderKernel::storeMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &b,
    610                                                        llvm::Value *blockIndex, llvm::Value *value) {
     607    void LZ4IndexBuilderKernel::storeMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &b, llvm::Value *blockIndex, llvm::Value *value) {
    611608        int fw = 64;
    612         Value* m0BufferBlocks = b->getSize(this->getOutputStreamSetBuffer("MatchOffsetMarker")->getBufferBlocks() * b->getBitBlockWidth() / fw);
     609        Value* m0BufferBlocks = b->CreateUDiv(b->getCapacity("MatchOffsetMarker"), b->getSize(fw));
    613610        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
    614 
    615611        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("MatchOffsetMarker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
    616612        b->CreateStore(value, b->CreateGEP(outputBasePtr, indexRem));
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r6026 r6047  
    1919    Constant* SIZE_1 = iBuilder->getSize(1);
    2020    Value* SIZE_64 = iBuilder->getSize(64); // maybe need to handle 32 bit machine
    21     Value* SIZE_INPUT_64_COUNT = iBuilder->getSize(this->getInputStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64);
    22 
     21    Value* SIZE_INPUT_64_COUNT = iBuilder->CreateUDiv(iBuilder->getCapacity(inputName), iBuilder->getSize(64));
    2322    Value* initCurrentPos = startPos;
    24 
    2523    Value* offsetMarkerRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_0), iBuilder->getInt64Ty()->getPointerTo());
    2624
  • icGREP/icgrep-devel/icgrep/kernels/multiblock_kernel.cpp

    r5998 r6047  
    206206        Value * const consumed = b->getConsumedItemCount(name);
    207207        Value * const unconsumed = b->CreateSub(produced, consumed);
    208         Value * const capacity = b->getBufferedSize(name);
     208        Value * const capacity = b->getCapacity(name);
    209209        writable = b->CreateSub(capacity, unconsumed);
    210210    } else {
     
    325325            BasicBlock * const copyBack = b->CreateBasicBlock(name + "CopyBack");
    326326            BasicBlock * const done = b->CreateBasicBlock(name + "CopyBackDone");
    327             Value * const bufferSize = b->getBufferedSize(name);
    328             Value * const priorOffset = b->CreateURem(mInitialProducedItemCount[i], bufferSize);
     327            Value * const capacity = b->getCapacity(name);
     328            Value * const priorOffset = b->CreateURem(mInitialProducedItemCount[i], capacity);
    329329            Value * const produced = b->getProducedItemCount(name);
    330             Value * const currentOffset = b->CreateURem(produced, bufferSize);
     330            Value * const currentOffset = b->CreateURem(produced, capacity);
    331331            b->CreateUnlikelyCondBr(b->CreateICmpULT(currentOffset, priorOffset), copyBack, done);
    332332
     
    337337                const ProcessingRate & rate = output.getRate();
    338338                if (rate.isPopCount() || rate.isNegatedPopCount()) {
    339                     Value * const limit = b->CreateSub(bufferSize, priorOffset);
     339                    Value * const limit = b->CreateSub(capacity, priorOffset);
    340340                    BasicBlock * const popCountLoop = b->CreateBasicBlock();
    341341                    BasicBlock * const popCountDone = b->CreateBasicBlock();
     
    412412        Value * const consumed = b->getConsumedItemCount(name);
    413413        Value * const unconsumed = b->CreateSub(produced, consumed);
    414         Value * const capacity = b->getBufferedSize(name);
     414        Value * const capacity = b->getCapacity(name);
    415415        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    416416            b->CreateAssert(b->CreateICmpULE(consumed, produced),
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r6046 r6047  
    3636    ConstantInt * const BLOCK_WIDTH = b->getSize(b->getBitBlockWidth());
    3737    ConstantInt * const PDEP_WIDTH = b->getSize(pdepWidth);
    38 
    39     Function * pdep = nullptr;
    40     if (pdepWidth == 64) {
    41         pdep = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_64);
    42     } else if (pdepWidth == 32) {
    43         pdep = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_bmi_pdep_32);
    44     } else {
    45         report_fatal_error(getName() + ": PDEP width must be 32 or 64");
    46     }
    4738
    4839    Constant * const ZERO = b->getSize(0);
     
    367358        llvm::report_fatal_error("StreamDepositCompiler only compresses bit streams (for now)");
    368359    }
    369     parabix::StreamSetBuffer * expandedStreams = mDriver.addBuffer<parabix::CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(N), mBufferBlocks);
     360    parabix::StreamSetBuffer * expandedStreams = mDriver.addBuffer<parabix::StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(N), mBufferBlocks);
    370361    Kernel * streamK = mDriver.addKernelInstance<StreamExpandKernel>(iBuilder, mFieldWidth, N);
    371362    mDriver.makeKernelCall(streamK, {depositMask, inputs}, {expandedStreams});
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r6025 r6047  
    3535    BasicBlock * const exit = b->CreateBasicBlock("Exit");
    3636    IntegerType * const sizeTy = b->getSizeTy();
    37 
    3837    ConstantInt * const PAGE_SIZE = b->getSize(getpagesize());
    39     ConstantInt * const ZERO = b->getSize(0);
    40 
    41 
    4238    Value * const fd = b->getScalarField("fileDescriptor");
    4339    assert (fileSizeMethod);
     
    5450        fileSize = b->CreateUDiv(fileSize, b->getSize(codeUnitWidth / 8));
    5551    }
    56     b->setBufferedSize("sourceBuffer", fileSize);
    5752    b->setScalarField("fileSize", fileSize);
    58     b->setProducedItemCount("sourceBuffer", ZERO);
    59     b->setCapacity("sourceBuffer", fileSize);
    6053    b->CreateBr(exit);
    6154
     
    6457    b->setScalarField("buffer", emptyFilePtr);
    6558    b->setBaseAddress("sourceBuffer", emptyFilePtr);
    66     b->setScalarField("fileSize", ZERO);
    67     b->setBufferedSize("sourceBuffer", PAGE_SIZE);
    68     b->setCapacity("sourceBuffer", PAGE_SIZE);
     59    b->setScalarField("fileSize", PAGE_SIZE);
    6960    b->setTerminationSignal();
    7061    b->CreateBr(exit);
     
    160151    b->setScalarField("buffer", buffer);
    161152    b->setCapacity("sourceBuffer", bufferItems);
     153    b->setScalarField("fileSize", b->getSize(0));
    162154}
    163155
     
    179171    // Do we have enough unread data to support one segment?
    180172    Value * const produced = b->getProducedItemCount("sourceBuffer");
    181     Value * const buffered = b->getBufferedSize("sourceBuffer");
     173    Value * const buffered = b->getScalarField("fileSize");
    182174    Value * const itemsPending = b->CreateAdd(produced, itemsPerSegment);
    183175
     
    245237    Value * const itemsRead = b->CreateUDiv(bytesRead, codeUnitBytes);
    246238    Value * const itemsBuffered = b->CreateAdd(buffered, itemsRead);
    247     b->setBufferedSize("sourceBuffer", itemsBuffered);
     239    b->setScalarField("fileSize", itemsBuffered);
    248240    b->CreateUnlikelyCondBr(b->CreateICmpULT(itemsBuffered, itemsPending), setTermination, readExit);
    249241
     
    255247    b->CreateBr(readExit);
    256248
    257     readExit->moveAfter(setTermination);
    258249    b->SetInsertPoint(readExit);
    259250    PHINode * const itemsProduced = b->CreatePHI(itemsPending->getType(), 3);
     
    339330    b->setBaseAddress("sourceBuffer", fileSource);
    340331    Value * const fileSize = b->getScalarField("fileSize");
    341     b->setBufferedSize("sourceBuffer", fileSize);
    342332    b->setCapacity("sourceBuffer", fileSize);
    343333    if (mStreamSetCount > 1) {
     
    403393MMapSourceKernel::MMapSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth)
    404394: SegmentOrientedKernel("mmap_source@" + std::to_string(codeUnitWidth)
    405 , {}
     395// input streams
     396, {}
     397// output streams
    406398, {Binding{b->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}}
     399// input scalars
    407400, {Binding{b->getInt32Ty(), "fileDescriptor"}}
    408 , {Binding{b->getSizeTy(), "fileSize"}}
    409 , {Binding{b->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"}})
     401// output scalars
     402, {}
     403// internal scalars
     404, {Binding{b->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"}
     405,  Binding{b->getSizeTy(), "fileSize"}})
    410406, mCodeUnitWidth(codeUnitWidth)
    411407, mFileSizeFunction(nullptr) {
     
    416412ReadSourceKernel::ReadSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth)
    417413: SegmentOrientedKernel("read_source" + std::to_string(codegen::SegmentSize) + "@" + std::to_string(codeUnitWidth)
    418 , {}
     414// input streams
     415, {}
     416// output streams
    419417, {Binding{b->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}}
     418// input scalars
    420419, {Binding{b->getInt32Ty(), "fileDescriptor"}}
    421 , {}
    422 , {Binding{b->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"}})
     420// output scalars
     421, {}
     422// internal scalars
     423, {Binding{b->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"}
     424,  Binding{b->getSizeTy(), "fileSize"}})
    423425, mCodeUnitWidth(codeUnitWidth) {
    424426    addAttribute(MustExplicitlyTerminate());
     
    429431FDSourceKernel::FDSourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned codeUnitWidth)
    430432: SegmentOrientedKernel("FD_source@" + std::to_string(codeUnitWidth)
     433// input streams
    431434, {}
    432435// output stream
     
    435438, {Binding{b->getInt8Ty(), "useMMap"}, Binding{b->getInt32Ty(), "fileDescriptor"}}
    436439, {}
    437 // internal scalar
    438 , {Binding{b->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"}, Binding{b->getSizeTy(), "fileSize"}})
     440// internal scalars
     441, {Binding{b->getIntNTy(codeUnitWidth)->getPointerTo(), "buffer"},
     442   Binding{b->getSizeTy(), "fileSize"}})
    439443, mCodeUnitWidth(codeUnitWidth)
    440444, mFileSizeFunction(nullptr) {
     
    445449MemorySourceKernel::MemorySourceKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned streamSetCount, const unsigned codeUnitWidth)
    446450: SegmentOrientedKernel("memory_source@" + std::to_string(streamSetCount) + ":" + std::to_string(codeUnitWidth),
     451// input streams
    447452{},
    448453// output stream
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5998 r6047  
    2424}
    2525
    26 Type * StreamSetBuffer::getStreamSetBlockType() const { return mType;}
    27 
    28 ArrayType * resolveStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
    29 
    30 StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type);
    31 
    32 void StreamSetBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
    33     assert (mBufferBlocks > 0);
    34     assert ("allocate buffer was called twice" && !mStreamSetBufferPtr);
    35     Type * const ty = getType();
    36     const auto blocks = (mBufferBlocks + mOverflowBlocks);
    37     if (mAddressSpace == 0) {
    38         Constant * size = ConstantExpr::getSizeOf(ty);
    39         size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), blocks));
    40         mStreamSetBufferPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
    41     } else {
    42         mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(blocks));
    43     }
    44 }
    45 
    46 void StreamSetBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
    47     if (mAddressSpace == 0) {
    48         b->CreateFree(mStreamSetBufferPtr);
    49     }
    50 }
    51 
    52 inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
    53     return isa<ConstantInt>(index) ? cast<ConstantInt>(index)->getLimitedValue() < capacity : false;
    54 }
    55 
    56 Value * StreamSetBuffer::modBufferSize(IDISA::IDISA_Builder * const b, Value * const offset) const {
    57     assert (offset->getType()->isIntegerTy());
    58     if (mBufferBlocks == 0 || isCapacityGuaranteed(offset, mBufferBlocks)) {
    59         return offset;
    60     } else if (mBufferBlocks == 1) {
    61         return ConstantInt::getNullValue(offset->getType());
    62     } else if (is_power_2(mBufferBlocks)) {
    63         return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mBufferBlocks - 1));
    64     } else {
    65         return b->CreateURem(offset, ConstantInt::get(offset->getType(), mBufferBlocks));
    66     }
    67 }
    68 
    6926Value * StreamSetBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
    7027    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     
    7431        b->CreateAssert(cond, "out-of-bounds stream access");
    7532    }
    76     return b->CreateGEP(getBaseAddress(b, handle), {modBufferSize(b, blockIndex), streamIndex});
     33    return b->CreateGEP(getBaseAddress(b, handle), {blockIndex, streamIndex});
    7734}
    7835
     
    8441        b->CreateAssert(cond, "out-of-bounds stream access");
    8542    }
    86     return b->CreateGEP(getBaseAddress(b, handle), {modBufferSize(b, blockIndex), streamIndex, packIndex});
    87 }
    88 
    89 void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* addr */, Value * /* handle */) const {
    90     report_fatal_error("setBaseAddress is not supported by this buffer type");
    91 }
    92 
    93 Value * StreamSetBuffer::getOverflowAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
    94     return b->CreateGEP(getBaseAddress(b, handle), b->getSize(mBufferBlocks));
    95 }
    96 
    97 Value * StreamSetBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * /* handle */) const {
    98     return b->getSize(mBufferBlocks * b->getBitBlockWidth());
    99 }
    100 
    101 void StreamSetBuffer::setBufferedSize(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* size */) const {
    102     report_fatal_error("setBufferedSize is not supported by this buffer type");
    103 }
    104 
    105 Value * StreamSetBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
    106     return b->getSize(mBufferBlocks * b->getBitBlockWidth());
    107 }
    108 
    109 void StreamSetBuffer::setCapacity(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* c */) const {
    110     report_fatal_error("setCapacity is not supported by this buffer type");
     43    return b->CreateGEP(getBaseAddress(b, handle), {blockIndex, streamIndex, packIndex});
    11144}
    11245
     
    11952}
    12053
    121 void StreamSetBuffer::doubleCapacity(IDISA::IDISA_Builder * const /* b */, Value */* handle */) const {
    122     report_fatal_error("doubleCapacity is not supported by this buffer type");
     54// External File Buffer
     55void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
     56    PointerType * const ptrTy = getPointerType();
     57    IntegerType * const sizeTy = b->getSizeTy();
     58    StructType * const structTy = StructType::get(b->getContext(), {ptrTy, sizeTy});
     59    Value * const handle = b->CreateCacheAlignedAlloca(structTy);
     60    mStreamSetHandle = handle;
     61    // If mExternalAddress is null, it must be set by a source kernel.
     62    Value * ptr = nullptr;
     63    Constant * size = nullptr;
     64    if (mExternalAddress) {
     65        ptr = b->CreatePointerBitCastOrAddrSpaceCast(mExternalAddress, ptrTy);
     66        size = ConstantInt::getAllOnesValue(sizeTy);
     67    } else {
     68        ptr = ConstantPointerNull::get(ptrTy);
     69        size = ConstantInt::getNullValue(sizeTy);
     70    }
     71    b->CreateStore(ptr, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)}));
     72    b->CreateStore(size, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)}));
     73}
     74
     75void ExternalBuffer::setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const {
     76    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     77        b->CreateAssert(handle, "handle cannot be null");
     78    }
     79    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
     80    Value * const ptr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
     81    b->CreateStore(ptr, p);
     82}
     83
     84Value * ExternalBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
     85    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     86        b->CreateAssert(handle, "handle cannot be null");
     87    }
     88    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
     89    return b->CreateLoad(p);
     90}
     91
     92Value * ExternalBuffer::getOverflowAddress(IDISA::IDISA_Builder * const /* b */, Value * const /* handle */) const {
     93    report_fatal_error("getOverflowAddress is not supported by this buffer type");
     94}
     95
     96void ExternalBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * const handle, Value * const capacity) const {
     97    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     98        b->CreateAssert(handle, "handle cannot be null");
     99    }
     100    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
     101    b->CreateStore(capacity, p);
     102}
     103
     104Value * ExternalBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
     105    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     106        b->CreateAssert(handle, "handle cannot be null");
     107    }
     108    Value * const p = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
     109    return b->CreateLoad(p);
     110}
     111
     112void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
     113    // this buffer is not responsible for free-ing th data associated with it
     114}
     115
     116Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
     117    // All available items can be accessed.
     118    return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
     119}
     120
     121Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, Value *consumed, const bool reverse) const {
     122    // Trust that the buffer is large enough to write any amount
     123    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
    123124}
    124125
     
    130131 * The type of the pointer is i8* for fields of 8 bits or less, otherwise iN* for N-bit fields.
    131132 */
    132 Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
     133Value * ExternalBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
    133134    Value * ptr = getBaseAddress(b, handle);
    134135    Value * relativePosition = absolutePosition;
     
    149150}
    150151
    151 Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * availItems, bool reverse) const {
    152     Type * const ty = fromPosition->getType();
    153     const auto blockWidth = b->getBitBlockWidth();
    154     Constant * const bufferSize = ConstantInt::get(ty, mBufferBlocks * blockWidth);
    155     Value * const itemsFromBase = b->CreateURem(fromPosition, bufferSize);
    156     if (reverse) {
    157         Value * const bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), bufferSize, itemsFromBase);
    158         return b->CreateUMin(availItems, bufAvail);
    159     } else {
    160         Constant * capacity = bufferSize;
    161         if (mOverflowBlocks) {
    162             capacity = ConstantInt::get(ty, (mBufferBlocks + mOverflowBlocks) * blockWidth - 1);
    163         }
    164         Value * const linearSpace = b->CreateSub(capacity, itemsFromBase);
    165         return b->CreateUMin(availItems, linearSpace);
    166     }
    167 }
    168 
    169 Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * consumed, bool reverse) const {
    170     Type * const ty = fromPosition->getType();
    171     const auto blockWidth = b->getBitBlockWidth();
    172     Constant * const bufferSize = ConstantInt::get(ty, mBufferBlocks * blockWidth);
    173     fromPosition = b->CreateURem(fromPosition, bufferSize);
    174     if (reverse) {
    175         return b->CreateSelect(b->CreateIsNull(fromPosition), bufferSize, fromPosition);
    176     }
    177     consumed = b->CreateURem(consumed, bufferSize);
    178     Constant * capacity = bufferSize;
    179     if (mOverflowBlocks) {
    180         capacity = ConstantInt::get(ty, (mBufferBlocks + mOverflowBlocks) * blockWidth - 1);
    181     }
    182     Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
    183     return b->CreateSub(limit, fromPosition);
    184 }
    185 
    186 Value * StreamSetBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
     152// Circular Buffer
     153void StaticBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
     154    assert (mCapacity > 0);
     155    assert ("allocate buffer was called twice" && !mStreamSetHandle);
     156    Type * const ty = getType();
     157    const auto blocks = (mCapacity + mOverflow);
     158    if (mAddressSpace == 0) {
     159        Constant * size = ConstantExpr::getSizeOf(ty);
     160        size = ConstantExpr::getMul(size, ConstantInt::get(size->getType(), blocks));
     161        mStreamSetHandle = b->CreatePointerCast(b->CreateCacheAlignedMalloc(size), ty->getPointerTo());
     162    } else {
     163        mStreamSetHandle = b->CreateCacheAlignedAlloca(ty, b->getSize(blocks));
     164    }
     165}
     166
     167void StaticBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
     168    if (mAddressSpace == 0) {
     169        b->CreateFree(mStreamSetHandle);
     170    }
     171}
     172
     173inline bool isCapacityGuaranteed(const Value * const index, const size_t capacity) {
     174    if (capacity == 0) {
     175        return true;
     176    } else if (isa<ConstantInt>(index)) {
     177        return cast<ConstantInt>(index)->getLimitedValue() < capacity;
     178    }
     179    return false;
     180}
     181
     182Value * StaticBuffer::modByCapacity(IDISA::IDISA_Builder * const b, Value * const offset) const {
     183    assert (offset->getType()->isIntegerTy());
     184    if (isCapacityGuaranteed(offset, mCapacity)) {
     185        return offset;
     186    } else if (mCapacity == 1) {
     187        return ConstantInt::getNullValue(offset->getType());
     188    } else if (is_power_2(mCapacity)) {
     189        return b->CreateAnd(offset, ConstantInt::get(offset->getType(), mCapacity - 1));
     190    } else {
     191        return b->CreateURem(offset, ConstantInt::get(offset->getType(), mCapacity));
     192    }
     193}
     194
     195Value * StaticBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
     196    return b->getSize(mCapacity * b->getBitBlockWidth());
     197}
     198
     199void StaticBuffer::setCapacity(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* c */) const {
     200    report_fatal_error("setCapacity is not supported by this buffer type");
     201}
     202
     203Value * StaticBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
    187204    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    188205        b->CreateAssert(handle, "handle cannot be null");
     
    191208}
    192209
    193 void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const b, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
    194     Type * i8ptr = b->getInt8PtrTy();
    195     unsigned alignment = b->getBitBlockWidth() / 8;
    196     size_t numStreams = 1;
    197     if (isa<ArrayType>(mBaseType)) {
    198         numStreams = mBaseType->getArrayNumElements();
    199     }
    200     const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
    201     Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
    202     b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
    203 }
    204 
    205 // Source File Buffer
    206 Type * SourceBuffer::getStreamSetBlockType() const {
    207     return cast<PointerType>(mType->getStructElementType(BaseAddress))->getElementType();
    208 }
    209 
    210 Value * SourceBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
    211     Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BufferedSize)});
    212     return b->CreateLoad(ptr);
    213 }
    214 
    215 void SourceBuffer::setBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle, Value * size) const {
    216     Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BufferedSize)});
    217     b->CreateStore(size, ptr);
    218 }
    219 
    220 Value * SourceBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
    221     Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
    222     return b->CreateLoad(ptr);
    223 }
    224 
    225 void SourceBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * const handle, Value * c) const {
    226     Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
    227     b->CreateStore(c, ptr);
    228 }
    229 
    230 void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * addr) const {
    231     if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    232         b->CreateAssert(handle, "handle cannot be null");
    233     }
    234     Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
    235     Type * const ptrTy = ptr->getType()->getPointerElementType();
    236     if (LLVM_LIKELY(isa<PointerType>(addr->getType()))) {
    237         const auto ptrSpace = cast<PointerType>(ptr->getType())->getAddressSpace();
    238         const auto addrSpace = cast<PointerType>(ptrTy)->getAddressSpace();
    239         if (LLVM_UNLIKELY(addrSpace != ptrSpace)) {
    240             report_fatal_error("SourceBuffer: base address was declared with address space "
    241                                      + std::to_string(ptrSpace)
    242                                      + " but given a pointer in address space "
    243                                      + std::to_string(addrSpace));
    244         }
    245     } else {
    246         report_fatal_error("SourceBuffer: base address is not a pointer type");
    247     }
    248     if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    249         b->CreateAssert(ptr, "SourceBuffer: base address cannot be zero");
    250         DataLayout DL(b->getModule());
    251         IntegerType * const intPtrTy = b->getIntPtrTy(DL, cast<PointerType>(ptrTy)->getAddressSpace());
    252         Value * const notAligned = b->CreateURem(b->CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, b->getBitBlockWidth() / 8));
    253         b->CreateAssertZero(notAligned, "SourceBuffer: base address is not aligned with the bit block width");
    254     }
    255     b->CreateStore(b->CreatePointerCast(addr, ptrTy), ptr);
    256 }
    257 
    258 Value * SourceBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
    259     if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    260         b->CreateAssert(handle, "handle cannot be null");
    261     }
    262     Value * const ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
    263     return b->CreateLoad(ptr);
    264 }
    265 
    266 Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
    267     if (reverse) report_fatal_error("SourceBuffer cannot be accessed in reverse");
    268     Value * maxAvail = b->CreateNUWSub(getBufferedSize(b, handle), fromPosition);
    269     return b->CreateSelect(b->CreateICmpULT(availItems, maxAvail), availItems, maxAvail);
    270 }
    271 
    272 Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
    273     report_fatal_error("SourceBuffers cannot be written");
    274 }
    275 
    276 void SourceBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
    277     if (LLVM_LIKELY(mStreamSetBufferPtr == nullptr)) {
    278         Type * const ty = getType();
    279         mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(ty, b->getSize(mBufferBlocks));
    280         b->CreateAlignedStore(Constant::getNullValue(ty), mStreamSetBufferPtr, b->getCacheAlignment());
    281     } else {
    282         report_fatal_error("StreamSetBuffer::allocateBuffer() was called twice on the same stream set");
    283     }
    284 }
    285 
    286 void SourceBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
    287 
    288 }
    289 
    290 // External File Buffer
    291 void ExternalBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> &) {
    292     report_fatal_error("External buffers cannot be allocated.");
    293 }
    294 
    295 void ExternalBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> &) const {
    296 
    297 }
    298 
    299 Value * ExternalBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value * availItems, const bool reverse) const {
    300     // All available items can be accessed.
    301     return reverse ? ConstantInt::getAllOnesValue(availItems->getType()) : availItems;
    302 }
    303 
    304 Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, Value *consumed, const bool reverse) const {
    305     // Trust that the buffer is large enough to write any amount
    306     return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
    307 }
    308 
    309 Value * ExternalBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
    310     return ConstantInt::getAllOnesValue(b->getSizeTy());
    311 }
    312 
    313 Value * ExternalBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const /* handle */) const {
    314     return ConstantInt::getAllOnesValue(b->getSizeTy());
    315 }
    316 
    317 // Circular Buffer
    318 Value * CircularBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
     210void StaticBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* addr */, Value * /* handle */) const {
     211    report_fatal_error("setBaseAddress is not supported by this buffer type");
     212}
     213
     214Value * StaticBuffer::getOverflowAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
     215    return b->CreateGEP(getBaseAddress(b, handle), b->getSize(mCapacity));
     216}
     217
     218Value * StaticBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool /* readOnly */) const {
     219    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     220        Value * const count = getStreamSetCount(b, handle);
     221        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
     222        Value * const cond = b->CreateICmpULT(index, count);
     223        b->CreateAssert(cond, "out-of-bounds stream access");
     224    }
     225    return b->CreateGEP(getBaseAddress(b, handle), {modByCapacity(b, blockIndex), streamIndex});
     226}
     227
     228Value * StaticBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool /* readOnly */) const {
     229    if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
     230        Value * const count = getStreamSetCount(b, handle);
     231        Value * const index = b->CreateZExtOrTrunc(streamIndex, count->getType());
     232        Value * const cond = b->CreateICmpULT(index, count);
     233        b->CreateAssert(cond, "out-of-bounds stream access");
     234    }
     235    return b->CreateGEP(getBaseAddress(b, handle), {modByCapacity(b, blockIndex), streamIndex, packIndex});
     236}
     237
     238Value * StaticBuffer::getRawItemPointer(IDISA::IDISA_Builder * const b, Value * const handle, Value * absolutePosition) const {
    319239    Value * ptr = getBaseAddress(b, handle);
    320     Value * relativePosition = b->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mBufferBlocks * b->getBitBlockWidth()));
     240    Value * relativePosition = b->CreateURem(absolutePosition, ConstantInt::get(absolutePosition->getType(), mCapacity * b->getBitBlockWidth()));
    321241    Type * const elemTy = mBaseType->getArrayElementType()->getVectorElementType();
    322242    const auto bw = elemTy->getPrimitiveSizeInBits();
     
    332252}
    333253
    334 // Expandable Buffer
    335 
    336 void ExpandableBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
    337     mStreamSetBufferPtr = b->CreateCacheAlignedAlloca(getType());
    338     Value * const capacityPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(0)});
    339     b->CreateStore(b->getSize(mInitialCapacity), capacityPtr);
    340     Type * const bufferType = getType()->getStructElementType(1)->getPointerElementType();
    341     Constant * const bufferWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(bufferType), b->getSizeTy(), false);
    342     Constant * const size = ConstantExpr::getMul(b->getSize(mBufferBlocks * mInitialCapacity), bufferWidth);
    343     const auto alignment = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
    344     Value * const ptr = b->CreateAlignedMalloc(size, alignment);
    345     b->CreateMemZero(ptr, size, bufferType->getPrimitiveSizeInBits() / 8);
    346     Value * const streamSetPtr = b->CreateGEP(mStreamSetBufferPtr, {b->getInt32(0), b->getInt32(1)});
    347     b->CreateStore(b->CreatePointerCast(ptr, bufferType->getPointerTo()), streamSetPtr);
    348 }
    349 
    350 std::pair<Value *, Value *> ExpandableBuffer::getInternalStreamBuffer(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
    351 
    352     // ENTRY
    353     Value * const capacityPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)});
    354     Value * const capacity = b->CreateLoad(capacityPtr);
    355     Value * const streamSetPtr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)});
    356     Value * const streamSet = b->CreateLoad(streamSetPtr);
    357     blockIndex = modBufferSize(b, blockIndex);
    358 
    359     assert (streamIndex->getType() == capacity->getType());
    360     Value * const cond = b->CreateICmpULT(streamIndex, capacity);
    361 
    362     // Are we guaranteed that we can access this stream?
    363     if (readOnly || isCapacityGuaranteed(streamIndex, mInitialCapacity)) {
    364         if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    365             b->CreateAssert(cond, "out-of-bounds stream access");
     254Value * StaticBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
     255    Type * const ty = fromPosition->getType();
     256    const auto blockWidth = b->getBitBlockWidth();
     257    Value * capacity = getCapacity(b, handle);
     258    Value * const itemsFromBase = b->CreateURem(fromPosition, capacity);
     259    if (reverse) {
     260        Value * const bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), capacity, itemsFromBase);
     261        return b->CreateUMin(availItems, bufAvail);
     262    } else {
     263        if (mOverflow) {
     264            capacity = ConstantInt::get(ty, (mCapacity + mOverflow) * blockWidth - 1);
    366265        }
    367         Value * offset = b->CreateAdd(b->CreateMul(blockIndex, capacity), streamIndex);
    368         return {streamSet, offset};
    369     }
    370 
    371     BasicBlock * const entry = b->GetInsertBlock();
    372     BasicBlock * const expand = BasicBlock::Create(b->getContext(), "expand", entry->getParent());
    373     BasicBlock * const resume = BasicBlock::Create(b->getContext(), "resume", entry->getParent());
    374 
    375     b->CreateLikelyCondBr(cond, resume, expand);
    376 
    377     // EXPAND
    378     b->SetInsertPoint(expand);
    379 
    380     Type * elementType = getType()->getStructElementType(1)->getPointerElementType();
    381     Constant * const vectorWidth = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(elementType), capacity->getType(), false);
    382 
    383     Value * newCapacity = b->CreateAdd(streamIndex, b->getSize(1));
    384     newCapacity = b->CreateCeilLog2(newCapacity);
    385     newCapacity = b->CreateShl(b->getSize(1), newCapacity, "newCapacity");
    386 
    387     std::string tmp;
    388     raw_string_ostream out(tmp);
    389     out << "__expand";
    390     elementType->print(out);
    391     std::string name = out.str();
    392 
    393     Module * const m = b->getModule();
    394     Function * expandFunction = m->getFunction(name);
    395 
    396     if (expandFunction == nullptr) {
    397 
    398         const auto ip = b->saveIP();
    399 
    400         FunctionType * fty = FunctionType::get(elementType->getPointerTo(), {elementType->getPointerTo(), b->getSizeTy(), b->getSizeTy()}, false);
    401         expandFunction = Function::Create(fty, GlobalValue::PrivateLinkage, name, m);
    402 
    403         auto args = expandFunction->arg_begin();
    404         Value * streamSet = &*args++;
    405         Value * capacity = &*args++;
    406         Value * newCapacity = &*args;
    407 
    408         BasicBlock * entry = BasicBlock::Create(b->getContext(), "entry", expandFunction);
    409         b->SetInsertPoint(entry);
    410 
    411         Value * size = b->CreateMul(newCapacity, b->getSize(mBufferBlocks));
    412         const auto memAlign = std::max(b->getCacheAlignment(), b->getBitBlockWidth() / 8);
    413 
    414         Value * newStreamSet = b->CreatePointerCast(b->CreateAlignedMalloc(b->CreateMul(size, vectorWidth), memAlign), elementType->getPointerTo());
    415         Value * const diffCapacity = b->CreateMul(b->CreateSub(newCapacity, capacity), vectorWidth);
    416 
    417         const auto alignment = elementType->getPrimitiveSizeInBits() / 8;
    418         for (unsigned i = 0; i < mBufferBlocks; ++i) {
    419             ConstantInt * const offset = b->getSize(i);
    420             Value * srcOffset = b->CreateMul(capacity, offset);
    421             Value * srcPtr = b->CreateGEP(streamSet, srcOffset);
    422             Value * destOffset = b->CreateMul(newCapacity, offset);
    423             Value * destPtr = b->CreateGEP(newStreamSet, destOffset);
    424             b->CreateMemCpy(destPtr, srcPtr, b->CreateMul(capacity, vectorWidth), alignment);
    425             Value * destZeroOffset = b->CreateAdd(destOffset, capacity);
    426             Value * destZeroPtr = b->CreateGEP(newStreamSet, destZeroOffset);
    427             b->CreateMemZero(destZeroPtr, diffCapacity, alignment);
    428         }
    429 
    430         b->CreateFree(streamSet);
    431 
    432         b->CreateRet(newStreamSet);
    433 
    434         b->restoreIP(ip);
    435     }
    436 
    437     Value * newStreamSet = b->CreateCall(expandFunction, {streamSet, capacity, newCapacity});
    438     b->CreateStore(newStreamSet, streamSetPtr);
    439     b->CreateStore(newCapacity, capacityPtr);
    440 
    441     b->CreateBr(resume);
    442 
    443     // RESUME
    444     b->SetInsertPoint(resume);
    445 
    446     PHINode * phiStreamSet = b->CreatePHI(streamSet->getType(), 2);
    447     phiStreamSet->addIncoming(streamSet, entry);
    448     phiStreamSet->addIncoming(newStreamSet, expand);
    449 
    450     PHINode * phiCapacity = b->CreatePHI(capacity->getType(), 2);
    451     phiCapacity->addIncoming(capacity, entry);
    452     phiCapacity->addIncoming(newCapacity, expand);
    453 
    454     Value * offset = b->CreateAdd(b->CreateMul(blockIndex, phiCapacity), streamIndex);
    455 
    456     return {phiStreamSet, offset};
    457 }
    458 
    459 Value * ExpandableBuffer::getStreamBlockPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, const bool readOnly) const {
    460     report_fatal_error("temporarily not supported");
    461 //    Value * ptr, * offset;
    462 //    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
    463 //    return b->CreateGEP(ptr, offset);
    464 }
    465 
    466 Value * ExpandableBuffer::getStreamPackPtr(IDISA::IDISA_Builder * const b, Value * const handle, Value * streamIndex, Value * blockIndex, Value * packIndex, const bool readOnly) const {
    467     report_fatal_error("temporarily not supported");
    468 //    Value * ptr, * offset;
    469 //    std::tie(ptr, offset) = getInternalStreamBuffer(b, handle, streamIndex, blockIndex, readOnly);
    470 //    return b->CreateGEP(ptr, {offset, packIndex});
    471 }
    472 
    473 Value * ExpandableBuffer::getStreamSetCount(IDISA::IDISA_Builder * const b, Value * const handle) const {
    474     return b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(0)}));
    475 }
    476 
    477 Value * ExpandableBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
    478     if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    479         b->CreateAssert(handle, "handle cannot be null");
    480     }
    481     Value * const baseAddr = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(1)}));
    482     if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    483         b->CreateAssert(handle, "base address cannot be 0");
    484     }
    485     return baseAddr;
    486 }
    487 
    488 void ExpandableBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
    489     b->CreateFree(getBaseAddress(b.get(), mStreamSetBufferPtr));
    490 }
    491 
    492 Value * ExpandableBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const, Value *, Value *, Value *, bool) const {
    493     report_fatal_error("Expandable buffers: getLinearlyAccessibleItems is not supported.");
    494 }
    495 
     266        Value * const linearSpace = b->CreateSub(capacity, itemsFromBase);
     267        return b->CreateUMin(availItems, linearSpace);
     268    }
     269}
     270
     271Value * StaticBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
     272    Type * const ty = fromPosition->getType();
     273    const auto blockWidth = b->getBitBlockWidth();
     274    Value * capacity = getCapacity(b, handle);
     275    fromPosition = b->CreateURem(fromPosition, capacity);
     276    if (reverse) {
     277        return b->CreateSelect(b->CreateIsNull(fromPosition), capacity, fromPosition);
     278    }
     279    consumed = b->CreateURem(consumed, capacity);
     280    if (mOverflow) {
     281        capacity = ConstantInt::get(ty, (mCapacity + mOverflow) * blockWidth - 1);
     282    }
     283    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), capacity, consumed);
     284    return b->CreateSub(limit, fromPosition);
     285}
     286
     287
     288// Dynamic Buffer
     289
     290inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
     291    IntegerType * sizeTy = b->getSizeTy();
     292    PointerType * typePtr = baseType->getPointerTo(addrSpace);
     293    return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy});
     294}
     295
     296void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
     297    Type * const structTy = getDynamicBufferStructType(b, mType, mAddressSpace);
     298    Value * const handle = b->CreateCacheAlignedAlloca(structTy);
     299    Constant * const capacity = b->getSize(mInitialCapacity * b->getBitBlockWidth());
     300    // note: when adding extensible stream sets, make sure to set the initial count here.
     301    Value * const bufferSize = b->CreateRoundUp(getAllocationSize(b.get(), handle, capacity), b->getSize(b->getCacheAlignment()));;
     302    Value * const baseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
     303    Type * const baseAddressPtrTy = baseAddressField->getType()->getPointerElementType();
     304    Value * const baseAddress = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufferSize), baseAddressPtrTy);
     305    if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
     306        b->CallPrintInt("allocated: ", baseAddress);
     307        b->CallPrintInt("allocated capacity: ", bufferSize);
     308    }
     309    b->CreateStore(baseAddress, baseAddressField);
     310    Value * const priorAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
     311    b->CreateStore(ConstantPointerNull::getNullValue(baseAddressPtrTy), priorAddressField);
     312    Value * const capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
     313    b->CreateStore(b->getSize(mInitialCapacity), capacityField);
     314    mStreamSetHandle = handle;
     315}
     316
     317void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
     318    Value * const handle = mStreamSetHandle;
     319    /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
     320    Value * priorAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
     321    b->CreateFree(b->CreateLoad(priorAddressField));
     322    Value * baseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
     323    b->CreateFree(b->CreateLoad(baseAddressField));
     324}
     325
     326void DynamicBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* addr */, Value * /* handle */) const {
     327    report_fatal_error("setBaseAddress is not supported by this buffer type");
     328}
    496329
    497330Value * DynamicBuffer::getBaseAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
     
    508341
    509342Value * DynamicBuffer::getBlockAddress(IDISA::IDISA_Builder * const b, Value * const handle, Value * blockIndex) const {
    510     Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
     343    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)}));
    511344    assert (blockIndex->getType() == workingBlocks->getType());
    512345    return b->CreateGEP(getBaseAddress(b, handle), b->CreateURem(blockIndex, workingBlocks));
     
    514347
    515348Value * DynamicBuffer::getOverflowAddress(IDISA::IDISA_Builder * const b, Value * const handle) const {
    516     Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
     349    Value * const workingBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)}));
    517350    return b->CreateGEP(getBaseAddress(b, handle), workingBlocks);
    518351}
     
    536369
    537370Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
    538     Value * const bufferSize = getBufferedSize(b, handle);
     371    Value * const bufferSize = getCapacity(b, handle);
    539372    Value * const itemsFromBase = b->CreateURem(fromPosition, bufferSize);
    540373    if (reverse) {
     
    543376    } else {
    544377        Value * capacity = bufferSize;
    545         if (mOverflowBlocks) {
    546             Constant * const overflow = b->getSize(mOverflowBlocks * b->getBitBlockWidth() - 1);
     378        if (mOverflow) {
     379            Constant * const overflow = b->getSize(mOverflow * b->getBitBlockWidth() - 1);
    547380            capacity = b->CreateAdd(bufferSize, overflow);
    548381        }
     
    553386
    554387Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
    555     Value * const bufferSize = getBufferedSize(b, handle);
     388    Value * const bufferSize = getCapacity(b, handle);
    556389    fromPosition = b->CreateURem(fromPosition, bufferSize);
    557390    if (reverse) {
     
    560393    consumed = b->CreateURem(consumed, bufferSize);
    561394    Value * capacity = bufferSize;
    562     if (mOverflowBlocks) {
    563         Constant * const overflow = b->getSize(mOverflowBlocks * b->getBitBlockWidth() - 1);
     395    if (mOverflow) {
     396        Constant * const overflow = b->getSize(mOverflow * b->getBitBlockWidth() - 1);
    564397        capacity = b->CreateAdd(bufferSize, overflow);
    565398    }
     
    568401}
    569402
    570 Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
    571     Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)});
     403Value * DynamicBuffer::getCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
     404    Value * ptr = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
    572405    return b->CreateMul(b->CreateLoad(ptr), b->getSize(b->getBitBlockWidth()));
    573406}
    574407
    575 
    576 inline StructType * getDynamicBufferStructType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, const unsigned addrSpace) {
    577     IntegerType * sizeTy = b->getSizeTy();
    578     PointerType * typePtr = baseType->getPointerTo(addrSpace);
    579     return StructType::get(b->getContext(), {typePtr, typePtr, sizeTy, sizeTy, sizeTy, sizeTy, sizeTy});
    580 }
    581 
    582 void DynamicBuffer::allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) {
    583     Type * const structTy = getDynamicBufferStructType(b, mType, mAddressSpace);
    584     Value * const handle = b->CreateCacheAlignedAlloca(structTy);
    585     size_t numStreams = 1;
    586     if (isa<ArrayType>(mBaseType)) {
    587         numStreams = mBaseType->getArrayNumElements();
    588     }
     408void DynamicBuffer::setCapacity(IDISA::IDISA_Builder * const b, Value * handle, Value * required) const {
     409
     410    BasicBlock * const entry = b->GetInsertBlock(); assert (entry);
     411    BasicBlock * const insertBefore = entry->getNextNode();
     412    BasicBlock * const expand = b->CreateBasicBlock("expandDynamicBuffer", insertBefore);
     413    BasicBlock * const resume = b->CreateBasicBlock("", insertBefore);
     414
     415    ConstantInt * const BLOCK_WIDTH = b->getSize(b->getBitBlockWidth());
     416
     417    Value * const capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Capacity)});
     418    Value * const capacity = b->CreateMul(capacityField, BLOCK_WIDTH);
     419    Value * const needsExpansion = b->CreateICmpULT(capacity, required);
     420    b->CreateUnlikelyCondBr(needsExpansion, expand, resume);
     421
     422    b->SetInsertPoint(expand);
     423    Value * const newCapacity = b->CreateRoundUp(required, capacity);
     424    Value * const bufferSize = getAllocationSize(b, handle, newCapacity);
     425    Value * const newBaseAddress = b->CreateCacheAlignedMalloc(bufferSize);
     426    Value * const baseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
     427    Value * const currentBaseAddress = b->CreateLoad(baseAddressField);
     428    Value * const priorBaseAddressField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
     429    Value * const priorBaseAddress = b->CreateLoad(priorBaseAddressField);
     430    b->CreateMemCpy(newBaseAddress, currentBaseAddress, capacity, b->getCacheAlignment());
     431    b->CreateStore(baseAddressField, newBaseAddress);
     432    b->CreateStore(capacityField, b->CreateUDiv(newCapacity, BLOCK_WIDTH));
     433    b->CreateStore(priorBaseAddressField, currentBaseAddress);
     434    b->CreateFree(priorBaseAddress);
     435
     436    b->CreateBr(resume);
     437    b->SetInsertPoint(resume);
     438}
     439
     440Value * DynamicBuffer::getAllocationSize(IDISA::IDISA_Builder * const b, Value * handle, Value * const requiredItemCapacity) const {
     441    Value * itemCapacity = requiredItemCapacity;
     442    if (mOverflow) {
     443        Constant * const overflowSize =  b->getSize(mOverflow * b->getBitBlockWidth());
     444        itemCapacity = b->CreateAdd(requiredItemCapacity, overflowSize);
     445    }
     446    Value * const numOfStreams = getStreamSetCount(b, handle);
     447    Value * bufferSize = b->CreateMul(itemCapacity, numOfStreams);
    589448    const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
    590     Value * bufSize = b->getSize((mBufferBlocks + mOverflowBlocks) * b->getBitBlockWidth() * numStreams * fieldWidth/8);
    591     bufSize = b->CreateRoundUp(bufSize, b->getSize(b->getCacheAlignment()));
    592     Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
    593     Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
    594     Value * bufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(bufSize), bufPtrType);
    595     if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
    596         b->CallPrintInt("allocated: ", bufPtr);
    597         b->CallPrintInt("allocated capacity: ", bufSize);
    598     }
    599     b->CreateStore(bufPtr, bufBasePtrField);
    600     b->CreateStore(ConstantPointerNull::getNullValue(bufPtrType), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)}));
    601     b->CreateStore(bufSize, b->CreateGEP(handle, {b->getInt32(0), b->getInt32(AllocatedCapacity)}));
    602     b->CreateStore(b->getSize(mBufferBlocks), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)}));
    603     b->CreateStore(b->getSize(-1), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(Length)}));
    604     b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(ProducedPosition)}));
    605     b->CreateStore(b->getSize(0), b->CreateGEP(handle, {b->getInt32(0), b->getInt32(ConsumedPosition)}));
    606     mStreamSetBufferPtr = handle;
    607 }
    608 
    609 void DynamicBuffer::releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const {
    610     Value * const handle = mStreamSetBufferPtr;
    611     /* Free the dynamically allocated buffer, but not the stack-allocated buffer struct. */
    612     Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
    613     Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
    614     Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
    615     BasicBlock * freePrior = b->CreateBasicBlock("freePrior");
    616     BasicBlock * freeCurrent = b->CreateBasicBlock("freeCurrent");
    617     Value * priorBuf = b->CreateLoad(priorBasePtrField);
    618     Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
    619     b->CreateCondBr(priorBufIsNonNull, freePrior, freeCurrent);
    620     b->SetInsertPoint(freePrior);
    621     if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
    622         b->CallPrintInt("releasing: ", priorBuf);
    623     }
    624     b->CreateFree(priorBuf);
    625     b->CreateBr(freeCurrent);
    626     b->SetInsertPoint(freeCurrent);
    627     b->CreateFree(b->CreateLoad(bufBasePtrField));
    628 }
    629 
    630 //
    631 //  Simple capacity doubling.  Use the circular buffer property: duplicating buffer data
    632 //  ensures that we have correct data.   TODO: consider optimizing based on actual
    633 //  consumer and producer positions.
    634 //
    635 void DynamicBuffer::doubleCapacity(IDISA::IDISA_Builder * const b, Value * const handle) const {
    636     size_t numStreams = 1;
    637     if (isa<ArrayType>(mBaseType)) {
    638         numStreams = mBaseType->getArrayNumElements();
    639     }
    640     const auto fieldWidth = mBaseType->getArrayElementType()->getScalarSizeInBits();
    641     Constant * blockBytes = b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8);
    642     Value * bufBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(BaseAddress)});
    643     Type * bufPtrType = bufBasePtrField->getType()->getPointerElementType();
    644     Value * priorBasePtrField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(PriorBaseAddress)});
    645     Value * workingBlocksField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(WorkingBlocks)});
    646     Value * capacityField = b->CreateGEP(handle, {b->getInt32(0), b->getInt32(AllocatedCapacity)});
    647 
    648     Value * oldBufPtr = b->CreateLoad(bufBasePtrField);
    649     Value * currentWorkingBlocks = b->CreateLoad(workingBlocksField);
    650     Value * workingBytes = b->CreateMul(currentWorkingBlocks, blockBytes);
    651     Value * const curAllocated = b->CreateLoad(capacityField);
    652     Value * neededCapacity = b->CreateAdd(workingBytes, workingBytes);
    653     if (mOverflowBlocks > 0) {
    654         Constant * overflowBytes = b->getSize(mOverflowBlocks * b->getBitBlockWidth() * numStreams * fieldWidth/8);
    655         neededCapacity = b->CreateAdd(neededCapacity, overflowBytes);
    656     }
    657     neededCapacity = b->CreateRoundUp(neededCapacity, b->getSize(b->getCacheAlignment()));
    658     BasicBlock * doubleEntry = b->GetInsertBlock();
    659     BasicBlock * doRealloc = b->CreateBasicBlock("doRealloc");
    660     BasicBlock * doCopy2 = b->CreateBasicBlock("doCopy2");
    661     b->CreateCondBr(b->CreateICmpULT(curAllocated, neededCapacity), doRealloc, doCopy2);
    662     b->SetInsertPoint(doRealloc);
    663     // If there is a non-null priorBasePtr, free it.
    664     Value * priorBuf = b->CreateLoad(priorBasePtrField);
    665     Value * priorBufIsNonNull = b->CreateICmpNE(priorBuf, ConstantPointerNull::get(cast<PointerType>(bufPtrType)));
    666     BasicBlock * deallocatePrior = b->CreateBasicBlock("deallocatePrior");
    667     BasicBlock * allocateNew = b->CreateBasicBlock("allocateNew");
    668     b->CreateCondBr(priorBufIsNonNull, deallocatePrior, allocateNew);
    669     b->SetInsertPoint(deallocatePrior);
    670     if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
    671         b->CallPrintInt("deallocating: ", priorBuf);
    672     }
    673     b->CreateFree(priorBuf);
    674     b->CreateBr(allocateNew);
    675 
    676     b->SetInsertPoint(allocateNew);
    677     b->CreateStore(oldBufPtr, priorBasePtrField);
    678     Value * newBufPtr = b->CreatePointerCast(b->CreateCacheAlignedMalloc(neededCapacity), bufPtrType);
    679     if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
    680         b->CallPrintInt("re-allocated: ", newBufPtr);
    681         b->CallPrintInt("allocated capacity: ", neededCapacity);
    682     }
    683     b->CreateStore(newBufPtr, bufBasePtrField);
    684     createBlockCopy(b, newBufPtr, oldBufPtr, currentWorkingBlocks);
    685     b->CreateStore(neededCapacity, capacityField);
    686     b->CreateBr(doCopy2);
    687 
    688     b->SetInsertPoint(doCopy2);
    689     PHINode * bufPtr = b->CreatePHI(oldBufPtr->getType(), 2);
    690     bufPtr->addIncoming(oldBufPtr, doubleEntry);
    691     bufPtr->addIncoming(newBufPtr, allocateNew);
    692     createBlockCopy(b, b->CreateGEP(bufPtr, currentWorkingBlocks), bufPtr, currentWorkingBlocks);
    693     currentWorkingBlocks = b->CreateAdd(currentWorkingBlocks, currentWorkingBlocks);
    694     if (codegen::DebugOptionIsSet(codegen::TraceDynamicBuffers)) {
    695         b->CallPrintInt("currentWorkingBlocks: ", currentWorkingBlocks);
    696     }
    697     b->CreateStore(currentWorkingBlocks, workingBlocksField);
    698 }
    699 
    700 inline StructType * getSourceBufferType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type, const unsigned MemoryAddressSpace) {
    701     return StructType::get(b->getContext(), {resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), b->getSizeTy()});
    702 }
    703 
    704 SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
    705 : StreamSetBuffer(BufferKind::SourceBuffer, type, getSourceBufferType(b, type, MemoryAddressSpace), 0, 0, StructAddressSpace) {
    706     mUniqueID = "B";
    707     if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
    708         mUniqueID += "@" + std::to_string(MemoryAddressSpace) + ":" + std::to_string(StructAddressSpace);
    709     }
    710 }
    711 
    712 ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, Value * addr, unsigned AddressSpace)
    713 : StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, 0, AddressSpace) {
     449    if (LLVM_LIKELY(fieldWidth < 8)) {
     450        bufferSize = b->CreateUDiv(bufferSize, b->getSize(8 / fieldWidth));
     451    } else if (LLVM_UNLIKELY(fieldWidth > 8)) {
     452        bufferSize = b->CreateMul(bufferSize, b->getSize(fieldWidth / 8));
     453    }
     454    return bufferSize;
     455}
     456
     457ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type,
     458                               Value * const externalAddress, const unsigned AddressSpace)
     459: StreamSetBuffer(BufferKind::ExternalBuffer, b, type, AddressSpace)
     460, mExternalAddress(externalAddress) {
    714461    mUniqueID = "E";
    715462    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
    716     mStreamSetBufferPtr = b->CreatePointerBitCastOrAddrSpaceCast(addr, getPointerType());
    717 }
    718 
    719 CircularBuffer::CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
    720 : StreamSetBuffer(BufferKind::CircularBuffer, type, resolveStreamSetType(b, type), bufferBlocks, 0, AddressSpace) {
    721     mUniqueID = "C" + std::to_string(bufferBlocks);
     463}
     464
     465StaticBuffer::StaticBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type,
     466                           const size_t capacity, const size_t overflowBlocks, const unsigned AddressSpace)
     467: StreamSetBuffer(BufferKind::StaticBuffer, b, type, AddressSpace)
     468, mCapacity(capacity)
     469, mOverflow(overflowBlocks) {
     470    mUniqueID = "S" + std::to_string(capacity);
    722471    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
    723472}
    724473
    725 CircularBuffer::CircularBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
    726 : StreamSetBuffer(k, type, resolveStreamSetType(b, type), bufferBlocks, overflowBlocks, AddressSpace) {
    727 
    728 }
    729 
    730 CircularCopybackBuffer::CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace)
    731 : CircularBuffer(BufferKind::CircularCopybackBuffer, b, type, bufferBlocks, overflowBlocks, AddressSpace) {
    732     if (overflowBlocks < 1) {
    733         report_fatal_error("CircularCopybackBuffer: overflowBlocks < 1");
    734     }
    735     if (bufferBlocks < 2 * overflowBlocks) {
    736         report_fatal_error("CircularCopybackBuffer: bufferBlocks < 2 * overflowBlocks");
    737     }
    738     mUniqueID = "CC" + std::to_string(bufferBlocks);
    739     if (mOverflowBlocks != 1) mUniqueID += "_" + std::to_string(mOverflowBlocks);
    740     if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
    741 }
    742 
    743 ExpandableBuffer::ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
    744 : StreamSetBuffer(BufferKind::ExpandableBuffer, type, resolveExpandableStreamSetType(b, type), bufferBlocks, 0, AddressSpace)
    745 , mInitialCapacity(type->getArrayNumElements()) {
    746     mUniqueID = "XP" + std::to_string(bufferBlocks);
    747     if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
    748 }
    749 
    750 DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, size_t initialCapacity, size_t overflow, unsigned swizzle, unsigned addrSpace)
    751 : StreamSetBuffer(BufferKind::DynamicBuffer, type, resolveStreamSetType(b, type), initialCapacity, overflow, addrSpace) {
    752     if (initialCapacity * b->getBitBlockWidth() < 2 * overflow) {
    753         report_fatal_error("DynamicBuffer: initialCapacity * b->getBitBlockWidth() < 2 * overflow");
    754     }
    755     mUniqueID = "DB";
    756     if (swizzle != 1) {
    757         mUniqueID += "s" + std::to_string(swizzle);
    758     }
    759         if (overflow != 0) {
    760         mUniqueID += "o" + std::to_string(overflow);
    761     }
    762     if (addrSpace != 0) {
    763         mUniqueID += "@" + std::to_string(addrSpace);
    764     }
    765 }
    766 
    767 
    768 inline StreamSetBuffer::StreamSetBuffer(BufferKind k, Type * baseType, Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace)
    769 : mBufferKind(k)
    770 , mType(resolvedType)
    771 , mBufferBlocks(BufferBlocks)
    772 , mOverflowBlocks(OverflowBlocks)
    773 , mAddressSpace(AddressSpace)
    774 , mStreamSetBufferPtr(nullptr)
    775 , mBaseType(baseType)
    776 , mProducer(nullptr) {
    777     assert((k == BufferKind::SourceBuffer || k == BufferKind::ExternalBuffer) ^ (BufferBlocks > 0));
    778     assert ("A zero length buffer cannot have overflow blocks!" && ((BufferBlocks > 0) || (OverflowBlocks == 0)));
    779 }
    780 
    781 StreamSetBuffer::~StreamSetBuffer() { }
     474DynamicBuffer::DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * const type,
     475                             const size_t initialCapacity, const size_t overflowBlocks, const unsigned AddressSpace)
     476: StreamSetBuffer(BufferKind::DynamicBuffer, b, type, AddressSpace)
     477, mInitialCapacity(initialCapacity)
     478, mOverflow(overflowBlocks) {
     479    mUniqueID = "D";
     480    if (overflowBlocks != 0) mUniqueID += std::to_string(overflowBlocks);
     481    if (AddressSpace != 0) mUniqueID += "@" + std::to_string(AddressSpace);
     482}
    782483
    783484// Helper routines
     
    806507}
    807508
    808 StructType * resolveExpandableStreamSetType(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type) {
    809     if (LLVM_LIKELY(type->isArrayTy())) {
    810         type = type->getArrayElementType();
    811     }
    812     if (LLVM_LIKELY(type->isVectorTy() && type->getVectorNumElements() == 0)) {
    813         type = type->getVectorElementType();
    814         if (LLVM_LIKELY(type->isIntegerTy())) {
    815             const auto fieldWidth = cast<IntegerType>(type)->getBitWidth();
    816             type = b->getBitBlockType();
    817             if (fieldWidth != 1) {
    818                 type = ArrayType::get(type, fieldWidth);
    819             }
    820             return StructType::get(b->getContext(), {b->getSizeTy(), type->getPointerTo()});
    821         }
    822     }
    823     std::string tmp;
    824     raw_string_ostream out(tmp);
    825     type->print(out);
    826     out << " is an unvalid stream set buffer type.";
    827     report_fatal_error(out.str());
    828 }
     509
     510inline StreamSetBuffer::StreamSetBuffer(BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, Type * baseType, unsigned AddressSpace)
     511: mBufferKind(k)
     512, mType(resolveStreamSetType(b, baseType))
     513, mAddressSpace(AddressSpace)
     514, mStreamSetHandle(nullptr)
     515, mBaseType(baseType)
     516, mProducer(nullptr) {
     517
     518}
     519
     520StreamSetBuffer::~StreamSetBuffer() { }
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5998 r6047  
    1111namespace IDISA { class IDISA_Builder; }
    1212namespace llvm { class Value; }
     13namespace llvm { class Constant; }
    1314namespace kernel { class Kernel; }
    1415namespace kernel { class KernelBuilder; }
     
    2223
    2324    enum class BufferKind : unsigned {
    24         SourceBuffer
    25         , ExternalBuffer
    26         , CircularBuffer
    27         , CircularCopybackBuffer
    28         , ExpandableBuffer
     25        ExternalBuffer
     26        , StaticBuffer
    2927        , DynamicBuffer
    3028    };
     
    4644    }
    4745
    48     // Return the number of streams for a static stream set or 0 for an expandable stream set.
    4946    unsigned getNumOfStreams () const {
    50         if (mBufferKind == BufferKind::ExpandableBuffer) return 0;
    5147        size_t numStreams = 1;
    5248        if (mBaseType->isArrayTy()) {
     
    7167    }
    7268
    73     size_t getBufferBlocks() const {
    74         return mBufferBlocks;
    75     }
    76 
    77     llvm::Value * getStreamSetHandle() const {
    78         return mStreamSetBufferPtr;
    79     }
    80 
    81     bool supportsCopyBack() const {
    82         return mOverflowBlocks != 0;
     69    virtual bool supportsCopyBack() const {
     70        return false;
    8371    }
    8472
     
    8775    }
    8876
    89     size_t overflowSize() const {
    90         return mOverflowBlocks;
    91     }
    92 
    9377    virtual ~StreamSetBuffer() = 0;
    9478
     
    10185    }
    10286
    103     virtual void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb);
    104 
    105     virtual void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const;
     87    virtual void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) = 0;
     88
     89    virtual void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const = 0;
    10690
    10791    llvm::PointerType * getStreamSetPointerType() const {
    108         return getStreamSetBlockType()->getPointerTo(mAddressSpace);
    109     }
    110 
    111 protected:
    112 
    113     virtual llvm::Type * getStreamSetBlockType() const;
     92        return mType->getPointerTo(mAddressSpace);
     93    }
     94
     95protected:
    11496
    11597    virtual llvm::Value * getStreamBlockPtr(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const;
     
    119101    virtual llvm::Value * getStreamSetCount(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
    120102
    121     virtual llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const;
    122 
    123     virtual void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const;
    124 
    125     virtual void setBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * size) const;
    126    
    127     virtual llvm::Value * getBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
    128    
    129     virtual void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * size, llvm::Value *) const;
    130    
    131     virtual llvm::Value * getCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
    132    
     103    virtual llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const = 0;
     104
     105    virtual void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const = 0;
     106
     107    virtual llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const = 0;
     108
     109    virtual void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * size, llvm::Value *) const = 0;
     110   
     111    virtual llvm::Value * getCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const = 0;
     112
     113    virtual llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const = 0;
     114
    133115    // The number of items that cam be linearly accessed from a given logical stream position.
    134     virtual llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPos, llvm::Value * avail, bool reverse = false) const;
    135 
    136     void createBlockCopy(IDISA::IDISA_Builder * const b, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * blocksToCopy) const;
    137 
    138     virtual llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const;
    139    
    140     virtual void doubleCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
    141 
    142     StreamSetBuffer(BufferKind k, llvm::Type * baseType, llvm::Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace);
    143 
    144     bool isCapacityGuaranteed(const llvm::Value * const index, const size_t capacity) const;
    145 
    146     llvm::Value * modBufferSize(IDISA::IDISA_Builder * const b, llvm::Value * const offset) const;
    147 
    148     virtual llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
    149 
    150     virtual llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
    151 
     116    virtual llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPos, llvm::Value * avail, bool reverse = false) const = 0;
     117
     118    virtual llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const = 0;
     119   
    152120    void setProducer(kernel::Kernel * const producer) {
    153121        assert (producer);
     
    160128    }
    161129
     130    llvm::Value * getStreamSetHandle() const {
     131        return mStreamSetHandle;
     132    }
     133
     134    StreamSetBuffer(const BufferKind k, const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * baseType, unsigned AddressSpace);
     135
    162136protected:
    163137    const BufferKind                 mBufferKind;
    164138    llvm::Type * const               mType;
    165     const size_t                     mBufferBlocks;
    166     size_t                           mOverflowBlocks;    /* Number of data blocks of additional space at the end of the buffer for writing only. */
    167139    const unsigned                   mAddressSpace;
    168     llvm::Value *                    mStreamSetBufferPtr;
     140    llvm::Value *                    mStreamSetHandle;
    169141    llvm::Type * const               mBaseType;
    170142    std::string                      mUniqueID;
     
    173145};   
    174146
    175 class SourceBuffer final : public StreamSetBuffer {
     147class ExternalBuffer final : public StreamSetBuffer {
    176148    friend class kernel::KernelBuilder;
    177149public:
    178150    static inline bool classof(const StreamSetBuffer * b) {
    179         return b->getBufferKind() == BufferKind::SourceBuffer;
    180     }
    181 
    182     SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, unsigned MemoryAddressSpace = 0, unsigned StructAddressSpace = 0);
     151        return b->getBufferKind() == BufferKind::ExternalBuffer;
     152    }
     153
     154    enum Field {BaseAddress, Capacity};
     155
     156    ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * const type,
     157                   llvm::Value * const externalAddress = nullptr, const unsigned AddressSpace = 0);
     158
     159    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     160
     161    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    183162
    184163    bool isUnbounded() const override {
     
    188167protected:
    189168
     169    llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     170
    190171    void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const override;
    191172
    192     void setBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * size) const override;
    193 
    194     void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * c) const override;
    195 
    196     llvm::Value * getBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    197    
     173    llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     174
     175    void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * size, llvm::Value * capacity) const override;
     176
    198177    llvm::Value * getCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    199    
     178
     179    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const override;
     180
    200181    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    201 
     182   
    202183    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    203184
    204     llvm::Type * getStreamSetBlockType() const override;
    205 
    206     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) override;
     185private:
     186
     187    llvm::Value * mExternalAddress;
     188
     189};
     190
     191class StaticBuffer final : public StreamSetBuffer {
     192    friend class kernel::KernelBuilder;
     193public:
     194    static inline bool classof(const StreamSetBuffer * b) {
     195        return b->getBufferKind() == BufferKind::StaticBuffer;
     196    }
     197   
     198    StaticBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * const type,
     199                 const size_t capacity, const size_t overflowBlocks = 0, const unsigned AddressSpace = 0);
     200
     201    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    207202
    208203    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    209204
    210     enum Field {BaseAddress, BufferedSize, Capacity};
     205    bool supportsCopyBack() const override {
     206        return mOverflow > 0;
     207    }
     208
     209protected:
    211210
    212211    llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    213212
     213    void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const override;
     214
     215    llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     216
     217    llvm::Value * getCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     218
     219    void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * size, llvm::Value *) const override;
     220
     221    llvm::Value * getStreamBlockPtr(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const override;
     222
     223    llvm::Value * getStreamPackPtr(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, llvm::Value * packIndex, const bool readOnly) const override;
     224
     225    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const override;
     226
     227    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
     228
     229    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
     230
     231private:
     232
     233    llvm::Value * modByCapacity(IDISA::IDISA_Builder * const b, llvm::Value * const offset) const;
     234
     235private:
     236
     237    const size_t    mCapacity;
     238    const size_t    mOverflow;
     239
    214240};
    215 
    216 class ExternalBuffer final : public StreamSetBuffer {
    217     friend class kernel::KernelBuilder;
    218 public:
    219     static inline bool classof(const StreamSetBuffer * b) {
    220         return b->getBufferKind() == BufferKind::ExternalBuffer;
    221     }
    222 
    223     ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, llvm::Value * addr, unsigned AddressSpace = 0);
    224 
    225     bool isUnbounded() const override {
    226         return true;
    227     }
    228 
    229     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    230 
    231     void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    232 
    233 protected:
    234 
    235     llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    236    
    237     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    238 
    239     llvm::Value * getBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    240 
    241     llvm::Value * getCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    242 };
    243 
    244 class CircularBuffer : public StreamSetBuffer {
    245     friend class kernel::KernelBuilder;
    246 public:
    247     static inline bool classof(const StreamSetBuffer * b) {
    248         return b->getBufferKind() == BufferKind::CircularBuffer;
    249     }
    250    
    251     CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace = 0);
    252 
    253 protected:
    254 
    255     llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const final;
    256 
    257     CircularBuffer(const BufferKind kind, const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace);
    258 
    259 };
    260    
    261 
    262 //
    263 //  A CircularCopybackBuffer operates as a circular buffer buffer with an overflow area
    264 //  for temporary use by the kernel that writes to it.   If the kernel uses the overflow
    265 //  area, it must perform the doCopyBack action before releasing the buffer for use by
    266 //  subsequent kernels.
    267 //  Kernels that read from a CircularCopybackBuffer must not access the overflow area.
    268 //
    269 class CircularCopybackBuffer final : public CircularBuffer {
    270     friend class kernel::KernelBuilder;
    271 public:
    272     static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::CircularCopybackBuffer;}
    273    
    274     CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks = 1, unsigned AddressSpace = 0);
    275        
    276 };
    277 
    278 // ExpandableBuffers do not allow access to the base stream set but will automatically increase the number of streams
    279 // within their set whenever the index exceeds its capacity
    280 //
    281 class ExpandableBuffer final : public StreamSetBuffer {
    282     friend class kernel::KernelBuilder;
    283 public:
    284     static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::ExpandableBuffer;}
    285 
    286     ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace = 0);
    287 
    288     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    289 
    290     void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    291 
    292 protected:
    293 
    294     llvm::Value * getStreamBlockPtr(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const override;
    295 
    296     llvm::Value * getStreamPackPtr(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, llvm::Value * packIndex, const bool readOnly) const override;
    297 
    298     llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    299    
    300     llvm::Value * getStreamSetCount(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    301 
    302     llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    303 
    304 private:
    305 
    306     std::pair<llvm::Value *, llvm::Value *> getInternalStreamBuffer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const;
    307 
    308 private:
    309 
    310     const uint64_t  mInitialCapacity;
    311 
    312 };
    313    
     241       
    314242// Dynamically allocated circular buffers: TODO: add copyback, swizzle support, dynamic allocation, producer, consumer, length
    315243class DynamicBuffer final : public StreamSetBuffer {
     
    317245    friend class kernel::KernelBuilder;
    318246
    319     /* Dynamic data fields stored in the buffer struct */
    320     enum Field {BaseAddress, PriorBaseAddress, AllocatedCapacity, WorkingBlocks, Length, ProducedPosition, ConsumedPosition, FieldCount};
    321 
    322     /* BaseAddress - the physical base address of the memory area for stream set data.
    323      PriorBaseAddress - the physical base address of the previous memory area for stream set data
    324      (the immediately prior memory area is preserved in case any users in other threads
    325      are accessing it).
    326      WorkingBlocks - the physical size of the buffer for use in reading and writing data.
    327      AllocatedCapacity - physical size available for expansion in place
    328      Length - actual final length of stream set or -1 for unknown
    329      ProducedPosition - the total number of items ever generated and stored in the buffer
    330      ConsumedPosition - the number of buffer items that are known to have been fully processed by all users
    331      */
     247    enum Field {BaseAddress, PriorBaseAddress, Capacity};
    332248
    333249public:
    334250    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::DynamicBuffer;}
    335251   
    336     DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t initialCapacity, size_t overflowBlocks = 0, unsigned swizzleFactor = 1, unsigned addrSpace = 0);
     252    DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t initialCapacity, size_t overflowBlocks = 0, unsigned AddressSpace = 0);
    337253
    338254    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     
    340256    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const override;
    341257   
    342 protected:
     258    bool supportsCopyBack() const override {
     259        return mOverflow > 0;
     260    }
     261
     262protected:
     263
     264    llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     265
     266    void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const override;
     267
     268    llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     269   
     270    llvm::Value * getCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     271   
     272    void setCapacity(IDISA::IDISA_Builder * const b, llvm::Value * size, llvm::Value *) const override;
     273
     274    llvm::Value * getBlockAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * blockIndex) const;
     275
     276    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const override;
    343277
    344278    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    345    
     279
    346280    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    347281
    348     llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const override;
    349    
    350     llvm::Value * getBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    351    
    352     void doubleCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle)  const final;
    353 
    354     llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    355    
    356     llvm::Value * getBlockAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * blockIndex) const;
    357 
    358     llvm::Value * getOverflowAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     282private:
     283
     284    llvm::Value * getAllocationSize(IDISA::IDISA_Builder * const b, llvm::Value * const handle, llvm::Value * const requiredItemCapacity) const;
     285
     286private:
     287
     288    const size_t    mInitialCapacity;
     289    const size_t    mOverflow;
     290
    359291};
    360292
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6044 r6047  
    4444    this->generateMainFunc(iBuilder);
    4545
    46     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     46    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    4747
    4848    // GeneratePipeline
     
    7373    this->generateMainFunc(iBuilder);
    7474
    75     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     75    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    7676
    7777    // GeneratePipeline
     
    8282
    8383    // Produce unswizzled bit streams
    84     StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     84    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    8585    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    8686
     
    110110    this->generateMainFunc(iBuilder);
    111111
    112     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     112    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    113113
    114114    // GeneratePipeline
     
    117117    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    118118
    119     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     119    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
    120120    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    121121    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
     
    143143    this->generateMainFunc(iBuilder);
    144144
    145     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     145    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    146146
    147147    // GeneratePipeline
     
    151151    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    152152
    153     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    154     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     153    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     154    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    155155
    156156    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     
    158158
    159159    // Produce unswizzled bit streams
    160     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     160    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    161161    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    162162    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
     
    183183    this->generateMainFunc(iBuilder);
    184184
    185     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     185    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    186186
    187187    // GeneratePipeline
     
    190190    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    191191
    192     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     192    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
    193193    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    194194    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    195195
    196     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     196    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    197197    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    198198    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     
    220220    this->generateMainFunc(iBuilder);
    221221
    222     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     222    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    223223
    224224    // GeneratePipeline
     
    228228    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    229229
    230     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    231     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     230    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     231    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    232232
    233233    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    234234    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    235235
    236     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    237     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     236    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     237    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    238238
    239239    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     
    242242
    243243    // Produce unswizzled bit streams
    244     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     244    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    245245    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    246246    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
     
    292292
    293293void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    294     mCompressedByteStream = mPxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    295     mCompressedBasisBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
     294    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     295    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
    296296
    297297    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     
    305305void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    306306    //// Decode Block Information
    307     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    308     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    309     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     307    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     308    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     309    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    310310
    311311    //// Generate Helper Markers Extenders, FX, XF
    312     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    313     mMatchOffsetMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     312    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
     313    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    314314        // FX and XF streams will be added to IndexBuilderKernel in the future
    315 //    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    316 //    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     315//    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     316//    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    317317
    318318    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     
    341341
    342342    //TODO handle uncompressed part
    343     StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    344     StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    345     StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    346 
    347     mDeletionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    348     mM0Marker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    349     mDepositMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     343    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     344    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     345    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     346
     347    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     348    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     349    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    350350
    351351    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     
    380380
    381381std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    382     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    383     StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     382    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     383    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    384384
    385385    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
     
    390390void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    391391    if (!mCompressionMarker) {
    392         mCompressionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     392        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    393393        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
    394394        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
     
    400400
    401401    // Deletion
    402     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    403     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
     402    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     403    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
    404404
    405405    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    406406    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    407407
    408     StreamSetBuffer * compressedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     408    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    409409    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    410410    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6044 r6047  
    100100    const unsigned baseBufferSize = this->getInputBufferBlocks();
    101101    auto & idb = mPxDriver.getBuilder();
    102     StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     102    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    103103    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    104104    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
     
    113113    this->generateCompressionMarker(idb);
    114114
    115     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
    116     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
     115    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
     116    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
    117117
    118118    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
    119119    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
    120120
    121     StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
     121    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
    122122    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
    123123    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
    124124
    125125    // Deposit
    126     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
     126    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
    127127    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
    128128    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
    129129
    130130    // Match Copy
    131     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
     131    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
    132132    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
    133133    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     
    141141    auto & idb = mGrepDriver->getBuilder();
    142142
    143     StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     143    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    144144    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    145145    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
     
    179179
    180180    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    181     StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     181    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    182182
    183183    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
     
    185185    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    186186    auto numOfCharacterClasses = mpx_basis.size();
    187     StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     187    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    188188
    189189    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    190190    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses}); //TODO get it from compression space
    191191
    192     StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     192    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    193193    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    194194    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
    195195
    196     StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
     196    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
    197197    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    198198    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
    199199
    200200    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
    201     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
    202     StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     201    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
     202    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    203203    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    204204    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
     
    209209     */
    210210
    211     StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
     211    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
    212212    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    213213    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
     
    219219    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    220220    if (mREs.size() > 1) {
    221         MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     221        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    222222        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
    223223        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     
    227227        StreamSetBuffer * OriginalMatches = Matches;
    228228        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    229         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     229        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    230230        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    231231    }
     
    235235        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
    236236        StreamSetBuffer * const AllMatches = Matches;
    237         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     237        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    238238        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    239239    }
     
    256256    const auto nREs = mREs.size();
    257257
    258     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     258    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    259259    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    260260
    261     StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    262     StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     261    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     262    StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    263263
    264264    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
     
    289289        std::set<re::Name *> UnicodeProperties;
    290290
    291         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     291        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    292292        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    293293        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     
    297297    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    298298    if (mREs.size() > 1) {
    299         MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     299        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    300300        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
    301301        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     
    305305        StreamSetBuffer * OriginalMatches = Matches;
    306306        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    307         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     307        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    308308        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    309309    }
     
    313313        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
    314314        StreamSetBuffer * const AllMatches = Matches;
    315         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     315        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    316316        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    317317    }
     
    334334    this->generateScanMatchMainFunc(iBuilder);
    335335
    336     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     336    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    337337
    338338    // GeneratePipeline
     
    342342    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    343343
    344     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    345     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     344    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     345    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    346346
    347347    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    348348    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    349349
    350     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    351     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     350    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     351    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    352352
    353353    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     
    355355
    356356    // Produce unswizzled bit streams
    357     StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     357    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    358358    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    359359    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
     
    396396    } else {
    397397        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    398         StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     398        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
    399399        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    400400        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    401401
    402         StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     402        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    403403        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    404404        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     
    438438    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    439439
    440     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    441     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     440    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     441    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    442442
    443443    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     
    454454*/
    455455
    456     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    457     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     456    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     457    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    458458
    459459    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     
    461461
    462462    // Produce unswizzled bit streams
    463     StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     463    StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    464464    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    465465    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
  • icGREP/icgrep-devel/icgrep/lz4d.cpp

    r6036 r6047  
    7171    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    7272
    73     StreamSetBuffer * const ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    74     StreamSetBuffer * const BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
    75     StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    76     StreamSetBuffer * const LiteralIndexes = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
    77     StreamSetBuffer * const MatchIndexes = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
    78     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), decompressBufBlocks);
     73    StreamSetBuffer * const ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     74    StreamSetBuffer * const BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
     75    StreamSetBuffer * const Extenders = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     76    StreamSetBuffer * const LiteralIndexes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
     77    StreamSetBuffer * const MatchIndexes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(2, 32), segmentSize * bufferSegments);
     78    StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), decompressBufBlocks);
    7979
    8080   
  • icGREP/icgrep-devel/icgrep/preprocess.cpp

    r5856 r6047  
    5656    unsigned bufferSegments = codegen::BufferSegments;
    5757
    58     StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     58    StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    5959    kernel::Kernel * sourceK = pxDriver.addKernelInstance<kernel::MemorySourceKernel>(iBuilder, iBuilder->getInt8PtrTy());
    6060    sourceK->setInitialArguments({inputStream, fileSize});
    6161    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    6262
    63     StreamSetBuffer * MatchResults = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     63    StreamSetBuffer * MatchResults = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    6464    kernel::Kernel * linefeedK = pxDriver.addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(iBuilder, "linefeed", std::vector<re::CC *>{re::makeCC(0x0A)}, 1);
    6565    pxDriver.makeKernelCall(linefeedK, {ByteStream}, {MatchResults});
  • icGREP/icgrep-devel/icgrep/toolchain/driver.h

    r5998 r6047  
    2626        BufferType * const b = new BufferType(std::forward<Args>(args) ...);
    2727        mOwnedBuffers.emplace_back(b);
    28         if (!std::is_same<BufferType, parabix::ExternalBuffer>::value) {
    29             mOwnedBuffers.back()->allocateBuffer(iBuilder);
    30         }
     28        mOwnedBuffers.back()->allocateBuffer(iBuilder);
    3129        return b;
    3230    }
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r6021 r6047  
    568568        for (unsigned i = 0; i < inputs.size(); ++i) {
    569569            const auto buffer = consumer->getStreamSetInputBuffer(i);
    570             if (isa<SourceBuffer>(buffer)) continue;
     570            if (isa<ExternalBuffer>(buffer)) continue;
    571571            const Kernel * const producer = buffer->getProducer();
    572572            assert (consumer != producer);
     
    872872        Value * const unconsumed = b->CreateSub(produced, consumed);
    873873        requiredSpace = b->CreateAdd(requiredSpace, unconsumed);
    874         Value * const capacity = b->getBufferedSize(name);
     874        Value * const capacity = b->getCapacity(name);
    875875        Value * const check = b->CreateICmpULE(requiredSpace, capacity);
    876876        terminated->addIncoming(b->getFalse(), b->GetInsertBlock());
     
    953953        const auto & name = output.getName();
    954954        if (initiallyProducedItemCount[i]) {
    955             Value * const bufferSize = b->getBufferedSize(name);
     955            Value * const bufferSize = b->getCapacity(name);
    956956            Value * const prior = initiallyProducedItemCount[i];
    957957            Value * const offset = b->CreateURem(prior, bufferSize);
     
    10211021    for (unsigned i = 0; i < outputs.size(); i++) {
    10221022        if (isa<DynamicBuffer>(outputs[i])) {
    1023 
    1024             const auto baseSize = ceiling(kernel->getUpperBound(kernel->getStreamOutput(i).getRate()) * kernel->getStride() * codegen::SegmentSize);
     1023            const auto & output = kernel->getStreamOutput(i);
     1024            const auto baseSize = ceiling(kernel->getUpperBound(output.getRate()) * kernel->getStride() * codegen::SegmentSize);
    10251025            if (LLVM_LIKELY(baseSize > 0)) {
    1026 
    1027                 const auto & name = kernel->getStreamOutput(i).getName();
    1028 
    1029                 BasicBlock * const doExpand = b->CreateBasicBlock(name + "Expand");
    1030                 BasicBlock * const nextBlock = b->GetInsertBlock()->getNextNode();
    1031                 doExpand->moveAfter(b->GetInsertBlock());
    1032                 BasicBlock * const bufferReady = b->CreateBasicBlock(name + "Ready");
    1033                 bufferReady->moveAfter(doExpand);
    1034                 if (nextBlock) nextBlock->moveAfter(bufferReady);
    1035 
     1026                const auto & name = output.getName();
    10361027                Value * const produced = b->getProducedItemCount(name);
    10371028                Value * const consumed = b->getConsumedItemCount(name);
    1038                 Value * const required = b->CreateAdd(b->CreateSub(produced, consumed), b->getSize(2 * baseSize));
    1039 
    1040                 b->CreateCondBr(b->CreateICmpUGT(required, b->getBufferedSize(name)), doExpand, bufferReady);
    1041                 b->SetInsertPoint(doExpand);
    1042 
    1043                 b->doubleCapacity(name);
    1044                 // Ensure that capacity is sufficient by successive doubling, if necessary.
    1045                 b->CreateCondBr(b->CreateICmpUGT(required, b->getBufferedSize(name)), doExpand, bufferReady);
    1046 
    1047                 b->SetInsertPoint(bufferReady);
    1048 
     1029                Value * const unconsumed = b->CreateSub(produced, consumed);
     1030                Value * const required = b->CreateAdd(unconsumed, b->getSize(2 * baseSize));
     1031                b->setCapacity(name, required);
    10491032            }
    10501033        }
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r6006 r6047  
    282282
    283283    // File data from mmap
    284     StreamSetBuffer * ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     284    StreamSetBuffer * ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    285285
    286286    Kernel * mmapK = pxDriver.addKernelInstance<MMapSourceKernel>(iBuilder);
     
    289289
    290290    // Transposed bits from s2p
    291     StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), bufferSize);
     291    StreamSetBuffer * BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), bufferSize);
    292292
    293293    Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
     
    296296
    297297    // Calculate UTF-16 data bits through bitwise logic on u8-indexed streams.
    298     StreamSetBuffer * u8bits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
    299     StreamSetBuffer * DelMask = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
     298    StreamSetBuffer * u8bits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
     299    StreamSetBuffer * DelMask = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
    300300
    301301    Kernel * u8u16k = pxDriver.addKernelInstance<U8U16Kernel>(iBuilder);
    302302    pxDriver.makeKernelCall(u8u16k, {BasisBits}, {u8bits, DelMask});
    303303
    304     StreamSetBuffer * u16bits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
     304    StreamSetBuffer * u16bits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(16), bufferSize);
    305305
    306306    const auto avx2 = enableAVXdel && AVX2_available() && codegen::BlockSize==256;
     
    311311        u16bytes = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), outputStream);
    312312    } else if (avx2) {
    313         u16bytes = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize);
     313        u16bytes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize);
    314314    } else {
    315         u16bytes = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize, 1);
     315        u16bytes = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 16), bufferSize, 1);
    316316    }
    317317
    318318    if (avx2) {
    319319        // Allocate space for fully compressed swizzled UTF-16 bit streams
    320         StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
    321         StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
    322         StreamSetBuffer * u16Swizzle2 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
    323         StreamSetBuffer * u16Swizzle3 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
     320        StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
     321        StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
     322        StreamSetBuffer * u16Swizzle2 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
     323        StreamSetBuffer * u16Swizzle3 = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), bufferSize, 1);
    324324        // Apply a deletion algorithm to discard all but the final position of the UTF-8
    325325        // sequences (bit streams) for each UTF-16 code unit. Also compresses and swizzles the result.
     
    332332        pxDriver.makeKernelCall(p2sk, {u16bits}, {u16bytes});
    333333    } else {
    334         StreamSetBuffer * DeletionCounts = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
     334        StreamSetBuffer * DeletionCounts = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), bufferSize);
    335335        Kernel * delK = pxDriver.addKernelInstance<FieldCompressKernel>(iBuilder, iBuilder->getBitBlockWidth()/16, 16);
    336336        pxDriver.makeKernelCall(delK, {u8bits, DelMask}, {u16bits, DeletionCounts});
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5964 r6047  
    171171    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    172172
    173     StreamSetBuffer * const ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     173    StreamSetBuffer * const ByteStream = pxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    174174
    175175
     
    180180    Kernel * wck  = nullptr;
    181181    if (CountWords || CountChars) {
    182         StreamSetBuffer * const BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
     182        StreamSetBuffer * const BasisBits = pxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments);
    183183        Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder);
    184184        pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
Note: See TracChangeset for help on using the changeset viewer.