Changeset 5782


Ignore:
Timestamp:
Dec 15, 2017, 12:44:01 PM (11 months ago)
Author:
nmedfort
Message:

Initial check-in of LookAhead? support; modified LineBreakKernel? to compute CR+LF using LookAhead?(1) + misc. fixes.

Location:
icGREP/icgrep-devel/icgrep
Files:
37 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5771 r5782  
    9999    Value * const p = b->CreatePtrToInt(Ptr, intPtrTy);
    100100    Value * const s = b->CreatePtrToInt(Base, intPtrTy);
    101     Value * const e = b->CreateAdd(s, b->CreateSub(sz, b->CreateZExtOrTrunc(Size, intPtrTy)));
    102     return b->CreateAnd(b->CreateICmpUGE(p, s), b->CreateICmpULE(p, e));
     101    Value * const w = b->CreateAdd(p, b->CreateZExtOrTrunc(Size, intPtrTy));
     102    Value * const e = b->CreateAdd(s, sz);
     103    return b->CreateAnd(b->CreateICmpUGE(p, s), b->CreateICmpULE(w, e));
    103104}
    104105
  • icGREP/icgrep-devel/icgrep/UCD/CaseFolding.cpp

    r5781 r5782  
    8282}
    8383
    84 UnicodeSet caseInsensitize(UnicodeSet & cc) {
     84UnicodeSet caseInsensitize(const UnicodeSet & cc) {
    8585    UnicodeSet cci;
    8686    for (const interval_t i : cc) {
  • icGREP/icgrep-devel/icgrep/UCD/CaseFolding.h

    r5781 r5782  
    2121};
    2222
    23 UCD::UnicodeSet caseInsensitize(UCD::UnicodeSet & cc);
    24 
     23
     24UCD::UnicodeSet caseInsensitize(const UCD::UnicodeSet & cc);
    2525
    2626const int foldTableSize = 246;
  • icGREP/icgrep-devel/icgrep/UCD/UTF.h

    r5760 r5782  
    4646        }
    4747    }
    48     else return static_cast<unsigned>(0x80 | ((cp >> (6 * (length - n))) & 0x3F));
     48    return static_cast<unsigned>(0x80 | ((cp >> (6 * (length - n))) & 0x3F));
    4949}
    5050
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5779 r5782  
    101101    const unsigned encodingBits = 8;
    102102
    103     StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), segmentSize * bufferSegments);
     103    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), segmentSize * bufferSegments + 1);
    104104    kernel::Kernel * s2pk = mGrepDriver->addKernelInstance<kernel::S2PKernel>(idb);
    105105    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    106106
     107    StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments + 1);
     108    kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, encodingBits);
     109    mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
     110
    107111    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     112    StreamSetBuffer * CRLFStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    108113    kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance<kernel::LineBreakKernelBuilder>(idb, encodingBits);
    109     mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     114    mGrepDriver->makeKernelCall(linebreakK, {BasisBits, LineFeedStream}, {LineBreakStream, CRLFStream});
    110115
    111116    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    112     StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize * bufferSegments);
     117    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(3, 1), segmentSize * bufferSegments);
    113118    mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
    114119
     
    137142        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    138143        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, REs[i], numOfCharacterClasses);
    139         mGrepDriver->makeKernelCall(icgrepK, {CharClasses, LineBreakStream, RequiredStreams}, {MatchResults});
     144        mGrepDriver->makeKernelCall(icgrepK, {CharClasses, LineBreakStream, CRLFStream, RequiredStreams}, {MatchResults});
    140145        MatchResultsBufs[i] = MatchResults;
    141146    }
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5755 r5782  
     1applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4Lexer.cpp
     2applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4Lexer.h
     3applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4Parser.cpp
     4applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4Parser.h
     5applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4ParserBaseListener.cpp
     6applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4ParserBaseListener.h
     7applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4ParserListener.cpp
     8applications/cfg-validator/grammars-v4/antlr4/cpp/ANTLRv4ParserListener.h
     9applications/cfg-validator/grammars-v4/antlr4/cpp/LexBasic.cpp
     10applications/cfg-validator/grammars-v4/antlr4/cpp/LexBasic.h
     11applications/cfg-validator/grammars-v4/c/examples/add.c
     12applications/cfg-validator/grammars-v4/c/examples/BinaryDigit.c
     13applications/cfg-validator/grammars-v4/c/examples/bt.c
     14applications/cfg-validator/grammars-v4/c/examples/dialog.c
     15applications/cfg-validator/grammars-v4/c/examples/helloworld.c
     16applications/cfg-validator/grammars-v4/c/examples/integrate.c
     17applications/cfg-validator/grammars-v4/c/examples/ll.c
     18applications/cfg-validator/grammars-v4/c/examples/pr403.c
     19applications/cfg-validator/grammars-v4/cpp/examples/helloworld.cpp
     20applications/cfg-validator/grammars-v4/cpp/examples/macro.cpp
     21applications/cfg-validator/grammars-v4/cpp/examples/template_args_test.cpp
     22applications/cfg-validator/grammars-v4/objc/examples/BoxPhoto.h
     23applications/cfg-validator/grammars-v4/objc/examples/FSBaseViewController.h
     24applications/cfg-validator/grammars-v4/objc/examples/NetworkRequest.h
     25applications/cfg-validator/grammars-v4/tinyc/examples/example1.c
     26applications/cfg-validator/grammars-v4/tinyc/examples/example2.c
     27applications/cfg-validator/grammars-v4/tinyc/examples/example3.c
     28applications/cfg-validator/grammars-v4/tinyc/examples/example4.c
     29applications/cfg-validator/grammars-v4/tinyc/examples/example5.c
     30applications/cfg-validator/ANTLRv4Lexer.cpp
     31applications/cfg-validator/ANTLRv4Lexer.h
     32applications/cfg-validator/ANTLRv4Parser.cpp
     33applications/cfg-validator/ANTLRv4Parser.h
     34applications/cfg-validator/ANTLRv4ParserBaseListener.cpp
     35applications/cfg-validator/ANTLRv4ParserBaseListener.h
     36applications/cfg-validator/ANTLRv4ParserListener.cpp
     37applications/cfg-validator/ANTLRv4ParserListener.h
    138cc/alphabet.cpp
    239cc/alphabet.h
     
    4683kernels/alignedprint.cpp
    4784kernels/alignedprint.h
     85kernels/attributes.cpp
    4886kernels/attributes.h
    4987kernels/cc_kernel.cpp
     
    77115kernels/pdep_kernel.cpp
    78116kernels/pdep_kernel.h
     117kernels/processing_rate.cpp
    79118kernels/processing_rate.h
    80119kernels/radix64.cpp
     
    164203pablo/symbol_generator.cpp
    165204pablo/symbol_generator.h
     205re/casing.cpp
     206re/casing.h
     207re/parse_fixed_strings.cpp
     208re/parse_fixed_strings.h
    166209re/printer_re.cpp
    167210re/printer_re.h
     
    170213re/re_analysis.h
    171214re/re_any.h
     215re/re_assertion.cpp
    172216re/re_assertion.h
    173217re/re_cc.cpp
     
    180224re/re_diff.h
    181225re/re_end.h
     226re/re_group.h
    182227re/re_intersect.cpp
    183228re/re_intersect.h
     
    208253re/re_parser_prosite.cpp
    209254re/re_parser_prosite.h
     255re/re_range.cpp
     256re/re_range.h
    210257re/re_re.cpp
    211258re/re_re.h
     
    224271re/re_utility.cpp
    225272re/re_utility.h
     273re/to_utf8.cpp
     274re/to_utf8.h
    226275toolchain/cpudriver.cpp
    227276toolchain/cpudriver.h
     
    282331UCD/unicode_set.h
    283332UCD/UnicodeData.h
     333UCD/UTF.h
    284334UCD/VerticalOrientation.h
    285335UCD/WordBreakProperty.h
     
    308358wc.cpp
    309359CMakeLists.txt
    310 applications/cfg-validator/ANTLRv4Lexer.cpp
    311 applications/cfg-validator/ANTLRv4Lexer.h
    312 applications/cfg-validator/ANTLRv4Lexer.tokens
    313 applications/cfg-validator/ANTLRv4Parser.cpp
    314 applications/cfg-validator/ANTLRv4Parser.h
    315 applications/cfg-validator/ANTLRv4Parser.tokens
    316 applications/cfg-validator/ANTLRv4ParserBaseListener.cpp
    317 applications/cfg-validator/ANTLRv4ParserBaseListener.h
    318 applications/cfg-validator/ANTLRv4ParserListener.cpp
    319 applications/cfg-validator/ANTLRv4ParserListener.h
    320 applications/cfg-validator/LexBasic.cpp
    321 applications/cfg-validator/LexBasic.h
    322 applications/cfg-validator/LexBasic.tokens
    323 applications/cfg-validator/ANTLRv4Lexer.cpp
    324 applications/cfg-validator/ANTLRv4Lexer.h
    325 applications/cfg-validator/ANTLRv4Parser.cpp
    326 applications/cfg-validator/ANTLRv4Parser.h
    327 applications/cfg-validator/ANTLRv4ParserBaseListener.cpp
    328 applications/cfg-validator/ANTLRv4ParserBaseListener.h
    329 applications/cfg-validator/ANTLRv4ParserListener.cpp
    330 applications/cfg-validator/ANTLRv4ParserListener.h
    331 kernels/attributes.cpp
    332 kernels/processing_rate.cpp
     360applications/cfg-validator/antlrv4validator.cpp
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r5755 r5782  
    55applications/cfg-validator
    66kernels
     7IR_Gen
     8combine/pugixml/src
     9combine/icgrep-test
     10pablo/passes
     11applications/cfg-validator/grammars-v4/tinyc/examples
     12toolchain
     13pablo/analysis
     14UCD
     15pablo
     16applications/cfg-validator/grammars-v4/c/examples
     17util
     18applications/cfg-validator/grammars-v4/cpp/examples
     19cc
     20re
     21pablo/optimizers
     22applications/cfg-validator/grammars-v4/antlr4/cpp
     23applications/cfg-validator/grammars-v4/objc/examples
     24combine
     25editd
  • icGREP/icgrep-devel/icgrep/kernels/attributes.cpp

    r5755 r5782  
    11#include "attributes.h"
     2
     3#include <llvm/Support/raw_ostream.h>
    24
    35namespace kernel {
    46
    5 void AttributeSet::addAttribute(Attribute attribute) {
    6     for (Attribute & attr : *this) {
     7Attribute & AttributeSet::addAttribute(Attribute attribute) {
     8    for (auto i = begin(), i_end = end(); i != i_end; ++i) {
     9        Attribute & attr = const_cast<Attribute &>(*i);
    710        if (attr.getKind() == attribute.getKind()) {
    8             attr.mK = attribute.mK;
    9             return;
     11            attr.mAmount = attribute.mAmount;
     12            return attr;
    1013        }
    1114    }
    1215    emplace_back(attribute);
     16    return back();
    1317}
    1418
    15 bool AttributeSet::hasAttribute(const AttributeId id) const {
    16     for (const Attribute & attr : *this) {
    17         if (attr.getKind() == id) {
    18             return true;
     19Attribute * AttributeSet::__findAttribute(const AttributeId id) const {
     20    for (auto i = begin(), i_end = end(); i != i_end; ++i) {
     21        if (i->getKind() == id) {
     22            return const_cast<Attribute *>(&*i);
    1923        }
    2024    }
    21     return false;
     25    return nullptr;
    2226}
    2327
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r5755 r5782  
    88struct Attribute {
    99
    10     friend struct AttributeSet;
    11 
    12     friend struct Binding;
    13 
    1410    enum class KindId {
    1511
     
    1915
    2016        // A LookAhead(n) attribute on an input stream set S declares that the kernel
    21         // looks ahead n positions in the input stream.   That is,
    22         // processing of item S[i, j] may be defined in terms of S[i, j+n].
     17        // looks ahead n positions in the input stream.  That is, processing of item
     18        // S[i, j] may be defined in terms of S[i, j+n].
    2319
    2420        // Guarantee required: the pipeline compiler must ensure that, when
     
    6864        // buffer size calculations.
    6965
    70         Greedy,
    71 
    72         // Normally, the available item count of fixed rate streams is equal to the
    73         // number of strides processed by the MultiBlock times its stride size for all
    74         // strides except for the final stride. Some kernels consume
     66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
     67
     68        // Some kernels can divide their processing into concrete non-overlapping regions
     69        // between a beginning and ending position. This is a hard guarantee that regardless
     70        // of the computations between the start of the stream and the beginning of the first
     71        // independent region or between the *beginning* of any two independent regions, A,
     72        // B, the calculations that occur prior to the beginning of B do not affect the
     73        // calculations after it --- even if A is started at an arbitrary position with a
     74        // zeroed-out kernel state.
     75
     76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
     77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
     78        // compute what its state will be and begin processing before K_0 is finished. This
     79        // requires a the pipeline to intervene and call an optimized "output-less" instance
     80        // of the kernel prior to calling B.
     81
     82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
     83
     84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
     85        // produce useful outputs for only the inputs within those regions. This attribute
     86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
     87        // skipping strides entirely whenever possible.
     88
     89        // If the same regions are also independent, we can avoid the overhead of "masking
     90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
     91        // uses of the streams and zero out any non-regions from the data.
    7592
    7693        /** OUTPUT STREAM ATTRIBUTES **/
     
    101118        // swizzling code necessary).
    102119
     120        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
     121
     122        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
     123        // However, this means all of the input data must be computed and stored prior to
     124        // executing this kernel. In practice, this is unnecessary as in the context of
     125        // text parsing, we're almost always searching for the true starting position of
     126        // something ambigious after we've found its end position in some prior kernel.
     127
     128
     129
     130
     131
     132//        Here is a revised definition of SegmentedReverse:
     133
     134//        Given a stream of data bits S that is considered to be divided into
     135//        segments, and a marker stream S having a one bit at the final position
     136//        of each segment, the R = SegmentedReverse(S, M) when
     137
     138//        R_{i} = S_{l + (h - i)}
     139//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
     140//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
     141//          (l and h are the low and high positions of the segment containing i)
     142
     143//        This is an invertible operation, so we can apply R to a kernel's input
     144//        and then to its output to get a SegmentedReverse version of a kernel
     145
     146//        A kernel which computes segmented reverse is feasible, but seems complex
     147//        to implement, and probably too slow.  I have played around with several
     148//        ways of tackling it, no good method yet.
     149
     150//        If there are multiple segments within a block, we could instead use
     151//        the following:
     152
     153//        BlockSegmentedReverse
     154
     155//        B_{i} = S_{L + (H - i)}
     156//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
     157//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
     158//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
     159//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
     160
     161//        An alternative way of looking at this is to eliminate all but the first
     162//        and last marker positions within a block.
     163
     164//        The main point is that, if we apply B to inputs, perform the kernel
     165//        and the apply B to outputs, we get the same result if we applied R
     166//        (assuming that the kernel computations do not cross boundaries in M).
     167
     168//        This will be more efficient to compute, but still involves overhead
     169//        for shifting and combining streams.
     170
     171//        I think it may be better to focus on the ReverseKernel adapter, that
     172//        handles the reverse operations for both input and output.   This actually
     173//        gives more flexibility, because, in a multiblock scenario, we can process
     174//        the longest sequence of blocks such that both the beginning and end blocks
     175//        have a one bit.   If there are any interior blocks with one bits, then
     176//        they will be handled automatically without special shifting and masking.
     177
     178//        By the way, in my designs, I am wanting to have a callable Multiblock
     179//        function, so that the Multiblock function for a Reversed Kernel just
     180//        does a little work before calling the Multiblock function of the base kernel.
     181//        That seems to have disappeared in the current system.
     182
     183
    103184        /** KERNEL ATTRIBUTES **/
    104185
     
    124205        // kernels and end the program once the final kernel has returned its result.
    125206
    126         IndependentRegions,
    127 
    128         // Some kernels can divide their processing into concrete non-overlapping regions
    129         // between a start and end position in which the data produced by a kernel. If a
    130         // kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
    131         // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
    132         // compute what its state will be and begin processing before K_0 is finished. This
    133         // requires a the pipeline to intervene and call an optimized "output-less" instance
    134         // of the kernel prior to calling B.
    135 
    136207    };
    137208
     
    152223    }
    153224
    154     unsigned getAmount() const {
    155         return mK;
     225    unsigned amount() const {
     226        return mAmount;
     227    }
     228
     229    void setAmount(const unsigned amount) {
     230        mAmount = amount;
    156231    }
    157232
    158233    bool operator == (const Attribute & other) const {
    159         return mKind == other.mKind && mK == other.mK;
     234        return mKind == other.mKind && mAmount == other.mAmount;
    160235    }
    161236
     
    170245    }
    171246
     247    friend struct AttributeSet;
     248    friend struct Binding;
    172249    friend Attribute Add1();
    173250    friend Attribute Principal();
    174251    friend Attribute RoundUpTo(const unsigned);
     252    friend Attribute LookAhead(const unsigned);
    175253    friend Attribute LookBehind(const unsigned);
    176254    friend Attribute Deferred();
    177 
    178     Attribute(const KindId kind, const unsigned k) : mKind(kind), mK(k) { }
     255    friend Attribute ConditionalRegionBegin();
     256    friend Attribute ConditionalRegionEnd();
     257
     258    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
    179259
    180260private:
    181261
    182262    const KindId    mKind;
    183     unsigned        mK;
     263    unsigned        mAmount;
    184264};
    185265
     
    192272    }
    193273
    194     const Attribute & getAttribute(const unsigned i) const {
    195         return getAttributes()[i];
    196     }
    197 
    198     void addAttribute(Attribute attribute);
     274    Attribute & findOrAddAttribute(const AttributeId id) {
     275        if (Attribute * const attr = __findAttribute(id)) {
     276            return *attr;
     277        } else {
     278            return addAttribute(Attribute(id, 0));
     279        }
     280    }
     281
     282    Attribute & findAttribute(const AttributeId id) const {
     283        return *__findAttribute(id);
     284    }
     285
     286    Attribute & addAttribute(Attribute attribute);
    199287
    200288    bool hasAttributes() const {
     
    202290    }
    203291
    204     bool hasAttribute(const AttributeId id) const;
     292    bool hasAttribute(const AttributeId id) const {
     293        return __findAttribute(id) != nullptr;
     294    }
    205295
    206296    AttributeSet() = default;
    207297
     298    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
     299
    208300    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
     301
     302private:
     303
     304    Attribute * __findAttribute(const AttributeId id) const;
     305
    209306};
    210307
     
    222319}
    223320
     321inline Attribute LookAhead(const unsigned k) {
     322    return Attribute(Attribute::KindId::LookAhead, k);
     323}
     324
    224325inline Attribute LookBehind(const unsigned k) {
    225326    return Attribute(Attribute::KindId::LookBehind, k);
     
    230331}
    231332
     333inline Attribute ConditionalRegionBegin() {
     334    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
     335}
     336
     337inline Attribute ConditionalRegionEnd() {
     338    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
     339}
     340
    232341}
    233342
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5769 r5782  
    4141    cc::CC_Compiler ccc(this, getInput(0));
    4242    auto & pb = ccc.getBuilder();
    43     Zeroes * const zero = pb.createZeroes();
    44     PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
    45     PabloAST * CR = ccc.compileCC(makeCC(0x0D));
    46 
    47     Var * crlf = pb.createVar("crlf", zero);
    48     PabloBuilder crb = PabloBuilder::Create(pb);
    49     PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
    50     crb.createAssign(crlf, crb.createAnd(cr1, LF));
    51     pb.createIf(CR, crb);
    52    
    53     Var * u8invalid = pb.createVar("u8invalid", zero);
    54     Var * valid_pfx = pb.createVar("valid_pfx", zero);
    55     Var * nonFinal = pb.createVar("nonfinal", zero);
    56     PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
    57    
     43    Zeroes * const ZEROES = pb.createZeroes();
     44    PabloAST * const u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
     45
     46
     47    Var * const nonFinal = pb.createVar("nonFinal", u8pfx);
     48    Var * const u8invalid = pb.createVar("u8invalid", ZEROES);
     49    Var * const valid_pfx = pb.createVar("valid_pfx", u8pfx);
     50
    5851    PabloBuilder it = PabloBuilder::Create(pb);
    5952
    6053    pb.createIf(u8pfx, it);
    61     PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
    62     PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
    63     PabloAST * u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
    64     PabloAST * u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
     54    PabloAST * const u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
     55    PabloAST * const u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
     56    PabloAST * const u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
     57    PabloAST * const u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
    6558   
    6659    //
    6760    // Two-byte sequences
    68     Var * u8scope22 = it.createVar("u8scope22", zero);
     61    Var * const anyscope = it.createVar("anyscope", ZEROES);
    6962    PabloBuilder it2 = PabloBuilder::Create(it);
    70     it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
    7163    it.createIf(u8pfx2, it2);
    72     //
    73     // Three-byte sequences
    74    
    75     Var * u8scope32 = it.createVar("u8scope32", zero);
    76     Var * u8scope3X = it.createVar("u8scope3X", zero);
    77     Var * EX_invalid = it.createVar("EX_invalid", zero);
     64    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
     65
     66    //
     67    // Three-byte sequences   
     68    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
    7869    PabloBuilder it3 = PabloBuilder::Create(it);
    7970    it.createIf(u8pfx3, it3);
    80     it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
    81     PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
    82     it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
    83     PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
    84     PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
    85     it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
    86    
     71    PabloAST * const u8scope32 = it3.createAdvance(u8pfx3, 1);
     72    it3.createAssign(nonFinal, it3.createOr(nonFinal, u8scope32));
     73    PabloAST * const u8scope33 = it3.createAdvance(u8pfx3, 2);
     74    PabloAST * const u8scope3X = it3.createOr(u8scope32, u8scope33);
     75    it3.createAssign(anyscope, it3.createOr(anyscope, u8scope3X));
     76    PabloAST * const E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
     77    PabloAST * const ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
     78    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
     79    it3.createAssign(EF_invalid, EX_invalid);
     80
     81
    8782    //
    8883    // Four-byte sequences
    89     Var * u8scope4nonfinal = it.createVar("u8scope4nonfinal", zero);
    90     Var * u8scope4X = it.createVar("u8scope4X", zero);
    91     Var * FX_invalid = it.createVar("FX_invalid", zero);
    9284    PabloBuilder it4 = PabloBuilder::Create(it);
    9385    it.createIf(u8pfx4, it4);
    94     PabloAST * u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
    95     PabloAST * u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
    96     PabloAST * u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
    97     it4.createAssign(u8scope4nonfinal, it4.createOr(u8scope42, u8scope43));
    98     it4.createAssign(u8scope4X, it4.createOr(u8scope4nonfinal, u8scope44));
    99     PabloAST * F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
    100     PabloAST * F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
    101     it4.createAssign(FX_invalid, it4.createOr(F0_invalid, F4_invalid));
     86    PabloAST * const u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
     87    PabloAST * const u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
     88    PabloAST * const u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
     89    PabloAST * const u8scope4nonfinal = it4.createOr(u8scope42, u8scope43);
     90    it4.createAssign(nonFinal, it4.createOr(nonFinal, u8scope4nonfinal));
     91    PabloAST * const u8scope4X = it4.createOr(u8scope4nonfinal, u8scope44);
     92    it4.createAssign(anyscope, it4.createOr(anyscope, u8scope4X));
     93    PabloAST * const F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
     94    PabloAST * const F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
     95    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
     96    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
    10297   
    10398    //
    10499    // Invalid cases
    105     PabloAST * anyscope = it.createOr(u8scope22, it.createOr(u8scope3X, u8scope4X));
    106     PabloAST * legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
     100    PabloAST * const legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
    107101    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
    108102    //  a scope is a mismatch, i.e., invalid UTF-8.
    109     PabloAST * mismatch = it.createXor(anyscope, u8suffix);
    110     //
    111     PabloAST * EF_invalid = it.createOr(EX_invalid, FX_invalid);
    112     PabloAST * pfx_invalid = it.createXor(u8pfx, legalpfx);
     103    PabloAST * const mismatch = it.createXor(anyscope, u8suffix);
     104    //
     105    PabloAST * const pfx_invalid = it.createXor(valid_pfx, legalpfx);
    113106    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
    114     PabloAST * u8valid = it.createNot(u8invalid, "u8valid");
    115     //
    116     //
    117    
    118     it.createAssign(valid_pfx, it.createAnd(u8pfx, u8valid));
    119     it.createAssign(nonFinal, it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
     107    PabloAST * const u8valid = it.createNot(u8invalid, "u8valid");
     108    //
     109    //
     110   
     111    it.createAssign(valid_pfx, it.createAnd(valid_pfx, u8valid));
     112    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
    120113   
    121114    PabloAST * u8single = pb.createAnd(ccc.compileCC(makeCC(0x00, 0x7F)), pb.createNot(u8invalid));
    122    
     115    PabloAST * const initial = pb.createOr(u8single, valid_pfx, "initial");
     116    PabloAST * const final = pb.createNot(pb.createOr(nonFinal, u8invalid), "final");
     117
    123118    Var * const required = getOutputStreamVar("required");
    124     pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u8single, valid_pfx, "initial"));
     119    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
    125120    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
    126     pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(nonFinal, u8invalid), "final"));
    127     pb.createAssign(pb.createExtract(required, pb.getInteger(3)), crlf);
     121    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
     122
    128123}
    129124
    130125RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
    131 : PabloKernel(kb, "RequiredStreams_UTF8",               
    132               {Binding{kb->getStreamSetTy(8), "basis"}},
    133               {Binding{kb->getStreamSetTy(4), "required", FixedRate(), Add1()}},
    134               {},
    135               {}) {
     126: PabloKernel(kb, "RequiredStreams_UTF8",
     127// input
     128{Binding{kb->getStreamSetTy(8), "basis"}},
     129// output
     130{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
     131
    136132}
    137133
     
    141137    auto & pb = ccc.getBuilder();
    142138   
    143     PabloAST * LF = ccc.compileCC("LF", makeCC(0x000A), pb);
    144     PabloAST * CR = ccc.compileCC("CR", makeCC(0x000D), pb);
    145     PabloAST * cr1 = pb.createAdvance(CR, 1, "cr1");
    146    
    147139    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF));    //u16hi_hi_surrogate = [\xD8-\xDB]
    148140    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF));    //u16hi_lo_surrogate = [\xDC-\xDF]
     
    150142    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
    151143    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
     144
    152145    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
    153    
     146    PabloAST * nonFinal = pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal");
     147
    154148    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF)), ccc.compileCC(makeCC(0xE000, 0xFFFF)));
    155149    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
    156150
     151    PabloAST * const nonFinalCodeUnits = pb.createExtract(getInput(1), pb.getInteger(0));
     152    PabloAST * const initial = pb.createOr(u16single, u16hi_hi_surrogate, "initial");
     153    PabloAST * const final = pb.createNot(pb.createOr(pb.createOr(u16hi_hi_surrogate, u16invalid), nonFinalCodeUnits), "final");
     154
    157155    Var * const required = getOutputStreamVar("required");
    158     pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u16single, u16hi_hi_surrogate, "initial"));
    159     pb.createAssign(pb.createExtract(required, pb.getInteger(1)), pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal"));
    160     pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(u16hi_hi_surrogate, u16invalid), "final"));
    161     pb.createAssign(pb.createExtract(required, pb.getInteger(3)), pb.createAnd(cr1, LF, "crlf"));
     156    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
     157    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
     158    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
     159
    162160}
    163161
    164162RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
    165163: PabloKernel(kb, "RequiredStreams_UTF16",               
    166               {Binding{kb->getStreamSetTy(16), "basis"}},
    167               {Binding{kb->getStreamSetTy(4), "required", FixedRate(), Add1()}},
    168               {},
    169               {}) {
    170 }
    171 
     164// inputs
     165{Binding{kb->getStreamSetTy(8), "basis"}},
     166// output
     167{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
     168
     169}
    172170
    173171ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
     
    179177ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re, unsigned numOfCharacterClasses)
    180178: ICGrepSignature(re)
    181 , PabloKernel(iBuilder,
    182               "ic" + sha1sum(mSignature),
    183               {Binding{iBuilder->getStreamSetTy(numOfCharacterClasses), "basis"},
    184                Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"},
    185                Binding{iBuilder->getStreamSetTy(4, 1), "required"}},
    186               {Binding{iBuilder->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}}) {
     179, PabloKernel(iBuilder, "ic" + sha1sum(mSignature),
     180// inputs
     181{Binding{iBuilder->getStreamSetTy(numOfCharacterClasses), "basis"},
     182Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"},
     183Binding{iBuilder->getStreamSetTy(1, 1), "cr+lf"},
     184Binding{iBuilder->getStreamSetTy(3, 1), "required"}},
     185// output
     186{Binding{iBuilder->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}}) {
    187187
    188188}
     
    211211MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
    212212: PabloKernel(iBuilder, "MatchedLines",
    213               {Binding{iBuilder->getStreamSetTy(1), "matchResults"}, Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
    214               {Binding{iBuilder->getStreamSetTy(1), "matchedLines"}},
    215               {},
    216               {}) {
     213// inputs
     214{Binding{iBuilder->getStreamSetTy(1), "matchResults"}
     215,Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
     216// output
     217{Binding{iBuilder->getStreamSetTy(1), "matchedLines"}}) {
     218
    217219}
    218220
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5757 r5782  
    3333
    3434    Binding(llvm::Type * type, const std::string & name, ProcessingRate r, Attribute && attribute)
    35     : AttributeSet({std::move(attribute)})
     35    : AttributeSet(std::move(attribute))
    3636    , mType(type), mName(name), mRate(std::move(r)) { }
    3737
     
    5858
    5959    bool isPrincipal() const {
    60         return hasAttribute(Attribute::KindId::Principal);
     60        return hasAttribute(AttributeId::Principal);
     61    }
     62
     63    bool hasLookahead() const {
     64        return hasAttribute(AttributeId::LookAhead);
     65    }
     66
     67    unsigned const getLookahead() const {
     68        return findAttribute(AttributeId::LookAhead).amount();
    6169    }
    6270
    6371    bool nonDeferred() const {
    64         return !hasAttribute(Attribute::KindId::Deferred);
     72        return !hasAttribute(AttributeId::Deferred);
    6573    }
    6674
     
    159167    }
    160168
    161     unsigned getLookAhead(const unsigned i) const {
    162         return 0;
    163     }
    164 
    165     void setLookAhead(const unsigned i, const unsigned lookAheadPositions) {
    166 
    167     }
    168 
    169169protected:
    170170
     
    178178
    179179    KernelInterface(const std::string && kernelName,
    180                     std::vector<Binding> && stream_inputs,
    181                     std::vector<Binding> && stream_outputs,
    182                     std::vector<Binding> && scalar_inputs,
    183                     std::vector<Binding> && scalar_outputs,
    184                     std::vector<Binding> && internal_scalars)
     180                    Bindings && stream_inputs,
     181                    Bindings && stream_outputs,
     182                    Bindings && scalar_inputs,
     183                    Bindings && scalar_outputs,
     184                    Bindings && internal_scalars)
    185185    : mKernelInstance(nullptr)
    186186    , mModule(nullptr)
     
    198198protected:
    199199
    200     llvm::Value *                           mKernelInstance;
    201     llvm::Module *                          mModule;
    202     llvm::StructType *                      mKernelStateType;
    203     bool                                    mHasPrincipalItemCount;
    204     const std::string                       mKernelName;
    205     std::vector<llvm::Value *>              mInitialArguments;
    206     std::vector<Binding>                    mStreamSetInputs;
    207     std::vector<Binding>                    mStreamSetOutputs;
    208     std::vector<Binding>                    mScalarInputs;
    209     std::vector<Binding>                    mScalarOutputs;
    210     std::vector<Binding>                    mInternalScalars;
     200    llvm::Value *                   mKernelInstance;
     201    llvm::Module *                  mModule;
     202    llvm::StructType *              mKernelStateType;
     203    bool                            mHasPrincipalItemCount;
     204    const std::string               mKernelName;
     205    std::vector<llvm::Value *>      mInitialArguments;
     206    Bindings                        mStreamSetInputs;
     207    Bindings                        mStreamSetOutputs;
     208    Bindings                        mScalarInputs;
     209    Bindings                        mScalarOutputs;
     210    Bindings                        mInternalScalars;
    211211};
    212212
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5771 r5782  
    624624
    625625/** ------------------------------------------------------------------------------------------------------------- *
    626  * @brief roundUp
    627  ** ------------------------------------------------------------------------------------------------------------- */
    628 unsigned roundUp(const ProcessingRate::RateValue & r) {
    629     if (LLVM_LIKELY(r.denominator() == 1)) {
    630         return r.numerator();
    631     } else {
    632         return (r.numerator() + r.denominator() - 1) / r.denominator();
    633     }
    634 }
    635 
    636 /** ------------------------------------------------------------------------------------------------------------- *
    637626 * @brief getItemAlignment
    638627 ** ------------------------------------------------------------------------------------------------------------- */
    639628inline unsigned MultiBlockKernel::getItemAlignment(const Binding & binding) const {
    640629    const auto & rate = binding.getRate();
    641     if (rate.isFixed()) {
    642         const auto & r = rate.getRate();
    643         const auto n = (r.numerator() * mStride);
     630    if (rate.isFixed() && binding.nonDeferred()) {
     631        const auto r = rate.getRate();
     632        auto n = (r.numerator() * mStride);
    644633        if (LLVM_LIKELY(r.denominator() == 1)) {
    645634            return n;
     
    675664    }
    676665
     666    using AttributeId = kernel::Attribute::KindId;
     667    using RateValue = ProcessingRate::RateValue;
     668
    677669    const auto inputSetCount = mStreamSetInputs.size();
    678670    const auto outputSetCount = mStreamSetOutputs.size();
     
    682674    AllocaInst * temporaryInputBuffer[inputSetCount];
    683675    for (unsigned i = 0; i < inputSetCount; ++i) {
    684         const auto & input = mStreamSetInputs[i];
     676        const Binding & input = mStreamSetInputs[i];
    685677        const ProcessingRate & rate = input.getRate();
    686678        if (isTransitivelyUnknownRate(rate)) {
    687679            report_fatal_error("MultiBlock kernels do not support unknown rate input streams or streams relative to an unknown rate input.");
    688         } else if (rate.isFixed() && input.nonDeferred() && !requiresBufferedFinalStride(input)) {
     680        } else if (rate.isFixed() && !requiresBufferedFinalStride(input)) {
    689681            temporaryInputBuffer[i] = nullptr;
    690682        } else {
    691683            Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType();
    692             const auto ub = getUpperBound(rate);
    693             Constant * arraySize = b->getInt64(roundUp(ub));
     684            auto ub = getUpperBound(rate);
     685            if (LLVM_UNLIKELY(input.hasLookahead())) {
     686                ub += RateValue(input.getLookahead(), mStride);
     687            }
     688            Constant * const arraySize = b->getInt64(ceiling(ub));
    694689            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
    695690            assert (ptr->isStaticAlloca());
     
    700695    AllocaInst * temporaryOutputBuffer[outputSetCount];
    701696    for (unsigned i = 0; i < outputSetCount; i++) {
    702         const auto & output = mStreamSetOutputs[i];
     697        const Binding & output = mStreamSetOutputs[i];
    703698        const ProcessingRate & rate = output.getRate();
    704         if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && output.nonDeferred() && !requiresBufferedFinalStride(output)))) {
     699        if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && !requiresBufferedFinalStride(output)))) {
    705700            temporaryOutputBuffer[i] = nullptr;
    706701        } else {           
     
    710705            }
    711706            Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
    712             Constant * arraySize = b->getInt64(roundUp(ub));
     707            Constant * const arraySize = b->getInt64(ceiling(ub));
    713708            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
    714709            assert (ptr->isStaticAlloca());
     
    742737    // linearly available strides.
    743738    Value * numOfStrides = nullptr;
    744     mInitialAvailableItemCount.resize(inputSetCount);
     739    mInitialAvailableItemCount.assign(mAvailableItemCount.begin(), mAvailableItemCount.end());
    745740    mInitialProcessedItemCount.resize(inputSetCount);
    746741    mStreamSetInputBaseAddress.resize(inputSetCount);
    747742    Value * inputStrideSize[inputSetCount];
    748743    for (unsigned i = 0; i < inputSetCount; i++) {
    749         const auto & input = mStreamSetInputs[i];
     744        const Binding & input = mStreamSetInputs[i];
    750745        const auto & name = input.getName();
    751746        const ProcessingRate & rate = input.getRate();
    752         Value * const ic = b->getProcessedItemCount(name);
    753         mInitialProcessedItemCount[i] = ic;
     747        Value * processed = b->getProcessedItemCount(name);
     748        //b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     749
     750        mInitialProcessedItemCount[i] = processed;
     751        Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH));
     752
    754753        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    755             b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic),
    756                             "processed item count cannot exceed the available item count");
    757         }
    758         assert (ic->getType() == mAvailableItemCount[i]->getType());
    759         Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], ic);
    760         Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
    761         mInitialAvailableItemCount[i] = mAvailableItemCount[i];
    762         mAvailableItemCount[i] = b->getLinearlyAccessibleItems(name, ic, unprocessed);
    763 
    764         // Are our linearly accessible items sufficient for a stride?
     754            b->CreateAssert(b->CreateICmpULT(processed, mAvailableItemCount[i]), "processed item count must be less than the available item count");
     755        }
     756
     757        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);
     758        //b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
     759
     760        Value * avail = b->getLinearlyAccessibleItems(name, processed, unprocessed);
     761        //b->CallPrintInt(getName() + "_" + name + "_avail", avail);
     762
     763
     764        // Ensure that everything between S⌈P/S⌉, and S⌈n*(P + L)/S⌉ is linearly available, where S is
     765        // the stride size, P is the current processed position, L is the lookahead amount and n ∈ â„€+.
     766
     767        Value * remaining = avail;
     768        if (LLVM_UNLIKELY(input.hasLookahead())) {
     769            Constant * const lookahead = b->getSize(input.getLookahead());
     770            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
     771            //b->CallPrintInt(getName() + "_" + name + "_remaining", remaining);
     772        }
     773
    765774        inputStrideSize[i] = getStrideSize(b, rate);
    766         Value * accessibleStrides = b->CreateUDiv(mAvailableItemCount[i], inputStrideSize[i]);
     775
     776        Value * accessibleStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
     777
     778        //b->CallPrintInt(getName() + "_" + name + "_accessibleStrides", accessibleStrides);
     779
    767780        AllocaInst * const tempBuffer = temporaryInputBuffer[i];
    768781        if (tempBuffer) {
     
    779792
    780793            b->SetInsertPoint(copyFromBack);
    781             Value * const temporaryAvailable = b->CreateUMin(unprocessed, inputStrideSize[i]);
     794            Value * const temporarySize = b->CreateMul(tempBuffer->getArraySize(), b->getSize(mStride));
     795            Value * const temporaryAvailable = b->CreateUMin(unprocessed, temporarySize);
    782796            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    783                 b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable),
    784                                 "linearly available cannot be greater than temporarily available");
    785             }
    786             Value * const offset = b->CreateAnd(ic, BLOCK_WIDTH_MASK);
     797                b->CreateAssert(b->CreateICmpULE(avail, temporaryAvailable),
     798                                "linearly available item count cannot exceed the temporarily available item count");
     799            }
     800            Value * const offset = b->CreateAnd(processed, BLOCK_WIDTH_MASK);
    787801            Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    788802            b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
    789803            const auto copyAlignment = getItemAlignment(mStreamSetInputs[i]);
    790             b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, mAvailableItemCount[i], copyAlignment);
     804            b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, avail, copyAlignment);
    791805            Value * const temporaryStrides = b->CreateSelect(b->CreateICmpULT(unprocessed, inputStrideSize[i]), ZERO, ONE);
    792806            BasicBlock * const copyToBackEnd = b->GetInsertBlock();
    793             b->CreateCondBr(b->CreateICmpNE(mAvailableItemCount[i], temporaryAvailable), copyFromFront, resume);
     807            b->CreateCondBr(b->CreateICmpNE(temporaryAvailable, unprocessed), copyFromFront, resume);
    794808
    795809            b->SetInsertPoint(copyFromFront);
    796             Value * const remaining = b->CreateSub(temporaryAvailable, mAvailableItemCount[i]);
     810            Value * const remaining = b->CreateSub(temporaryAvailable, avail);
    797811            Value * const baseAddress = b->getBaseAddress(name);
    798             b->CreateStreamCpy(name, tempBuffer, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);
     812            b->CreateStreamCpy(name, tempBuffer, avail, baseAddress, ZERO, remaining, copyAlignment);
    799813            BasicBlock * const copyToFrontEnd = b->GetInsertBlock();
    800814            b->CreateBr(resume);
     
    808822
    809823            PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3);
    810             phiAvailItemCount->addIncoming(mAvailableItemCount[i], entry);
     824            phiAvailItemCount->addIncoming(avail, entry);
    811825            phiAvailItemCount->addIncoming(temporaryAvailable, copyToBackEnd);
    812826            phiAvailItemCount->addIncoming(temporaryAvailable, copyToFrontEnd);
    813             mAvailableItemCount[i] = phiAvailItemCount;
     827            avail = phiAvailItemCount;
    814828
    815829            PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
     
    819833            accessibleStrides = phiStrides;
    820834        }
    821 
     835        mAvailableItemCount[i] = avail;
    822836        mStreamSetInputBaseAddress[i] = baseBuffer;
    823837        numOfStrides = b->CreateUMin(numOfStrides, accessibleStrides);
     
    833847        const auto & name = output.getName();
    834848        const ProcessingRate & rate = output.getRate();
    835         Value * const ic = b->getProducedItemCount(name);
    836         Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
     849        Value * const produced = b->getProducedItemCount(name);
     850
     851        //b->CallPrintInt(getName() + "_" + name + "_produced", produced);
     852
     853        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
    837854        assert (baseBuffer->getType()->isPointerTy());
    838         linearlyWritable[i] = b->getLinearlyWritableItems(name, ic);       
     855        linearlyWritable[i] = b->getLinearlyWritableItems(name, produced);
     856
     857        //b->CallPrintInt(getName() + "_" + name + "_linearlyWritable", linearlyWritable[i]);
     858
    839859        outputStrideSize[i] = getStrideSize(b, rate);
    840860        // Is the number of linearly writable items sufficient for a stride?
     
    842862            AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
    843863            Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
     864            //b->CallPrintInt(getName() + "_" + name + "_writableStrides", writableStrides);
     865
     866
    844867            // Do we require a temporary buffer to write to?
    845868            if (tempBuffer) {
    846869                assert (tempBuffer->getType() == baseBuffer->getType());
    847870                BasicBlock * const entry = b->GetInsertBlock();
    848                 BasicBlock * const useTemporary = b->CreateBasicBlock(name + "UseTemporary");
     871                BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearTemporaryBuffer");
    849872                BasicBlock * const resume = b->CreateBasicBlock(name + "Resume");
    850873                Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO);
    851 
    852                 b->CreateUnlikelyCondBr(requiresCopy, useTemporary, resume);
    853 
    854                 // Clear the buffer after use since we may end up reusing it within the same stride
    855                 b->SetInsertPoint(useTemporary);
     874                b->CreateUnlikelyCondBr(requiresCopy, clearBuffer, resume);
     875                // Clear the output buffer prior to using it
     876                b->SetInsertPoint(clearBuffer);
    856877                Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    857878                b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
    858879                b->CreateBr(resume);
    859 
     880                // Select the appropriate buffer / stride #
    860881                b->SetInsertPoint(resume);
    861882                PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3);
    862883                phiBuffer->addIncoming(baseBuffer, entry);
    863                 phiBuffer->addIncoming(tempBuffer, useTemporary);
     884                phiBuffer->addIncoming(tempBuffer, clearBuffer);
    864885                baseBuffer = phiBuffer;
    865886                PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
    866887                phiStrides->addIncoming(writableStrides, entry);
    867                 phiStrides->addIncoming(ONE, useTemporary);
     888                phiStrides->addIncoming(ONE, clearBuffer);
    868889                writableStrides = phiStrides;
    869 
    870890            }
    871891            numOfStrides = b->CreateUMin(numOfStrides, writableStrides);
    872892        }
    873         mInitialProducedItemCount[i] = ic;
     893        mInitialProducedItemCount[i] = produced;
    874894        mStreamSetOutputBaseAddress[i] = baseBuffer;
    875895    }
     
    885905        }
    886906        for (unsigned i = 0; i < inputSetCount; ++i) {
    887             const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    888             if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
     907            const auto & input = mStreamSetInputs[i];
     908            const ProcessingRate & rate = input.getRate();
     909            if (rate.isFixed() && input.nonDeferred()) {
    889910                mAvailableItemCount[i] = b->CreateSelect(mIsFinal, mAvailableItemCount[i], b->CreateMul(numOfStrides, inputStrideSize[i]));
    890911            }
     
    896917
    897918    for (unsigned i = 0; i < inputSetCount; ++i) {
    898         const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    899         if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
     919        const auto & input = mStreamSetInputs[i];
     920        const ProcessingRate & rate = input.getRate();
     921        if (rate.isFixed() && input.nonDeferred()) {
    900922            Value * const ic = b->CreateAdd(mInitialProcessedItemCount[i], mAvailableItemCount[i]);
    901             b->setProcessedItemCount(mStreamSetInputs[i].getName(), ic);
     923            b->setProcessedItemCount(input.getName(), ic);
    902924        }
    903925    }
    904926
    905927    for (unsigned i = 0; i < outputSetCount; ++i) {
    906         const ProcessingRate & rate = mStreamSetOutputs[i].getRate();
     928        const auto & output = mStreamSetOutputs[i];
     929        const ProcessingRate & rate = output.getRate();
    907930        if (rate.isFixed()) {
    908             assert (mStreamSetOutputs[i].nonDeferred());
     931            assert (output.nonDeferred());
    909932            Value * const produced = b->CreateMul(numOfStrides, outputStrideSize[i]);
    910933            Value * const ic = b->CreateAdd(mInitialProducedItemCount[i], produced);
    911             b->setProducedItemCount(mStreamSetOutputs[i].getName(), ic);
     934            b->setProducedItemCount(output.getName(), ic);
    912935        }
    913936    }
     
    9901013    Value * hasMoreStrides = b->getTrue();
    9911014    for (unsigned i = 0; i < inputSetCount; ++i) {
    992         const auto & name = mStreamSetInputs[i].getName();
     1015        const Binding & input = mStreamSetInputs[i];
     1016        const auto & name = input.getName();
    9931017        Value * const avail = mInitialAvailableItemCount[i];
    9941018        Value * const processed = b->getProcessedItemCount(name);
    9951019        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    996             b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data");
    997         }
    998         Value * const remaining = b->CreateSub(avail, processed);
     1020            b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + "." + name + ": processed data exceeds available data");
     1021        }
     1022        Value * remaining = b->CreateSub(avail, processed);
     1023        if (LLVM_UNLIKELY(input.hasAttribute(AttributeId::LookAhead))) {
     1024            Constant * const lookahead = b->getSize(input.findAttribute(AttributeId::LookAhead).amount());
     1025            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
     1026        }
    9991027        Value * const remainingStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
    10001028        Value * const hasRemainingStrides = b->CreateICmpNE(remainingStrides, ZERO);
     
    10131041            Value * const consumed = b->getConsumedItemCount(name);
    10141042            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1015                 b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data");
     1043                b->CreateAssert(b->CreateICmpULE(consumed, produced), getName() + "." + name + ": consumed data exceeds produced data");
    10161044            }
    10171045            Value * const unconsumed = b->CreateSub(produced, consumed);
    10181046            Value * const capacity = b->getCapacity(name);
    10191047            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1020                 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity");
     1048                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), getName() + "." + name + ": unconsumed data exceeds capacity");
    10211049            }
    10221050            Value * const remaining = b->CreateSub(capacity, unconsumed);
     
    11831211        for (const Attribute & attr : output.getAttributes()) {
    11841212            if (attr.isAdd()) {
    1185                 produced = b->CreateAdd(produced, b->getSize(attr.getAmount()));
     1213                produced = b->CreateAdd(produced, b->getSize(attr.amount()));
    11861214            } else if (attr.isRoundUpTo()) {
    1187                 produced = b->CreateRoundUp(produced, b->getSize(attr.getAmount()));
     1215                produced = b->CreateRoundUp(produced, b->getSize(attr.amount()));
    11881216            }
    11891217        }
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5757 r5782  
    152152    }
    153153
     154    const Binding & getStreamInput(const parabix::StreamSetBuffer * const buffer) const {
     155        for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
     156            if (mStreamSetInputBuffers[i] == buffer) {
     157                return getStreamInput(i);
     158            }
     159        }
     160        throw std::runtime_error("no output binding found given buffer");
     161    }
     162
    154163    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
    155164        assert (i < mStreamSetOutputBuffers.size());
     
    166175    const Binding & getStreamOutput(const unsigned i) const {
    167176        return KernelInterface::getStreamOutput(i);
     177    }
     178
     179    const Binding & getStreamOutput(const parabix::StreamSetBuffer * const buffer) const {
     180        for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
     181            if (mStreamSetOutputBuffers[i] == buffer) {
     182                return getStreamOutput(i);
     183            }
     184        }
     185        throw std::runtime_error("no output binding found given buffer");
    168186    }
    169187
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5757 r5782  
    8282            itemCount = CreateExactUDiv(itemCount, ConstantInt::get(itemCount->getType(), r.denominator()));
    8383        }
     84    } else if (LLVM_UNLIKELY(rate.isPopCount())) {
     85        Port port; unsigned index;
     86        std::tie(port, index) = mKernel->getStreamPort(rate.getReference());
     87
     88
     89
     90
    8491    } else {
    8592        itemCount = getScalarField(name + suffix);
     
    133140Value * KernelBuilder::getLinearlyWritableItems(const std::string & name, Value * fromPosition, bool reverse) {
    134141    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
    135     return buf->getLinearlyWritableItems(this, getStreamHandle(name), fromPosition, reverse);
     142    return buf->getLinearlyWritableItems(this, getStreamHandle(name), fromPosition, getConsumedItemCount(name), reverse);
    136143}
    137144
     
    195202    // (w.r.t the stream copy) would be n*m. By taking this into account we can optimize and simplify the copy code.
    196203    const auto fieldWidth = getFieldWidth(itemWidth * itemAlignment, blockWidth);
    197     assert ("overflow error" && is_power_2(fieldWidth) && (itemWidth <= fieldWidth) && (fieldWidth <= blockWidth));
     204
     205//    CallPrintInt(mKernel->getName() + "_" + name + "_target", target);
     206//    CallPrintInt(mKernel->getName() + "_" + name + "_targetOffset", targetOffset);
     207//    CallPrintInt(mKernel->getName() + "_" + name + "_source", source);
     208//    CallPrintInt(mKernel->getName() + "_" + name + "_sourceOffset", sourceOffset);
     209//    CallPrintInt(mKernel->getName() + "_" + name + "_itemsToCopy", itemsToCopy);
    198210
    199211    if (LLVM_LIKELY(itemWidth < fieldWidth)) {
     
    223235
    224236       So if we're copying the entire stream set block or our stream set has one element, we can use memcpy.
     237
     238       One compilication here is when the BlockSize of a stream is not equal to the BitBlockWidth.
     239
    225240
    226241    */
     
    421436
    422437Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) {
    423 
    424 
    425 
    426438    return CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
    427439}
     
    432444}
    433445
    434 Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, Value * streamIndex) {
     446Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * const streamIndex, Value * const blockOffset) {
    435447    Value * const addr = mKernel->getStreamSetInputAddress(name);
    436448    if (addr) {
    437         return CreateGEP(addr, {blockAdjustment, streamIndex});
     449        return CreateGEP(addr, {blockOffset, streamIndex});
    438450    } else {
    439451        const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
    440452        Value * blockIndex = CreateLShr(getProcessedItemCount(name), std::log2(getBitBlockWidth()));
    441         blockIndex = CreateAdd(blockIndex, blockAdjustment);
     453        blockIndex = CreateAdd(blockIndex, blockOffset);
    442454        return buf->getStreamBlockPtr(this, getStreamHandle(name), getBaseAddress(name), streamIndex, blockIndex, true);
    443455    }
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5755 r5782  
    9191    llvm::Value * getOutputStreamSetCount(const std::string & name);
    9292
    93     llvm::Value * getAdjustedInputStreamBlockPtr(llvm::Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex);
     93    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * blockOffset);
    9494
    9595    llvm::Value * getRawInputPointer(const std::string & name, llvm::Value * absolutePosition);
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp

    r5755 r5782  
    1414#include <kernels/kernel_builder.h>
    1515
     16#include <llvm/Support/raw_ostream.h>
     17
    1618using namespace cc;
    1719using namespace kernel;
     
    2022using namespace llvm;
    2123
    22 LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned basisBitsCount)
    23 : PabloKernel(b, "lb",
    24     {Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
    25     {Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}}) {
     24
     25LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
     26: PabloKernel(b, "lf" + std::to_string(basisBitsCount),
     27// input
     28{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
     29// output
     30{Binding{b->getStreamSetTy(1), "lf"}}) {
     31
     32}
     33
     34void LineFeedKernelBuilder::generatePabloMethod() {
     35    CC_Compiler ccc(this, getInput(0));
     36    auto & pb = ccc.getBuilder();
     37    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
     38    pb.createAssign(pb.createExtract(getOutput(0), pb.getInteger(0)), LF);
     39}
     40
     41LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
     42: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
     43// inputs
     44{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
     45,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
     46// outputs
     47{Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}
     48,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
    2649
    2750}
    2851
    2952void LineBreakKernelBuilder::generatePabloMethod() {
    30 
    3153    CC_Compiler ccc(this, getInput(0));
    3254    auto & pb = ccc.getBuilder();
    3355
    34     PabloAST * LineBreak = nullptr;
    35     PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
    36     PabloAST * CR = ccc.compileCC(makeCC(0x0D));
    37     PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     56    Integer * const ZERO = pb.getInteger(0);
    3857
    39     Zeroes * const zero = pb.createZeroes();
    40     Var * crlf = pb.createVar("crlf", zero);
     58    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
     59    PabloAST * const CR = ccc.compileCC(makeCC(0x0D));
     60    PabloAST * const LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     61    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
     62
     63    // Remove the CR of any CR+LF
     64    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
    4165    PabloBuilder crb = PabloBuilder::Create(pb);
    42 #ifndef USE_LOOKAHEAD_CRLF
    43     PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
    44     crb.createAssign(crlf, crb.createAnd(cr1, LF));
    45 #else
    46     PabloAST * lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
    47     crb.createAssign(crlf, crb.createAnd(CR, lookaheadLF));
    48 #endif
    4966    pb.createIf(CR, crb);
     67    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
     68    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
     69    crb.createAssign(CRLF, crlf);
     70    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
     71    crb.createAssign(LineBreak, removedCRLF);
     72    // Record the CR marker of any CR+LF
     73    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
    5074
    51     Var * NEL_LS_PS = pb.createVar("NEL_LS_PS", zero);
    52 
     75    // Check for Unicode Line Breaks
    5376    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
    5477    PabloBuilder it = PabloBuilder::Create(pb);
     
    5780    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
    5881
    59     //
    6082    // Two-byte sequences
    61     Var * NEL = it.createVar("NEL", zero);
    6283    PabloBuilder it2 = PabloBuilder::Create(it);
    63     it2.createAssign(NEL, it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2)));
    6484    it.createIf(u8pfx2, it2);
     85    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2), "NEL");
     86    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
    6587
    66     //
    6788    // Three-byte sequences
    68     Var * LS_PS = it.createVar("LS_PS", zero);
    6989    PabloBuilder it3 = PabloBuilder::Create(it);
    7090    it.createIf(u8pfx3, it3);
    7191    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE2), it3), 1), ccc.compileCC(makeCC(0x80), it3));
    72     it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3)));
    73     it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
     92    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3), "LS_PS");
     93    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
    7494
    75     PabloAST * LB_chars = pb.createOr(LF_VT_FF_CR, NEL_LS_PS);
    76     PabloAST * lb = nullptr;
    77     if (AlgorithmOptionIsSet(DisableUnicodeLineBreak)) {
    78         lb = LF;
    79     } else {
    80         lb = pb.createAnd(LB_chars, pb.createNot(crlf));  // count the CR, but not CRLF
    81     }
    82 
    83     PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LB_chars), 1));
    84     LineBreak = pb.createOr(lb, unterminatedLineAtEOF);
    85     PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(0));
    86     pb.createAssign(r, LineBreak);
    87 #ifdef USE_LOOKAHEAD_CRLF
    88     setLookAhead(1);
    89 #endif
     95    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1));
     96    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
    9097}
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.h

    r5464 r5782  
    1212namespace kernel {
    1313
     14class LineFeedKernelBuilder final : public pablo::PabloKernel {
     15public:
     16    LineFeedKernelBuilder(const std::unique_ptr<KernelBuilder> & b, unsigned basisBitsCount);
     17    bool isCachable() const override { return true; }
     18    bool hasSignature() const override { return false; }
     19protected:
     20    void generatePabloMethod() override;
     21};
     22
     23
    1424class LineBreakKernelBuilder final : public pablo::PabloKernel {
    1525public:
  • icGREP/icgrep-devel/icgrep/kernels/processing_rate.cpp

    r5755 r5782  
    1717}
    1818
     19/** ------------------------------------------------------------------------------------------------------------- *
     20 * @brief gcd
     21 ** ------------------------------------------------------------------------------------------------------------- */
    1922ProcessingRate::RateValue gcd(const ProcessingRate::RateValue & x, const ProcessingRate::RateValue & y) {
    2023    const auto n = boost::gcd(x.numerator(), y.numerator());
     
    2629}
    2730
     31/** ------------------------------------------------------------------------------------------------------------- *
     32 * @brief roundUp
     33 ** ------------------------------------------------------------------------------------------------------------- */
     34unsigned ceiling(const ProcessingRate::RateValue & r) {
     35    if (LLVM_LIKELY(r.denominator() == 1)) {
     36        return r.numerator();
     37    } else {
     38        return (r.numerator() + r.denominator() - 1) / r.denominator();
     39    }
    2840}
     41
     42
     43}
  • icGREP/icgrep-devel/icgrep/kernels/processing_rate.h

    r5756 r5782  
    88namespace kernel {
    99
    10 // Processing rate attributes are required for all stream set bindings for a kernel.
    11 // These attributes describe the number of items that are processed or produced as
    12 // a ratio in comparison to a reference stream set, normally the principal input stream set
    13 // by default (or the principal output stream set if there is no input).
     10// Processing rate attributes are required for all stream set bindings. They describe
     11// the relationship between processed items (inputs) and produced items (outputs).
    1412//
    15 // The default ratio is FixedRatio(1) which means that there is one item processed or
    16 // produced for every item of the reference stream.
    17 // FixedRatio(m, n) means that for every group of n items of the refrence stream,
    18 // there are m items in the output stream (rounding up).
     13// For example, the 3-to-4 kernel converts every 3 input items into 4 output items.
     14// Thus it has a FixedRate(3) for its input stream and FixedRate(4) for its output
     15// stream. Processing these every 3 items individually would be time consuming. Instead
     16// the kernel processes a strides' worth of "iterations" and automatically scales the
     17// FixedRates accordingly.
    1918//
    20 // Kernels which produce a variable number of items use MaxRatio(n), for a maximum
    21 // of n items produced or consumed per principal input or output item.  MaxRatio(m, n)
    22 // means there are at most m items for every n items of the reference stream.
    23 //
    24 // RoundUpToMultiple(n) means that number of items produced is the same as the
    25 // number of reference items, rounded up to an exact multiple of n.
    26 //
     19// NOTE: fixed and bounded rates should be the smallest number of input items for the
     20// smallest number of output items that can be logically produced by a kernel.
     21
     22
     23
    2724
    2825struct ProcessingRate  {
     
    145142ProcessingRate::RateValue gcd(const ProcessingRate::RateValue & x, const ProcessingRate::RateValue & y);
    146143
     144unsigned ceiling(const ProcessingRate::RateValue & r);
     145
    147146}
    148147
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5758 r5782  
    210210ScanMatchKernel::ScanMatchKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    211211: MultiBlockKernel("scanMatch",
    212     {Binding{b->getStreamSetTy(1, 1), "matchResult", FixedRate(), Principal()}, Binding{b->getStreamSetTy(1, 1), "lineBreak"}, Binding{b->getStreamSetTy(1, 8), "InputStream", FixedRate(), Deferred() }},
    213     {},
    214     {Binding{b->getIntAddrTy(), "accumulator_address"}},
    215     {},
    216     {Binding{b->getSizeTy(), "BlockNo"}, Binding{b->getSizeTy(), "LineNum"}}) {}
    217 }
     212// inputs
     213{Binding{b->getStreamSetTy(1, 1), "matchResult", FixedRate(), Principal()}
     214,Binding{b->getStreamSetTy(1, 1), "lineBreak"}
     215,Binding{b->getStreamSetTy(1, 8), "InputStream", FixedRate(), Deferred()}},
     216// outputs
     217{},
     218// input scalars
     219{Binding{b->getIntAddrTy(), "accumulator_address"}},
     220// output scalars
     221{},
     222// kernel state
     223{Binding{b->getSizeTy(), "BlockNo"}
     224,Binding{b->getSizeTy(), "LineNum"}}) {
     225
     226}
     227
     228}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5757 r5782  
    144144}
    145145
    146 Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
     146Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * availItems, bool reverse) const {
    147147    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
    148148    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
     
    156156}
    157157
    158 Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
    159     Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
    160     Value * bufRem = b->CreateURem(fromPosition, bufSize);
     158Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * consumed, bool reverse) const {
     159    Constant * const bufferSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
     160    fromPosition = b->CreateURem(fromPosition, bufferSize);
    161161    if (reverse) {
    162         return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
    163     }
    164     return b->CreateSub(bufSize, bufRem, "linearSpace");
     162        return b->CreateSelect(b->CreateICmpEQ(fromPosition, b->getSize(0)), bufferSize, fromPosition);
     163    }
     164    consumed = b->CreateURem(consumed, bufferSize);
     165    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), bufferSize, consumed);
     166    return b->CreateNUWSub(limit, fromPosition);
    165167}
    166168
     
    187189    Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
    188190    b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
    189 }
    190 
    191 inline bool isConstantZero(Value * const v) {
    192     return isa<Constant>(v) && cast<Constant>(v)->isNullValue();
    193191}
    194192
     
    293291}
    294292
    295 Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
     293Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
    296294    report_fatal_error("SourceBuffers cannot be written");
    297295}
     
    325323}
    326324
    327 Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, const bool reverse) const {
     325Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, Value *consumed, const bool reverse) const {
    328326    // Trust that the buffer is large enough to write any amount
    329327    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
     
    374372}
    375373
    376 Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
    377     Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
     374Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
     375    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, consumed, reverse);
    378376    if (reverse) return writableProper;
    379377    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
     
    452450}
    453451
    454 Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
    455     Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
     452Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
     453    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, consumed, reverse);
    456454    if (reverse) return writableProper;
    457455    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
     
    692690}
    693691
    694 Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
     692Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
    695693    Value * bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
    696694    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5757 r5782  
    9797    virtual void createBlockAlignedCopy(IDISA::IDISA_Builder * const b, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * itemsToCopy, const unsigned alignment = 1) const;
    9898
    99     virtual llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const;
     99    virtual llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const;
    100100   
    101101    bool supportsCopyBack() const {
     
    118118        return mConsumers;
    119119    }
     120
     121
    120122
    121123protected:
     
    174176    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    175177
    176     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     178    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    177179
    178180    llvm::Type * getStreamSetBlockType() const override;
     
    200202    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    201203   
    202     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     204    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    203205
    204206    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     
    242244    CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace = 0);
    243245   
    244     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     246    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    245247   
    246248    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     
    258260    void createBlockAlignedCopy(IDISA::IDISA_Builder * const b, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * itemsToCopy, const unsigned alignment = 1) const override;
    259261
    260     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     262    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    261263   
    262264    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     
    317319    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    318320   
    319     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     321    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    320322   
    321323    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
  • icGREP/icgrep-devel/icgrep/pablo/analysis/pabloverifier.cpp

    r5706 r5782  
    77#include <pablo/pablo_kernel.h>
    88#include <pablo/printer_pablos.h>
    9 #include <boost/container/flat_set.hpp>
    109#include <llvm/Support/ErrorHandling.h>
    1110#include <llvm/Support/raw_ostream.h>
     11#include <llvm/ADT/SmallSet.h>
     12#include <llvm/ADT/DenseMap.h>
     13#include <llvm/ADT/SmallBitVector.h>
     14#include <llvm/IR/Type.h>
    1215
    1316using namespace llvm;
     
    1720using TypeId = PabloAST::ClassTypeId;
    1821
    19 template <typename Type>
    20 using SmallSet = boost::container::flat_set<Type>;
    21 
    22 using ScopeSet = SmallSet<const PabloBlock *>;
     22using ScopeSet = SmallSet<const PabloBlock *, 32>;
    2323
    2424/** ------------------------------------------------------------------------------------------------------------- *
     
    2727void testUsers(const PabloAST * expr, const ScopeSet & validScopes) {
    2828    size_t uses = 0;
    29     SmallSet<const PabloAST *> verified;
     29    SmallSet<const PabloAST *, 16> verified;
    3030    for (const PabloAST * use : expr->users()) {
    3131        if (LLVM_UNLIKELY(verified.count(use) != 0)) {
     
    154154    }
    155155    verifyUseDefInformation(kernel->getEntryBlock(), validScopes);
    156 }
    157 
    158 /** ------------------------------------------------------------------------------------------------------------- *
    159  * @brief unreachable
    160  ** ------------------------------------------------------------------------------------------------------------- */
    161 bool unreachable(const Statement * stmt, const PabloBlock * const block) {
    162     PabloBlock * parent = stmt->getParent();
    163     while (parent)  {
    164         if (parent == block) {
    165             return false;
    166         }
    167         parent = parent->getPredecessor();
    168     }
    169     return true;
    170156}
    171157
     
    302288
    303289/** ------------------------------------------------------------------------------------------------------------- *
    304  * @brief isTopologicallyOrdered
    305  ** ------------------------------------------------------------------------------------------------------------- */
    306 struct OrderingVerifier {
    307     OrderingVerifier() : mParent(nullptr), mSet() {}
    308     OrderingVerifier(const OrderingVerifier & parent) : mParent(&parent) {}
    309     bool count(const PabloAST * expr) const {
     290 * @brief verifyAllPathsDominate
     291 ** ------------------------------------------------------------------------------------------------------------- */
     292void verifyAllPathsDominate(const PabloBlock * block) {
     293    for (const Statement * stmt : *block) {
     294        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     295            const PabloAST * const op = stmt->getOperand(i);
     296            if (LLVM_UNLIKELY(!dominates(op, stmt))) {
     297                std::string tmp;
     298                raw_string_ostream out(tmp);
     299                PabloPrinter::print(cast<Statement>(op), out);
     300                out << " does not dominate ";
     301                PabloPrinter::print(stmt, out);
     302                throw std::runtime_error(out.str());
     303            }
     304        }
     305        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
     306            verifyAllPathsDominate(cast<Branch>(stmt)->getBody());
     307        }
     308    }
     309}
     310
     311void verifyAllPathsDominate(const PabloKernel * kernel) {
     312    verifyAllPathsDominate(kernel->getEntryBlock());
     313}
     314
     315/** ------------------------------------------------------------------------------------------------------------- *
     316 * @brief verifyVariableAssignments
     317 ** ------------------------------------------------------------------------------------------------------------- */
     318struct AssignmentSet {
     319    AssignmentSet() : mParent(nullptr), mSet() {}
     320    AssignmentSet(const AssignmentSet & parent) : mParent(&parent) {}
     321    bool contains(const Var * expr) const {
    310322        if (mSet.count(expr)) {
    311323            return true;
    312324        } else if (mParent) {
    313             return mParent->count(expr);
     325            return mParent->contains(expr);
    314326        }
    315327        return false;
    316328    }
    317     void insert(const PabloAST * expr) {
     329
     330    void insert_full(const Var * expr) {
     331        const auto n = getNumOfElements(expr);
     332        auto f = mAssignment.find(expr);
     333        if (LLVM_LIKELY(f == mAssignment.end())) {
     334            mAssignment.insert(std::move(std::make_pair(expr, SmallBitVector(n, true))));
     335        } else {
     336            f->second.resize(n, true);
     337        }
     338    }
     339
     340    void insert(const Var * expr, const unsigned i) {
    318341        mSet.insert(expr);
    319342    }
     343protected:
     344
     345    static unsigned getNumOfElements(const Var * expr) {
     346        const Type * const ty = expr->getType();
     347        if (ty->isArrayTy()) {
     348            return ty->getArrayNumElements();
     349        }
     350        return 1;
     351    }
     352
    320353private:
    321     const OrderingVerifier * const mParent;
    322     SmallSet<const PabloAST *> mSet;
     354    const AssignmentSet * const mParent;
     355    DenseMap<const Var *, SmallBitVector> mAssignment;
     356
     357    SmallSet<const Var *, 16> mSet;
    323358};
    324359
    325 void isTopologicallyOrdered(const PabloBlock * block, const OrderingVerifier & parent) {
    326     OrderingVerifier ov(parent);
    327     for (const Statement * stmt : *block) {
    328         if (LLVM_UNLIKELY(isa<While>(stmt))) {
    329             isTopologicallyOrdered(cast<While>(stmt)->getBody(), ov);
    330             for (const Var * var : cast<While>(stmt)->getEscaped()) {
    331                 ov.insert(var);
    332             }
    333         } else if (LLVM_UNLIKELY(isa<Assign>(stmt))) {
    334             ov.insert(cast<Assign>(stmt)->getVariable());
    335         }
    336         for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
    337             const PabloAST * const op = stmt->getOperand(i);
    338             if (LLVM_UNLIKELY((isa<Statement>(op) || isa<Var>(op)) && ov.count(op) == 0)) {
    339                 std::string tmp;
    340                 raw_string_ostream out(tmp);
    341                 if (isa<Var>(op)) {
    342                     PabloPrinter::print(op, out);
    343                     out << " is used by ";
    344                     PabloPrinter::print(stmt, out);
    345                     out << " before being assigned a value.";
    346                 } else {
    347                     PabloPrinter::print(op, out);
    348                     if (LLVM_UNLIKELY(isa<Statement>(op) && unreachable(stmt, cast<Statement>(op)->getParent()))) {
    349                         out << " was defined in a scope that is unreachable by ";
    350                     } else {
    351                         out << " was used before definition by ";
    352                     }
    353                     PabloPrinter::print(stmt, out);
    354                 }
    355                 throw std::runtime_error(out.str());
    356             }
    357         }
    358         ov.insert(stmt);
    359         if (LLVM_UNLIKELY(isa<If>(stmt))) {
    360             isTopologicallyOrdered(cast<If>(stmt)->getBody(), ov);
    361             for (const Var * def : cast<If>(stmt)->getEscaped()) {
    362                 ov.insert(def);
    363             }
    364         }
    365     }
    366 }
    367 
    368 void isTopologicallyOrdered(const PabloKernel * kernel) {
    369     OrderingVerifier ov;
    370     for (unsigned i = 0; i != kernel->getNumOfInputs(); ++i) {
    371         ov.insert(kernel->getInput(i));
    372     }
    373     for (unsigned i = 0; i != kernel->getNumOfOutputs(); ++i) {
    374         ov.insert(kernel->getOutput(i));
    375     }
    376     isTopologicallyOrdered(kernel->getEntryBlock(), ov);
    377 }
     360//void verifyVariableUsages(const PabloBlock * block, const AssignmentSet & parent) {
     361//    AssignmentSet A(parent);
     362//    for (const Statement * stmt : *block) {
     363//        if (isa<Assign>(stmt)) {
     364//            PabloAST * var = cast<Assign>(stmt)->getVariable();
     365//            if (isa<Extract>(var)) {
     366//                var = cast<Extract>(var)->getArray();
     367//            }
     368//            A.insert(cast<Var>(var));
     369//        } else if (isa<Extract>(stmt)) {
     370//            Var * const var = cast<Var>(cast<Extract>(var)->getArray());
     371//            if (A.contains(var)) {
     372//                continue;
     373//            }
     374//        } else {
     375//            for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     376//                const PabloAST * const op = stmt->getOperand(i);
     377//                if (isa<Var>(op)) {
     378
     379//                }
     380//            }
     381//        }
     382
     383
     384
     385//        for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     386//            const PabloAST * const op = stmt->getOperand(i);
     387//            if (LLVM_UNLIKELY(!dominates(op, stmt))) {
     388//                std::string tmp;
     389//                raw_string_ostream out(tmp);
     390//                PabloPrinter::print(cast<Statement>(op), out);
     391//                out << " does not dominate ";
     392//                PabloPrinter::print(stmt, out);
     393//                throw std::runtime_error(out.str());
     394//            }
     395//        }
     396//        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
     397//            verifyAllPathsDominate(cast<Branch>(stmt)->getBody());
     398//        }
     399//    }
     400//}
     401
     402//void verifyVariableUsages(const PabloKernel * kernel) {
     403//    AssignmentSet A;
     404//    for (unsigned i = 0; i != kernel->getNumOfInputs(); ++i) {
     405//        A.insert(kernel->getInput(i));
     406//    }
     407//    for (unsigned i = 0; i != kernel->getNumOfOutputs(); ++i) {
     408//        A.insert(kernel->getOutput(i));
     409//    }
     410//    verifyVariableUsages(kernel->getEntryBlock(), A);
     411//}
     412
     413
    378414
    379415void PabloVerifier::verify(const PabloKernel * kernel, const std::string & location) {
     
    381417        verifyProgramStructure(kernel);
    382418        verifyUseDefInformation(kernel);
    383         isTopologicallyOrdered(kernel);
     419        verifyAllPathsDominate(kernel);
    384420    } catch(std::runtime_error & err) {
    385421        PabloPrinter::print(kernel, errs());
  • icGREP/icgrep-devel/icgrep/pablo/builder.cpp

    r5714 r5782  
    172172
    173173PabloAST * PabloBuilder::createLookahead(PabloAST * expr, PabloAST * shiftAmount) {
    174     if (isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0) {
     174    if (LLVM_UNLIKELY(isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0)) {
    175175        return expr;
    176176    }
     
    180180
    181181PabloAST * PabloBuilder::createLookahead(PabloAST * expr, PabloAST * shiftAmount, const llvm::StringRef & prefix) {
    182     if (isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0) {
     182    if (LLVM_UNLIKELY(isa<Zeroes>(expr) || cast<Integer>(shiftAmount)->value() == 0)) {
    183183        return expr;
    184184    }
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r5706 r5782  
    483483                }
    484484            }
    485         } else if (LLVM_UNLIKELY(isa<ScanThru>(stmt))) {
    486             ScanThru * scanThru = cast<ScanThru>(stmt);
    487             if (LLVM_UNLIKELY(isa<Advance>(scanThru->getScanFrom()))) {
    488                 // Replace a ScanThru(Advance(x,n),y) with an ScanThru(Advance(x, n - 1), Advance(x, n - 1) | y), where Advance(x, 0) = x
    489                 Advance * adv = cast<Advance>(scanThru->getScanFrom());
    490                 if (LLVM_UNLIKELY(adv->getNumUses() == 1)) {
    491                     PabloAST * stream = adv->getExpression();
     485        } else if (LLVM_UNLIKELY(isa<ScanThru>(stmt))) {           
     486            ScanThru * const outer = cast<ScanThru>(stmt);
     487            if (LLVM_UNLIKELY(isa<Advance>(outer->getScanFrom()))) {
     488                // Replace ScanThru(Advance(x,n),y) with ScanThru(Advance(x, n - 1), Advance(x, n - 1) | y), where Advance(x, 0) = x               
     489                Advance * const inner = cast<Advance>(outer->getScanFrom());
     490                if (LLVM_UNLIKELY(inner->getNumUses() == 1)) {
     491                    PabloAST * stream = inner->getExpression();
    492492                    block->setInsertPoint(stmt);
    493                     if (LLVM_UNLIKELY(adv->getAmount() != 1)) {
    494                         stream = block->createAdvance(stream, block->getInteger(adv->getAmount() - 1));
    495                     }
    496                     stmt = scanThru->replaceWith(block->createAdvanceThenScanThru(stream, scanThru->getScanThru()));
    497                     adv->eraseFromParent(false);
     493                    if (LLVM_UNLIKELY(inner->getAmount() != 1)) {
     494                        stream = block->createAdvance(stream, block->getInteger(inner->getAmount() - 1));
     495                    }
     496                    stmt = outer->replaceWith(block->createAdvanceThenScanThru(stream, outer->getScanThru()));
     497                    inner->eraseFromParent(false);
    498498                    continue;
    499499                }
    500             } else if (LLVM_UNLIKELY(isa<And>(scanThru->getScanFrom()))) {
     500            } else if (LLVM_UNLIKELY(isa<ScanThru>(outer->getScanFrom()))) {
     501                // Replace ScanThru(ScanThru(x, y), z) with ScanThru(x, y | z)
     502                ScanThru * const inner = cast<ScanThru>(outer->getScanFrom());
     503                block->setInsertPoint(stmt);
     504                ScanThru * const scanThru = block->createScanThru(inner->getScanFrom(), block->createOr(inner->getScanThru(), outer->getScanThru()));
     505                stmt->replaceWith(scanThru);
     506                stmt = scanThru;
     507                continue;
     508            } else if (LLVM_UNLIKELY(isa<And>(outer->getScanFrom()))) {
    501509                // Suppose B is an arbitrary bitstream and A = Advance(B, 1). ScanThru(B ∧ ¬A, B) will leave a marker on the position
    502510                // following the end of any run of 1-bits in B. But this is equivalent to computing A ∧ ¬B since A will have exactly
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r5706 r5782  
    8282            const Lookahead * const la = cast<Lookahead>(stmt);
    8383            PabloAST * input = la->getExpression();
    84             if (LLVM_UNLIKELY(isa<Extract>(input))) {
     84            if (isa<Extract>(input)) {
    8585                input = cast<Extract>(input)->getArray();
    8686            }
     
    8989                for (unsigned i = 0; i < mKernel->getNumOfInputs(); ++i) {
    9090                    if (input == mKernel->getInput(i)) {
    91                         if (LLVM_LIKELY(mKernel->getLookAhead(i) < la->getAmount())) {
    92                             mKernel->setLookAhead(i, la->getAmount());
     91                        const auto & binding = mKernel->getStreamInput(i);
     92                        if (LLVM_UNLIKELY(!binding.hasLookahead() || binding.getLookahead() < la->getAmount())) {
     93                            std::string tmp;
     94                            raw_string_ostream out(tmp);
     95                            input->print(out);
     96                            out << " must have a lookahead attribute of at least " << la->getAmount();
     97                            report_fatal_error(out.str());
    9398                        }
    9499                        notFound = false;
     
    566571            PabloAST * stream = l->getExpression();
    567572            Value * index = nullptr;
    568             if (LLVM_UNLIKELY(isa<Extract>(stream))) {
     573            if (LLVM_UNLIKELY(isa<Extract>(stream))) {               
     574                index = compileExpression(iBuilder, cast<Extract>(stream)->getIndex(), true);
    569575                stream = cast<Extract>(stream)->getArray();
    570                 index = compileExpression(iBuilder, cast<Extract>(stream)->getIndex());
    571576            } else {
    572577                index = iBuilder->getInt32(0);
     
    574579            const auto bit_shift = (l->getAmount() % iBuilder->getBitBlockWidth());
    575580            const auto block_shift = (l->getAmount() / iBuilder->getBitBlockWidth());
    576             Value * ptr = iBuilder->getAdjustedInputStreamBlockPtr(iBuilder->getSize(block_shift), cast<Var>(stream)->getName(), index);
     581            Value * ptr = iBuilder->getInputStreamBlockPtr(cast<Var>(stream)->getName(), index, iBuilder->getSize(block_shift));
    577582            Value * lookAhead = iBuilder->CreateBlockAlignedLoad(ptr);
    578583            if (bit_shift == 0) {  // Simple case with no intra-block shifting.
    579584                value = lookAhead;
    580585            } else { // Need to form shift result from two adjacent blocks.
    581                 Value * ptr = iBuilder->getAdjustedInputStreamBlockPtr(iBuilder->getSize(block_shift + 1), cast<Var>(stream)->getName(), index);
     586                Value * ptr = iBuilder->getInputStreamBlockPtr(cast<Var>(stream)->getName(), index, iBuilder->getSize(block_shift + 1));
    582587                Value * lookAhead1 = iBuilder->CreateBlockAlignedLoad(ptr);
    583588                if (LLVM_UNLIKELY((bit_shift % 8) == 0)) { // Use a single whole-byte shift, if possible.
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5706 r5782  
    183183        }
    184184
    185 //        Value * base = iBuilder->CreateLoad(iBuilder->CreateGEP(profile, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    186 //        base = iBuilder->CreateUIToFP(base, iBuilder->getDoubleTy());
    187 
    188 //        unsigned branchCount = 0;
    189 //        std::function<void (const PabloBlock *)> writeProfile = [&](const PabloBlock * const scope) {
    190 //            for (const Statement * stmt : *scope) {
    191 //                if (isa<Branch>(stmt)) {
    192 
    193 //                    ++branchCount;
    194 
    195 //                    std::string tmp;
    196 //                    raw_string_ostream str(tmp);
    197 //                    str << "%3.3f\t";
    198 //                    str << mPabloCompiler->getBranchEntry(branchCount)->getName();
    199 //                    str << "\n";
    200 
    201 //                    Value * branches = iBuilder->CreateLoad(iBuilder->CreateGEP(profile, {iBuilder->getInt32(0), iBuilder->getInt32(branchCount)}));
    202 //                    branches = iBuilder->CreateUIToFP(branches, iBuilder->getDoubleTy());
    203 //                    Value * prob = iBuilder->CreateFDiv(branches, base);
    204 //                    iBuilder->CreateCall(dprintf, {fd, iBuilder->GetString(str.str()), prob});
    205 
    206 //                    writeProfile(cast<Branch>(stmt)->getBody());
    207 
    208 //                }
    209 //            }
    210 //        };
    211 
    212 //        writeProfile(getEntryBlock());
    213185        iBuilder->CreateCloseCall(fd);
    214186    }
  • icGREP/icgrep-devel/icgrep/pablo/pe_lookahead.h

    r5646 r5782  
    2727        return getOperand(0);
    2828    }
    29     inline int64_t getAmount() const {
     29    inline unsigned getAmount() const {
    3030        return llvm::cast<Integer>(getOperand(1))->value();
    3131    }
     
    3333    Lookahead(PabloAST * expr, PabloAST * shiftAmount, const String * name, Allocator & allocator)
    3434    : Statement(ClassTypeId::Lookahead, expr->getType(), {expr, shiftAmount}, name, allocator) {
    35         assert(llvm::isa<Integer>(shiftAmount));
     35        assert(llvm::isa<Integer>(shiftAmount) && llvm::cast<Integer>(shiftAmount)->value() >= 0);
    3636    }
    3737};
  • icGREP/icgrep-devel/icgrep/re/casing.cpp

    r5781 r5782  
    2222
    2323namespace re {
    24 RE * resolveCaseInsensitiveMode(RE * re, bool inCaseInsensitiveMode) {
     24RE * resolveCaseInsensitiveMode(RE * re, const bool inCaseInsensitiveMode) {
    2525    if (isa<CC>(re)) {
    2626        if (inCaseInsensitiveMode) {
    27             UCD::UnicodeSet cased = caseInsensitize(*cast<CC>(re));
    28             return makeCC(std::move(cased));
     27            return makeCC(std::move(caseInsensitize(*cast<CC>(re))));
    2928        }
    30         else return re;
    31     }
    32     else if (Name * name = dyn_cast<Name>(re)) {
     29        return re;
     30    } else if (Name * name = dyn_cast<Name>(re)) {
    3331        if (!inCaseInsensitiveMode || (name->getDefinition() == nullptr)) return re;
    3432        RE * r = resolveCaseInsensitiveMode(name->getDefinition(), true);
    35         Name * n = makeName(name->getNamespace(), name->getName() + "/i", name->getType());
     33        Name * n = nullptr;
     34        if (name->hasNamespace()) {
     35            n = makeName(name->getNamespace(), name->getName() + "/i", name->getType());
     36        } else {
     37            n = makeName(name->getName() + "/i", name->getType());
     38        }
    3639        n->setDefinition(r);
    3740        return n;
    38     }
    39     else if (Seq * seq = dyn_cast<Seq>(re)) {
     41    } else if (Seq * seq = dyn_cast<Seq>(re)) {
    4042        std::vector<RE*> list;
    4143        for (auto i = seq->begin(); i != seq->end(); ++i) {
  • icGREP/icgrep-devel/icgrep/re/casing.h

    r5766 r5782  
    66class RE;
    77
    8 RE * resolveCaseInsensitiveMode(RE * re, bool inCaseInsensitiveMode);
     8RE * resolveCaseInsensitiveMode(RE * re, const bool inCaseInsensitiveMode);
    99
    1010}
  • icGREP/icgrep-devel/icgrep/re/re_alt.h

    r5775 r5782  
    5252RE * makeAlt(iterator begin, iterator end) {
    5353    Alt * newAlt = makeAlt();
    54     CC * unionCC = makeCC();
     54    CC * unionCC = nullptr;
    5555    for (auto i = begin; i != end; ++i) {
    56         if (const CC * cc = llvm::dyn_cast<CC>(*i)) {
    57             unionCC = makeCC(unionCC, cc);
     56        if (CC * cc = llvm::dyn_cast<CC>(*i)) {
     57            unionCC = unionCC ? makeCC(unionCC, cc) : cc;
    5858        } else if (const Alt * alt = llvm::dyn_cast<Alt>(*i)) {
    5959            // We have an Alt to embed within the alt.  We extract the individual
     
    6262            for (RE * a : *alt) {
    6363                if (CC * cc = llvm::dyn_cast<CC>(a)) {
    64                     unionCC = makeCC(unionCC, cc);
     64                    unionCC = unionCC ? makeCC(unionCC, cc) : cc;
     65                } else {
     66                    newAlt->push_back(a);
    6567                }
    66                 else newAlt->push_back(a);
    6768            }
    6869        }
     
    7172        }
    7273    }
    73     if (!unionCC->empty()) newAlt->push_back(unionCC);
     74    if (unionCC) {
     75        newAlt->push_back(unionCC);
     76    }
    7477    return newAlt;
    7578}
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5780 r5782  
    3737namespace re { class Alt; }
    3838namespace re { class RE; }
    39 
    40 
    41 #define UNICODE_LINE_BREAK (!AlgorithmOptionIsSet(DisableUnicodeLineBreak))
    4239
    4340using namespace pablo;
     
    9996
    10097inline MarkerType RE_Compiler::compileAny(const MarkerType m, PabloBuilder & pb) {
    101     PabloAST * nextFinalByte = markerVar(AdvanceMarker(m, MarkerPosition::FinalPostPositionUnit, pb));
     98    PabloAST * const nextFinalByte = markerVar(AdvanceMarker(m, MarkerPosition::FinalPostPositionUnit, pb));
    10299    return makeMarker(MarkerPosition::FinalMatchUnit, nextFinalByte);
    103100}
    104101
    105102MarkerType RE_Compiler::compileCC(CC * cc, MarkerType marker, PabloBuilder & pb) {
    106     MarkerType nextPos;
    107     if (markerPos(marker) == MarkerPosition::FinalPostPositionUnit) {
    108         nextPos = marker;
    109     } else {
    110         nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    111     }
    112     return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(markerVar(nextPos), mCCCompiler.compileCC(cc, pb)));
     103    PabloAST * const nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     104    return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb)));
    113105}
    114106
     
    123115    } else if (isUnicodeUnitLength(name)) {
    124116        MarkerType nameMarker = compileName(name, pb);
    125         MarkerType nextPos;
    126         if (markerPos(marker) == MarkerPosition::FinalPostPositionUnit) {
    127             nextPos = marker;
    128         } else {
    129             nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    130         }
     117        MarkerType nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    131118        nameMarker.stream = pb.createAnd(markerVar(nextPos), markerVar(nameMarker), name->getName());
    132119        return nameMarker;
     
    560547}
    561548
    562 inline MarkerType RE_Compiler::compileStart(const MarkerType marker, pablo::PabloBuilder & pb) {
     549inline MarkerType RE_Compiler::compileStart(MarkerType marker, pablo::PabloBuilder & pb) {
     550    PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineBreak), 1));
     551    if (!AlgorithmOptionIsSet(DisableUnicodeLineBreak)) {
     552        sol = pb.createScanThru(pb.createAnd(mInitial, sol), mNonFinal);
     553    }
    563554    MarkerType m = AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb);
    564     if (UNICODE_LINE_BREAK) {
    565         PabloAST * line_end = mPB.createOr(mLineBreak, mCRLF);
    566         PabloAST * sol_init = pb.createNot(pb.createOr(pb.createAdvance(pb.createNot(line_end), 1), mCRLF));
    567         PabloAST * sol = pb.createScanThru(pb.createAnd(mInitial, sol_init), mNonFinal);
    568         return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
    569     } else {
    570         PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineBreak), 1));
    571         return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
    572     }
    573 }
    574 
    575 inline MarkerType RE_Compiler::compileEnd(const MarkerType marker, pablo::PabloBuilder & pb) {
    576     PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
    577     return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(nextPos, mLineBreak, "eol"));
     555    return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(markerVar(m), sol, "sol"));
     556}
     557
     558inline MarkerType RE_Compiler::compileEnd(MarkerType marker, pablo::PabloBuilder & pb) {
     559    PabloAST * const nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     560    return makeMarker(MarkerPosition::FinalPostPositionUnit, pb.createAnd(pb.createScanThru(nextPos, mCRLF), mLineBreak, "eol"));
    578561}
    579562
     
    605588    llvm::report_fatal_error(errmsg);
    606589}
    607    
    608    
    609590
    610591RE_Compiler::RE_Compiler(PabloKernel * kernel, cc::CC_Compiler & ccCompiler)
     
    623604    Var * const linebreak = mKernel->getInputStreamVar("linebreak");
    624605    mLineBreak = mPB.createExtract(linebreak, mPB.getInteger(0));
     606    Var * const crlf = mKernel->getInputStreamVar("cr+lf");
     607    mCRLF = mPB.createExtract(crlf, mPB.getInteger(0));
    625608    Var * const required = mKernel->getInputStreamVar("required");
    626609    mInitial = mPB.createExtract(required, mPB.getInteger(0));
    627610    mNonFinal = mPB.createExtract(required, mPB.getInteger(1));
    628611    mFinal = mPB.createExtract(required, mPB.getInteger(2));
    629     mCRLF = mPB.createExtract(required, mPB.getInteger(3));
     612
    630613}
    631614
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5780 r5782  
    111111    MarkerType compileName(Name * name, pablo::PabloBuilder & pb);
    112112    MarkerType compileAny(const MarkerType m, pablo::PabloBuilder & pb);
    113     MarkerType compileStart(const MarkerType marker, pablo::PabloBuilder & pb);
    114     MarkerType compileEnd(const MarkerType marker, pablo::PabloBuilder & pb);
     113    MarkerType compileStart(MarkerType marker, pablo::PabloBuilder & pb);
     114    MarkerType compileEnd(MarkerType marker, pablo::PabloBuilder & pb);
    115115
    116116    MarkerType AdvanceMarker(MarkerType marker, const MarkerPosition newpos, pablo::PabloBuilder & pb);
  • icGREP/icgrep-devel/icgrep/re/to_utf8.cpp

    r5760 r5782  
    1919#include <re/re_assertion.h>
    2020#include <llvm/Support/Casting.h>
     21#include <llvm/Support/ErrorHandling.h>
    2122
    2223using namespace llvm;
     
    9495        return makeIntersect(toUTF8(e->getLH()), toUTF8(e->getRH()));
    9596    }
    96 }
     97    llvm_unreachable("unexpected RE type given to toUTF8");
     98    return nullptr;
    9799}
    98100
     101}
     102
  • icGREP/icgrep-devel/icgrep/toolchain/grep_pipeline.cpp

    r5769 r5782  
    6262    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    6363   
    64     StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize);
    65    
     64    StreamSetBuffer * BasisBits = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8, 1), segmentSize + 1);
    6665    kernel::Kernel * s2pk = pxDriver.addKernelInstance<kernel::S2PKernel>(idb);
    6766    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    6867   
     68    kernel::Kernel * linefeedK = pxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, 8);
     69    StreamSetBuffer * LineFeedStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize + 1);
     70    pxDriver.makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
     71
    6972    kernel::Kernel * linebreakK = pxDriver.addKernelInstance<kernel::LineBreakKernelBuilder>(idb, 8);
    7073    StreamSetBuffer * LineBreakStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    71     pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     74    StreamSetBuffer * CRLFStream = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
     75    pxDriver.makeKernelCall(linebreakK, {BasisBits, LineFeedStream}, {LineBreakStream, CRLFStream});
    7276   
    7377    kernel::Kernel * requiredStreamsK = pxDriver.addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    74     StreamSetBuffer * RequiredStreams = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize);
     78    StreamSetBuffer * RequiredStreams = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(3, 1), segmentSize);
    7579    pxDriver.makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
    7680   
    7781    StreamSetBuffer * MatchResults = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
    7882    kernel::Kernel * icgrepK = pxDriver.addKernelInstance<kernel::ICGrepKernel>(idb, pattern);
    79     pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, RequiredStreams}, {MatchResults});
     83    pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream, CRLFStream, RequiredStreams}, {MatchResults});
    8084   
    8185    StreamSetBuffer * MatchedLines = pxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize);
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5761 r5782  
    3333
    3434void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel);
     35
     36void handleInsufficientData(const std::unique_ptr<KernelBuilder> & b, Value * const produced, Value * const final, BasicBlock * const entry, const Kernel * const consumer,  const Binding & input, const StreamSetBuffer * const buffer);
    3537
    3638/** ------------------------------------------------------------------------------------------------------------- *
     
    4345 * fashion such that processing of segment S_i by the full pipeline is carried out by thread i mod T.
    4446 ** ------------------------------------------------------------------------------------------------------------- */
    45 void generateSegmentParallelPipeline(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Kernel *> & kernels) {
     47void generateSegmentParallelPipeline(const std::unique_ptr<KernelBuilder> & b, const std::vector<Kernel *> & kernels) {
    4648
    4749    const unsigned n = kernels.size();
    48     Module * const m = iBuilder->getModule();
    49     IntegerType * const sizeTy = iBuilder->getSizeTy();
    50     PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
     50    Module * const m = b->getModule();
     51    IntegerType * const sizeTy = b->getSizeTy();
     52    PointerType * const voidPtrTy = b->getVoidPtrTy();
    5153    Constant * nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    5254    std::vector<Type *> structTypes;
     
    6163    StructType * const threadStructType = StructType::get(m->getContext(), {sharedStructType->getPointerTo(), sizeTy});
    6264
    63     const auto ip = iBuilder->saveIP();
    64 
    65     Function * const threadFunc = makeThreadFunction(iBuilder, "segment");
     65    const auto ip = b->saveIP();
     66
     67    Function * const threadFunc = makeThreadFunction(b, "segment");
    6668    auto args = threadFunc->arg_begin();
    6769
     
    7173
    7274     // Create the basic blocks for the thread function.
    73     BasicBlock * entryBlock = BasicBlock::Create(iBuilder->getContext(), "entry", threadFunc);
    74     iBuilder->SetInsertPoint(entryBlock);
    75 
    76     Value * const threadStruct = iBuilder->CreateBitCast(&*(args), threadStructType->getPointerTo());
    77 
    78     Value * const sharedStatePtr = iBuilder->CreateLoad(iBuilder->CreateGEP(threadStruct, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     75    BasicBlock * entryBlock = BasicBlock::Create(b->getContext(), "entry", threadFunc);
     76    b->SetInsertPoint(entryBlock);
     77
     78    Value * const threadStruct = b->CreateBitCast(&*(args), threadStructType->getPointerTo());
     79
     80    Value * const sharedStatePtr = b->CreateLoad(b->CreateGEP(threadStruct, {b->getInt32(0), b->getInt32(0)}));
    7981    for (unsigned k = 0; k < n; ++k) {
    80         Value * ptr = iBuilder->CreateLoad(iBuilder->CreateGEP(sharedStatePtr, {iBuilder->getInt32(0), iBuilder->getInt32(k)}));
     82        Value * ptr = b->CreateLoad(b->CreateGEP(sharedStatePtr, {b->getInt32(0), b->getInt32(k)}));
    8183        kernels[k]->setInstance(ptr);
    8284    }
    83     Value * const segOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(threadStruct, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    84 
    85     BasicBlock * segmentLoop = BasicBlock::Create(iBuilder->getContext(), "segmentLoop", threadFunc);
    86     iBuilder->CreateBr(segmentLoop);
    87 
    88     iBuilder->SetInsertPoint(segmentLoop);
    89     PHINode * const segNo = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "segNo");
     85    Value * const segOffset = b->CreateLoad(b->CreateGEP(threadStruct, {b->getInt32(0), b->getInt32(1)}));
     86
     87    BasicBlock * segmentLoop = BasicBlock::Create(b->getContext(), "segmentLoop", threadFunc);
     88    b->CreateBr(segmentLoop);
     89
     90    b->SetInsertPoint(segmentLoop);
     91    PHINode * const segNo = b->CreatePHI(b->getSizeTy(), 2, "segNo");
    9092    segNo->addIncoming(segOffset, entryBlock);
    9193
    92     Value * terminated = iBuilder->getFalse();
    93     Value * const nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    94 
    95     BasicBlock * segmentLoopBody = nullptr;
    96     BasicBlock * const exitThreadBlock = BasicBlock::Create(iBuilder->getContext(), "exitThread", threadFunc);
    97 
    98     StreamSetBufferMap<Value *> producedPos;
    99     StreamSetBufferMap<Value *> consumedPos;
     94    Value * terminated = b->getFalse();
     95    Value * const nextSegNo = b->CreateAdd(segNo, b->getSize(1));
     96
     97    BasicBlock * const exitThreadBlock = BasicBlock::Create(b->getContext(), "exitThread", threadFunc);
     98
     99    StreamSetBufferMap<Value *> producedItemCount;
     100    StreamSetBufferMap<Value *> consumedItemCount;
    100101
    101102    Value * cycleCountStart = nullptr;
    102103    Value * cycleCountEnd = nullptr;
    103104    if (DebugOptionIsSet(codegen::EnableCycleCounter)) {
    104         cycleCountStart = iBuilder->CreateReadCycleCounter();
     105        cycleCountStart = b->CreateReadCycleCounter();
    105106    }
    106107
     
    109110        const auto & kernel = kernels[k];
    110111
    111         BasicBlock * const segmentWait = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Wait", threadFunc);
    112 
    113         BasicBlock * segmentYield = segmentWait;
    114         iBuilder->CreateBr(segmentWait);
    115 
    116         segmentLoopBody = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Do", threadFunc);
    117 
    118         iBuilder->SetInsertPoint(segmentWait);
     112        BasicBlock * const kernelWait = BasicBlock::Create(b->getContext(), kernel->getName() + "Wait", threadFunc);
     113
     114        b->CreateBr(kernelWait);
     115
     116        BasicBlock * const kernelBody = BasicBlock::Create(b->getContext(), kernel->getName() + "Do", threadFunc);
     117
     118        b->SetInsertPoint(kernelWait);
    119119        const unsigned waitIdx = codegen::DebugOptionIsSet(codegen::SerializeThreads) ? (n - 1) : k;
    120120
    121         iBuilder->setKernel(kernels[waitIdx]);
    122         Value * const processedSegmentCount = iBuilder->acquireLogicalSegmentNo();
    123         iBuilder->setKernel(kernel);
     121        b->setKernel(kernels[waitIdx]);
     122        Value * const processedSegmentCount = b->acquireLogicalSegmentNo();
     123        b->setKernel(kernel);
    124124
    125125        assert (processedSegmentCount->getType() == segNo->getType());
    126         Value * const ready = iBuilder->CreateICmpEQ(segNo, processedSegmentCount);
     126        Value * const ready = b->CreateICmpEQ(segNo, processedSegmentCount);
    127127
    128128        if (kernel->hasNoTerminateAttribute()) {
    129             iBuilder->CreateCondBr(ready, segmentLoopBody, segmentYield);
     129            b->CreateCondBr(ready, kernelBody, kernelWait);
    130130        } else { // If the kernel was terminated in a previous segment then the pipeline is done.
    131             BasicBlock * completionTest = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Completed", threadFunc, 0);
    132             BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), kernel->getName() + "Exit", threadFunc, 0);
    133             iBuilder->CreateCondBr(ready, completionTest, segmentYield);
    134 
    135             iBuilder->SetInsertPoint(completionTest);
    136             Value * terminationSignal = iBuilder->getTerminationSignal();
    137             iBuilder->CreateCondBr(terminationSignal, exitBlock, segmentLoopBody);
    138             iBuilder->SetInsertPoint(exitBlock);
    139             // Ensure that the next thread will also exit.
    140             iBuilder->releaseLogicalSegmentNo(nextSegNo);
    141             iBuilder->CreateBr(exitThreadBlock);
    142         }
     131            BasicBlock * kernelTerminated = BasicBlock::Create(b->getContext(), kernel->getName() + "Terminated", threadFunc, 0);
     132            BasicBlock * exitBlock = BasicBlock::Create(b->getContext(), kernel->getName() + "Exit", threadFunc, 0);
     133            b->CreateCondBr(ready, kernelTerminated, kernelWait);
     134
     135            b->SetInsertPoint(kernelTerminated);
     136            Value * terminationSignal = b->getTerminationSignal();
     137            b->CreateCondBr(terminationSignal, exitBlock, kernelBody);
     138            b->SetInsertPoint(exitBlock);
     139            b->releaseLogicalSegmentNo(nextSegNo); // Ensure that the next thread will also exit.
     140            b->CreateBr(exitThreadBlock);
     141        }
     142
     143        BasicBlock * const kernelEnd = BasicBlock::Create(b->getContext(), kernel->getName() + "End", threadFunc);
    143144
    144145        // Execute the kernel segment
    145         iBuilder->SetInsertPoint(segmentLoopBody);
     146        b->SetInsertPoint(kernelBody);
    146147        const auto & inputs = kernel->getStreamInputs();
    147148        std::vector<Value *> args = {kernel->getInstance(), terminated};
    148149        for (unsigned i = 0; i < inputs.size(); ++i) {
    149             const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
    150             assert (f != producedPos.end());
    151             args.push_back(f->second);
    152         }
    153 
    154         iBuilder->setKernel(kernel);
    155         iBuilder->createDoSegmentCall(args);
     150            const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
     151            const auto f = producedItemCount.find(buffer);
     152            assert (f != producedItemCount.end());
     153            Value * const produced = f->second;
     154            args.push_back(produced);
     155            handleInsufficientData(b, produced, terminated, kernelEnd, kernel, inputs[i], buffer);
     156        }
     157
     158        b->setKernel(kernel);
     159        b->createDoSegmentCall(args);
     160        b->CreateBr(kernelEnd);
     161
     162        b->SetInsertPoint(kernelEnd);
     163
    156164        if (!kernel->hasNoTerminateAttribute()) {
    157             terminated = iBuilder->CreateOr(terminated, iBuilder->getTerminationSignal());
     165            terminated = b->CreateOr(terminated, b->getTerminationSignal());
    158166        }
    159167
    160168        const auto & outputs = kernel->getStreamOutputs();
    161169        for (unsigned i = 0; i < outputs.size(); ++i) {           
    162             Value * const produced = iBuilder->getProducedItemCount(outputs[i].getName()); // terminated
     170            Value * const produced = b->getProducedItemCount(outputs[i].getName());
    163171            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    164             assert (producedPos.count(buf) == 0);
    165             producedPos.emplace(buf, produced);
     172            assert (producedItemCount.count(buf) == 0);
     173            producedItemCount.emplace(buf, produced);
    166174        }
    167175        for (unsigned i = 0; i < inputs.size(); ++i) {
    168             Value * const processedItemCount = iBuilder->getProcessedItemCount(inputs[i].getName());
     176            Value * const processedItemCount = b->getProcessedItemCount(inputs[i].getName());
    169177            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);           
    170             auto f = consumedPos.find(buf);
    171             if (f == consumedPos.end()) {
    172                 consumedPos.emplace(buf, processedItemCount);
     178            auto f = consumedItemCount.find(buf);
     179            if (f == consumedItemCount.end()) {
     180                consumedItemCount.emplace(buf, processedItemCount);
    173181            } else {
    174                 Value * lesser = iBuilder->CreateICmpULT(processedItemCount, f->second);
    175                 f->second = iBuilder->CreateSelect(lesser, processedItemCount, f->second);
    176             }
    177         }
     182                assert (f->second);
     183                f->second = b->CreateUMin(processedItemCount, f->second);
     184            }
     185        }
     186
    178187        if (DebugOptionIsSet(codegen::EnableCycleCounter)) {
    179             cycleCountEnd = iBuilder->CreateReadCycleCounter();
    180             Value * counterPtr = iBuilder->getCycleCountPtr();
    181             iBuilder->CreateStore(iBuilder->CreateAdd(iBuilder->CreateLoad(counterPtr), iBuilder->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
     188            cycleCountEnd = b->CreateReadCycleCounter();
     189            Value * counterPtr = b->getCycleCountPtr();
     190            b->CreateStore(b->CreateAdd(b->CreateLoad(counterPtr), b->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
    182191            cycleCountStart = cycleCountEnd;
    183         }
    184        
    185         iBuilder->releaseLogicalSegmentNo(nextSegNo);
    186     }
    187 
    188     assert (segmentLoopBody);
    189     exitThreadBlock->moveAfter(segmentLoopBody);
    190 
    191     for (const auto consumed : consumedPos) {
     192        }       
     193        b->releaseLogicalSegmentNo(nextSegNo);
     194    }
     195
     196    exitThreadBlock->moveAfter(b->GetInsertBlock());
     197    for (const auto consumed : consumedItemCount) {
    192198        const StreamSetBuffer * const buf = consumed.first;
    193199        Kernel * const k = buf->getProducer();
     
    199205                    continue;
    200206                }
    201                 iBuilder->setKernel(k);
    202                 iBuilder->setConsumedItemCount(binding.getName(), consumed.second);
     207                b->setKernel(k);
     208                b->setConsumedItemCount(binding.getName(), consumed.second);
    203209                break;
    204210            }
     
    206212    }
    207213
    208     segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(codegen::ThreadNum)), segmentLoopBody);
    209     iBuilder->CreateCondBr(terminated, exitThreadBlock, segmentLoop);
    210 
    211     iBuilder->SetInsertPoint(exitThreadBlock);
     214    segNo->addIncoming(b->CreateAdd(segNo, b->getSize(codegen::ThreadNum)), b->GetInsertBlock());
     215    b->CreateUnlikelyCondBr(terminated, exitThreadBlock, segmentLoop);
     216
     217    b->SetInsertPoint(exitThreadBlock);
    212218
    213219    // only call pthread_exit() within spawned threads; otherwise it'll be equivalent to calling exit() within the process
    214     BasicBlock * const exitThread = BasicBlock::Create(iBuilder->getContext(), "ExitThread", threadFunc);
    215     BasicBlock * const exitFunction = BasicBlock::Create(iBuilder->getContext(), "ExitProcessFunction", threadFunc);
    216 
    217     Value * const exitCond = iBuilder->CreateICmpEQ(segOffset, ConstantInt::getNullValue(segOffset->getType()));
    218     iBuilder->CreateCondBr(exitCond, exitFunction, exitThread);
    219     iBuilder->SetInsertPoint(exitThread);
    220     iBuilder->CreatePThreadExitCall(nullVoidPtrVal);
    221     iBuilder->CreateBr(exitFunction);
    222     iBuilder->SetInsertPoint(exitFunction);
    223     iBuilder->CreateRetVoid();
     220    BasicBlock * const exitThread = BasicBlock::Create(b->getContext(), "ExitThread", threadFunc);
     221    BasicBlock * const exitFunction = BasicBlock::Create(b->getContext(), "ExitProcessFunction", threadFunc);
     222
     223    Value * const exitCond = b->CreateICmpEQ(segOffset, ConstantInt::getNullValue(segOffset->getType()));
     224    b->CreateCondBr(exitCond, exitFunction, exitThread);
     225    b->SetInsertPoint(exitThread);
     226    b->CreatePThreadExitCall(nullVoidPtrVal);
     227    b->CreateBr(exitFunction);
     228    b->SetInsertPoint(exitFunction);
     229    b->CreateRetVoid();
    224230
    225231    // -------------------------------------------------------------------------------------------------------------------------
    226     iBuilder->restoreIP(ip);
     232    b->restoreIP(ip);
    227233
    228234    for (unsigned i = 0; i < n; ++i) {
     
    236242    assert (codegen::ThreadNum > 1);
    237243    Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
    238     AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
     244    AllocaInst * const pthreads = b->CreateAlloca(pthreadsTy);
    239245    Value * threadIdPtr[threads];
    240246
    241247    for (unsigned i = 0; i < threads; ++i) {
    242         threadIdPtr[i] = iBuilder->CreateGEP(pthreads, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    243     }
    244 
    245     for (unsigned i = 0; i < n; ++i) {
    246         iBuilder->setKernel(kernels[i]);
    247         iBuilder->releaseLogicalSegmentNo(iBuilder->getSize(0));
    248     }
    249 
    250     AllocaInst * const sharedStruct = iBuilder->CreateCacheAlignedAlloca(sharedStructType);
    251     for (unsigned i = 0; i < n; ++i) {
    252         Value * ptr = iBuilder->CreateGEP(sharedStruct, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    253         iBuilder->CreateStore(kernels[i]->getInstance(), ptr);
     248        threadIdPtr[i] = b->CreateGEP(pthreads, {b->getInt32(0), b->getInt32(i)});
     249    }
     250
     251    for (unsigned i = 0; i < n; ++i) {
     252        b->setKernel(kernels[i]);
     253        b->releaseLogicalSegmentNo(b->getSize(0));
     254    }
     255
     256    AllocaInst * const sharedStruct = b->CreateCacheAlignedAlloca(sharedStructType);
     257    for (unsigned i = 0; i < n; ++i) {
     258        Value * ptr = b->CreateGEP(sharedStruct, {b->getInt32(0), b->getInt32(i)});
     259        b->CreateStore(kernels[i]->getInstance(), ptr);
    254260    }
    255261
    256262    // use the process thread to handle the initial segment function after spawning (n - 1) threads to handle the subsequent offsets
    257263    for (unsigned i = 0; i < threads; ++i) {
    258         AllocaInst * const threadState = iBuilder->CreateAlloca(threadStructType);
    259         iBuilder->CreateStore(sharedStruct, iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    260         iBuilder->CreateStore(iBuilder->getSize(i + 1), iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    261         iBuilder->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, threadFunc, threadState);
    262     }
    263 
    264     AllocaInst * const threadState = iBuilder->CreateAlloca(threadStructType);
    265     iBuilder->CreateStore(sharedStruct, iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    266     iBuilder->CreateStore(iBuilder->getSize(0), iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    267     iBuilder->CreateCall(threadFunc, iBuilder->CreatePointerCast(threadState, voidPtrTy));
    268 
    269     AllocaInst * const status = iBuilder->CreateAlloca(voidPtrTy);
     264        AllocaInst * const threadState = b->CreateAlloca(threadStructType);
     265        b->CreateStore(sharedStruct, b->CreateGEP(threadState, {b->getInt32(0), b->getInt32(0)}));
     266        b->CreateStore(b->getSize(i + 1), b->CreateGEP(threadState, {b->getInt32(0), b->getInt32(1)}));
     267        b->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, threadFunc, threadState);
     268    }
     269
     270    AllocaInst * const threadState = b->CreateAlloca(threadStructType);
     271    b->CreateStore(sharedStruct, b->CreateGEP(threadState, {b->getInt32(0), b->getInt32(0)}));
     272    b->CreateStore(b->getSize(0), b->CreateGEP(threadState, {b->getInt32(0), b->getInt32(1)}));
     273    b->CreateCall(threadFunc, b->CreatePointerCast(threadState, voidPtrTy));
     274
     275    AllocaInst * const status = b->CreateAlloca(voidPtrTy);
    270276    for (unsigned i = 0; i < threads; ++i) {
    271         Value * threadId = iBuilder->CreateLoad(threadIdPtr[i]);
    272         iBuilder->CreatePThreadJoinCall(threadId, status);
     277        Value * threadId = b->CreateLoad(threadIdPtr[i]);
     278        b->CreatePThreadJoinCall(threadId, status);
    273279    }
    274280   
     
    276282        for (unsigned k = 0; k < kernels.size(); k++) {
    277283            auto & kernel = kernels[k];
    278             iBuilder->setKernel(kernel);
     284            b->setKernel(kernel);
    279285            const auto & inputs = kernel->getStreamInputs();
    280286            const auto & outputs = kernel->getStreamOutputs();
    281287            Value * items = nullptr;
    282288            if (inputs.empty()) {
    283                 items = iBuilder->getProducedItemCount(outputs[0].getName());
     289                items = b->getProducedItemCount(outputs[0].getName());
    284290            } else {
    285                 items = iBuilder->getProcessedItemCount(inputs[0].getName());
    286             }
    287             Value * fItems = iBuilder->CreateUIToFP(items, iBuilder->getDoubleTy());
    288             Value * cycles = iBuilder->CreateLoad(iBuilder->getCycleCountPtr());
    289             Value * fCycles = iBuilder->CreateUIToFP(cycles, iBuilder->getDoubleTy());
     291                items = b->getProcessedItemCount(inputs[0].getName());
     292            }
     293            Value * fItems = b->CreateUIToFP(items, b->getDoubleTy());
     294            Value * cycles = b->CreateLoad(b->getCycleCountPtr());
     295            Value * fCycles = b->CreateUIToFP(cycles, b->getDoubleTy());
    290296            const auto formatString = kernel->getName() + ": %7.2e items processed; %7.2e CPU cycles,  %6.2f cycles per item.\n";
    291             Value * stringPtr = iBuilder->CreatePointerCast(iBuilder->GetString(formatString), iBuilder->getInt8PtrTy());
    292             iBuilder->CreateCall(iBuilder->GetDprintf(), {iBuilder->getInt32(2), stringPtr, fItems, fCycles, iBuilder->CreateFDiv(fCycles, fItems)});
     297            Value * stringPtr = b->CreatePointerCast(b->GetString(formatString), b->getInt8PtrTy());
     298            b->CreateCall(b->GetDprintf(), {b->getInt32(2), stringPtr, fItems, fCycles, b->CreateFDiv(fCycles, fItems)});
    293299        }
    294300    }
     
    502508 * @brief generatePipelineLoop
    503509 ** ------------------------------------------------------------------------------------------------------------- */
    504 void generatePipelineLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Kernel *> & kernels) {
    505 
    506     BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     510void generatePipelineLoop(const std::unique_ptr<KernelBuilder> & b, const std::vector<Kernel *> & kernels) {
     511
     512    BasicBlock * entryBlock = b->GetInsertBlock();
    507513    Function * main = entryBlock->getParent();
    508514
    509515    // Create the basic blocks for the loop.
    510     BasicBlock * pipelineLoop = BasicBlock::Create(iBuilder->getContext(), "pipelineLoop", main);
    511     BasicBlock * pipelineExit = BasicBlock::Create(iBuilder->getContext(), "pipelineExit", main);
    512 
    513     StreamSetBufferMap<Value *> producedPos;
    514     StreamSetBufferMap<Value *> consumedPos;
    515 
    516     iBuilder->CreateBr(pipelineLoop);
    517     iBuilder->SetInsertPoint(pipelineLoop);
     516    BasicBlock * pipelineLoop = BasicBlock::Create(b->getContext(), "pipelineLoop", main);
     517    BasicBlock * pipelineExit = BasicBlock::Create(b->getContext(), "pipelineExit", main);
     518
     519    StreamSetBufferMap<Value *> producedItemCount;
     520    StreamSetBufferMap<Value *> consumedItemCount;
     521
     522    b->CreateBr(pipelineLoop);
     523    b->SetInsertPoint(pipelineLoop);
    518524   
    519525    Value * cycleCountStart = nullptr;
    520526    Value * cycleCountEnd = nullptr;
    521527    if (DebugOptionIsSet(codegen::EnableCycleCounter)) {
    522         cycleCountStart = iBuilder->CreateReadCycleCounter();
    523     }
    524     Value * terminated = iBuilder->getFalse();
     528        cycleCountStart = b->CreateReadCycleCounter();
     529    }
     530    Value * terminated = b->getFalse();
    525531
    526532    for (Kernel * const kernel : kernels) {
    527533
    528         iBuilder->setKernel(kernel);
     534        b->setKernel(kernel);
    529535        const auto & inputs = kernel->getStreamInputs();
    530536        const auto & outputs = kernel->getStreamOutputs();
     
    533539
    534540        for (unsigned i = 0; i < inputs.size(); ++i) {
    535             const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
    536             if (LLVM_UNLIKELY(f == producedPos.end())) {
     541            const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
     542            const auto f = producedItemCount.find(buffer);
     543            if (LLVM_UNLIKELY(f == producedItemCount.end())) {
    537544                report_fatal_error(kernel->getName() + " uses stream set " + inputs[i].getName() + " prior to its definition");
    538545            }
    539             args.push_back(f->second);
    540         }
    541 
    542         applyOutputBufferExpansions(iBuilder, kernel);
    543 
    544         iBuilder->createDoSegmentCall(args);
     546            Value * const produced = f->second;
     547            args.push_back(produced);
     548            handleInsufficientData(b, produced, terminated, pipelineLoop, kernel, inputs[i], buffer);
     549        }
     550
     551        applyOutputBufferExpansions(b, kernel);
     552
     553        b->createDoSegmentCall(args);
    545554
    546555        if (!kernel->hasNoTerminateAttribute()) {
    547             Value * terminatedSignal = iBuilder->getTerminationSignal();
    548             terminated = iBuilder->CreateOr(terminated, terminatedSignal);
     556            Value * terminatedSignal = b->getTerminationSignal();
     557            terminated = b->CreateOr(terminated, terminatedSignal);
    549558        }
    550559        for (unsigned i = 0; i < outputs.size(); ++i) {
    551             Value * const produced = iBuilder->getProducedItemCount(outputs[i].getName()); // , terminated
     560            Value * const produced = b->getProducedItemCount(outputs[i].getName());
    552561            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    553             assert (producedPos.count(buf) == 0);
    554             producedPos.emplace(buf, produced);
     562            assert (producedItemCount.count(buf) == 0);
     563            producedItemCount.emplace(buf, produced);
    555564        }
    556565
    557566        for (unsigned i = 0; i < inputs.size(); ++i) {
    558             Value * const processed = iBuilder->getProcessedItemCount(inputs[i].getName());
     567            Value * const processed = b->getProcessedItemCount(inputs[i].getName());
    559568            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
    560             auto f = consumedPos.find(buf);
    561             if (f == consumedPos.end()) {
    562                 consumedPos.emplace(buf, processed);
     569            auto f = consumedItemCount.find(buf);
     570            if (f == consumedItemCount.end()) {
     571                consumedItemCount.emplace(buf, processed);
    563572            } else {
    564                 Value * lesser = iBuilder->CreateICmpULT(processed, f->second);
    565                 f->second = iBuilder->CreateSelect(lesser, processed, f->second);
    566             }
    567         }
     573                f->second = b->CreateUMin(processed, f->second);
     574            }
     575        }
     576
    568577        if (DebugOptionIsSet(codegen::EnableCycleCounter)) {
    569             cycleCountEnd = iBuilder->CreateReadCycleCounter();
    570             Value * counterPtr = iBuilder->getCycleCountPtr();
    571             iBuilder->CreateStore(iBuilder->CreateAdd(iBuilder->CreateLoad(counterPtr), iBuilder->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
     578            cycleCountEnd = b->CreateReadCycleCounter();
     579            Value * counterPtr = b->getCycleCountPtr();
     580            b->CreateStore(b->CreateAdd(b->CreateLoad(counterPtr), b->CreateSub(cycleCountEnd, cycleCountStart)), counterPtr);
    572581            cycleCountStart = cycleCountEnd;
    573582        }
    574 
    575         Value * const segNo = iBuilder->acquireLogicalSegmentNo();
    576         Value * nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    577         iBuilder->releaseLogicalSegmentNo(nextSegNo);
    578     }
    579 
    580     for (const auto consumed : consumedPos) {
    581         const StreamSetBuffer * const buf = consumed.first;
    582         Kernel * const k = buf->getProducer();
    583         const auto & outputs = k->getStreamSetOutputBuffers();
    584         for (unsigned i = 0; i < outputs.size(); ++i) {
    585             if (outputs[i] == buf) {
    586                 const auto & binding = k->getStreamOutput(i);
    587                 if (LLVM_UNLIKELY(binding.getRate().isDerived())) {
    588                     continue;
    589                 }
    590                 iBuilder->setKernel(k);
    591                 iBuilder->setConsumedItemCount(binding.getName(), consumed.second);
    592                 break;
    593             }
    594         }
    595     }
    596 
    597     iBuilder->CreateCondBr(terminated, pipelineExit, pipelineLoop);
    598 
    599     iBuilder->SetInsertPoint(pipelineExit);
     583//        Value * const segNo = b->acquireLogicalSegmentNo();
     584//        Value * nextSegNo = b->CreateAdd(segNo, b->getSize(1));
     585//        b->releaseLogicalSegmentNo(nextSegNo);
     586    }
     587
     588    for (const auto consumed : consumedItemCount) {
     589        const StreamSetBuffer * const buffer = consumed.first;
     590        Kernel * const kernel = buffer->getProducer();
     591        const auto & binding = kernel->getStreamOutput(buffer);
     592        if (LLVM_UNLIKELY(binding.getRate().isDerived())) {
     593            continue;
     594        }
     595        b->setKernel(kernel);
     596        b->setConsumedItemCount(binding.getName(), consumed.second);
     597    }
     598
     599    b->CreateCondBr(terminated, pipelineExit, pipelineLoop);
     600
     601    b->SetInsertPoint(pipelineExit);
    600602
    601603    if (DebugOptionIsSet(codegen::EnableCycleCounter)) {
    602604        for (unsigned k = 0; k < kernels.size(); k++) {
    603605            auto & kernel = kernels[k];
    604             iBuilder->setKernel(kernel);
     606            b->setKernel(kernel);
    605607            const auto & inputs = kernel->getStreamInputs();
    606608            const auto & outputs = kernel->getStreamOutputs();
    607609            Value * items = nullptr;
    608610            if (inputs.empty()) {
    609                 items = iBuilder->getProducedItemCount(outputs[0].getName());
     611                items = b->getProducedItemCount(outputs[0].getName());
    610612            } else {
    611                 items = iBuilder->getProcessedItemCount(inputs[0].getName());
    612             }
    613             Value * fItems = iBuilder->CreateUIToFP(items, iBuilder->getDoubleTy());
    614             Value * cycles = iBuilder->CreateLoad(iBuilder->getCycleCountPtr());
    615             Value * fCycles = iBuilder->CreateUIToFP(cycles, iBuilder->getDoubleTy());
     613                items = b->getProcessedItemCount(inputs[0].getName());
     614            }
     615            Value * fItems = b->CreateUIToFP(items, b->getDoubleTy());
     616            Value * cycles = b->CreateLoad(b->getCycleCountPtr());
     617            Value * fCycles = b->CreateUIToFP(cycles, b->getDoubleTy());
    616618            const auto formatString = kernel->getName() + ": %7.2e items processed; %7.2e CPU cycles,  %6.2f cycles per item.\n";
    617             Value * stringPtr = iBuilder->CreatePointerCast(iBuilder->GetString(formatString), iBuilder->getInt8PtrTy());
    618             iBuilder->CreateCall(iBuilder->GetDprintf(), {iBuilder->getInt32(2), stringPtr, fItems, fCycles, iBuilder->CreateFDiv(fCycles, fItems)});
     619            Value * stringPtr = b->CreatePointerCast(b->GetString(formatString), b->getInt8PtrTy());
     620            b->CreateCall(b->GetDprintf(), {b->getInt32(2), stringPtr, fItems, fCycles, b->CreateFDiv(fCycles, fItems)});
    619621        }
    620622    }
    621623}
    622624
     625/** ------------------------------------------------------------------------------------------------------------- *
     626 * @brief applyOutputBufferExpansions
     627 ** ------------------------------------------------------------------------------------------------------------- */
    623628void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const std::string & name, DynamicBuffer * const db, const uint64_t baseSize) {
    624 
    625     BasicBlock * const doExpand = b->CreateBasicBlock(name + "Expand");
     629    BasicBlock * const doExpand = BasicBlock::Create(b->getContext(), name + "Expand", b->GetInsertBlock()->getParent());
    626630    BasicBlock * const nextBlock = b->GetInsertBlock()->getNextNode();
    627631    doExpand->moveAfter(b->GetInsertBlock());
     
    659663    }
    660664}
     665
     666/** ------------------------------------------------------------------------------------------------------------- *
     667 * @brief handleInsufficientData
     668 ** ------------------------------------------------------------------------------------------------------------- */
     669inline void handleInsufficientData(const std::unique_ptr<KernelBuilder> & b, Value * const produced, Value * const final, BasicBlock * const insufficient,
     670                                   const Kernel * const consumer,  const Binding & input, const StreamSetBuffer * const buffer) {
     671    const Kernel * const producer = buffer->getProducer();
     672    const Binding & output = producer->getStreamOutput(buffer);
     673    auto producedRate = producer->getLowerBound(output.getRate()) * producer->getStride();
     674    const auto consumedRate = consumer->getUpperBound(input.getRate()) * consumer->getStride();
     675    if (LLVM_UNLIKELY(input.hasLookahead())) {
     676        producedRate -= input.getLookahead();
     677//        const auto amount = input.getLookahead();
     678//        const auto strides = ((amount + consumer->getStride() - 1) / consumer->getStride());
     679//        consumedRate += strides * consumer->getStride();
     680    }
     681    if (LLVM_UNLIKELY(producedRate < consumedRate)) {
     682        const auto name = input.getName();
     683        BasicBlock * const sufficient = BasicBlock::Create(b->getContext(), name + "IsSufficient", b->GetInsertBlock()->getParent());
     684        Value * const processed = b->getProcessedItemCount(name);
     685        Value * const unread = b->CreateSub(produced, processed);
     686        Constant * const amount = ConstantInt::get(unread->getType(), ceiling(consumedRate));
     687        Value * const cond = b->CreateOr(b->CreateICmpUGE(unread, amount), final);
     688        b->CreateLikelyCondBr(cond, sufficient, insufficient);
     689        b->SetInsertPoint(sufficient);
     690    }
     691}
     692
  • icGREP/icgrep-devel/icgrep/util/slab_allocator.h

    r5748 r5782  
    2424
    2525    template<typename Type = T>
    26     inline Type * allocate(size_type n, const_pointer = nullptr) noexcept {
     26    inline Type * allocate(const size_type n, const_pointer = nullptr) noexcept {
    2727        static_assert(sizeof(Type) > 0, "Cannot allocate a zero-length type.");
    28         assert ("Cannot allocate 0 items." && n > 0);
     28        assert ("A memory leak will occur whenever the SlabAllocator allocates 0 items" && n > 0);
    2929        auto ptr = static_cast<Type *>(mAllocator.Allocate(n * sizeof(Type), sizeof(void*)));
    30         assert ("Allocating returned a null pointer. Function was likely called before Allocator creation!" && ptr);
     30        assert ("allocator returned a null pointer. Function was likely called before Allocator creation!" && ptr);
    3131        return ptr;
    3232    }
     
    4242
    4343    template<typename Type = T>
    44     inline bool operator==(SlabAllocator<Type> const & other) {
     44    inline bool operator==(SlabAllocator<Type> const & other) const noexcept {
    4545        return this == &other;
    4646    }
    4747
    4848    template<typename Type = T>
    49     inline bool operator!=(SlabAllocator<Type> const & other) {
     49    inline bool operator!=(SlabAllocator<Type> const & other) const noexcept {
    5050        return this != &other;
    5151    }
    5252
    53     inline size_type getTotalMemory() const {
     53    inline size_type getTotalMemory() const noexcept {
    5454        return mAllocator.getTotalMemory();
    5555    }
     
    6161    inline SlabAllocator() noexcept {}
    6262    inline SlabAllocator(const SlabAllocator &) noexcept = delete;
    63     template <class U> inline SlabAllocator (const SlabAllocator<U> &) noexcept { assert (false); }
     63    template <class U> inline SlabAllocator (const SlabAllocator<U> &) noexcept { }
    6464private:
    6565    LLVMAllocator mAllocator;
     
    9898    }
    9999
    100     inline size_type max_size() const {
     100    inline size_type max_size() const noexcept {
    101101        return std::numeric_limits<size_type>::max();
    102102    }
    103103
    104104    template<typename Type = T>
    105     inline bool operator==(ProxyAllocator<Type> const & other) {
     105    inline bool operator==(ProxyAllocator<Type> const & other) const noexcept {
    106106        return mAllocator == other.mAllocator;
    107107    }
    108108
    109109    template<typename Type = T>
    110     inline bool operator!=(ProxyAllocator<Type> const & other) {
     110    inline bool operator!=(ProxyAllocator<Type> const & other) const noexcept {
    111111        return mAllocator != other.mAllocator;
    112112    }
    113113
    114     inline size_type getTotalMemory() const {
     114    inline size_type getTotalMemory() const noexcept {
    115115        return mAllocator->getTotalMemory();
    116116    }
Note: See TracChangeset for help on using the changeset viewer.