Ignore:
Timestamp:
Dec 15, 2017, 12:44:01 PM (21 months ago)
Author:
nmedfort
Message:

Initial check-in of LookAhead? support; modified LineBreakKernel? to compute CR+LF using LookAhead?(1) + misc. fixes.

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/attributes.cpp

    r5755 r5782  
    11#include "attributes.h"
     2
     3#include <llvm/Support/raw_ostream.h>
    24
    35namespace kernel {
    46
    5 void AttributeSet::addAttribute(Attribute attribute) {
    6     for (Attribute & attr : *this) {
     7Attribute & AttributeSet::addAttribute(Attribute attribute) {
     8    for (auto i = begin(), i_end = end(); i != i_end; ++i) {
     9        Attribute & attr = const_cast<Attribute &>(*i);
    710        if (attr.getKind() == attribute.getKind()) {
    8             attr.mK = attribute.mK;
    9             return;
     11            attr.mAmount = attribute.mAmount;
     12            return attr;
    1013        }
    1114    }
    1215    emplace_back(attribute);
     16    return back();
    1317}
    1418
    15 bool AttributeSet::hasAttribute(const AttributeId id) const {
    16     for (const Attribute & attr : *this) {
    17         if (attr.getKind() == id) {
    18             return true;
     19Attribute * AttributeSet::__findAttribute(const AttributeId id) const {
     20    for (auto i = begin(), i_end = end(); i != i_end; ++i) {
     21        if (i->getKind() == id) {
     22            return const_cast<Attribute *>(&*i);
    1923        }
    2024    }
    21     return false;
     25    return nullptr;
    2226}
    2327
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r5755 r5782  
    88struct Attribute {
    99
    10     friend struct AttributeSet;
    11 
    12     friend struct Binding;
    13 
    1410    enum class KindId {
    1511
     
    1915
    2016        // A LookAhead(n) attribute on an input stream set S declares that the kernel
    21         // looks ahead n positions in the input stream.   That is,
    22         // processing of item S[i, j] may be defined in terms of S[i, j+n].
     17        // looks ahead n positions in the input stream.  That is, processing of item
     18        // S[i, j] may be defined in terms of S[i, j+n].
    2319
    2420        // Guarantee required: the pipeline compiler must ensure that, when
     
    6864        // buffer size calculations.
    6965
    70         Greedy,
    71 
    72         // Normally, the available item count of fixed rate streams is equal to the
    73         // number of strides processed by the MultiBlock times its stride size for all
    74         // strides except for the final stride. Some kernels consume
     66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
     67
     68        // Some kernels can divide their processing into concrete non-overlapping regions
     69        // between a beginning and ending position. This is a hard guarantee that regardless
     70        // of the computations between the start of the stream and the beginning of the first
     71        // independent region or between the *beginning* of any two independent regions, A,
     72        // B, the calculations that occur prior to the beginning of B do not affect the
     73        // calculations after it --- even if A is started at an arbitrary position with a
     74        // zeroed-out kernel state.
     75
     76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
     77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
     78        // compute what its state will be and begin processing before K_0 is finished. This
     79        // requires a the pipeline to intervene and call an optimized "output-less" instance
     80        // of the kernel prior to calling B.
     81
     82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
     83
     84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
     85        // produce useful outputs for only the inputs within those regions. This attribute
     86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
     87        // skipping strides entirely whenever possible.
     88
     89        // If the same regions are also independent, we can avoid the overhead of "masking
     90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
     91        // uses of the streams and zero out any non-regions from the data.
    7592
    7693        /** OUTPUT STREAM ATTRIBUTES **/
     
    101118        // swizzling code necessary).
    102119
     120        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
     121
     122        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
     123        // However, this means all of the input data must be computed and stored prior to
     124        // executing this kernel. In practice, this is unnecessary as in the context of
     125        // text parsing, we're almost always searching for the true starting position of
     126        // something ambigious after we've found its end position in some prior kernel.
     127
     128
     129
     130
     131
     132//        Here is a revised definition of SegmentedReverse:
     133
     134//        Given a stream of data bits S that is considered to be divided into
     135//        segments, and a marker stream S having a one bit at the final position
     136//        of each segment, the R = SegmentedReverse(S, M) when
     137
     138//        R_{i} = S_{l + (h - i)}
     139//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
     140//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
     141//          (l and h are the low and high positions of the segment containing i)
     142
     143//        This is an invertible operation, so we can apply R to a kernel's input
     144//        and then to its output to get a SegmentedReverse version of a kernel
     145
     146//        A kernel which computes segmented reverse is feasible, but seems complex
     147//        to implement, and probably too slow.  I have played around with several
     148//        ways of tackling it, no good method yet.
     149
     150//        If there are multiple segments within a block, we could instead use
     151//        the following:
     152
     153//        BlockSegmentedReverse
     154
     155//        B_{i} = S_{L + (H - i)}
     156//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
     157//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
     158//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
     159//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
     160
     161//        An alternative way of looking at this is to eliminate all but the first
     162//        and last marker positions within a block.
     163
     164//        The main point is that, if we apply B to inputs, perform the kernel
     165//        and the apply B to outputs, we get the same result if we applied R
     166//        (assuming that the kernel computations do not cross boundaries in M).
     167
     168//        This will be more efficient to compute, but still involves overhead
     169//        for shifting and combining streams.
     170
     171//        I think it may be better to focus on the ReverseKernel adapter, that
     172//        handles the reverse operations for both input and output.   This actually
     173//        gives more flexibility, because, in a multiblock scenario, we can process
     174//        the longest sequence of blocks such that both the beginning and end blocks
     175//        have a one bit.   If there are any interior blocks with one bits, then
     176//        they will be handled automatically without special shifting and masking.
     177
     178//        By the way, in my designs, I am wanting to have a callable Multiblock
     179//        function, so that the Multiblock function for a Reversed Kernel just
     180//        does a little work before calling the Multiblock function of the base kernel.
     181//        That seems to have disappeared in the current system.
     182
     183
    103184        /** KERNEL ATTRIBUTES **/
    104185
     
    124205        // kernels and end the program once the final kernel has returned its result.
    125206
    126         IndependentRegions,
    127 
    128         // Some kernels can divide their processing into concrete non-overlapping regions
    129         // between a start and end position in which the data produced by a kernel. If a
    130         // kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
    131         // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
    132         // compute what its state will be and begin processing before K_0 is finished. This
    133         // requires a the pipeline to intervene and call an optimized "output-less" instance
    134         // of the kernel prior to calling B.
    135 
    136207    };
    137208
     
    152223    }
    153224
    154     unsigned getAmount() const {
    155         return mK;
     225    unsigned amount() const {
     226        return mAmount;
     227    }
     228
     229    void setAmount(const unsigned amount) {
     230        mAmount = amount;
    156231    }
    157232
    158233    bool operator == (const Attribute & other) const {
    159         return mKind == other.mKind && mK == other.mK;
     234        return mKind == other.mKind && mAmount == other.mAmount;
    160235    }
    161236
     
    170245    }
    171246
     247    friend struct AttributeSet;
     248    friend struct Binding;
    172249    friend Attribute Add1();
    173250    friend Attribute Principal();
    174251    friend Attribute RoundUpTo(const unsigned);
     252    friend Attribute LookAhead(const unsigned);
    175253    friend Attribute LookBehind(const unsigned);
    176254    friend Attribute Deferred();
    177 
    178     Attribute(const KindId kind, const unsigned k) : mKind(kind), mK(k) { }
     255    friend Attribute ConditionalRegionBegin();
     256    friend Attribute ConditionalRegionEnd();
     257
     258    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
    179259
    180260private:
    181261
    182262    const KindId    mKind;
    183     unsigned        mK;
     263    unsigned        mAmount;
    184264};
    185265
     
    192272    }
    193273
    194     const Attribute & getAttribute(const unsigned i) const {
    195         return getAttributes()[i];
    196     }
    197 
    198     void addAttribute(Attribute attribute);
     274    Attribute & findOrAddAttribute(const AttributeId id) {
     275        if (Attribute * const attr = __findAttribute(id)) {
     276            return *attr;
     277        } else {
     278            return addAttribute(Attribute(id, 0));
     279        }
     280    }
     281
     282    Attribute & findAttribute(const AttributeId id) const {
     283        return *__findAttribute(id);
     284    }
     285
     286    Attribute & addAttribute(Attribute attribute);
    199287
    200288    bool hasAttributes() const {
     
    202290    }
    203291
    204     bool hasAttribute(const AttributeId id) const;
     292    bool hasAttribute(const AttributeId id) const {
     293        return __findAttribute(id) != nullptr;
     294    }
    205295
    206296    AttributeSet() = default;
    207297
     298    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
     299
    208300    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
     301
     302private:
     303
     304    Attribute * __findAttribute(const AttributeId id) const;
     305
    209306};
    210307
     
    222319}
    223320
     321inline Attribute LookAhead(const unsigned k) {
     322    return Attribute(Attribute::KindId::LookAhead, k);
     323}
     324
    224325inline Attribute LookBehind(const unsigned k) {
    225326    return Attribute(Attribute::KindId::LookBehind, k);
     
    230331}
    231332
     333inline Attribute ConditionalRegionBegin() {
     334    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
     335}
     336
     337inline Attribute ConditionalRegionEnd() {
     338    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
     339}
     340
    232341}
    233342
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5769 r5782  
    4141    cc::CC_Compiler ccc(this, getInput(0));
    4242    auto & pb = ccc.getBuilder();
    43     Zeroes * const zero = pb.createZeroes();
    44     PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
    45     PabloAST * CR = ccc.compileCC(makeCC(0x0D));
    46 
    47     Var * crlf = pb.createVar("crlf", zero);
    48     PabloBuilder crb = PabloBuilder::Create(pb);
    49     PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
    50     crb.createAssign(crlf, crb.createAnd(cr1, LF));
    51     pb.createIf(CR, crb);
    52    
    53     Var * u8invalid = pb.createVar("u8invalid", zero);
    54     Var * valid_pfx = pb.createVar("valid_pfx", zero);
    55     Var * nonFinal = pb.createVar("nonfinal", zero);
    56     PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
    57    
     43    Zeroes * const ZEROES = pb.createZeroes();
     44    PabloAST * const u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
     45
     46
     47    Var * const nonFinal = pb.createVar("nonFinal", u8pfx);
     48    Var * const u8invalid = pb.createVar("u8invalid", ZEROES);
     49    Var * const valid_pfx = pb.createVar("valid_pfx", u8pfx);
     50
    5851    PabloBuilder it = PabloBuilder::Create(pb);
    5952
    6053    pb.createIf(u8pfx, it);
    61     PabloAST * u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
    62     PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
    63     PabloAST * u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
    64     PabloAST * u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
     54    PabloAST * const u8pfx2 = ccc.compileCC(makeCC(0xC2, 0xDF), it);
     55    PabloAST * const u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
     56    PabloAST * const u8pfx4 = ccc.compileCC(makeCC(0xF0, 0xF4), it);
     57    PabloAST * const u8suffix = ccc.compileCC("u8suffix", makeCC(0x80, 0xBF), it);
    6558   
    6659    //
    6760    // Two-byte sequences
    68     Var * u8scope22 = it.createVar("u8scope22", zero);
     61    Var * const anyscope = it.createVar("anyscope", ZEROES);
    6962    PabloBuilder it2 = PabloBuilder::Create(it);
    70     it2.createAssign(u8scope22, it2.createAdvance(u8pfx2, 1));
    7163    it.createIf(u8pfx2, it2);
    72     //
    73     // Three-byte sequences
    74    
    75     Var * u8scope32 = it.createVar("u8scope32", zero);
    76     Var * u8scope3X = it.createVar("u8scope3X", zero);
    77     Var * EX_invalid = it.createVar("EX_invalid", zero);
     64    it2.createAssign(anyscope, it2.createAdvance(u8pfx2, 1));
     65
     66    //
     67    // Three-byte sequences   
     68    Var * const EF_invalid = it.createVar("EF_invalid", ZEROES);
    7869    PabloBuilder it3 = PabloBuilder::Create(it);
    7970    it.createIf(u8pfx3, it3);
    80     it3.createAssign(u8scope32, it3.createAdvance(u8pfx3, 1));
    81     PabloAST * u8scope33 = it3.createAdvance(u8pfx3, 2);
    82     it3.createAssign(u8scope3X, it3.createOr(u8scope32, u8scope33));
    83     PabloAST * E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
    84     PabloAST * ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
    85     it3.createAssign(EX_invalid, it3.createOr(E0_invalid, ED_invalid));
    86    
     71    PabloAST * const u8scope32 = it3.createAdvance(u8pfx3, 1);
     72    it3.createAssign(nonFinal, it3.createOr(nonFinal, u8scope32));
     73    PabloAST * const u8scope33 = it3.createAdvance(u8pfx3, 2);
     74    PabloAST * const u8scope3X = it3.createOr(u8scope32, u8scope33);
     75    it3.createAssign(anyscope, it3.createOr(anyscope, u8scope3X));
     76    PabloAST * const E0_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE0), it3), 1), ccc.compileCC(makeCC(0x80, 0x9F), it3));
     77    PabloAST * const ED_invalid = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xED), it3), 1), ccc.compileCC(makeCC(0xA0, 0xBF), it3));
     78    PabloAST * const EX_invalid = it3.createOr(E0_invalid, ED_invalid);
     79    it3.createAssign(EF_invalid, EX_invalid);
     80
     81
    8782    //
    8883    // Four-byte sequences
    89     Var * u8scope4nonfinal = it.createVar("u8scope4nonfinal", zero);
    90     Var * u8scope4X = it.createVar("u8scope4X", zero);
    91     Var * FX_invalid = it.createVar("FX_invalid", zero);
    9284    PabloBuilder it4 = PabloBuilder::Create(it);
    9385    it.createIf(u8pfx4, it4);
    94     PabloAST * u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
    95     PabloAST * u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
    96     PabloAST * u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
    97     it4.createAssign(u8scope4nonfinal, it4.createOr(u8scope42, u8scope43));
    98     it4.createAssign(u8scope4X, it4.createOr(u8scope4nonfinal, u8scope44));
    99     PabloAST * F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
    100     PabloAST * F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
    101     it4.createAssign(FX_invalid, it4.createOr(F0_invalid, F4_invalid));
     86    PabloAST * const u8scope42 = it4.createAdvance(u8pfx4, 1, "u8scope42");
     87    PabloAST * const u8scope43 = it4.createAdvance(u8scope42, 1, "u8scope43");
     88    PabloAST * const u8scope44 = it4.createAdvance(u8scope43, 1, "u8scope44");
     89    PabloAST * const u8scope4nonfinal = it4.createOr(u8scope42, u8scope43);
     90    it4.createAssign(nonFinal, it4.createOr(nonFinal, u8scope4nonfinal));
     91    PabloAST * const u8scope4X = it4.createOr(u8scope4nonfinal, u8scope44);
     92    it4.createAssign(anyscope, it4.createOr(anyscope, u8scope4X));
     93    PabloAST * const F0_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF0), it4), 1), ccc.compileCC(makeCC(0x80, 0x8F), it4));
     94    PabloAST * const F4_invalid = it4.createAnd(it4.createAdvance(ccc.compileCC(makeCC(0xF4), it4), 1), ccc.compileCC(makeCC(0x90, 0xBF), it4));
     95    PabloAST * const FX_invalid = it4.createOr(F0_invalid, F4_invalid);
     96    it4.createAssign(EF_invalid, it4.createOr(EF_invalid, FX_invalid));
    10297   
    10398    //
    10499    // Invalid cases
    105     PabloAST * anyscope = it.createOr(u8scope22, it.createOr(u8scope3X, u8scope4X));
    106     PabloAST * legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
     100    PabloAST * const legalpfx = it.createOr(it.createOr(u8pfx2, u8pfx3), u8pfx4);
    107101    //  Any scope that does not have a suffix byte, and any suffix byte that is not in
    108102    //  a scope is a mismatch, i.e., invalid UTF-8.
    109     PabloAST * mismatch = it.createXor(anyscope, u8suffix);
    110     //
    111     PabloAST * EF_invalid = it.createOr(EX_invalid, FX_invalid);
    112     PabloAST * pfx_invalid = it.createXor(u8pfx, legalpfx);
     103    PabloAST * const mismatch = it.createXor(anyscope, u8suffix);
     104    //
     105    PabloAST * const pfx_invalid = it.createXor(valid_pfx, legalpfx);
    113106    it.createAssign(u8invalid, it.createOr(pfx_invalid, it.createOr(mismatch, EF_invalid)));
    114     PabloAST * u8valid = it.createNot(u8invalid, "u8valid");
    115     //
    116     //
    117    
    118     it.createAssign(valid_pfx, it.createAnd(u8pfx, u8valid));
    119     it.createAssign(nonFinal, it.createAnd(it.createOr(it.createOr(u8pfx, u8scope32), u8scope4nonfinal), u8valid));
     107    PabloAST * const u8valid = it.createNot(u8invalid, "u8valid");
     108    //
     109    //
     110   
     111    it.createAssign(valid_pfx, it.createAnd(valid_pfx, u8valid));
     112    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
    120113   
    121114    PabloAST * u8single = pb.createAnd(ccc.compileCC(makeCC(0x00, 0x7F)), pb.createNot(u8invalid));
    122    
     115    PabloAST * const initial = pb.createOr(u8single, valid_pfx, "initial");
     116    PabloAST * const final = pb.createNot(pb.createOr(nonFinal, u8invalid), "final");
     117
    123118    Var * const required = getOutputStreamVar("required");
    124     pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u8single, valid_pfx, "initial"));
     119    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
    125120    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
    126     pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(nonFinal, u8invalid), "final"));
    127     pb.createAssign(pb.createExtract(required, pb.getInteger(3)), crlf);
     121    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
     122
    128123}
    129124
    130125RequiredStreams_UTF8::RequiredStreams_UTF8(const std::unique_ptr<kernel::KernelBuilder> & kb)
    131 : PabloKernel(kb, "RequiredStreams_UTF8",               
    132               {Binding{kb->getStreamSetTy(8), "basis"}},
    133               {Binding{kb->getStreamSetTy(4), "required", FixedRate(), Add1()}},
    134               {},
    135               {}) {
     126: PabloKernel(kb, "RequiredStreams_UTF8",
     127// input
     128{Binding{kb->getStreamSetTy(8), "basis"}},
     129// output
     130{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
     131
    136132}
    137133
     
    141137    auto & pb = ccc.getBuilder();
    142138   
    143     PabloAST * LF = ccc.compileCC("LF", makeCC(0x000A), pb);
    144     PabloAST * CR = ccc.compileCC("CR", makeCC(0x000D), pb);
    145     PabloAST * cr1 = pb.createAdvance(CR, 1, "cr1");
    146    
    147139    PabloAST * u16hi_hi_surrogate = ccc.compileCC(makeCC(0xD800, 0xDBFF));    //u16hi_hi_surrogate = [\xD8-\xDB]
    148140    PabloAST * u16hi_lo_surrogate = ccc.compileCC(makeCC(0xDC00, 0xDFFF));    //u16hi_lo_surrogate = [\xDC-\xDF]
     
    150142    PabloAST * invalidTemp = pb.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
    151143    PabloAST * u16invalid = pb.createXor(invalidTemp, u16hi_lo_surrogate, "u16invalid");
     144
    152145    PabloAST * u16valid = pb.createNot(u16invalid, "u16valid");
    153    
     146    PabloAST * nonFinal = pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal");
     147
    154148    PabloAST * u16single_temp = pb.createOr(ccc.compileCC(makeCC(0x0000, 0xD7FF)), ccc.compileCC(makeCC(0xE000, 0xFFFF)));
    155149    PabloAST * u16single = pb.createAnd(u16single_temp, pb.createNot(u16invalid));
    156150
     151    PabloAST * const nonFinalCodeUnits = pb.createExtract(getInput(1), pb.getInteger(0));
     152    PabloAST * const initial = pb.createOr(u16single, u16hi_hi_surrogate, "initial");
     153    PabloAST * const final = pb.createNot(pb.createOr(pb.createOr(u16hi_hi_surrogate, u16invalid), nonFinalCodeUnits), "final");
     154
    157155    Var * const required = getOutputStreamVar("required");
    158     pb.createAssign(pb.createExtract(required, pb.getInteger(0)), pb.createOr(u16single, u16hi_hi_surrogate, "initial"));
    159     pb.createAssign(pb.createExtract(required, pb.getInteger(1)), pb.createAnd(u16hi_hi_surrogate, u16valid, "nonfinal"));
    160     pb.createAssign(pb.createExtract(required, pb.getInteger(2)), pb.createNot(pb.createOr(u16hi_hi_surrogate, u16invalid), "final"));
    161     pb.createAssign(pb.createExtract(required, pb.getInteger(3)), pb.createAnd(cr1, LF, "crlf"));
     156    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), initial);
     157    pb.createAssign(pb.createExtract(required, pb.getInteger(1)), nonFinal);
     158    pb.createAssign(pb.createExtract(required, pb.getInteger(2)), final);
     159
    162160}
    163161
    164162RequiredStreams_UTF16::RequiredStreams_UTF16(const std::unique_ptr<kernel::KernelBuilder> & kb)
    165163: PabloKernel(kb, "RequiredStreams_UTF16",               
    166               {Binding{kb->getStreamSetTy(16), "basis"}},
    167               {Binding{kb->getStreamSetTy(4), "required", FixedRate(), Add1()}},
    168               {},
    169               {}) {
    170 }
    171 
     164// inputs
     165{Binding{kb->getStreamSetTy(8), "basis"}},
     166// output
     167{Binding{kb->getStreamSetTy(3), "required", FixedRate(), Add1()}}) {
     168
     169}
    172170
    173171ICGrepSignature::ICGrepSignature(re::RE * const re_ast)
     
    179177ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re, unsigned numOfCharacterClasses)
    180178: ICGrepSignature(re)
    181 , PabloKernel(iBuilder,
    182               "ic" + sha1sum(mSignature),
    183               {Binding{iBuilder->getStreamSetTy(numOfCharacterClasses), "basis"},
    184                Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"},
    185                Binding{iBuilder->getStreamSetTy(4, 1), "required"}},
    186               {Binding{iBuilder->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}}) {
     179, PabloKernel(iBuilder, "ic" + sha1sum(mSignature),
     180// inputs
     181{Binding{iBuilder->getStreamSetTy(numOfCharacterClasses), "basis"},
     182Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"},
     183Binding{iBuilder->getStreamSetTy(1, 1), "cr+lf"},
     184Binding{iBuilder->getStreamSetTy(3, 1), "required"}},
     185// output
     186{Binding{iBuilder->getStreamSetTy(1, 1), "matches", FixedRate(), Add1()}}) {
    187187
    188188}
     
    211211MatchedLinesKernel::MatchedLinesKernel (const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
    212212: PabloKernel(iBuilder, "MatchedLines",
    213               {Binding{iBuilder->getStreamSetTy(1), "matchResults"}, Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
    214               {Binding{iBuilder->getStreamSetTy(1), "matchedLines"}},
    215               {},
    216               {}) {
     213// inputs
     214{Binding{iBuilder->getStreamSetTy(1), "matchResults"}
     215,Binding{iBuilder->getStreamSetTy(1), "lineBreaks"}},
     216// output
     217{Binding{iBuilder->getStreamSetTy(1), "matchedLines"}}) {
     218
    217219}
    218220
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5757 r5782  
    3333
    3434    Binding(llvm::Type * type, const std::string & name, ProcessingRate r, Attribute && attribute)
    35     : AttributeSet({std::move(attribute)})
     35    : AttributeSet(std::move(attribute))
    3636    , mType(type), mName(name), mRate(std::move(r)) { }
    3737
     
    5858
    5959    bool isPrincipal() const {
    60         return hasAttribute(Attribute::KindId::Principal);
     60        return hasAttribute(AttributeId::Principal);
     61    }
     62
     63    bool hasLookahead() const {
     64        return hasAttribute(AttributeId::LookAhead);
     65    }
     66
     67    unsigned const getLookahead() const {
     68        return findAttribute(AttributeId::LookAhead).amount();
    6169    }
    6270
    6371    bool nonDeferred() const {
    64         return !hasAttribute(Attribute::KindId::Deferred);
     72        return !hasAttribute(AttributeId::Deferred);
    6573    }
    6674
     
    159167    }
    160168
    161     unsigned getLookAhead(const unsigned i) const {
    162         return 0;
    163     }
    164 
    165     void setLookAhead(const unsigned i, const unsigned lookAheadPositions) {
    166 
    167     }
    168 
    169169protected:
    170170
     
    178178
    179179    KernelInterface(const std::string && kernelName,
    180                     std::vector<Binding> && stream_inputs,
    181                     std::vector<Binding> && stream_outputs,
    182                     std::vector<Binding> && scalar_inputs,
    183                     std::vector<Binding> && scalar_outputs,
    184                     std::vector<Binding> && internal_scalars)
     180                    Bindings && stream_inputs,
     181                    Bindings && stream_outputs,
     182                    Bindings && scalar_inputs,
     183                    Bindings && scalar_outputs,
     184                    Bindings && internal_scalars)
    185185    : mKernelInstance(nullptr)
    186186    , mModule(nullptr)
     
    198198protected:
    199199
    200     llvm::Value *                           mKernelInstance;
    201     llvm::Module *                          mModule;
    202     llvm::StructType *                      mKernelStateType;
    203     bool                                    mHasPrincipalItemCount;
    204     const std::string                       mKernelName;
    205     std::vector<llvm::Value *>              mInitialArguments;
    206     std::vector<Binding>                    mStreamSetInputs;
    207     std::vector<Binding>                    mStreamSetOutputs;
    208     std::vector<Binding>                    mScalarInputs;
    209     std::vector<Binding>                    mScalarOutputs;
    210     std::vector<Binding>                    mInternalScalars;
     200    llvm::Value *                   mKernelInstance;
     201    llvm::Module *                  mModule;
     202    llvm::StructType *              mKernelStateType;
     203    bool                            mHasPrincipalItemCount;
     204    const std::string               mKernelName;
     205    std::vector<llvm::Value *>      mInitialArguments;
     206    Bindings                        mStreamSetInputs;
     207    Bindings                        mStreamSetOutputs;
     208    Bindings                        mScalarInputs;
     209    Bindings                        mScalarOutputs;
     210    Bindings                        mInternalScalars;
    211211};
    212212
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5771 r5782  
    624624
    625625/** ------------------------------------------------------------------------------------------------------------- *
    626  * @brief roundUp
    627  ** ------------------------------------------------------------------------------------------------------------- */
    628 unsigned roundUp(const ProcessingRate::RateValue & r) {
    629     if (LLVM_LIKELY(r.denominator() == 1)) {
    630         return r.numerator();
    631     } else {
    632         return (r.numerator() + r.denominator() - 1) / r.denominator();
    633     }
    634 }
    635 
    636 /** ------------------------------------------------------------------------------------------------------------- *
    637626 * @brief getItemAlignment
    638627 ** ------------------------------------------------------------------------------------------------------------- */
    639628inline unsigned MultiBlockKernel::getItemAlignment(const Binding & binding) const {
    640629    const auto & rate = binding.getRate();
    641     if (rate.isFixed()) {
    642         const auto & r = rate.getRate();
    643         const auto n = (r.numerator() * mStride);
     630    if (rate.isFixed() && binding.nonDeferred()) {
     631        const auto r = rate.getRate();
     632        auto n = (r.numerator() * mStride);
    644633        if (LLVM_LIKELY(r.denominator() == 1)) {
    645634            return n;
     
    675664    }
    676665
     666    using AttributeId = kernel::Attribute::KindId;
     667    using RateValue = ProcessingRate::RateValue;
     668
    677669    const auto inputSetCount = mStreamSetInputs.size();
    678670    const auto outputSetCount = mStreamSetOutputs.size();
     
    682674    AllocaInst * temporaryInputBuffer[inputSetCount];
    683675    for (unsigned i = 0; i < inputSetCount; ++i) {
    684         const auto & input = mStreamSetInputs[i];
     676        const Binding & input = mStreamSetInputs[i];
    685677        const ProcessingRate & rate = input.getRate();
    686678        if (isTransitivelyUnknownRate(rate)) {
    687679            report_fatal_error("MultiBlock kernels do not support unknown rate input streams or streams relative to an unknown rate input.");
    688         } else if (rate.isFixed() && input.nonDeferred() && !requiresBufferedFinalStride(input)) {
     680        } else if (rate.isFixed() && !requiresBufferedFinalStride(input)) {
    689681            temporaryInputBuffer[i] = nullptr;
    690682        } else {
    691683            Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType();
    692             const auto ub = getUpperBound(rate);
    693             Constant * arraySize = b->getInt64(roundUp(ub));
     684            auto ub = getUpperBound(rate);
     685            if (LLVM_UNLIKELY(input.hasLookahead())) {
     686                ub += RateValue(input.getLookahead(), mStride);
     687            }
     688            Constant * const arraySize = b->getInt64(ceiling(ub));
    694689            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
    695690            assert (ptr->isStaticAlloca());
     
    700695    AllocaInst * temporaryOutputBuffer[outputSetCount];
    701696    for (unsigned i = 0; i < outputSetCount; i++) {
    702         const auto & output = mStreamSetOutputs[i];
     697        const Binding & output = mStreamSetOutputs[i];
    703698        const ProcessingRate & rate = output.getRate();
    704         if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && output.nonDeferred() && !requiresBufferedFinalStride(output)))) {
     699        if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && !requiresBufferedFinalStride(output)))) {
    705700            temporaryOutputBuffer[i] = nullptr;
    706701        } else {           
     
    710705            }
    711706            Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
    712             Constant * arraySize = b->getInt64(roundUp(ub));
     707            Constant * const arraySize = b->getInt64(ceiling(ub));
    713708            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
    714709            assert (ptr->isStaticAlloca());
     
    742737    // linearly available strides.
    743738    Value * numOfStrides = nullptr;
    744     mInitialAvailableItemCount.resize(inputSetCount);
     739    mInitialAvailableItemCount.assign(mAvailableItemCount.begin(), mAvailableItemCount.end());
    745740    mInitialProcessedItemCount.resize(inputSetCount);
    746741    mStreamSetInputBaseAddress.resize(inputSetCount);
    747742    Value * inputStrideSize[inputSetCount];
    748743    for (unsigned i = 0; i < inputSetCount; i++) {
    749         const auto & input = mStreamSetInputs[i];
     744        const Binding & input = mStreamSetInputs[i];
    750745        const auto & name = input.getName();
    751746        const ProcessingRate & rate = input.getRate();
    752         Value * const ic = b->getProcessedItemCount(name);
    753         mInitialProcessedItemCount[i] = ic;
     747        Value * processed = b->getProcessedItemCount(name);
     748        //b->CallPrintInt(getName() + "_" + name + "_processed", processed);
     749
     750        mInitialProcessedItemCount[i] = processed;
     751        Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH));
     752
    754753        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    755             b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic),
    756                             "processed item count cannot exceed the available item count");
    757         }
    758         assert (ic->getType() == mAvailableItemCount[i]->getType());
    759         Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], ic);
    760         Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
    761         mInitialAvailableItemCount[i] = mAvailableItemCount[i];
    762         mAvailableItemCount[i] = b->getLinearlyAccessibleItems(name, ic, unprocessed);
    763 
    764         // Are our linearly accessible items sufficient for a stride?
     754            b->CreateAssert(b->CreateICmpULT(processed, mAvailableItemCount[i]), "processed item count must be less than the available item count");
     755        }
     756
     757        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed);
     758        //b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed);
     759
     760        Value * avail = b->getLinearlyAccessibleItems(name, processed, unprocessed);
     761        //b->CallPrintInt(getName() + "_" + name + "_avail", avail);
     762
     763
     764        // Ensure that everything between S⌈P/S⌉, and S⌈n*(P + L)/S⌉ is linearly available, where S is
     765        // the stride size, P is the current processed position, L is the lookahead amount and n ∈ â„€+.
     766
     767        Value * remaining = avail;
     768        if (LLVM_UNLIKELY(input.hasLookahead())) {
     769            Constant * const lookahead = b->getSize(input.getLookahead());
     770            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
     771            //b->CallPrintInt(getName() + "_" + name + "_remaining", remaining);
     772        }
     773
    765774        inputStrideSize[i] = getStrideSize(b, rate);
    766         Value * accessibleStrides = b->CreateUDiv(mAvailableItemCount[i], inputStrideSize[i]);
     775
     776        Value * accessibleStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
     777
     778        //b->CallPrintInt(getName() + "_" + name + "_accessibleStrides", accessibleStrides);
     779
    767780        AllocaInst * const tempBuffer = temporaryInputBuffer[i];
    768781        if (tempBuffer) {
     
    779792
    780793            b->SetInsertPoint(copyFromBack);
    781             Value * const temporaryAvailable = b->CreateUMin(unprocessed, inputStrideSize[i]);
     794            Value * const temporarySize = b->CreateMul(tempBuffer->getArraySize(), b->getSize(mStride));
     795            Value * const temporaryAvailable = b->CreateUMin(unprocessed, temporarySize);
    782796            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    783                 b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable),
    784                                 "linearly available cannot be greater than temporarily available");
    785             }
    786             Value * const offset = b->CreateAnd(ic, BLOCK_WIDTH_MASK);
     797                b->CreateAssert(b->CreateICmpULE(avail, temporaryAvailable),
     798                                "linearly available item count cannot exceed the temporarily available item count");
     799            }
     800            Value * const offset = b->CreateAnd(processed, BLOCK_WIDTH_MASK);
    787801            Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    788802            b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
    789803            const auto copyAlignment = getItemAlignment(mStreamSetInputs[i]);
    790             b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, mAvailableItemCount[i], copyAlignment);
     804            b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, avail, copyAlignment);
    791805            Value * const temporaryStrides = b->CreateSelect(b->CreateICmpULT(unprocessed, inputStrideSize[i]), ZERO, ONE);
    792806            BasicBlock * const copyToBackEnd = b->GetInsertBlock();
    793             b->CreateCondBr(b->CreateICmpNE(mAvailableItemCount[i], temporaryAvailable), copyFromFront, resume);
     807            b->CreateCondBr(b->CreateICmpNE(temporaryAvailable, unprocessed), copyFromFront, resume);
    794808
    795809            b->SetInsertPoint(copyFromFront);
    796             Value * const remaining = b->CreateSub(temporaryAvailable, mAvailableItemCount[i]);
     810            Value * const remaining = b->CreateSub(temporaryAvailable, avail);
    797811            Value * const baseAddress = b->getBaseAddress(name);
    798             b->CreateStreamCpy(name, tempBuffer, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);
     812            b->CreateStreamCpy(name, tempBuffer, avail, baseAddress, ZERO, remaining, copyAlignment);
    799813            BasicBlock * const copyToFrontEnd = b->GetInsertBlock();
    800814            b->CreateBr(resume);
     
    808822
    809823            PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3);
    810             phiAvailItemCount->addIncoming(mAvailableItemCount[i], entry);
     824            phiAvailItemCount->addIncoming(avail, entry);
    811825            phiAvailItemCount->addIncoming(temporaryAvailable, copyToBackEnd);
    812826            phiAvailItemCount->addIncoming(temporaryAvailable, copyToFrontEnd);
    813             mAvailableItemCount[i] = phiAvailItemCount;
     827            avail = phiAvailItemCount;
    814828
    815829            PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
     
    819833            accessibleStrides = phiStrides;
    820834        }
    821 
     835        mAvailableItemCount[i] = avail;
    822836        mStreamSetInputBaseAddress[i] = baseBuffer;
    823837        numOfStrides = b->CreateUMin(numOfStrides, accessibleStrides);
     
    833847        const auto & name = output.getName();
    834848        const ProcessingRate & rate = output.getRate();
    835         Value * const ic = b->getProducedItemCount(name);
    836         Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
     849        Value * const produced = b->getProducedItemCount(name);
     850
     851        //b->CallPrintInt(getName() + "_" + name + "_produced", produced);
     852
     853        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH));
    837854        assert (baseBuffer->getType()->isPointerTy());
    838         linearlyWritable[i] = b->getLinearlyWritableItems(name, ic);       
     855        linearlyWritable[i] = b->getLinearlyWritableItems(name, produced);
     856
     857        //b->CallPrintInt(getName() + "_" + name + "_linearlyWritable", linearlyWritable[i]);
     858
    839859        outputStrideSize[i] = getStrideSize(b, rate);
    840860        // Is the number of linearly writable items sufficient for a stride?
     
    842862            AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
    843863            Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
     864            //b->CallPrintInt(getName() + "_" + name + "_writableStrides", writableStrides);
     865
     866
    844867            // Do we require a temporary buffer to write to?
    845868            if (tempBuffer) {
    846869                assert (tempBuffer->getType() == baseBuffer->getType());
    847870                BasicBlock * const entry = b->GetInsertBlock();
    848                 BasicBlock * const useTemporary = b->CreateBasicBlock(name + "UseTemporary");
     871                BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearTemporaryBuffer");
    849872                BasicBlock * const resume = b->CreateBasicBlock(name + "Resume");
    850873                Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO);
    851 
    852                 b->CreateUnlikelyCondBr(requiresCopy, useTemporary, resume);
    853 
    854                 // Clear the buffer after use since we may end up reusing it within the same stride
    855                 b->SetInsertPoint(useTemporary);
     874                b->CreateUnlikelyCondBr(requiresCopy, clearBuffer, resume);
     875                // Clear the output buffer prior to using it
     876                b->SetInsertPoint(clearBuffer);
    856877                Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
    857878                b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
    858879                b->CreateBr(resume);
    859 
     880                // Select the appropriate buffer / stride #
    860881                b->SetInsertPoint(resume);
    861882                PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3);
    862883                phiBuffer->addIncoming(baseBuffer, entry);
    863                 phiBuffer->addIncoming(tempBuffer, useTemporary);
     884                phiBuffer->addIncoming(tempBuffer, clearBuffer);
    864885                baseBuffer = phiBuffer;
    865886                PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
    866887                phiStrides->addIncoming(writableStrides, entry);
    867                 phiStrides->addIncoming(ONE, useTemporary);
     888                phiStrides->addIncoming(ONE, clearBuffer);
    868889                writableStrides = phiStrides;
    869 
    870890            }
    871891            numOfStrides = b->CreateUMin(numOfStrides, writableStrides);
    872892        }
    873         mInitialProducedItemCount[i] = ic;
     893        mInitialProducedItemCount[i] = produced;
    874894        mStreamSetOutputBaseAddress[i] = baseBuffer;
    875895    }
     
    885905        }
    886906        for (unsigned i = 0; i < inputSetCount; ++i) {
    887             const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    888             if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
     907            const auto & input = mStreamSetInputs[i];
     908            const ProcessingRate & rate = input.getRate();
     909            if (rate.isFixed() && input.nonDeferred()) {
    889910                mAvailableItemCount[i] = b->CreateSelect(mIsFinal, mAvailableItemCount[i], b->CreateMul(numOfStrides, inputStrideSize[i]));
    890911            }
     
    896917
    897918    for (unsigned i = 0; i < inputSetCount; ++i) {
    898         const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    899         if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) {
     919        const auto & input = mStreamSetInputs[i];
     920        const ProcessingRate & rate = input.getRate();
     921        if (rate.isFixed() && input.nonDeferred()) {
    900922            Value * const ic = b->CreateAdd(mInitialProcessedItemCount[i], mAvailableItemCount[i]);
    901             b->setProcessedItemCount(mStreamSetInputs[i].getName(), ic);
     923            b->setProcessedItemCount(input.getName(), ic);
    902924        }
    903925    }
    904926
    905927    for (unsigned i = 0; i < outputSetCount; ++i) {
    906         const ProcessingRate & rate = mStreamSetOutputs[i].getRate();
     928        const auto & output = mStreamSetOutputs[i];
     929        const ProcessingRate & rate = output.getRate();
    907930        if (rate.isFixed()) {
    908             assert (mStreamSetOutputs[i].nonDeferred());
     931            assert (output.nonDeferred());
    909932            Value * const produced = b->CreateMul(numOfStrides, outputStrideSize[i]);
    910933            Value * const ic = b->CreateAdd(mInitialProducedItemCount[i], produced);
    911             b->setProducedItemCount(mStreamSetOutputs[i].getName(), ic);
     934            b->setProducedItemCount(output.getName(), ic);
    912935        }
    913936    }
     
    9901013    Value * hasMoreStrides = b->getTrue();
    9911014    for (unsigned i = 0; i < inputSetCount; ++i) {
    992         const auto & name = mStreamSetInputs[i].getName();
     1015        const Binding & input = mStreamSetInputs[i];
     1016        const auto & name = input.getName();
    9931017        Value * const avail = mInitialAvailableItemCount[i];
    9941018        Value * const processed = b->getProcessedItemCount(name);
    9951019        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    996             b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data");
    997         }
    998         Value * const remaining = b->CreateSub(avail, processed);
     1020            b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + "." + name + ": processed data exceeds available data");
     1021        }
     1022        Value * remaining = b->CreateSub(avail, processed);
     1023        if (LLVM_UNLIKELY(input.hasAttribute(AttributeId::LookAhead))) {
     1024            Constant * const lookahead = b->getSize(input.findAttribute(AttributeId::LookAhead).amount());
     1025            remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO);
     1026        }
    9991027        Value * const remainingStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
    10001028        Value * const hasRemainingStrides = b->CreateICmpNE(remainingStrides, ZERO);
     
    10131041            Value * const consumed = b->getConsumedItemCount(name);
    10141042            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1015                 b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data");
     1043                b->CreateAssert(b->CreateICmpULE(consumed, produced), getName() + "." + name + ": consumed data exceeds produced data");
    10161044            }
    10171045            Value * const unconsumed = b->CreateSub(produced, consumed);
    10181046            Value * const capacity = b->getCapacity(name);
    10191047            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    1020                 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity");
     1048                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), getName() + "." + name + ": unconsumed data exceeds capacity");
    10211049            }
    10221050            Value * const remaining = b->CreateSub(capacity, unconsumed);
     
    11831211        for (const Attribute & attr : output.getAttributes()) {
    11841212            if (attr.isAdd()) {
    1185                 produced = b->CreateAdd(produced, b->getSize(attr.getAmount()));
     1213                produced = b->CreateAdd(produced, b->getSize(attr.amount()));
    11861214            } else if (attr.isRoundUpTo()) {
    1187                 produced = b->CreateRoundUp(produced, b->getSize(attr.getAmount()));
     1215                produced = b->CreateRoundUp(produced, b->getSize(attr.amount()));
    11881216            }
    11891217        }
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5757 r5782  
    152152    }
    153153
     154    const Binding & getStreamInput(const parabix::StreamSetBuffer * const buffer) const {
     155        for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
     156            if (mStreamSetInputBuffers[i] == buffer) {
     157                return getStreamInput(i);
     158            }
     159        }
     160        throw std::runtime_error("no output binding found given buffer");
     161    }
     162
    154163    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
    155164        assert (i < mStreamSetOutputBuffers.size());
     
    166175    const Binding & getStreamOutput(const unsigned i) const {
    167176        return KernelInterface::getStreamOutput(i);
     177    }
     178
     179    const Binding & getStreamOutput(const parabix::StreamSetBuffer * const buffer) const {
     180        for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
     181            if (mStreamSetOutputBuffers[i] == buffer) {
     182                return getStreamOutput(i);
     183            }
     184        }
     185        throw std::runtime_error("no output binding found given buffer");
    168186    }
    169187
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5757 r5782  
    8282            itemCount = CreateExactUDiv(itemCount, ConstantInt::get(itemCount->getType(), r.denominator()));
    8383        }
     84    } else if (LLVM_UNLIKELY(rate.isPopCount())) {
     85        Port port; unsigned index;
     86        std::tie(port, index) = mKernel->getStreamPort(rate.getReference());
     87
     88
     89
     90
    8491    } else {
    8592        itemCount = getScalarField(name + suffix);
     
    133140Value * KernelBuilder::getLinearlyWritableItems(const std::string & name, Value * fromPosition, bool reverse) {
    134141    const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
    135     return buf->getLinearlyWritableItems(this, getStreamHandle(name), fromPosition, reverse);
     142    return buf->getLinearlyWritableItems(this, getStreamHandle(name), fromPosition, getConsumedItemCount(name), reverse);
    136143}
    137144
     
    195202    // (w.r.t the stream copy) would be n*m. By taking this into account we can optimize and simplify the copy code.
    196203    const auto fieldWidth = getFieldWidth(itemWidth * itemAlignment, blockWidth);
    197     assert ("overflow error" && is_power_2(fieldWidth) && (itemWidth <= fieldWidth) && (fieldWidth <= blockWidth));
     204
     205//    CallPrintInt(mKernel->getName() + "_" + name + "_target", target);
     206//    CallPrintInt(mKernel->getName() + "_" + name + "_targetOffset", targetOffset);
     207//    CallPrintInt(mKernel->getName() + "_" + name + "_source", source);
     208//    CallPrintInt(mKernel->getName() + "_" + name + "_sourceOffset", sourceOffset);
     209//    CallPrintInt(mKernel->getName() + "_" + name + "_itemsToCopy", itemsToCopy);
    198210
    199211    if (LLVM_LIKELY(itemWidth < fieldWidth)) {
     
    223235
    224236       So if we're copying the entire stream set block or our stream set has one element, we can use memcpy.
     237
     238       One compilication here is when the BlockSize of a stream is not equal to the BitBlockWidth.
     239
    225240
    226241    */
     
    421436
    422437Value * KernelBuilder::loadInputStreamPack(const std::string & name, Value * streamIndex, Value * packIndex) {
    423 
    424 
    425 
    426438    return CreateBlockAlignedLoad(getInputStreamPackPtr(name, streamIndex, packIndex));
    427439}
     
    432444}
    433445
    434 Value * KernelBuilder::getAdjustedInputStreamBlockPtr(Value * blockAdjustment, const std::string & name, Value * streamIndex) {
     446Value * KernelBuilder::getInputStreamBlockPtr(const std::string & name, Value * const streamIndex, Value * const blockOffset) {
    435447    Value * const addr = mKernel->getStreamSetInputAddress(name);
    436448    if (addr) {
    437         return CreateGEP(addr, {blockAdjustment, streamIndex});
     449        return CreateGEP(addr, {blockOffset, streamIndex});
    438450    } else {
    439451        const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
    440452        Value * blockIndex = CreateLShr(getProcessedItemCount(name), std::log2(getBitBlockWidth()));
    441         blockIndex = CreateAdd(blockIndex, blockAdjustment);
     453        blockIndex = CreateAdd(blockIndex, blockOffset);
    442454        return buf->getStreamBlockPtr(this, getStreamHandle(name), getBaseAddress(name), streamIndex, blockIndex, true);
    443455    }
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5755 r5782  
    9191    llvm::Value * getOutputStreamSetCount(const std::string & name);
    9292
    93     llvm::Value * getAdjustedInputStreamBlockPtr(llvm::Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex);
     93    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * blockOffset);
    9494
    9595    llvm::Value * getRawInputPointer(const std::string & name, llvm::Value * absolutePosition);
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp

    r5755 r5782  
    1414#include <kernels/kernel_builder.h>
    1515
     16#include <llvm/Support/raw_ostream.h>
     17
    1618using namespace cc;
    1719using namespace kernel;
     
    2022using namespace llvm;
    2123
    22 LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned basisBitsCount)
    23 : PabloKernel(b, "lb",
    24     {Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
    25     {Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}}) {
     24
     25LineFeedKernelBuilder::LineFeedKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
     26: PabloKernel(b, "lf" + std::to_string(basisBitsCount),
     27// input
     28{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}},
     29// output
     30{Binding{b->getStreamSetTy(1), "lf"}}) {
     31
     32}
     33
     34void LineFeedKernelBuilder::generatePabloMethod() {
     35    CC_Compiler ccc(this, getInput(0));
     36    auto & pb = ccc.getBuilder();
     37    PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
     38    pb.createAssign(pb.createExtract(getOutput(0), pb.getInteger(0)), LF);
     39}
     40
     41LineBreakKernelBuilder::LineBreakKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned basisBitsCount)
     42: PabloKernel(b, "lb" + std::to_string(basisBitsCount),
     43// inputs
     44{Binding{b->getStreamSetTy(basisBitsCount), "basis", FixedRate(), Principal()}
     45,Binding{b->getStreamSetTy(1), "lf", FixedRate(), LookAhead(1)}},
     46// outputs
     47{Binding{b->getStreamSetTy(1), "linebreak", FixedRate(), Add1()}
     48,Binding{b->getStreamSetTy(1), "cr+lf"}}) {
    2649
    2750}
    2851
    2952void LineBreakKernelBuilder::generatePabloMethod() {
    30 
    3153    CC_Compiler ccc(this, getInput(0));
    3254    auto & pb = ccc.getBuilder();
    3355
    34     PabloAST * LineBreak = nullptr;
    35     PabloAST * LF = ccc.compileCC("LF", makeCC(0x0A), pb);
    36     PabloAST * CR = ccc.compileCC(makeCC(0x0D));
    37     PabloAST * LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     56    Integer * const ZERO = pb.getInteger(0);
    3857
    39     Zeroes * const zero = pb.createZeroes();
    40     Var * crlf = pb.createVar("crlf", zero);
     58    PabloAST * const LF = pb.createExtract(getInput(1), ZERO, "LF");
     59    PabloAST * const CR = ccc.compileCC(makeCC(0x0D));
     60    PabloAST * const LF_VT_FF_CR = ccc.compileCC(makeCC(0x0A, 0x0D));
     61    Var * const LineBreak = pb.createVar("LineBreak", LF_VT_FF_CR);
     62
     63    // Remove the CR of any CR+LF
     64    Var * const CRLF = pb.createVar("CRLF", pb.createZeroes());
    4165    PabloBuilder crb = PabloBuilder::Create(pb);
    42 #ifndef USE_LOOKAHEAD_CRLF
    43     PabloAST * cr1 = crb.createAdvance(CR, 1, "cr1");
    44     crb.createAssign(crlf, crb.createAnd(cr1, LF));
    45 #else
    46     PabloAST * lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
    47     crb.createAssign(crlf, crb.createAnd(CR, lookaheadLF));
    48 #endif
    4966    pb.createIf(CR, crb);
     67    PabloAST * const lookaheadLF = crb.createLookahead(LF, 1, "lookaheadLF");
     68    PabloAST * const crlf = crb.createAnd(CR, lookaheadLF);
     69    crb.createAssign(CRLF, crlf);
     70    PabloAST * removedCRLF = crb.createAnd(LineBreak, crb.createNot(CRLF));
     71    crb.createAssign(LineBreak, removedCRLF);
     72    // Record the CR marker of any CR+LF
     73    pb.createAssign(pb.createExtract(getOutput(1), ZERO), CRLF);
    5074
    51     Var * NEL_LS_PS = pb.createVar("NEL_LS_PS", zero);
    52 
     75    // Check for Unicode Line Breaks
    5376    PabloAST * u8pfx = ccc.compileCC(makeCC(0xC0, 0xFF));
    5477    PabloBuilder it = PabloBuilder::Create(pb);
     
    5780    PabloAST * u8pfx3 = ccc.compileCC(makeCC(0xE0, 0xEF), it);
    5881
    59     //
    6082    // Two-byte sequences
    61     Var * NEL = it.createVar("NEL", zero);
    6283    PabloBuilder it2 = PabloBuilder::Create(it);
    63     it2.createAssign(NEL, it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2)));
    6484    it.createIf(u8pfx2, it2);
     85    PabloAST * NEL = it2.createAnd(it2.createAdvance(ccc.compileCC(makeCC(0xC2), it2), 1), ccc.compileCC(makeCC(0x85), it2), "NEL");
     86    it2.createAssign(LineBreak, it2.createOr(LineBreak, NEL));
    6587
    66     //
    6788    // Three-byte sequences
    68     Var * LS_PS = it.createVar("LS_PS", zero);
    6989    PabloBuilder it3 = PabloBuilder::Create(it);
    7090    it.createIf(u8pfx3, it3);
    7191    PabloAST * E2_80 = it3.createAnd(it3.createAdvance(ccc.compileCC(makeCC(0xE2), it3), 1), ccc.compileCC(makeCC(0x80), it3));
    72     it3.createAssign(LS_PS, it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3)));
    73     it.createAssign(NEL_LS_PS, it.createOr(NEL, LS_PS));
     92    PabloAST * LS_PS = it3.createAnd(it3.createAdvance(E2_80, 1), ccc.compileCC(makeCC(0xA8,0xA9), it3), "LS_PS");
     93    it3.createAssign(LineBreak, it3.createOr(LineBreak, LS_PS));
    7494
    75     PabloAST * LB_chars = pb.createOr(LF_VT_FF_CR, NEL_LS_PS);
    76     PabloAST * lb = nullptr;
    77     if (AlgorithmOptionIsSet(DisableUnicodeLineBreak)) {
    78         lb = LF;
    79     } else {
    80         lb = pb.createAnd(LB_chars, pb.createNot(crlf));  // count the CR, but not CRLF
    81     }
    82 
    83     PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LB_chars), 1));
    84     LineBreak = pb.createOr(lb, unterminatedLineAtEOF);
    85     PabloAST * const r = pb.createExtract(getOutput(0), pb.getInteger(0));
    86     pb.createAssign(r, LineBreak);
    87 #ifdef USE_LOOKAHEAD_CRLF
    88     setLookAhead(1);
    89 #endif
     95    PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1));
     96    pb.createAssign(pb.createExtract(getOutput(0), ZERO), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
    9097}
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.h

    r5464 r5782  
    1212namespace kernel {
    1313
     14class LineFeedKernelBuilder final : public pablo::PabloKernel {
     15public:
     16    LineFeedKernelBuilder(const std::unique_ptr<KernelBuilder> & b, unsigned basisBitsCount);
     17    bool isCachable() const override { return true; }
     18    bool hasSignature() const override { return false; }
     19protected:
     20    void generatePabloMethod() override;
     21};
     22
     23
    1424class LineBreakKernelBuilder final : public pablo::PabloKernel {
    1525public:
  • icGREP/icgrep-devel/icgrep/kernels/processing_rate.cpp

    r5755 r5782  
    1717}
    1818
     19/** ------------------------------------------------------------------------------------------------------------- *
     20 * @brief gcd
     21 ** ------------------------------------------------------------------------------------------------------------- */
    1922ProcessingRate::RateValue gcd(const ProcessingRate::RateValue & x, const ProcessingRate::RateValue & y) {
    2023    const auto n = boost::gcd(x.numerator(), y.numerator());
     
    2629}
    2730
     31/** ------------------------------------------------------------------------------------------------------------- *
     32 * @brief roundUp
     33 ** ------------------------------------------------------------------------------------------------------------- */
     34unsigned ceiling(const ProcessingRate::RateValue & r) {
     35    if (LLVM_LIKELY(r.denominator() == 1)) {
     36        return r.numerator();
     37    } else {
     38        return (r.numerator() + r.denominator() - 1) / r.denominator();
     39    }
    2840}
     41
     42
     43}
  • icGREP/icgrep-devel/icgrep/kernels/processing_rate.h

    r5756 r5782  
    88namespace kernel {
    99
    10 // Processing rate attributes are required for all stream set bindings for a kernel.
    11 // These attributes describe the number of items that are processed or produced as
    12 // a ratio in comparison to a reference stream set, normally the principal input stream set
    13 // by default (or the principal output stream set if there is no input).
     10// Processing rate attributes are required for all stream set bindings. They describe
     11// the relationship between processed items (inputs) and produced items (outputs).
    1412//
    15 // The default ratio is FixedRatio(1) which means that there is one item processed or
    16 // produced for every item of the reference stream.
    17 // FixedRatio(m, n) means that for every group of n items of the refrence stream,
    18 // there are m items in the output stream (rounding up).
     13// For example, the 3-to-4 kernel converts every 3 input items into 4 output items.
     14// Thus it has a FixedRate(3) for its input stream and FixedRate(4) for its output
     15// stream. Processing these every 3 items individually would be time consuming. Instead
     16// the kernel processes a strides' worth of "iterations" and automatically scales the
     17// FixedRates accordingly.
    1918//
    20 // Kernels which produce a variable number of items use MaxRatio(n), for a maximum
    21 // of n items produced or consumed per principal input or output item.  MaxRatio(m, n)
    22 // means there are at most m items for every n items of the reference stream.
    23 //
    24 // RoundUpToMultiple(n) means that number of items produced is the same as the
    25 // number of reference items, rounded up to an exact multiple of n.
    26 //
     19// NOTE: fixed and bounded rates should be the smallest number of input items for the
     20// smallest number of output items that can be logically produced by a kernel.
     21
     22
     23
    2724
    2825struct ProcessingRate  {
     
    145142ProcessingRate::RateValue gcd(const ProcessingRate::RateValue & x, const ProcessingRate::RateValue & y);
    146143
     144unsigned ceiling(const ProcessingRate::RateValue & r);
     145
    147146}
    148147
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5758 r5782  
    210210ScanMatchKernel::ScanMatchKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    211211: MultiBlockKernel("scanMatch",
    212     {Binding{b->getStreamSetTy(1, 1), "matchResult", FixedRate(), Principal()}, Binding{b->getStreamSetTy(1, 1), "lineBreak"}, Binding{b->getStreamSetTy(1, 8), "InputStream", FixedRate(), Deferred() }},
    213     {},
    214     {Binding{b->getIntAddrTy(), "accumulator_address"}},
    215     {},
    216     {Binding{b->getSizeTy(), "BlockNo"}, Binding{b->getSizeTy(), "LineNum"}}) {}
    217 }
     212// inputs
     213{Binding{b->getStreamSetTy(1, 1), "matchResult", FixedRate(), Principal()}
     214,Binding{b->getStreamSetTy(1, 1), "lineBreak"}
     215,Binding{b->getStreamSetTy(1, 8), "InputStream", FixedRate(), Deferred()}},
     216// outputs
     217{},
     218// input scalars
     219{Binding{b->getIntAddrTy(), "accumulator_address"}},
     220// output scalars
     221{},
     222// kernel state
     223{Binding{b->getSizeTy(), "BlockNo"}
     224,Binding{b->getSizeTy(), "LineNum"}}) {
     225
     226}
     227
     228}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5757 r5782  
    144144}
    145145
    146 Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
     146Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * availItems, bool reverse) const {
    147147    Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
    148148    Value * itemsFromBase = b->CreateURem(fromPosition, bufSize);
     
    156156}
    157157
    158 Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
    159     Constant * bufSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
    160     Value * bufRem = b->CreateURem(fromPosition, bufSize);
     158Value * StreamSetBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const /* handle */, Value * fromPosition, Value * consumed, bool reverse) const {
     159    Constant * const bufferSize = ConstantInt::get(fromPosition->getType(), mBufferBlocks * b->getStride());
     160    fromPosition = b->CreateURem(fromPosition, bufferSize);
    161161    if (reverse) {
    162         return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
    163     }
    164     return b->CreateSub(bufSize, bufRem, "linearSpace");
     162        return b->CreateSelect(b->CreateICmpEQ(fromPosition, b->getSize(0)), bufferSize, fromPosition);
     163    }
     164    consumed = b->CreateURem(consumed, bufferSize);
     165    Value * const limit = b->CreateSelect(b->CreateICmpULE(consumed, fromPosition), bufferSize, consumed);
     166    return b->CreateNUWSub(limit, fromPosition);
    165167}
    166168
     
    187189    Value * blockCopyBytes = b->CreateMul(blocksToCopy, b->getSize(b->getBitBlockWidth() * numStreams * fieldWidth/8));
    188190    b->CreateMemMove(b->CreateBitCast(targetBlockPtr, i8ptr), b->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
    189 }
    190 
    191 inline bool isConstantZero(Value * const v) {
    192     return isa<Constant>(v) && cast<Constant>(v)->isNullValue();
    193191}
    194192
     
    293291}
    294292
    295 Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
     293Value * SourceBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
    296294    report_fatal_error("SourceBuffers cannot be written");
    297295}
     
    325323}
    326324
    327 Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, const bool reverse) const {
     325Value * ExternalBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const, Value *, Value * fromPosition, Value *consumed, const bool reverse) const {
    328326    // Trust that the buffer is large enough to write any amount
    329327    return reverse ? fromPosition : ConstantInt::getAllOnesValue(fromPosition->getType());
     
    374372}
    375373
    376 Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
    377     Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
     374Value * CircularCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
     375    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, consumed, reverse);
    378376    if (reverse) return writableProper;
    379377    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
     
    452450}
    453451
    454 Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
    455     Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, reverse);
     452Value * SwizzledCopybackBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
     453    Value * writableProper = StreamSetBuffer::getLinearlyWritableItems(b, handle, fromPosition, consumed, reverse);
    456454    if (reverse) return writableProper;
    457455    return b->CreateAdd(writableProper, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
     
    692690}
    693691
    694 Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, bool reverse) const {
     692Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value *consumed, bool reverse) const {
    695693    Value * bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
    696694    Constant * blockSize = ConstantInt::get(bufBlocks->getType(), b->getBitBlockWidth());
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5757 r5782  
    9797    virtual void createBlockAlignedCopy(IDISA::IDISA_Builder * const b, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * itemsToCopy, const unsigned alignment = 1) const;
    9898
    99     virtual llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const;
     99    virtual llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const;
    100100   
    101101    bool supportsCopyBack() const {
     
    118118        return mConsumers;
    119119    }
     120
     121
    120122
    121123protected:
     
    174176    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    175177
    176     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     178    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    177179
    178180    llvm::Type * getStreamSetBlockType() const override;
     
    200202    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    201203   
    202     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     204    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    203205
    204206    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     
    242244    CircularCopybackBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace = 0);
    243245   
    244     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     246    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    245247   
    246248    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     
    258260    void createBlockAlignedCopy(IDISA::IDISA_Builder * const b, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * itemsToCopy, const unsigned alignment = 1) const override;
    259261
    260     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     262    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    261263   
    262264    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     
    317319    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    318320   
    319     llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, bool reverse = false) const override;
     321    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    320322   
    321323    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
Note: See TracChangeset for help on using the changeset viewer.