Ignore:
Timestamp:
Dec 21, 2017, 3:22:41 PM (18 months ago)
Author:
cameron
Message:

Adding Alphabet to CCs: initial check-in

Location:
icGREP/icgrep-devel/icgrep/re
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r5781 r5795  
    3333}
    3434   
    35 CC::CC()
     35CC::CC(const cc::Alphabet * alphabet)
    3636: RE(ClassTypeId::CC)
    37 , UnicodeSet() {
     37, UnicodeSet()
     38, mAlphabet(alphabet) {}
    3839
    39 }
    4040
    4141CC::CC(const CC & cc)
    4242: RE(ClassTypeId::CC)
    43 , UCD::UnicodeSet(cc) {
     43, UCD::UnicodeSet(cc)
     44, mAlphabet(cc.getAlphabet()) {}
    4445
    45 }
    4646
    47 CC::CC(const codepoint_t codepoint)
     47CC::CC(const codepoint_t codepoint, const cc::Alphabet * alphabet)
    4848: RE(ClassTypeId::CC)
    49 , UCD::UnicodeSet(codepoint) {
     49, UCD::UnicodeSet(codepoint)
     50, mAlphabet(alphabet) {}
    5051
    51 }
    5252
    53 CC::CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint)
     53CC::CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint, const cc::Alphabet * alphabet)
    5454: RE(ClassTypeId::CC)
    55 , UCD::UnicodeSet(lo_codepoint, hi_codepoint) {
     55, UCD::UnicodeSet(lo_codepoint, hi_codepoint)
     56, mAlphabet(alphabet) {}
    5657
    57 }
    5858
    5959CC::CC(const CC * cc1, const CC * cc2)
    6060: RE(ClassTypeId::CC)
    61 , UCD::UnicodeSet(std::move(*cc1 + *cc2)) {
     61, UCD::UnicodeSet(std::move(*cc1 + *cc2))
     62, mAlphabet(cc1->getAlphabet()) {
     63    assert (cc1->getAlphabet() == cc2->getAlphabet());
     64}
     65
     66
     67CC::CC(const UCD::UnicodeSet && set, const cc::Alphabet * alphabet)
     68: RE(ClassTypeId::CC)
     69, UCD::UnicodeSet(std::move(set))
     70, mAlphabet(alphabet) {}
     71
     72
     73CC::CC(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end, const cc::Alphabet * alphabet)
     74: RE(ClassTypeId::CC)
     75, UCD::UnicodeSet(begin, end)
     76, mAlphabet(alphabet) {}
     77
     78
     79CC::CC(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end, const cc::Alphabet * alphabet)
     80: RE(ClassTypeId::CC)
     81, UCD::UnicodeSet(begin, end)
     82, mAlphabet(alphabet) {}
    6283
    6384}
    64 
    65 CC::CC(const UCD::UnicodeSet && set)
    66 : RE(ClassTypeId::CC)
    67 , UCD::UnicodeSet(std::move(set)) {
    68 
    69 }
    70 
    71 CC::CC(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end)
    72 : RE(ClassTypeId::CC)
    73 , UCD::UnicodeSet(begin, end)
    74 {
    75 
    76 }
    77 
    78 CC::CC(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end)
    79 : RE(ClassTypeId::CC)
    80 , UCD::UnicodeSet(begin, end)
    81 {
    82 
    83 }
    84 
    85 }
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r5781 r5795  
    1010#include "re_re.h"
    1111#include <UCD/unicode_set.h>
     12#include <cc/alphabet.h>
    1213
    1314namespace re {
     
    2829    }
    2930
     31    const cc::Alphabet * getAlphabet() const { return mAlphabet;}
    3032
    3133    std::string canonicalName(const CC_type type) const;
     
    4244
    4345protected:
    44     friend CC * makeCC();
    45     friend CC * makeCC(const codepoint_t codepoint);
    46     friend CC * makeCC(const codepoint_t lo, const codepoint_t hi);
     46    friend CC * makeCC(const cc::Alphabet * alphabet);
     47    friend CC * makeCC(const codepoint_t codepoint, const cc::Alphabet * alphabet);
     48    friend CC * makeCC(const codepoint_t lo, const codepoint_t hi, const cc::Alphabet * alphabet);
    4749    friend CC * makeCC(const CC * cc1, const CC * cc2);
    48     friend CC * makeCC(std::initializer_list<interval_t> list);
    49     friend CC * makeCC(std::vector<interval_t> && list);
    50     friend CC * makeCC(UCD::UnicodeSet && set);
     50    friend CC * makeCC(std::initializer_list<interval_t> list, const cc::Alphabet * alphabet);
     51    friend CC * makeCC(std::vector<interval_t> && list, const cc::Alphabet * alphabet);
     52    friend CC * makeCC(UCD::UnicodeSet && set, const cc::Alphabet * alphabet);
    5153    friend CC * subtractCC(const CC * a, const CC * b);
    5254    friend CC * intersectCC(const CC * a, const CC * b);
    5355
    54     CC();
     56    CC(const cc::Alphabet * alphabet);
    5557
    5658    CC(const CC & cc);
    5759
    58     CC(const codepoint_t codepoint);
     60    CC(const codepoint_t codepoint, const cc::Alphabet * alphabet);
    5961
    60     explicit CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint);
     62    explicit CC(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint, const cc::Alphabet * alphabet);
    6163
    6264    explicit CC(const CC * cc1, const CC * cc2);
    6365
    64     CC(const UCD::UnicodeSet && set);
     66    CC(const UCD::UnicodeSet && set, const cc::Alphabet * alphabet);
    6567
    66     CC(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end);
     68    CC(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end, const cc::Alphabet * alphabet);
    6769
    68     CC(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end);
     70    CC(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end, const cc::Alphabet * alphabet);
     71private:
     72    const cc::Alphabet * mAlphabet;
     73   
    6974
    7075};
     
    100105 */
    101106
    102 inline CC * makeCC() {
    103     return new CC();
     107inline CC * makeCC(const cc::Alphabet * alphabet = &cc::Unicode) {
     108    return new CC(alphabet);
    104109}
    105110
    106 inline CC * makeCC(const codepoint_t codepoint) {
    107     return new CC(codepoint);
     111    inline CC * makeCC(const codepoint_t codepoint, const cc::Alphabet * alphabet = &cc::Unicode) {
     112    return new CC(codepoint, alphabet);
    108113}
    109114
    110 inline CC * makeCC(const codepoint_t lo, const codepoint_t hi) {
    111     return new CC(lo, hi);
     115inline CC * makeCC(const codepoint_t lo, const codepoint_t hi, const cc::Alphabet * alphabet = &cc::Unicode) {
     116    return new CC(lo, hi, alphabet);
    112117}
    113118
     
    116121}
    117122
    118 inline CC * makeCC(std::initializer_list<interval_t> list) {
    119     return new CC(list.begin(), list.end());
     123inline CC * makeCC(std::initializer_list<interval_t> list, const cc::Alphabet * alphabet = &cc::Unicode) {
     124    return new CC(list.begin(), list.end(), alphabet);
    120125}
    121126
    122 inline CC * makeCC(std::vector<interval_t> && list) {
    123     return new CC(list.begin(), list.end());
     127inline CC * makeCC(std::vector<interval_t> && list, const cc::Alphabet * alphabet = &cc::Unicode) {
     128    return new CC(list.begin(), list.end(), alphabet);
    124129}
    125130
    126 inline CC * makeCC(UCD::UnicodeSet && set) {
    127     return new CC(std::move(set));
     131inline CC * makeCC(UCD::UnicodeSet && set, const cc::Alphabet * alphabet = &cc::Unicode) {
     132    return new CC(std::move(set), alphabet);
    128133}
    129134
    130135inline CC * subtractCC(const CC * a, const CC * b) {
    131     return new CC(*a - *b);
     136    //assert (a->getAlphabet() == b->getAlphabet());
     137    return new CC(*a - *b, a->getAlphabet());
    132138}
    133139
    134140inline CC * intersectCC(const CC * a, const CC * b) {
    135     return new CC(*a & *b);
     141    //assert (a->getAlphabet() == b->getAlphabet());
     142    return new CC(*a & *b, a->getAlphabet());
    136143}
    137144
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5792 r5795  
    2222#include <re/exclude_CC.h>
    2323#include <re/re_name_resolve.h>
    24 #include <re/re_collect_unicodesets.h>
    25 #include <re/re_multiplex.h>
    2624#include <re/grapheme_clusters.h>
    27 #include <cc/multiplex_CCs.h>
    2825#include <llvm/Support/raw_ostream.h>
    2926
     
    6562
    6663
    67 std::pair<RE *, std::vector<re::CC *>> multiplexing_passes(RE * r) {
     64RE * multiplexing_prepasses(RE * r) {
    6865    std::vector<re::CC *> charclasses;
    6966    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
     
    116113        errs() << "exclude_CC:\n" << Printer_RE::PrintRE(r) << '\n';
    117114    }
    118     const auto UnicodeSets = re::collectUnicodeSets(r);
    119     std::vector<std::vector<unsigned>> exclusiveSetIDs;
    120     doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses);
    121     r = multiplex(r, UnicodeSets, exclusiveSetIDs);
    122     if (PrintOptions.isSet(ShowAllREs)) {
    123         errs() << "multiplex:\n" << Printer_RE::PrintRE(r) << '\n';
    124     }
    125     return std::pair<RE *, std::vector<re::CC *>>(r, charclasses);
     115    return r;
    126116}
    127117
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.h

    r5784 r5795  
    3232RE * regular_expression_passes(RE * re_ast);
    3333
    34 std::pair<RE *, std::vector<re::CC *>> multiplexing_passes(RE * r);
     34RE * multiplexing_prepasses(RE * r);
    3535
    3636pablo::PabloAST * re2pablo_compiler(pablo::PabloKernel * kernel, RE * re_ast);
Note: See TracChangeset for help on using the changeset viewer.