Ignore:
Timestamp:
Dec 31, 2017, 7:22:14 PM (13 months ago)
Author:
cameron
Message:

Supporting multiple alphabets in RE compilation - initial check-in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r5812 r5816  
    1212#include <pablo/builder.hpp>
    1313#include <vector>       // for vector<>::iterator
    14 namespace cc { class CC_Compiler; }
     14namespace cc { class CC_Compiler; class Alphabet;}
    1515namespace pablo { class PabloAST; }
    1616namespace pablo { class PabloBuilder; }
    1717namespace pablo { class PabloKernel; }
     18namespace pablo { class Var; }
    1819namespace re { class Alt; }
    1920namespace re { class Assertion; }
     
    5152
    5253    RE_Compiler(pablo::PabloKernel * kernel, cc::CC_Compiler & ccCompiler);
     54   
     55    //
     56    // The CCs (character classes) within a regular expression are generally
     57    // expressed using a single alphabet.   But multiple alphabets may be
     58    // used under some circumstances.   For example, regular expressions for
     59    // Unicode may use both the Unicode alphabet for full Unicode characters
     60    // as well as the Byte alphabet for the individual code units of UTF-8.
     61    // In other cases, a multiplexed alphabet may be used for a certain
     62    // subexpression, for example, if the subexpression involves a local
     63    // language or a capture-backreference combination.
     64    //
     65    // Alphabets are added as needed using the addAlphabet method, giving both
     66    // the alphabet value and the set of parallel bit streams that comprise
     67    // a basis for the coded alphabet values.
     68   
     69    void addAlphabet(cc::Alphabet * a, pablo::Var * basis_set);
     70
    5371    pablo::PabloAST * compile(RE * re, pablo::PabloAST * const initialCursors = nullptr);
    5472
     
    113131
    114132    pablo::PabloKernel * const                      mKernel;
     133    std::vector<cc::Alphabet *>                     mAlphabets;
     134    std::vector<std::unique_ptr<cc::CC_Compiler>>   mAlphabetCompilers;
     135
    115136    bool                                            mCountOnly;
    116137    cc::CC_Compiler &                               mCCCompiler;
Note: See TracChangeset for help on using the changeset viewer.