Changeset 5278


Ignore:
Timestamp:
Jan 26, 2017, 10:06:16 AM (8 months ago)
Author:
cameron
Message:

Alphabet class: initial check-in

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5267 r5278  
    6565add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
    6666add_library(RegExpCompiler re/re_parser.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp)
    67 add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding_txt.cpp)
     67add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding_txt.cpp cc/alphabet.cpp)
    6868add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp)
    6969
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.cpp

    r5267 r5278  
    2929using run_t = UnicodeSet::run_t;
    3030using interval_t = UnicodeSet::interval_t;
    31 using codepoint_t = UnicodeSet::codepoint_t;
    3231
    3332//
  • icGREP/icgrep-devel/icgrep/UCD/unicode_set.h

    r5240 r5278  
    3636namespace UCD {
    3737
     38typedef unsigned codepoint_t;
     39enum : codepoint_t { UNICODE_MAX = 0x10FFFF };
     40
    3841enum run_type_t : uint16_t {Empty, Mixed, Full};
    3942
     
    4649    using quad_iterator_return_t = std::pair<run_t, bitquad_t>;
    4750
    48     using codepoint_t = unsigned;
    4951    using interval_t = std::pair<codepoint_t, codepoint_t>;
    5052
     
    197199};
    198200
    199 enum : UnicodeSet::codepoint_t { UNICODE_MAX = 0x10FFFF };
    200201
    201202inline void UnicodeSet::swap(UnicodeSet & other) {
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r5267 r5278  
    1313namespace re {
    1414
    15 using codepoint_t = UCD::UnicodeSet::codepoint_t;
     15using codepoint_t = UCD::codepoint_t;
    1616using interval_t = UCD::UnicodeSet::interval_t;
    1717
  • icGREP/icgrep-devel/icgrep/utf16_encoder.cpp

    r5046 r5278  
    1010#include <stdexcept>
    1111
    12 using namespace re;
     12using namespace UCD;
    1313
    1414namespace cc {
     
    7979}
    8080
    81 codepoint_t UTF16_Encoder::minCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n) {
     81codepoint_t UTF16_Encoder::minCodePointWithCommonBytes(const codepoint_t cp, const unsigned n) {
    8282    const auto len = length(cp);
    8383    const auto mask = (static_cast<codepoint_t>(1) << (len - n) * 10) - 1;
     
    8686}
    8787
    88 codepoint_t UTF16_Encoder::maxCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n) {
     88codepoint_t UTF16_Encoder::maxCodePointWithCommonBytes(const codepoint_t cp, const unsigned n) {
    8989    const auto len = length(cp);
    9090    const auto mask = (static_cast<codepoint_t>(1) << (len - n) * 10) - 1;
  • icGREP/icgrep-devel/icgrep/utf16_encoder.h

    r5046 r5278  
    88#define UTF16_ENCODER_H
    99
    10 #include <re/re_cc.h>
     10#include <UCD/unicode_set.h>
    1111
    1212namespace cc {
    1313
    1414struct UTF16_Encoder {
    15     static bool isHi_Surrogate(const re::codepoint_t cp);
    16     static bool isLo_Surrogate(const re::codepoint_t cp);
    17     static unsigned length(const re::codepoint_t cp);
    18     static re::codepoint_t maxCodePoint(const unsigned length);
    19     static re::codepoint_t encodingByte(const re::codepoint_t cp, const unsigned n);
    20     static bool isLowCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
    21     static bool isHighCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
    22     static re::codepoint_t minCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
    23     static re::codepoint_t maxCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
     15    static bool isHi_Surrogate(const UCD::codepoint_t cp);
     16    static bool isLo_Surrogate(const UCD::codepoint_t cp);
     17    static unsigned length(const UCD::codepoint_t cp);
     18    static UCD::codepoint_t maxCodePoint(const unsigned length);
     19    static UCD::codepoint_t encodingByte(const UCD::codepoint_t cp, const unsigned n);
     20    static bool isLowCodePointAfterByte(const UCD::codepoint_t cp, const unsigned n);
     21    static bool isHighCodePointAfterByte(const UCD::codepoint_t cp, const unsigned n);
     22    static UCD::codepoint_t minCodePointWithCommonBytes(const UCD::codepoint_t cp, const unsigned n);
     23    static UCD::codepoint_t maxCodePointWithCommonBytes(const UCD::codepoint_t cp, const unsigned n);
    2424};
    2525
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4814 r5278  
    1010#include <stdexcept>
    1111
    12 using namespace re;
     12using namespace UCD;
    1313
    1414namespace cc {
     
    8686}
    8787
    88 codepoint_t UTF8_Encoder::minCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n) {
     88codepoint_t UTF8_Encoder::minCodePointWithCommonBytes(const UCD::codepoint_t cp, const unsigned n) {
    8989    const auto len = length(cp);
    9090    const auto mask = (static_cast<codepoint_t>(1) << (len - n) * 6) - 1;
     
    9393}
    9494
    95 codepoint_t UTF8_Encoder::maxCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n) {
     95codepoint_t UTF8_Encoder::maxCodePointWithCommonBytes(const UCD::codepoint_t cp, const unsigned n) {
    9696    const auto len = length(cp);
    9797    const auto mask = (static_cast<codepoint_t>(1) << (len - n) * 6) - 1;
  • icGREP/icgrep-devel/icgrep/utf8_encoder.h

    r4819 r5278  
    88#define UTF8_ENCODER_H
    99
    10 #include <re/re_cc.h>
     10#include <UCD/unicode_set.h>
    1111
    1212namespace cc {
    1313
    1414struct UTF8_Encoder {
    15     static bool isPrefix(const re::codepoint_t cp);
    16     static unsigned length(const re::codepoint_t cp);
    17     static re::codepoint_t maxCodePoint(const unsigned length);
    18     static re::codepoint_t encodingByte(const re::codepoint_t cp, const unsigned n);
    19     static bool isLowCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
    20     static bool isHighCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
    21     static re::codepoint_t minCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
    22     static re::codepoint_t maxCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
     15    static bool isPrefix(const UCD::codepoint_t cp);
     16    static unsigned length(const UCD::codepoint_t cp);
     17    static UCD::codepoint_t maxCodePoint(const unsigned length);
     18    static UCD::codepoint_t encodingByte(const UCD::codepoint_t cp, const unsigned n);
     19    static bool isLowCodePointAfterByte(const UCD::codepoint_t cp, const unsigned n);
     20    static bool isHighCodePointAfterByte(const UCD::codepoint_t cp, const unsigned n);
     21    static UCD::codepoint_t minCodePointWithCommonBytes(const UCD::codepoint_t cp, const unsigned n);
     22    static UCD::codepoint_t maxCodePointWithCommonBytes(const UCD::codepoint_t cp, const unsigned n);
    2323};
    2424
Note: See TracChangeset for help on using the changeset viewer.