Ignore:
Timestamp:
Nov 28, 2017, 1:45:19 AM (21 months ago)
Author:
nmedfort
Message:

Bug fix for segment pipeline parallel mode + memory management improvements.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/CaseFolding.h

    r5673 r5748  
    1212#include "PropertyObjects.h"
    1313#include "PropertyValueAliases.h"
    14 #include "re/re_cc.h"
    1514#include "unicode_set.h"
    1615#include <vector>
    1716
    18 typedef unsigned codepoint_t;
    19 
    2017struct FoldEntry {
    21     re::codepoint_t range_lo;
    22     int fold_offset;
    23     std::vector<re::interval_t> fold_pairs;
     18    const UCD::codepoint_t range_lo;
     19    const int fold_offset;
     20    const std::vector<UCD::interval_t> fold_pairs;
    2421};
    2522
    26 
    27 void caseInsensitiveInsertRange(re::CC * cc, const re::codepoint_t lo, const re::codepoint_t hi);
    28 
    29 inline void caseInsensitiveInsert(re::CC * cc, const re::codepoint_t cp) {
     23void caseInsensitiveInsertRange(UCD::UnicodeSet * const cc, const UCD::codepoint_t lo, const UCD::codepoint_t hi);
     24
     25inline void caseInsensitiveInsert(UCD::UnicodeSet * const cc, const UCD::codepoint_t cp) {
    3026    caseInsensitiveInsertRange(cc, cp, cp);
    3127}
     
    312308        /** Code Point Ranges for scf mapping to <none>
    313309        **/
    314 
    315         const UnicodeSet null_codepoint_set
    316                     {{{Empty, 34816}},
    317              {}};
     310       
     311       
     312        namespace {
     313        const static UnicodeSet::run_t __null_codepoint_set_runs[] = {
     314        {Empty, 34816}};
     315        const static UnicodeSet::bitquad_t * const __null_codepoint_set_quads = nullptr;
     316        }
     317
     318        const static UnicodeSet null_codepoint_set{const_cast<UnicodeSet::run_t *>(__null_codepoint_set_runs), 1, 0, const_cast<UnicodeSet::bitquad_t *>(__null_codepoint_set_quads), 0, 0};
     319
     320
    318321
    319322        /** Code Point Ranges for scf mapping to <codepoint>
     
    468471        [a7b7, ab6f], [abc0, ff20], [ff3b, 103ff], [10428, 104af],
    469472        [104d4, 10c7f], [10cb3, 1189f], [118c0, 1e8ff], [1e922, 10ffff]**/
    470         const UnicodeSet reflexive_set
    471                     {{{Full, 2}, {Mixed, 1}, {Full, 2}, {Mixed, 2}, {Full, 1},
    472               {Mixed, 11}, {Full, 7}, {Mixed, 6}, {Empty, 1}, {Mixed, 1},
    473               {Full, 1}, {Mixed, 8}, {Full, 90}, {Empty, 1}, {Mixed, 1},
    474               {Full, 24}, {Mixed, 1}, {Full, 68}, {Mixed, 1}, {Full, 11},
    475               {Mixed, 16}, {Full, 9}, {Mixed, 1}, {Full, 1}, {Mixed, 2},
    476               {Full, 24}, {Mixed, 2}, {Full, 57}, {Empty, 1}, {Mixed, 1},
    477               {Full, 1}, {Mixed, 5}, {Full, 970}, {Mixed, 3}, {Full, 4},
    478               {Mixed, 5}, {Full, 29}, {Mixed, 1}, {Empty, 2}, {Full, 667},
    479               {Mixed, 1}, {Full, 38}, {Empty, 1}, {Mixed, 1}, {Full, 3},
    480               {Mixed, 2}, {Full, 61}, {Empty, 1}, {Mixed, 1}, {Full, 95},
    481               {Empty, 1}, {Full, 1666}, {Empty, 1}, {Mixed, 1},
    482               {Full, 30902}},
    483              {0xf8000001, 0xffdfffff, 0x80800000, 0xaaaaaaaa, 0x55abaaaa,
    484               0xaaaaab55, 0x54aaaaaa, 0x4e243129, 0xee512d2a, 0xb555524f,
    485               0xaa29aaaa, 0xaaaaaaaa, 0x93faaaaa, 0xffffaa85, 0xffffffdf,
    486               0x7fbaffff, 0x000128bf, 0xfffff004, 0xaa9c7ffb, 0x194caaaa,
    487               0xffff0000, 0xaaaaaaaa, 0xaaaaabfe, 0xaaaaaaaa, 0xaaaad554,
    488               0xaaaaaaaa, 0xaaaaaaaa, 0x0001aaaa, 0xff800000, 0xffffdf40,
    489               0xc0ffffff, 0xfffffe00, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
    490               0xaaaaaaaa, 0xb7eaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
    491               0xc0ff00ff, 0x00ff00ff, 0x55ffc0ff, 0xffff00ff, 0x00ff00ff,
    492               0xa0ff00ff, 0xf0ffe0ff, 0xe0ffe0ff, 0xfffbf3bf, 0xffff0000,
    493               0xfffffff7, 0x003fffff, 0xffff0000, 0xffff8000, 0x3fda1562,
    494               0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xfffbd7fa, 0xaaaaaaaa,
    495               0xffffeaaa, 0xfaaaaaaa, 0xaaabaaab, 0xaaaaaaaa, 0x95ffaaaa,
    496               0xaabad7aa, 0xffa082aa, 0x0000ffff, 0xf8000001, 0xffffff00,
    497               0x0000ffff, 0xfff00000, 0xfff80000, 0xfffffffc}};
     473       
     474       
     475        namespace {
     476        const static UnicodeSet::run_t __reflexive_set_runs[] = {
     477        {Full, 2}, {Mixed, 1}, {Full, 2}, {Mixed, 2}, {Full, 1},
     478        {Mixed, 11}, {Full, 7}, {Mixed, 6}, {Empty, 1}, {Mixed, 1},
     479        {Full, 1}, {Mixed, 8}, {Full, 90}, {Empty, 1}, {Mixed, 1},
     480        {Full, 24}, {Mixed, 1}, {Full, 68}, {Mixed, 1}, {Full, 11},
     481        {Mixed, 16}, {Full, 9}, {Mixed, 1}, {Full, 1}, {Mixed, 2},
     482        {Full, 24}, {Mixed, 2}, {Full, 57}, {Empty, 1}, {Mixed, 1},
     483        {Full, 1}, {Mixed, 5}, {Full, 970}, {Mixed, 3}, {Full, 4},
     484        {Mixed, 5}, {Full, 29}, {Mixed, 1}, {Empty, 2}, {Full, 667},
     485        {Mixed, 1}, {Full, 38}, {Empty, 1}, {Mixed, 1}, {Full, 3},
     486        {Mixed, 2}, {Full, 61}, {Empty, 1}, {Mixed, 1}, {Full, 95},
     487        {Empty, 1}, {Full, 1666}, {Empty, 1}, {Mixed, 1}, {Full, 30902}};
     488        const static UnicodeSet::bitquad_t  __reflexive_set_quads[] = {
     489        0xf8000001, 0xffdfffff, 0x80800000, 0xaaaaaaaa, 0x55abaaaa,
     490        0xaaaaab55, 0x54aaaaaa, 0x4e243129, 0xee512d2a, 0xb555524f,
     491        0xaa29aaaa, 0xaaaaaaaa, 0x93faaaaa, 0xffffaa85, 0xffffffdf,
     492        0x7fbaffff, 0x000128bf, 0xfffff004, 0xaa9c7ffb, 0x194caaaa,
     493        0xffff0000, 0xaaaaaaaa, 0xaaaaabfe, 0xaaaaaaaa, 0xaaaad554,
     494        0xaaaaaaaa, 0xaaaaaaaa, 0x0001aaaa, 0xff800000, 0xffffdf40,
     495        0xc0ffffff, 0xfffffe00, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
     496        0xaaaaaaaa, 0xb7eaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
     497        0xc0ff00ff, 0x00ff00ff, 0x55ffc0ff, 0xffff00ff, 0x00ff00ff,
     498        0xa0ff00ff, 0xf0ffe0ff, 0xe0ffe0ff, 0xfffbf3bf, 0xffff0000,
     499        0xfffffff7, 0x003fffff, 0xffff0000, 0xffff8000, 0x3fda1562,
     500        0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xfffbd7fa, 0xaaaaaaaa,
     501        0xffffeaaa, 0xfaaaaaaa, 0xaaabaaab, 0xaaaaaaaa, 0x95ffaaaa,
     502        0xaabad7aa, 0xffa082aa, 0x0000ffff, 0xf8000001, 0xffffff00,
     503        0x0000ffff, 0xfff00000, 0xfff80000, 0xfffffffc};
     504        }
     505
     506        const static UnicodeSet reflexive_set{const_cast<UnicodeSet::run_t *>(__reflexive_set_runs), 55, 0, const_cast<UnicodeSet::bitquad_t *>(__reflexive_set_quads), 74, 0};
     507
     508
    498509
    499510        const unsigned buffer_length = 4958;
    500         const static char __attribute__ ((aligned (32))) string_buffer[5120] = u8R"__(a
     511        const static char string_buffer[5120] LLVM_ALIGNAS(32) = u8R"__(a
    501512b
    502513c
     
    18691880)__";
    18701881
    1871         const static std::vector<codepoint_t> defined_cps = {
     1882        const static std::vector<codepoint_t> defined_cps{
    18721883        0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048,
    18731884        0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
     
    20402051        0x1e91b, 0x1e91c, 0x1e91d, 0x1e91e, 0x1e91f, 0x1e920, 0x1e921};
    20412052        static StringPropertyObject property_object(scf,
    2042                                                     null_codepoint_set,
    2043                                                     reflexive_set,
     2053                                                    std::move(null_codepoint_set),
     2054                                                    std::move(reflexive_set),
    20442055                                                    static_cast<const char *>(string_buffer),
    20452056                                                    buffer_length,
    2046                                                     defined_cps);
     2057                                                    std::move(defined_cps));
    20472058    }
    20482059    namespace CF_ns {
     
    20572068        [fb13, fb17]**/
    20582069
    2059         const UnicodeSet explicitly_defined_set
    2060                     {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
    2061               {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
    2062               {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
    2063               {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
    2064               {Empty, 32807}},
    2065              {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
    2066               0x00010000, 0x00000080, 0x47c00000, 0x00550000, 0x10dcffff,
    2067               0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     2070       
     2071        namespace {
     2072        const static UnicodeSet::run_t __explicitly_defined_set_runs[] = {
     2073        {Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     2074        {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     2075        {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     2076        {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1}, {Empty, 32807}};
     2077        const static UnicodeSet::bitquad_t  __explicitly_defined_set_quads[] = {
     2078        0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     2079        0x00010000, 0x00000080, 0x47c00000, 0x00550000, 0x10dcffff,
     2080        0x00cc10dc, 0x10dc00dc, 0x00f8007f};
     2081        }
     2082
     2083        const static UnicodeSet explicitly_defined_set{const_cast<UnicodeSet::run_t *>(__explicitly_defined_set_runs), 20, 0, const_cast<UnicodeSet::bitquad_t *>(__explicitly_defined_set_quads), 13, 0};
     2084
     2085
    20682086
    20692087        const unsigned buffer_length = 575;
    2070         const static char __attribute__ ((aligned (32))) string_buffer[768] = u8R"__(ss
     2088        const static char string_buffer[768] LLVM_ALIGNAS(32) = u8R"__(ss
    20712089i̇
    20722090ÊŒn
     
    21852203)__";
    21862204
    2187         const static std::vector<codepoint_t> defined_cps = {
     2205        const static std::vector<codepoint_t> defined_cps{
    21882206        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
    21892207        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1e9e, 0x1f50, 0x1f52, 0x1f54,
     
    22012219        static StringOverridePropertyObject property_object(cf,
    22022220                                                    SCF_ns::property_object,
    2203                                                     explicitly_defined_set,
     2221                                                    std::move(explicitly_defined_set),
    22042222                                                    static_cast<const char *>(string_buffer),
    22052223                                                    buffer_length,
    2206                                                     defined_cps);
     2224                                                    std::move(defined_cps));
    22072225    }
    22082226}
Note: See TracChangeset for help on using the changeset viewer.