source: icGREP/icgrep-devel/icgrep/UCD/CaseFolding_txt.cpp @ 4627

Last change on this file since 4627 was 4615, checked in by nmedfort, 4 years ago

Temporary check-in

File size: 3.2 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters, Inc.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters, Inc.
5 *
6 */
7
8#include "CaseFolding_txt.h"
9#include <algorithm>
10
11using namespace re;
12
13int findFoldEntry(const codepoint_t cp) {
14    int lo = 0;
15    int hi = foldTableSize;
16    while (hi - lo > 1) {
17        int mid = (lo + hi)/2;
18        if (cp < foldTable[mid].range_lo) {
19            hi = mid;
20        }
21        else {
22            lo = mid;
23        }
24    }
25    return lo;
26}
27
28void caseInsensitiveInsertRange(CC * cc, const codepoint_t lo, const codepoint_t hi) {
29    cc->insert_range(lo, hi);
30    // Find the first foldTable entry overlapping the (lo, hi) range.
31    int e = findFoldEntry(lo);
32    // There may be multiple consecutive entries overlapping the range.
33    // Keep processing until we are done.
34    while (foldTable[e].range_lo <= hi) {
35        const FoldEntry & fe = foldTable[e];
36        const FoldEntry & fnext = foldTable[e + 1];
37        // Constrain (lo, hi) to this entry only.
38        codepoint_t lo1 = std::max(lo, fe.range_lo);
39        codepoint_t hi1 = std::min(hi, fnext.range_lo - 1);
40        if (fe.fold_offset > 0 && fe.range_lo + fe.fold_offset < fnext.range_lo) {
41            //
42            // There are more than fold_offset values in the range, meaning that
43            // we have an extended range with alternating subranges of positive
44            // and negative offsets.
45            // First find the negative offset subrange.
46            codepoint_t subrange_lo = lo1 - ((lo1 - fe.range_lo) % (2 * fe.fold_offset));
47            codepoint_t negative_subrange_lo = subrange_lo + fe.fold_offset;
48            codepoint_t negative_subrange_hi = subrange_lo + 2 * fe.fold_offset - 1;
49            if ((lo1 <= negative_subrange_hi) && (hi1 >= negative_subrange_lo)) {
50                // negative offsets apply
51                cc->insert_range(std::max(negative_subrange_lo,lo1) - fe.fold_offset, std::min(negative_subrange_hi, hi1) - fe.fold_offset);
52            }
53            // Now the positive offset subrange.
54            codepoint_t positive_subrange_lo = hi1 - ((hi1 - fe.range_lo) % (2 * fe.fold_offset));
55            codepoint_t positive_subrange_hi = positive_subrange_lo + fe.fold_offset - 1;
56            if ((lo1 <= positive_subrange_hi) && (hi1 >= positive_subrange_lo)) {
57                cc->insert_range(std::max(positive_subrange_lo, lo1) + fe.fold_offset, std::min(positive_subrange_hi, hi1) + fe.fold_offset);
58            }
59        }
60        else if (fe.fold_offset != 0) {
61            // We have either a positive or negative offset, and all offsets for
62            // this entry have the same sign.
63            cc->insert_range(lo1 + fe.fold_offset, hi1 + fe.fold_offset);
64        }
65        // Now pick up any individual fold entries.
66        for (int i = 0; i < fe.fold_pairs.size(); i++) {
67            if (fe.fold_pairs[i].first < lo) continue;  // Only possible for first fold_entry.
68            if (fe.fold_pairs[i].first > hi) break;     // Only possible for last fold_entry.
69            cc->insert(fe.fold_pairs[i].second);
70        }
71        // Move on to the next fold_entry.
72        e++;
73    }
74}
75
76
Note: See TracBrowser for help on using the repository browser.