source: icGREP/icgrep-devel/icgrep/UCD/CaseFolding_txt.cpp @ 4612

Last change on this file since 4612 was 4612, checked in by nmedfort, 4 years ago

Multiplexing bug fix and some CC changes.

File size: 3.2 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters, Inc.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters, Inc.
5 *
6 */
7
8#include "CaseFolding_txt.h"
9#include <algorithm>
10
11int findFoldEntry(codepoint_t cp) {
12    int lo = 0;
13    int hi = foldTableSize;
14    while (hi - lo > 1) {
15        int mid = (lo + hi)/2;
16        if (cp < foldTable[mid].range_lo) {
17            hi = mid;
18        }
19        else {
20            lo = mid;
21        }
22    }
23    return lo;
24}
25
26void caseInsensitiveInsert(re::CC * cc, codepoint_t cp) {
27    caseInsensitiveInsertRange(cc, cp, cp);
28}
29
30void caseInsensitiveInsertRange(re::CC * cc, codepoint_t lo, codepoint_t hi) {
31    cc->insert_range(lo, hi);
32    // Find the first foldTable entry overlapping the (lo, hi) range.
33    int e = findFoldEntry(lo);
34    // There may be multiple consecutive entries overlapping the range.
35    // Keep processing until we are done.
36    while (foldTable[e].range_lo <= hi) {
37        const FoldEntry & fe = foldTable[e];
38        const FoldEntry & fnext = foldTable[e + 1];
39        // Constrain (lo, hi) to this entry only.
40        codepoint_t lo1 = std::max(lo, fe.range_lo);
41        codepoint_t hi1 = std::min(hi, fnext.range_lo - 1);
42        if (fe.fold_offset > 0 && fe.range_lo + fe.fold_offset < fnext.range_lo) {
43            //
44            // There are more than fold_offset values in the range, meaning that
45            // we have an extended range with alternating subranges of positive
46            // and negative offsets.
47            // First find the negative offset subrange.
48            codepoint_t subrange_lo = lo1 - ((lo1 - fe.range_lo) % (2 * fe.fold_offset));
49            codepoint_t negative_subrange_lo = subrange_lo + fe.fold_offset;
50            codepoint_t negative_subrange_hi = subrange_lo + 2 * fe.fold_offset - 1;
51            if ((lo1 <= negative_subrange_hi) && (hi1 >= negative_subrange_lo)) {
52                // negative offsets apply
53                cc->insert_range(std::max(negative_subrange_lo,lo1) - fe.fold_offset, std::min(negative_subrange_hi, hi1) - fe.fold_offset);
54            }
55            // Now the positive offset subrange.
56            codepoint_t positive_subrange_lo = hi1 - ((hi1 - fe.range_lo) % (2 * fe.fold_offset));
57            codepoint_t positive_subrange_hi = positive_subrange_lo + fe.fold_offset - 1;
58            if ((lo1 <= positive_subrange_hi) && (hi1 >= positive_subrange_lo)) {
59                cc->insert_range(std::max(positive_subrange_lo, lo1) + fe.fold_offset, std::min(positive_subrange_hi, hi1) + fe.fold_offset);
60            }
61        }
62        else if (fe.fold_offset != 0) {
63            // We have either a positive or negative offset, and all offsets for
64            // this entry have the same sign.
65            cc->insert_range(lo1 + fe.fold_offset, hi1 + fe.fold_offset);
66        }
67        // Now pick up any individual fold entries.
68        for (int i = 0; i < fe.fold_pairs.size(); i++) {
69            if (fe.fold_pairs[i].first < lo) continue;  // Only possible for first fold_entry.
70            if (fe.fold_pairs[i].first > hi) break;     // Only possible for last fold_entry.
71            cc->insert(fe.fold_pairs[i].second);
72        }
73        // Move on to the next fold_entry.
74        e++;
75    }
76}
77
78
Note: See TracBrowser for help on using the repository browser.