source: icGREP/icgrep-devel/icgrep-1.00/UCD/CaseFolding_txt.cpp @ 4529

Last change on this file since 4529 was 4318, checked in by cameron, 5 years ago

Don't make impossible ranges

File size: 3.1 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters, Inc.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters, Inc.
5 *
6 */
7
8#include "CaseFolding_txt.h"
9#include <algorithm>
10
11int findFoldEntry(codepoint_t cp) {
12    int lo = 0; 
13    int hi = foldTableSize;
14    while (hi - lo > 1) {
15      int mid = (lo + hi)/2;
16      if (cp < foldTable[mid].range_lo) {
17        hi = mid;
18      }
19      else {
20        lo = mid;
21      }
22    }
23    return lo;
24}
25
26void caseInsensitiveInsert(re::CC * cc, codepoint_t cp) {
27    caseInsensitiveInsertRange(cc, cp, cp);
28}
29
30void caseInsensitiveInsertRange(re::CC * base_cc, codepoint_t lo, codepoint_t hi) {
31    base_cc->insert_range(lo, hi);
32    // Find the first foldTable entry overlapping the (lo, hi) range.
33    int e = findFoldEntry(lo);
34    // There may be multiple consecutive entries overlapping the range.
35    // Keep processing until we are done.
36    while (foldTable[e].range_lo <= hi) {
37      const FoldEntry & fe = foldTable[e];
38      const FoldEntry & fnext = foldTable[e + 1];
39      // Constrain (lo, hi) to this entry only.
40      codepoint_t lo1 = std::max(lo, fe.range_lo);
41      codepoint_t hi1 = std::min(hi, fnext.range_lo - 1);
42      if (fe.fold_offset > 0 && fe.range_lo + fe.fold_offset < fnext.range_lo) {
43        //
44        // There are more than fold_offset values in the range, meaning that
45        // we have an extended range with alternating subranges of positive
46        // and negative offsets. 
47        // First find the negative offset subrange.
48        codepoint_t subrange_lo = lo1 - ((lo1 - fe.range_lo) % (2 * fe.fold_offset));
49        codepoint_t negative_subrange_lo = subrange_lo + fe.fold_offset;
50        codepoint_t negative_subrange_hi = subrange_lo + 2 * fe.fold_offset - 1;
51        if ((lo1 <= negative_subrange_hi) && (hi1 >= negative_subrange_lo)) {
52           // negative offsets apply
53           base_cc -> insert_range(std::max(negative_subrange_lo,lo1) - fe.fold_offset, std::min(negative_subrange_hi, hi1) - fe.fold_offset);
54        }
55        // Now the positive offset subrange.
56        codepoint_t positive_subrange_lo = hi1 - ((hi1 - fe.range_lo) % (2 * fe.fold_offset));
57        codepoint_t positive_subrange_hi = positive_subrange_lo + fe.fold_offset - 1;
58        if ((lo1 <= positive_subrange_hi) && (hi1 >= positive_subrange_lo)) {
59           base_cc -> insert_range(std::max(positive_subrange_lo, lo1) + fe.fold_offset, std::min(positive_subrange_hi, hi1) + fe.fold_offset);
60        }
61      }
62      else if (fe.fold_offset != 0) {
63        // We have either a positive or negative offset, and all offsets for
64        // this entry have the same sign.
65        base_cc -> insert_range(lo1 + fe.fold_offset, hi1 + fe.fold_offset);
66      }
67      // Now pick up any individual fold entries.
68      for (int i = 0; i < fe.fold_pairs.size(); i++) {
69        if (fe.fold_pairs[i].first < lo) continue;  // Only possible for first fold_entry.
70        if (fe.fold_pairs[i].first > hi) break;     // Only possible for last fold_entry.
71        base_cc->insert(fe.fold_pairs[i].second);
72      }
73      // Move on to the next fold_entry.
74      e++;
75    }
76}
77
78
Note: See TracBrowser for help on using the repository browser.