source: icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp @ 5630

Last change on this file since 5630 was 5630, checked in by nmedfort, 20 months ago

Partial check-in for avoidance of compiling Pablo/LLVM code to determine the Kernel struct type when using a cached object. Inactive RE alternation minimization check in.

File size: 4.1 KB
RevLine 
[5369]1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include <map>
7#include <UCD/unicode_set.h>
8#include <re/re_cc.h>
9#include "boost/dynamic_bitset.hpp"
10
11//
12// Breakpoints of a set of character classes (CCs): each codepoint c such that
13// there is some CC in CCs such that either (a) c is in the CC and c-1 is not, or
14// (b) c-1 is in the CC and c is not.
15//
16// The breakpoints may be determined by iterating through the interval
17// representation of each CC.   For each interval (lo, hi), lo and hi+1
18// are breakpoints.
19//
20// For each breakpoint, a bitset is computed identifying the source CCs whose
21// status changes at the breakpoint.
22//
23
24std::map<UCD::codepoint_t, boost::dynamic_bitset<>> computeBreakpoints(std::vector<UCD::UnicodeSet> CCs) {
25    std::map<UCD::codepoint_t, boost::dynamic_bitset<>> breakpoints;
26    for (unsigned i = 0; i < CCs.size(); i++) {
27        for (const UCD::UnicodeSet::interval_t & range : CCs[i]) {
28            auto lo = re::lo_codepoint(range);
29            auto hi = re::hi_codepoint(range);
30            auto f = breakpoints.find(lo);
31            if (f == breakpoints.end()) {
32                breakpoints.emplace(lo, boost::dynamic_bitset<>(CCs.size()));
33            }
34            breakpoints[lo].set(i);
35            f = breakpoints.find(hi + 1);
36            if (f == breakpoints.end()) {
37                breakpoints.emplace(hi+1, boost::dynamic_bitset<>(CCs.size()));
38            }
39            breakpoints[hi+1].set(i);
40        }
41    }
42    return breakpoints;
43}
44
45
[5630]46void doMultiplexCCs(const std::vector<UCD::UnicodeSet> & CCs,
[5369]47                    std::vector<std::vector<unsigned>> & exclusiveSetIDs,
48                    std::vector<UCD::UnicodeSet> & multiplexedCCs) {
49   
50    std::map<UCD::codepoint_t, boost::dynamic_bitset<>> breakpoints = computeBreakpoints(CCs);
51    // Initialize the exclusiveSetIDs to have one empty vector per source CC.
52    exclusiveSetIDs.clear();
53    exclusiveSetIDs.resize(CCs.size());
54    //
55    // Exclusive set determination.   
56    //
57    // Set up a map from the set of source CCs for each exclusive set to the exclusive set index.
58
59    std::map<boost::dynamic_bitset<>, unsigned> CC_set_to_exclusive_set_map;
60
61    // Entry 0 is for the characters not in any of the CCs.
62    CC_set_to_exclusive_set_map.emplace(boost::dynamic_bitset<>(CCs.size()), 0);
63
64    unsigned current_exclusive_set_idx = 0;
65    unsigned multiplexed_bit_count = 0;
[5490]66    boost::dynamic_bitset<> current_set(CCs.size());
[5369]67   
68    unsigned range_lo = 0;
[5490]69    unsigned cur_index = 1;
[5369]70    for (auto & bkpt_entry : breakpoints) {
71        if (current_exclusive_set_idx > 0) {  // We have a range entry to close for a pending exclusive set.
72            unsigned range_hi = bkpt_entry.first - 1;
73            for (unsigned bit = 0; bit < multiplexed_bit_count; bit++) {
74                if (((current_exclusive_set_idx >> bit) & 1) == 1) {
75                    multiplexedCCs[bit].insert_range(range_lo, range_hi);
76                }
77            }
78        }
79        // Start a new range.
80        range_lo = bkpt_entry.first;
81        current_set ^= bkpt_entry.second;
82        auto idx_iter = CC_set_to_exclusive_set_map.find(current_set);
83        if (idx_iter == CC_set_to_exclusive_set_map.end()) {
84            // New exclusive class; assign the next sequential integer.
[5490]85            //current_exclusive_set_idx = exclusiveSetIDs.size();
86            current_exclusive_set_idx = cur_index;
87            cur_index++;
[5369]88            CC_set_to_exclusive_set_map.emplace(current_set, current_exclusive_set_idx);
89           
90            for (unsigned CC1 = current_set.find_first(); CC1 < CCs.size(); CC1 = current_set.find_next(CC1)) {
91                exclusiveSetIDs[CC1].push_back(current_exclusive_set_idx);
92            }
93            if ((current_exclusive_set_idx & (current_exclusive_set_idx - 1)) == 0) {
94                multiplexed_bit_count++;
95                multiplexedCCs.push_back(UCD::UnicodeSet());
96            }
97        }
98        else {
99            current_exclusive_set_idx = idx_iter->second;
100        }
101    }
102    assert (current_exclusive_set_idx == 0 && "Breakpoint for final interval not found.");
103}
Note: See TracBrowser for help on using the repository browser.