source: icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp @ 5748

Last change on this file since 5748 was 5748, checked in by nmedfort, 17 months ago

Bug fix for segment pipeline parallel mode + memory management improvements.

File size: 4.3 KB
RevLine 
[5369]1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include <map>
7#include <UCD/unicode_set.h>
8#include <re/re_cc.h>
9#include "boost/dynamic_bitset.hpp"
10
11//
12// Breakpoints of a set of character classes (CCs): each codepoint c such that
13// there is some CC in CCs such that either (a) c is in the CC and c-1 is not, or
[5741]14// (b) c-1 is in the CC and c is not.  Boundary cases: if codepoint 0 is in
15// some CC, then 0 is a breakpoint (codepoint -1 is not in any CC).  If codepoint
16// 0x10FFFF is in some CC then 0x110000 is a breakpoint.
[5369]17//
18// The breakpoints may be determined by iterating through the interval
19// representation of each CC.   For each interval (lo, hi), lo and hi+1
[5727]20// are breakpoints.
[5369]21//
22// For each breakpoint, a bitset is computed identifying the source CCs whose
23// status changes at the breakpoint.
24//
25
[5748]26std::map<UCD::codepoint_t, boost::dynamic_bitset<>> computeBreakpoints(const std::vector<const re::CC *> & CCs) {
[5369]27    std::map<UCD::codepoint_t, boost::dynamic_bitset<>> breakpoints;
28    for (unsigned i = 0; i < CCs.size(); i++) {
[5748]29        for (const auto range : *CCs[i]) {
30            const auto lo = re::lo_codepoint(range);
31            const auto hi = re::hi_codepoint(range);
[5369]32            auto f = breakpoints.find(lo);
33            if (f == breakpoints.end()) {
34                breakpoints.emplace(lo, boost::dynamic_bitset<>(CCs.size()));
35            }
36            breakpoints[lo].set(i);
37            f = breakpoints.find(hi + 1);
38            if (f == breakpoints.end()) {
39                breakpoints.emplace(hi+1, boost::dynamic_bitset<>(CCs.size()));
40            }
41            breakpoints[hi+1].set(i);
42        }
43    }
44    return breakpoints;
45}
46
47
[5748]48void doMultiplexCCs(const std::vector<const re::CC *> & CCs,
[5369]49                    std::vector<std::vector<unsigned>> & exclusiveSetIDs,
[5748]50                    std::vector<re::CC *> & multiplexedCCs) {
[5369]51   
[5748]52    const auto breakpoints = computeBreakpoints(CCs);
[5369]53    // Initialize the exclusiveSetIDs to have one empty vector per source CC.
54    exclusiveSetIDs.clear();
55    exclusiveSetIDs.resize(CCs.size());
56    //
57    // Exclusive set determination.   
58    //
59    // Set up a map from the set of source CCs for each exclusive set to the exclusive set index.
60
61    std::map<boost::dynamic_bitset<>, unsigned> CC_set_to_exclusive_set_map;
62
63    // Entry 0 is for the characters not in any of the CCs.
64    CC_set_to_exclusive_set_map.emplace(boost::dynamic_bitset<>(CCs.size()), 0);
65
66    unsigned current_exclusive_set_idx = 0;
67    unsigned multiplexed_bit_count = 0;
[5490]68    boost::dynamic_bitset<> current_set(CCs.size());
[5369]69   
70    unsigned range_lo = 0;
[5741]71    unsigned next_set_index = 1;
[5369]72    for (auto & bkpt_entry : breakpoints) {
73        if (current_exclusive_set_idx > 0) {  // We have a range entry to close for a pending exclusive set.
74            unsigned range_hi = bkpt_entry.first - 1;
75            for (unsigned bit = 0; bit < multiplexed_bit_count; bit++) {
76                if (((current_exclusive_set_idx >> bit) & 1) == 1) {
[5748]77                    multiplexedCCs[bit]->insert_range(range_lo, range_hi);
[5369]78                }
79            }
80        }
81        // Start a new range.
82        range_lo = bkpt_entry.first;
[5741]83        if (range_lo > UCD::UNICODE_MAX) continue; // Nothing to do for bkpt 0x110000
[5369]84        current_set ^= bkpt_entry.second;
85        auto idx_iter = CC_set_to_exclusive_set_map.find(current_set);
86        if (idx_iter == CC_set_to_exclusive_set_map.end()) {
87            // New exclusive class; assign the next sequential integer.
[5490]88            //current_exclusive_set_idx = exclusiveSetIDs.size();
[5741]89            current_exclusive_set_idx = next_set_index;
90            next_set_index++;
[5369]91            CC_set_to_exclusive_set_map.emplace(current_set, current_exclusive_set_idx);
92           
93            for (unsigned CC1 = current_set.find_first(); CC1 < CCs.size(); CC1 = current_set.find_next(CC1)) {
94                exclusiveSetIDs[CC1].push_back(current_exclusive_set_idx);
95            }
96            if ((current_exclusive_set_idx & (current_exclusive_set_idx - 1)) == 0) {
97                multiplexed_bit_count++;
[5748]98                multiplexedCCs.push_back(re::makeCC());
[5369]99            }
100        }
101        else {
102            current_exclusive_set_idx = idx_iter->second;
103        }
104    }
105}
Note: See TracBrowser for help on using the repository browser.