Changeset 4618 for icGREP/icgrepdevel/icgrep/UCD/unicode_set.cpp
 Timestamp:
 Jun 26, 2015, 3:27:40 PM (4 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/UCD/unicode_set.cpp
r4617 r4618 21 21 #include "assert.h" 22 22 #include <string> 23 #include <llvm/Support/raw_ostream.h> 24 #include <include/simdlib/builtins.hpp> 23 25 #include <iostream> 24 #include <include/simdlib/builtins.hpp>25 26 26 27 const size_t QUAD_BITS = (8 * sizeof(bitquad_t)); … … 29 30 const bitquad_t FULL_QUAD_MASK = 1; 30 31 31 inline const RunStructure & get_run(UnicodeSet::quad_iterator i) { 32 return std::get<0>(*i); 33 } 34 35 inline bitquad_t get_quad(UnicodeSet::quad_iterator i) { 36 return std::get<1>(*i); 37 } 38 39 const std::pair<RunStructure, bitquad_t> UnicodeSet::quad_iterator::dereference() const { 40 const RunStructure & t = mUnicodeSet.runs[mRunIndex]; 41 RunStructure s(t.mType, t.mRunLength  mOffset); 42 const bitquad_t q = ((t.mType == Empty) ? 0 : (t.mType == Full) ? FULL_QUAD_MASK : mUnicodeSet.quads[mQuadIndex]); 43 return std::make_pair(s, q); 44 } 45 32 /**  * 33 * @brief append_run 34 **  */ 35 inline void UnicodeSet::append_run(const run_type_t type, const unsigned length) { 36 if (length == 0) { 37 return; 38 } 39 else if (runs.size() == 0) { 40 runs.emplace_back(type, length); 41 } 42 else { 43 RunStructure last_run = runs[runs.size()1]; 44 if (last_run.mType == type) { 45 runs.back().mRunLength += length; 46 } 47 else { 48 runs.emplace_back(type, length); 49 } 50 } 51 } 52 53 /**  * 54 * @brief append_quad 55 **  */ 56 inline void UnicodeSet::append_quad(const bitquad_t quad) { 57 if (quad == 0) { 58 append_run(Empty, 1); 59 } 60 else if (quad == FULL_QUAD_MASK) { 61 append_run(Full, 1); 62 } 63 else { 64 quads.push_back(quad); 65 append_run(Mixed, 1); 66 } 67 } 68 69 /**  * 70 * @brief dump 71 **  */ 72 void UnicodeSet::dump(llvm::raw_ostream & out) const { 73 auto quad_itr = quads.cbegin(); 74 for (const RunStructure & run : runs) { 75 if (run.mType == Empty) { 76 out << "Empty(" << run.mRunLength << ")\n"; 77 } 78 else if (run.mType == Empty) { 79 out << "Full(" << run.mRunLength << ")\n"; 80 } 81 else { 82 for (unsigned i = 0; i != run.mRunLength; ++i, ++quad_itr) { 83 assert (quad_itr != quads.cend()); 84 out << "Mixed("; out.write_hex(*quad_itr) << ")\n"; 85 } 86 } 87 } 88 } 89 90 /**  * 91 * @brief complement 92 **  */ 93 UnicodeSet UnicodeSet::complement() const { 94 UnicodeSet set; 95 auto quad_itr = quads.cbegin(); 96 for (const RunStructure & run : runs) { 97 if (run.mType == Empty) { 98 set.append_run(Full, run.mRunLength); 99 } 100 else if (run.mType == Empty) { 101 set.append_run(Empty, run.mRunLength); 102 } 103 else { 104 for (unsigned i = 0; i != run.mRunLength; ++i, ++quad_itr) { 105 assert (quad_itr != quads.cend()); 106 set.append_quad(FULL_QUAD_MASK ^ *quad_itr); 107 } 108 } 109 } 110 return set; 111 } 112 113 /**  * 114 * @brief intersection 115 **  */ 116 UnicodeSet UnicodeSet::operator&(const UnicodeSet & other) const { 117 UnicodeSet iset; 118 const auto e1 = quad_end(); 119 const auto e2 = other.quad_end(); 120 for (auto i1 = quad_begin(), i2 = other.quad_begin(); i1 != e1 && i2 != e2; ) { 121 const auto run1 = i1.getRun(); 122 const auto run2 = i2.getRun(); 123 const auto n = std::min(run1.mRunLength, run2.mRunLength); 124 if (run1.mType == run2.mType && run1.mType != Mixed) { 125 iset.append_run(run1.mType, n); 126 i1 += n; 127 i2 += n; 128 } 129 else if (run1.mType == Full) { 130 for (unsigned i = 0; i != n; ++i, ++i2) { 131 iset.append_quad(i2.getQuad()); 132 } 133 i1 += n; 134 } 135 else if (run2.mType == Full) { 136 for (unsigned i = 0; i != n; ++i, ++i1) { 137 iset.append_quad(i1.getQuad()); 138 } 139 i2 += n; 140 } 141 else { 142 for (unsigned i = 0; i < n; ++i, ++i1, ++i2) { 143 iset.append_quad(i1.getQuad() & i2.getQuad()); 144 } 145 } 146 } 147 return iset; 148 } 149 150 /**  * 151 * @brief union 152 **  */ 153 UnicodeSet UnicodeSet::operator+(const UnicodeSet & other) const { 154 UnicodeSet iset; 155 const auto e1 = quad_end(); 156 const auto e2 = other.quad_end(); 157 for (auto i1 = quad_begin(), i2 = other.quad_begin(); i1 != e1 && i2 != e2; ) { 158 const auto run1 = i1.getRun(); 159 const auto run2 = i2.getRun(); 160 const auto n = std::min(run1.mRunLength, run2.mRunLength); 161 if (run1.mType == run2.mType && run1.mType != Mixed) { 162 iset.append_run(run1.mType, n); 163 i1 += n; 164 i2 += n; 165 } 166 else if (run1.mType == Empty) { 167 for (unsigned i = 0; i != n; ++i, ++i2) { 168 iset.append_quad(i2.getQuad()); 169 } 170 i1 += n; 171 } 172 else if (run2.mType == Empty) { 173 for (unsigned i = 0; i != n; ++i, ++i1) { 174 iset.append_quad(i1.getQuad()); 175 } 176 i2 += n; 177 } 178 else { 179 for (unsigned i = 0; i < n; ++i, ++i1, ++i2) { 180 iset.append_quad(i1.getQuad()  i2.getQuad()); 181 } 182 } 183 } 184 return iset; 185 } 186 187 /**  * 188 * @brief difference 189 **  */ 190 UnicodeSet UnicodeSet::operator(const UnicodeSet & other) const { 191 UnicodeSet iset; 192 const auto e1 = quad_end(); 193 const auto e2 = other.quad_end(); 194 for (auto i1 = quad_begin(), i2 = other.quad_begin(); i1 != e1 && i2 != e2; ) { 195 const auto run1 = i1.getRun(); 196 const auto run2 = i2.getRun(); 197 unsigned n = std::min(run1.mRunLength, run2.mRunLength); 198 if ((run1.mType == Empty)  (run2.mType == Full)  (run1.mType == Full && run2.mType == Empty)) { 199 iset.append_run(run1.mType, n); 200 i1 += n; 201 i2 += n; 202 } 203 else if (run1.mType == Full) { 204 for (unsigned i = 0; i != n; ++i, ++i2) { 205 iset.append_quad(FULL_QUAD_MASK ^ i2.getQuad()); 206 } 207 i1 += n; 208 } 209 else if (run2.mType == Empty) { 210 for (unsigned i = 0; i != n; ++i, ++i1) { 211 iset.append_quad(i1.getQuad()); 212 } 213 i2 += n; 214 } 215 else { 216 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 217 iset.append_quad(i1.getQuad() &~ i2.getQuad()); 218 } 219 } 220 } 221 return iset; 222 } 223 224 /**  * 225 * @brief symmetric difference 226 **  */ 227 UnicodeSet UnicodeSet::operator^(const UnicodeSet & other) const { 228 UnicodeSet iset; 229 const auto e1 = quad_end(); 230 const auto e2 = other.quad_end(); 231 for (auto i1 = quad_begin(), i2 = other.quad_begin(); i1 != e1 && i2 != e2; ) { 232 const auto run1 = i1.getRun(); 233 const auto run2 = i2.getRun(); 234 unsigned n = std::min(run1.mRunLength, run2.mRunLength); 235 if (run1.mType != Mixed && run2.mType != Mixed) { 236 iset.append_run(run1.mType == run2.mType ? Empty : Full, n); 237 i1 += n; 238 i2 += n; 239 } 240 else if (run1.mType == Empty) { 241 for (int i = 0; i < n; ++i, ++i2) { 242 iset.append_quad(i2.getQuad()); 243 } 244 i1 += n; 245 } 246 else if (run2.mType == Empty) { 247 for (int i = 0; i < n; ++i, ++i1) { 248 iset.append_quad(i1.getQuad()); 249 } 250 i2 += n; 251 } 252 else if (run1.mType == Full) { 253 for (int i = 0; i < n; ++i, ++i2) { 254 iset.append_quad(FULL_QUAD_MASK ^ i2.getQuad()); 255 } 256 i1 += n; 257 } 258 else if (run2.mType == Empty) { 259 for (unsigned i = 0; i < n; ++i, ++i1) { 260 iset.append_quad(FULL_QUAD_MASK ^ i1.getQuad()); 261 } 262 i2 += n; 263 } 264 else { 265 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 266 iset.append_quad(i1.getQuad() ^ i2.getQuad()); 267 } 268 } 269 } 270 return iset; 271 } 272 273 /**  * 274 * @brief contains 275 * @param codepoint 276 * 277 * Return whether this UnicodeSet contains the specified code point 278 **  */ 279 bool UnicodeSet::contains(const codepoint_t codepoint) const { 280 281 auto n = codepoint / QUAD_BITS; 282 unsigned runIndex = 0; 283 unsigned quadIndex = 0; 284 285 for (;;) { 286 const RunStructure & t = runs[runIndex]; 287 if (t.mRunLength >= n) { 288 if (t.mType == Mixed) { 289 return (quads[quadIndex + n  1] & (static_cast<bitquad_t>(1) << (codepoint & MOD_QUAD_BIT_MASK))) != 0; 290 } 291 return (t.mType == Full); 292 } 293 if (t.mType == Mixed) { 294 quadIndex += n; 295 } 296 ++runIndex; 297 n = t.mRunLength; 298 } 299 300 } 301 302 /**  * 303 * @brief UnicodeSet::quad_iterator::advance 304 **  */ 46 305 void UnicodeSet::quad_iterator::advance(unsigned n) { 47 306 while (n > 0) { 48 307 const RunStructure & t = mUnicodeSet.runs[mRunIndex]; 49 intremain = t.mRunLength  mOffset;308 const unsigned remain = t.mRunLength  mOffset; 50 309 if (remain > n) { 51 310 mOffset += n; … … 74 333 } 75 334 76 void UnicodeSet::append_run(run_type_t run_type, int run_length) { 77 if (run_length == 0) { 78 return; 79 } 80 else if (runs.size() == 0) { 81 runs.emplace_back(run_type, run_length); 82 } 83 else { 84 RunStructure last_run = runs[runs.size()1]; 85 if (last_run.mType == run_type) { 86 runs.back().mRunLength += run_length; 87 } 88 else { 89 runs.emplace_back(run_type, run_length); 90 } 91 } 92 quad_count += run_length; 93 } 94 95 void UnicodeSet::append_quad(bitquad_t q) { 96 if (q == 0) { 97 append_run(Empty, 1); 98 } 99 else if (q == FULL_QUAD_MASK) { 100 append_run(Full, 1); 101 } 102 else { 103 quads.push_back(q); 104 append_run(Mixed, 1); 105 } 106 } 107 108 void Dump_uset(const UnicodeSet & s) { 109 for (auto it = s.quad_begin(); it != s.quad_end(); ++it) { 110 RunStructure this_run = get_run(it); 111 if (this_run.mType == Empty) { 112 std::cout << "Empty(" << this_run.mRunLength << ")\n"; 113 it += this_run.mRunLength; 114 } 115 else if (this_run.mType == Full) { 116 std::cout << "Full(" << this_run.mRunLength << ")\n"; 117 it += this_run.mRunLength; 118 } 119 else { 120 for (int i = 0; i != this_run.mRunLength; i++) { 121 std::cout << "Mixed(" << std::hex << get_quad(it) << std::dec << ")\n"; 122 ++it; 123 } 124 } 125 } 126 } 127 128 UnicodeSet empty_uset() { 129 UnicodeSet iset; 130 iset.runs.emplace_back(Empty, UNICODE_QUAD_COUNT); 131 iset.quad_count = UNICODE_QUAD_COUNT; 132 return iset; 133 } 134 135 // singleton set constructor 136 UnicodeSet singleton_uset(int codepoint) { 137 UnicodeSet iset; 138 int quad_no = codepoint / QUAD_BITS; 139 bitquad_t quad_val = 1 << (codepoint & MOD_QUAD_BIT_MASK); 140 if (quad_no > 0) iset.append_run(Empty, quad_no); 141 iset.append_run(Mixed, 1); 142 iset.quads.push_back(quad_val); 143 if (quad_no < UNICODE_QUAD_COUNT  1) iset.append_run(Empty, UNICODE_QUAD_COUNT  (quad_no + 1)); 144 iset.quad_count = UNICODE_QUAD_COUNT; 145 return iset; 146 } 147 148 // range set constructor 149 UnicodeSet range_uset(int lo_codepoint, int hi_codepoint) { 150 UnicodeSet iset; 151 int lo_quad_no = lo_codepoint / QUAD_BITS; 152 int hi_quad_no = hi_codepoint / QUAD_BITS; 153 int lo_offset = lo_codepoint & MOD_QUAD_BIT_MASK; 154 int hi_offset = hi_codepoint & MOD_QUAD_BIT_MASK; 155 if (lo_quad_no > 0) iset.append_run(Empty, lo_quad_no); 156 if (lo_quad_no == hi_quad_no) { 157 bitquad_t quad = (FULL_QUAD_MASK << lo_offset) & (FULL_QUAD_MASK >> (QUAD_BITS  1  hi_offset)); 158 iset.append_quad(quad); 159 } 160 else { 161 iset.append_quad((FULL_QUAD_MASK << lo_offset) & FULL_QUAD_MASK); 162 iset.append_run(Full, hi_quad_no  (lo_quad_no + 1)); 163 iset.append_quad((FULL_QUAD_MASK >> (QUAD_BITS  1  hi_offset)) & FULL_QUAD_MASK); 164 } 165 if (hi_quad_no < UNICODE_QUAD_COUNT  1) iset.append_run(Empty, UNICODE_QUAD_COUNT  (hi_quad_no + 1)); 166 return iset; 167 } 168 169 UnicodeSet uset_complement (const UnicodeSet & s) { 170 assert(s.quad_count == UNICODE_QUAD_COUNT); 171 UnicodeSet iset; 172 for (auto itr = s.quad_begin(); itr != s.quad_end(); ) { 173 auto run = get_run(itr); 174 if (run.mType == Empty) { 175 iset.append_run(Full, run.mRunLength); 176 itr += run.mRunLength; 177 } 178 else if (run.mType == Full) { 179 iset.append_run(Empty, run.mRunLength); 180 itr += run.mRunLength; 181 } 182 else { 183 for (unsigned i = 0; i != run.mRunLength; i++) { 184 iset.append_quad(FULL_QUAD_MASK ^ get_quad(itr++)); 185 } 186 } 187 } 188 return iset; 189 } 190 191 UnicodeSet uset_intersection (const UnicodeSet & s1, const UnicodeSet & s2) { 192 assert(s1.quad_count == UNICODE_QUAD_COUNT); 193 assert(s2.quad_count == UNICODE_QUAD_COUNT); 194 UnicodeSet iset; 195 for (auto i1 = s1.quad_begin(), i2 = s2.quad_begin(); i1 != s1.quad_end(); ) { 196 auto run1 = get_run(i1); 197 auto run2 = get_run(i2); 198 unsigned n = std::min(run1.mRunLength, run2.mRunLength); 199 if ((run1.mType == Empty)  (run2.mType == Empty)) { 200 iset.append_run(Empty, n); 201 i1 += n; 202 i2 += n; 203 } 204 else if ((run1.mType == Full) && (run2.mType == Full)) { 205 iset.append_run(Full, n); 206 i1 += n; 207 i2 += n; 208 } 209 else if (run1.mType == Full) { 210 for (unsigned i = 0; i != n; ++i, ++i2) { 211 iset.append_quad(get_quad(i2)); 212 } 213 i1 += n; 214 } 215 else if (run2.mType == Full) { 216 for (unsigned i = 0; i != n; ++i, ++i1) { 217 iset.append_quad(get_quad(i1)); 218 } 219 i2 += n; 220 } 221 else { 222 for (unsigned i = 0; i < n; ++i, ++i1, ++i2) { 223 iset.append_quad(get_quad(i1) & get_quad(i2)); 224 } 225 } 226 } 227 return iset; 228 } 229 230 UnicodeSet uset_union (const UnicodeSet & s1, const UnicodeSet & s2) { 231 assert(s1.quad_count == UNICODE_QUAD_COUNT); 232 assert(s2.quad_count == UNICODE_QUAD_COUNT); 233 UnicodeSet iset; 234 for (auto i1 = s1.quad_begin(), i2 = s2.quad_begin(); i1 != s1.quad_end(); ) { 235 auto run1 = get_run(i1); 236 auto run2 = get_run(i2); 237 unsigned n = std::min(run1.mRunLength, run2.mRunLength); 238 if ((run1.mType == Empty) && (run2.mType == Empty)) { 239 iset.append_run(Empty, n); 240 i1 += n; 241 i2 += n; 242 } 243 else if ((run1.mType == Full)  (run2.mType == Full)) { 244 iset.append_run(Full, n); 245 i1 += n; 246 i2 += n; 247 } 248 else if (run1.mType == Empty) { 249 for (unsigned i = 0; i < n; ++i, ++i2) { 250 iset.append_quad(get_quad(i2)); 251 } 252 i1 += n; 253 } 254 else if (run2.mType == Empty) { 255 for (unsigned i = 0; i < n; ++i, ++i1) { 256 iset.append_quad(get_quad(i1)); 257 } 258 i2 += n; 259 } 260 else { 261 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 262 iset.append_quad(get_quad(i1)  get_quad(i2)); 263 } 264 } 265 } 266 return iset; 267 } 268 269 UnicodeSet uset_difference (const UnicodeSet & s1, const UnicodeSet & s2) { 270 assert(s1.quad_count == UNICODE_QUAD_COUNT); 271 assert(s2.quad_count == UNICODE_QUAD_COUNT); 272 UnicodeSet iset; 273 for (auto i1 = s1.quad_begin(), i2 = s2.quad_begin(); i1 != s1.quad_end(); ) { 274 auto run1 = get_run(i1); 275 auto run2 = get_run(i2); 276 unsigned n = std::min(run1.mRunLength, run2.mRunLength); 277 if ((run1.mType == Empty)  (run2.mType == Full)) { 278 iset.append_run(Empty, n); 279 i1 += n; 280 i2 += n; 281 } 282 else if ((run1.mType == Full) && (run2.mType == Empty)) { 283 iset.append_run(Full, n); 284 i1 += n; 285 i2 += n; 286 } 287 else if (run1.mType == Full) { 288 for (unsigned i = 0; i != n; ++i, ++i2) { 289 iset.append_quad(FULL_QUAD_MASK ^ get_quad(i2)); 290 } 291 i1 += n; 292 } 293 else if (run2.mType == Empty) { 294 for (unsigned i = 0; i != n; ++i, ++i1) { 295 iset.append_quad(get_quad(i1)); 296 } 297 i2 += n; 298 } 299 else { 300 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 301 iset.append_quad(get_quad(i1) & ~get_quad(i2)); 302 } 303 } 304 } 305 return iset; 306 } 307 308 UnicodeSet uset_symmetric_difference (const UnicodeSet & s1, const UnicodeSet & s2) { 309 assert(s1.quad_count == UNICODE_QUAD_COUNT); 310 assert(s2.quad_count == UNICODE_QUAD_COUNT); 311 UnicodeSet iset; 312 for (auto i1 = s1.quad_begin(), i2 = s2.quad_begin(); i1 != s1.quad_end(); ) { 313 auto run1 = get_run(i1); 314 auto run2 = get_run(i2); 315 unsigned n = std::min(run1.mRunLength, run2.mRunLength); 316 if (((run1.mType == Empty) && (run2.mType == Full))  ((run1.mType == Full) && (run2.mType == Empty))) { 317 iset.append_run(Full, n); 318 i1 += n; 319 i2 += n; 320 } 321 else if (((run1.mType == Full) && (run2.mType == Full))  ((run1.mType == Empty) && (run2.mType == Empty))) { 322 iset.append_run(Empty, n); 323 i1 += n; 324 i2 += n; 325 } 326 else if (run1.mType == Empty) { 327 for (int i = 0; i < n; ++i, ++i2) { 328 iset.append_quad(get_quad(i2)); 329 } 330 i1 += n; 331 } 332 else if (run2.mType == Empty) { 333 for (int i = 0; i < n; ++i, ++i1) { 334 iset.append_quad(get_quad(i1)); 335 } 336 i2 += n; 337 } 338 else if (run1.mType == Full) { 339 for (int i = 0; i < n; ++i, ++i2) { 340 iset.append_quad(FULL_QUAD_MASK ^ get_quad(i2)); 341 } 342 i1 += n; 343 } 344 else if (run2.mType == Empty) { 345 for (unsigned i = 0; i < n; ++i, ++i1) { 346 iset.append_quad(FULL_QUAD_MASK ^ get_quad(i1)); 347 } 348 i2 += n; 349 } 350 else { 351 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 352 iset.append_quad(get_quad(i1) ^ get_quad(i2)); 353 } 354 } 355 } 356 return iset; 357 } 358 359 bool uset_member(const UnicodeSet & s, int codepoint){ 360 int quad_no = codepoint / QUAD_BITS; 361 bitquad_t quad_val = 1 << (codepoint & MOD_QUAD_BIT_MASK); 362 return (get_quad(s.quad_begin() + quad_no) & quad_val) != 0; 363 } 364 335 /**  * 336 * @brief UnicodeSet::iterator::advance 337 **  */ 365 338 void UnicodeSet::iterator::advance(unsigned n) { 366 339 367 while (n) { 368 369 const RunStructure & t = mUnicodeSet.runs[mRunIndex]; 370 371 if (t.mType == Full) { 372 mRight = mBaseCodePoint + t.mRunLength * QUAD_BITS; 373 n; 374 } 340 std::cerr << "advance(" << n << ")\n"; 341 342 mMinCodePoint = mBaseCodePoint; 343 344 for ( ;n; ++mRunIterator) { 345 346 const RunStructure & t = *mRunIterator; 347 348 std::cerr << "Type:"; 349 switch (t.mType) { 350 case Empty: std::cerr << "Empty"; break; 351 case Full: std::cerr << "Full"; break; 352 case Mixed: std::cerr << "Mixed"; break; 353 } 354 std::cerr << " Length:" << t.mRunLength; 355 std::cerr << " BaseCodePoint:" << mBaseCodePoint; 356 357 358 std::cerr << std::endl; 375 359 376 360 if (t.mType != Mixed) { 377 ++mRunIndex;378 mBaseCodePoint += t.mRunLength * QUAD_BITS;361 mMaxCodePoint = mBaseCodePoint + t.mRunLength * QUAD_BITS; 362 mBaseCodePoint = mMaxCodePoint; 379 363 mQuadOffset = 0; 380 mQuadRunIndex = 0; 364 mQuadPosition = 0; 365 if (t.mType == Full) { 366 n; 367 } 381 368 continue; 382 369 } 383 370 384 while (mQuadRunIndex < t.mRunLength) { 385 386 const bitquad_t q = mUnicodeSet.quads[mQuadIndex]; 387 const bitquad_t m = q &(MOD_QUAD_BIT_MASK >> mQuadOffset); 371 while (mQuadPosition != t.mRunLength) { 372 373 const bitquad_t q = *mQuadIterator; 374 375 const bitquad_t m = q & ((1) >> mQuadOffset); 376 377 std::cerr << " q:" << std::hex << q << std::endl; 378 std::cerr << " +m:" << std::hex << m << std::dec << " (" << mQuadOffset << ")" << std::endl; 388 379 389 380 // Nothing left in this quad to add; skip to the next one. 390 381 if (m == 0) { 391 382 mBaseCodePoint += QUAD_BITS; 392 mLeft = mBaseCodePoint; 393 ++mQuadIndex; 394 if (++mQuadRunIndex == t.mRunLength) { 395 ++mRunIndex; 396 } 383 mMinCodePoint = mBaseCodePoint; 384 ++mQuadIterator; 397 385 continue; 398 386 } 399 387 400 388 mQuadOffset = scan_forward_zeroes(m); 401 mLeft = mBaseCodePoint + mQuadOffset; 389 mMinCodePoint = mBaseCodePoint + mQuadOffset; 390 391 392 402 393 break; 403 394 } 404 395 405 406 while (mQuadRunIndex < t.mRunLength) { 396 while (mQuadPosition != t.mRunLength) { 407 397 408 398 // Although the initial position was in this quad, the final position isn't … … 410 400 // Empty. 411 401 412 const bitquad_t q = mUnicodeSet.quads[mQuadIndex]; 413 const bitquad_t m = ~q & (MOD_QUAD_BIT_MASK >> mQuadOffset); 402 const bitquad_t q = *mQuadIterator; 403 const bitquad_t m = ~q & ((1) >> mQuadOffset); 404 405 std::cerr << " q:" << std::hex << q << std::endl; 406 std::cerr << " m:" << std::hex << m << std::dec << " (" << mQuadOffset << ")" << std::endl; 407 414 408 // Nothing left in this quad to add; skip to the next one. 415 409 if (m == 0) { 416 410 mBaseCodePoint += QUAD_BITS; 417 mRight = mBaseCodePoint; 418 ++mQuadIndex; 419 if (++mQuadRunIndex == t.mRunLength) { 420 ++mRunIndex; 421 } 411 mMaxCodePoint = mBaseCodePoint; 412 ++mQuadIterator; 422 413 continue; 423 414 } 424 415 425 416 mQuadOffset = scan_forward_zeroes(m); 426 m Right = mBaseCodePoint + mQuadOffset;417 mMaxCodePoint = mBaseCodePoint + mQuadOffset; 427 418 n; 428 419 break; … … 431 422 } 432 423 433 424 UnicodeSet::UnicodeSet() 425 : runs({{{Empty, UNICODE_QUAD_COUNT}}}) 426 { 427 428 } 429 430 // singleton set constructor 431 UnicodeSet::UnicodeSet(const codepoint_t codepoint) { 432 codepoint_t quad_no = codepoint / QUAD_BITS; 433 if (quad_no > 0) { 434 append_run(Empty, quad_no); 435 } 436 append_run(Mixed, 1); 437 quads.push_back(static_cast<bitquad_t>(1) << (codepoint & MOD_QUAD_BIT_MASK)); 438 if (quad_no < UNICODE_QUAD_COUNT  1) { 439 append_run(Empty, UNICODE_QUAD_COUNT  (quad_no + 1)); 440 } 441 } 442 443 // range set constructor 444 UnicodeSet::UnicodeSet(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) { 445 codepoint_t lo_quad_no = lo_codepoint / QUAD_BITS; 446 codepoint_t hi_quad_no = hi_codepoint / QUAD_BITS; 447 codepoint_t lo_offset = lo_codepoint & MOD_QUAD_BIT_MASK; 448 codepoint_t hi_offset = hi_codepoint & MOD_QUAD_BIT_MASK; 449 if (lo_quad_no > 0) { 450 append_run(Empty, lo_quad_no); 451 } 452 if (lo_quad_no == hi_quad_no) { 453 bitquad_t quad = (FULL_QUAD_MASK << lo_offset) & (FULL_QUAD_MASK >> (QUAD_BITS  1  hi_offset)); 454 append_quad(quad); 455 } 456 else { 457 append_quad((FULL_QUAD_MASK << lo_offset) & FULL_QUAD_MASK); 458 append_run(Full, hi_quad_no  (lo_quad_no + 1)); 459 append_quad((FULL_QUAD_MASK >> (QUAD_BITS  1  hi_offset)) & FULL_QUAD_MASK); 460 } 461 if (hi_quad_no < UNICODE_QUAD_COUNT  1) { 462 append_run(Empty, UNICODE_QUAD_COUNT  (hi_quad_no + 1)); 463 } 464 } 465
Note: See TracChangeset
for help on using the changeset viewer.