Changeset 5742 for icGREP/icgrepdevel/icgrep/UCD
 Timestamp:
 Nov 22, 2017, 3:32:58 PM (22 months ago)
 Location:
 icGREP/icgrepdevel/icgrep/UCD
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/UCD/unicode_set.cpp
r5740 r5742 44 44 45 45 46 SlabAllocator<> UnicodeSet:: mAllocator;46 SlabAllocator<> UnicodeSet::GlobalAllocator; 47 47 48 48 const uint64_t QUAD_BITS = (8 * sizeof(bitquad_t)); … … 101 101 unsigned mixedQuads = 0; 102 102 for (auto run : runs) { 103 const auto type = typeOf(run); 104 if (LLVM_UNLIKELY(type != Empty && type != Mixed && type != Full)) { 105 throw std::runtime_error("illegal run type " + std::to_string(type) + " found"); 106 } 103 107 const auto l = lengthOf(run); 104 if ( l == 0) {105 throw std::runtime_error(" Zerolength quad found!");106 } 107 if (type Of(run)== Mixed) {108 if (LLVM_UNLIKELY(l == 0)) { 109 throw std::runtime_error("zerolength quad found"); 110 } 111 if (type == Mixed) { 108 112 mixedQuads += l; 109 113 } 110 114 sum += l; 111 115 } 112 if ( sum != UNICODE_QUAD_COUNT) {113 throw std::runtime_error(" Invalid quad count: found " + std::to_string(sum) + "but expected " + std::to_string(UNICODE_QUAD_COUNT));114 } 115 if ( mixedQuads != quads.size()) {116 throw std::runtime_error(" Invalid mixed quad count: found " + std::to_string(quads.size()) + "but expected " + std::to_string(mixedQuads));117 } 118 for ( auto quad : quads) {119 if ( quad == 0) {116 if (LLVM_UNLIKELY(sum != UNICODE_QUAD_COUNT)) { 117 throw std::runtime_error("found " + std::to_string(sum) + " quads but expected " + std::to_string(UNICODE_QUAD_COUNT)); 118 } 119 if (LLVM_UNLIKELY(mixedQuads != quads.size())) { 120 throw std::runtime_error("found " + std::to_string(quads.size()) + " mixed quad but expected " + std::to_string(mixedQuads)); 121 } 122 for (const auto quad : quads) { 123 if (LLVM_UNLIKELY(quad == 0)) { 120 124 throw std::runtime_error("Empty quad found in Mixed quad array!"); 121 } else if ( quad == FULL_QUAD_MASK) {125 } else if (LLVM_UNLIKELY(quad == FULL_QUAD_MASK)) { 122 126 throw std::runtime_error("Full quad found in Mixed quad array!"); 123 127 } … … 286 290 *qi++ = ~quad; 287 291 } 288 return UnicodeSet(std::move(runs), std::move(quads) );292 return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator()); 289 293 } 290 294 … … 293 297 **  */ 294 298 UnicodeSet UnicodeSet::operator&(const UnicodeSet & other) const { 299 300 std::vector<run_t> runs; 301 std::vector<bitquad_t> quads; 302 303 auto i1 = quad_begin(), i2 = other.quad_begin(); 304 305 for (;;) { 306 assert ("neither run can be zero length unless both are of zero length" && ((i1.length() != 0) ^ (i2.length() == 0))); 307 const auto n = std::min(i1.length(), i2.length()); 308 if (LLVM_UNLIKELY(n == 0)) { 309 break; 310 } 311 if ((i1.type() == Full) && (i2.type() == Full)) { 312 append_run(Full, n, runs); 313 i1 += n; 314 i2 += n; 315 } else if ((i1.type() == Empty)  (i2.type() == Empty)) { 316 append_run(Empty, n, runs); 317 i1 += n; 318 i2 += n; 319 } else if (i1.type() == Full) { 320 for (unsigned i = 0; i != n; ++i, ++i2) { 321 append_quad(i2.quad(), quads, runs); 322 } 323 i1 += n; 324 } else if (i2.type() == Full) { 325 for (unsigned i = 0; i != n; ++i, ++i1) { 326 append_quad(i1.quad(), quads, runs); 327 } 328 i2 += n; 329 } else { // both Mixed 330 assert (i1.type() == Mixed && i2.type() == Mixed); 331 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 332 append_quad(i1.quad() & i2.quad(), quads, runs); 333 } 334 } 335 } 336 assert (i1 == quad_end() && i2 == other.quad_end()); 337 return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator()); 338 } 339 340 /**  * 341 * @brief union 342 **  */ 343 UnicodeSet UnicodeSet::operator+(const UnicodeSet & other) const { 295 344 std::vector<run_t> runs; 296 345 std::vector<bitquad_t> quads; 297 const auto e1 = quad_end();298 const auto e2 = other.quad_end();299 346 auto i1 = quad_begin(), i2 = other.quad_begin(); 300 347 for (;;) { … … 304 351 break; 305 352 } 306 if ((i1.type() == Full) && (i2.type() == Full)) { 353 if ((i1.type() == Empty) && (i2.type() == Empty)) { 354 append_run(Empty, n, runs); 355 i1 += n; 356 i2 += n; 357 } else if ((i1.type() == Full)  (i2.type() == Full)) { 307 358 append_run(Full, n, runs); 308 359 i1 += n; 309 360 i2 += n; 310 } else if ((i1.type() == Empty)  (i2.type() == Empty)) { 311 append_run(Empty, n, runs); 312 i1 += n; 313 i2 += n; 314 } else if (i1.type() == Full) { 361 } else if (i1.type() == Empty) { 362 assert (i2.type() == Mixed); 315 363 for (unsigned i = 0; i != n; ++i, ++i2) { 316 364 append_quad(i2.quad(), quads, runs); 317 365 } 318 366 i1 += n; 319 } else if (i2.type() == Full) { 367 } else if (i2.type() == Empty) { 368 assert (i1.type() == Mixed); 320 369 for (unsigned i = 0; i != n; ++i, ++i1) { 321 370 append_quad(i1.quad(), quads, runs); … … 323 372 i2 += n; 324 373 } else { // both Mixed 325 326 for (unsigned i = 0; i !=n; ++i, ++i1, ++i2) {327 append_quad(i1.quad() &i2.quad(), quads, runs);328 } 329 } 330 } 331 assert (i1 == e1 && i2 == e2);332 return UnicodeSet(std::move(runs), std::move(quads) );333 } 334 335 /**  * 336 * @brief union337 **  */ 338 UnicodeSet UnicodeSet::operator +(const UnicodeSet & other) const {374 assert (i1.type() == Mixed && i2.type() == Mixed); 375 for (unsigned i = 0; i < n; ++i, ++i1, ++i2) { 376 append_quad(i1.quad()  i2.quad(), quads, runs); 377 } 378 } 379 } 380 assert (i1 == quad_end() && i2 == other.quad_end()); 381 return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator()); 382 } 383 384 /**  * 385 * @brief difference 386 **  */ 387 UnicodeSet UnicodeSet::operator(const UnicodeSet & other) const { 339 388 std::vector<run_t> runs; 340 389 std::vector<bitquad_t> quads; 341 const auto e1 = quad_end();342 const auto e2 = other.quad_end();343 390 auto i1 = quad_begin(), i2 = other.quad_begin(); 344 391 for (;;) { 345 392 assert ("neither run can be zero length unless both are of zero length" && ((i1.length() != 0) ^ (i2.length() == 0))); 346 393 const auto n = std::min(i1.length(), i2.length()); 347 if (LLVM_UNLIKELY(n == 0)) {348 break;349 }350 if ((i1.type() == Empty) && (i2.type() == Empty)) {351 append_run(Empty, n, runs);352 i1 += n;353 i2 += n;354 } else if ((i1.type() == Full)  (i2.type() == Full)) {355 append_run(Full, n, runs);356 i1 += n;357 i2 += n;358 } else if (i1.type() == Empty) {359 assert (i2.type() == Mixed);360 for (unsigned i = 0; i != n; ++i, ++i2) {361 append_quad(i2.quad(), quads, runs);362 }363 i1 += n;364 } else if (i2.type() == Empty) {365 assert (i1.type() == Mixed);366 for (unsigned i = 0; i != n; ++i, ++i1) {367 append_quad(i1.quad(), quads, runs);368 }369 i2 += n;370 } else { // both Mixed371 assert (i1.type() == Mixed && i2.type() == Mixed);372 for (unsigned i = 0; i < n; ++i, ++i1, ++i2) {373 append_quad(i1.quad()  i2.quad(), quads, runs);374 }375 }376 }377 378 assert (i1 == e1 && i2 == e2);379 return UnicodeSet(std::move(runs), std::move(quads));380 }381 382 /**  *383 * @brief difference384 **  */385 UnicodeSet UnicodeSet::operator(const UnicodeSet & other) const {386 std::vector<run_t> runs;387 std::vector<bitquad_t> quads;388 const auto e1 = quad_end();389 const auto e2 = other.quad_end();390 auto i1 = quad_begin(), i2 = other.quad_begin();391 for (;;) {392 assert ("neither run can be zero length unless both are of zero length" && ((i1.length() != 0) ^ (i2.length() == 0)));393 const auto n = std::min(i1.length(), i2.length());394 assert (n != 0  (i1 == e1 && i2 == e2));395 394 if (LLVM_UNLIKELY(n == 0)) { 396 395 break; … … 416 415 i2 += n; 417 416 } else { // both Mixed 418 417 assert (i1.type() == Mixed && i2.type() == Mixed); 419 418 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 420 419 append_quad(i1.quad() &~ i2.quad(), quads, runs); … … 422 421 } 423 422 } 424 assert (i1 == e1 && i2 == e2);425 return UnicodeSet(std::move(runs), std::move(quads) );423 assert (i1 == quad_end() && i2 == other.quad_end()); 424 return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator()); 426 425 } 427 426 … … 432 431 std::vector<run_t> runs; 433 432 std::vector<bitquad_t> quads; 434 const auto e1 = quad_end();435 const auto e2 = other.quad_end();436 433 auto i1 = quad_begin(), i2 = other.quad_begin(); 437 434 for (;;) { … … 466 463 i2 += n; 467 464 } else { // both Mixed 468 465 assert (i1.type() == Mixed && i2.type() == Mixed); 469 466 for (unsigned i = 0; i != n; ++i, ++i1, ++i2) { 470 467 append_quad(i1.quad() ^ i2.quad(), quads, runs); … … 472 469 } 473 470 } 474 assert (i1 == e1 && i2 == e2);475 return UnicodeSet(std::move(runs), std::move(quads) );471 assert (i1 == quad_end() && i2 == other.quad_end()); 472 return UnicodeSet(std::move(runs), std::move(quads), mRuns.get_allocator()); 476 473 } 477 474 … … 739 736 return false; 740 737 } 741 742 738 743 739 /**  * 744 740 * @brief intersects … … 751 747 auto n = std::min(i1.length(), i2.length()); 752 748 if (LLVM_UNLIKELY(n == 0)) { 749 assert (i1 == quad_end() && i2 == other.quad_end()); 753 750 return false; 754 751 } … … 759 756 return true; 760 757 } else { //both Mixed 758 assert (i1.type() == Mixed && i2.type() == Mixed); 761 759 for (; n; n, ++i1, ++i2) { 762 760 if ((i1.quad() & i2.quad()) != 0) return true; … … 766 764 } 767 765 766 /**  * 767 * @brief isSubsetOf 768 * @param other 769 * 770 * Return true if this UnicodeSet is a subset of other 771 **  */ 772 bool UnicodeSet::subset(const UnicodeSet & other) const { 773 for (auto i1 = quad_begin(), i2 = other.quad_begin();; ) { 774 auto n = std::min(i1.length(), i2.length()); 775 if (LLVM_UNLIKELY(n == 0)) { 776 assert (i1 == quad_end() && i2 == other.quad_end()); 777 return true; 778 } 779 if (i1.type() == Empty  i2.type() == Full) { 780 i1 += n; 781 i2 += n; 782 } else if (i1.type() == Full  i2.type() == Empty) { 783 return false; 784 } else { //both Mixed 785 assert (i1.type() == Mixed && i2.type() == Mixed); 786 for (; n; n, ++i1, ++i2) { 787 if (i1.quad() &~ i2.quad()) return false; 788 } 789 } 790 } 791 } 768 792 769 793 /**  * … … 771 795 **  */ 772 796 void UnicodeSet::quad_iterator::advance(unsigned n) { 773 while (n > 0) {774 assert (mRemaining > 0);797 assert (mRemaining > 0); 798 while (n > 0) { 775 799 if (mRemaining > n) { 776 800 if (mType == Mixed) { … … 785 809 mQuadIterator += mRemaining; 786 810 } 787 n = mRemaining; 811 n = mRemaining; 788 812 ++mRunIterator; 789 813 if (LLVM_UNLIKELY(mRunIterator == mRunEnd)) { … … 905 929 * @brief Empty/Full Set Constructor 906 930 **  */ 907 UnicodeSet::UnicodeSet(run_type_t emptyOrFull) 908 : mRuns(mAllocator) 909 , mQuads(mAllocator) 910 { 931 UnicodeSet::UnicodeSet(run_type_t emptyOrFull, ProxyAllocator<> allocator) 932 : mRuns(allocator) 933 , mQuads(allocator) { 911 934 assert((emptyOrFull == Empty)  (emptyOrFull == Full)); 912 935 append_run(emptyOrFull, UNICODE_QUAD_COUNT, mRuns); … … 917 940 * @brief Singleton Set Constructor 918 941 **  */ 919 UnicodeSet::UnicodeSet(const codepoint_t codepoint) 920 : mRuns(mAllocator) 921 , mQuads(mAllocator) 922 { 942 UnicodeSet::UnicodeSet(const codepoint_t codepoint, ProxyAllocator<> allocator) 943 : mRuns(allocator) 944 , mQuads(allocator) { 923 945 const codepoint_t quad_no = codepoint / QUAD_BITS; 924 946 append_run(Empty, quad_no, mRuns); … … 931 953 * @brief Range Set Constructor 932 954 **  */ 933 UnicodeSet::UnicodeSet(const codepoint_t lo, const codepoint_t hi )934 : mRuns( mAllocator)935 , mQuads( mAllocator)955 UnicodeSet::UnicodeSet(const codepoint_t lo, const codepoint_t hi, ProxyAllocator<> allocator) 956 : mRuns(allocator) 957 , mQuads(allocator) 936 958 { 937 959 const codepoint_t lo_index = lo / QUAD_BITS; … … 959 981 template <typename itr> 960 982 void convertIntervalRangesToSparseSet(const itr begin, const itr end, UnicodeSet::RunVector & mRuns, UnicodeSet::QuadVector & mQuads) { 961 assert (std::is_sorted(begin, end, [](const interval_t l, const interval_t r) { 962 assert (l.first <= l.second); 963 assert (l.second <= UNICODE_MAX); 964 assert (r.first <= r.second); 965 assert (r.second <= UNICODE_MAX); 983 984 std::vector<run_t> runs; 985 std::vector<bitquad_t> quads; 986 987 assert ("interval list must be totally ordered" && std::is_sorted(begin, end, [](const interval_t l, const interval_t r) { 988 if (l.first > l.second) return false; 989 if (l.second > UNICODE_MAX) return false; 990 if (r.first > r.second) return false; 991 if (r.second > UNICODE_MAX) return false; 966 992 return l.second < r.first; 967 993 })); 968 969 std::vector<run_t> runs; 970 std::vector<bitquad_t> quads; 971 994 972 995 codepoint_t prior_index = 0; 973 996 bitquad_t mask = 0; … … 1004 1027 append_run(Empty, UNICODE_QUAD_COUNT  prior_index, runs); 1005 1028 } 1006 assert (verify(runs, quads));1007 1029 mRuns.assign(runs.begin(), runs.end()); 1008 1030 mQuads.assign(quads.begin(), quads.end()); … … 1012 1034 * @brief Interval Range Constructor 1013 1035 **  */ 1014 UnicodeSet::UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end )1015 : mRuns( 0, {Empty, 0}, mAllocator)1016 , mQuads( 0, 0, mAllocator) {1036 UnicodeSet::UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end, ProxyAllocator<> allocator) 1037 : mRuns(allocator) 1038 , mQuads(allocator) { 1017 1039 convertIntervalRangesToSparseSet(begin, end, mRuns, mQuads); 1040 assert (verify(mRuns, mQuads)); 1018 1041 } 1019 1042 … … 1021 1044 * @brief Interval Range Constructor 1022 1045 **  */ 1023 UnicodeSet::UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end )1024 : mRuns( 0, {Empty, 0}, mAllocator)1025 , mQuads( 0, 0, mAllocator) {1046 UnicodeSet::UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end, ProxyAllocator<> allocator) 1047 : mRuns(allocator) 1048 , mQuads(allocator) { 1026 1049 convertIntervalRangesToSparseSet(begin, end, mRuns, mQuads); 1050 assert (verify(mRuns, mQuads)); 1027 1051 } 1028 1052 … … 1030 1054 * @brief Copy Constructor 1031 1055 **  */ 1032 UnicodeSet::UnicodeSet(const UnicodeSet & other )1033 : mRuns(other.mRuns, mAllocator)1034 , mQuads(other.mQuads, mAllocator) {1056 UnicodeSet::UnicodeSet(const UnicodeSet & other, ProxyAllocator<> allocator) 1057 : mRuns(other.mRuns, allocator) 1058 , mQuads(other.mQuads, allocator) { 1035 1059 assert (verify(mRuns, mQuads)); 1036 1060 } … … 1039 1063 * @brief Initializer Constructor 1040 1064 **  */ 1041 UnicodeSet::UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q )1042 : mRuns(r.begin(), r.end(), mAllocator)1043 , mQuads(q.begin(), q.end(), mAllocator) {1065 UnicodeSet::UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q, ProxyAllocator<> allocator) 1066 : mRuns(r.begin(), r.end(), allocator) 1067 , mQuads(q.begin(), q.end(), allocator) { 1044 1068 assert (verify(mRuns, mQuads)); 1045 1069 } … … 1048 1072 * @brief Internal Vector Constructor 1049 1073 **  */ 1050 inline UnicodeSet::UnicodeSet(std::vector<run_t> && r, std::vector<bitquad_t> && q )1051 : mRuns(r.begin(), r.end(), mAllocator)1052 , mQuads(q.begin(), q.end(), mAllocator) {1074 inline UnicodeSet::UnicodeSet(std::vector<run_t> && r, std::vector<bitquad_t> && q, ProxyAllocator<> allocator) 1075 : mRuns(r.begin(), r.end(), allocator) 1076 , mQuads(q.begin(), q.end(), allocator) { 1053 1077 assert (verify(mRuns, mQuads)); 1054 1078 } 
icGREP/icgrepdevel/icgrep/UCD/unicode_set.h
r5740 r5742 112 112 113 113 bool intersects(const UnicodeSet & other) const; 114 115 bool subset(const UnicodeSet & other) const; 114 116 115 117 void insert(const codepoint_t cp); … … 140 142 bool operator<(const UnicodeSet & other) const; 141 143 142 UnicodeSet(run_type_t emptyOrFull = Empty );143 UnicodeSet(const codepoint_t codepoint );144 UnicodeSet(const codepoint_t lo, const codepoint_t hi );145 UnicodeSet(const UnicodeSet & other );146 UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q );147 UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end );148 UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end );144 UnicodeSet(run_type_t emptyOrFull = Empty, ProxyAllocator<> allocator = GlobalAllocator); 145 UnicodeSet(const codepoint_t codepoint, ProxyAllocator<> allocator = GlobalAllocator); 146 UnicodeSet(const codepoint_t lo, const codepoint_t hi, ProxyAllocator<> allocator = GlobalAllocator); 147 UnicodeSet(const UnicodeSet & other, ProxyAllocator<> allocator = GlobalAllocator); 148 UnicodeSet(std::initializer_list<run_t> r, std::initializer_list<bitquad_t> q, ProxyAllocator<> allocator = GlobalAllocator); 149 UnicodeSet(std::initializer_list<interval_t>::iterator begin, std::initializer_list<interval_t>::iterator end, ProxyAllocator<> allocator = GlobalAllocator); 150 UnicodeSet(const std::vector<interval_t>::iterator begin, const std::vector<interval_t>::iterator end, ProxyAllocator<> allocator = GlobalAllocator); 149 151 150 152 inline void swap(UnicodeSet & other); 151 153 inline void swap(UnicodeSet && other); 152 154 155 inline static void Reset() { 156 GlobalAllocator.Reset(); 157 } 158 153 159 protected: 154 160 155 UnicodeSet(std::vector<run_t> && r, std::vector<bitquad_t> && q );156 161 UnicodeSet(std::vector<run_t> && r, std::vector<bitquad_t> && q, ProxyAllocator<> allocator = GlobalAllocator); 162 157 163 class quad_iterator : public boost::iterator_facade<quad_iterator, quad_iterator_return_t, boost::random_access_traversal_tag, quad_iterator_return_t> { 158 164 friend class UnicodeSet; 159 165 friend class boost::iterator_core_access; 160 166 public: 161 quad_iterator(RunIterator runIterator, RunIterator runEnd, QuadIterator quadIterator, QuadIterator quadEnd, const run_type_t type, const length_t remaining)167 explicit quad_iterator(RunIterator runIterator, RunIterator runEnd, QuadIterator quadIterator, QuadIterator quadEnd, const run_type_t type, const length_t remaining) 162 168 : mRunIterator(runIterator) 163 169 , mRunEnd(runEnd) … … 167 173 #endif 168 174 , mType(type) 169 , mRemaining(remaining) {} 175 , mRemaining(remaining) { 176 assert (type == Empty  type == Mixed  type == Full); 177 assert (remaining > 0  type == Empty); 178 assert (remaining <= UNICODE_MAX); 179 } 170 180 171 181 void advance(unsigned n); … … 210 220 211 221 inline quad_iterator quad_begin() const { 212 return quad_iterator(mRuns.cbegin(), mRuns.cend(), mQuads.cbegin(), mQuads.cend(), std::get<0>(*mRuns.cbegin()), std::get<1>(*mRuns.cbegin())); 222 assert (mRuns.cbegin() != mRuns.cend()); 223 return quad_iterator(mRuns.cbegin(), mRuns.cend(), mQuads.cbegin(), mQuads.cend(), mRuns.cbegin()>first, mRuns.cbegin()>second); 213 224 } 214 225 … … 221 232 RunVector mRuns; 222 233 QuadVector mQuads; 223 static SlabAllocator<> mAllocator;234 static SlabAllocator<> GlobalAllocator; 224 235 }; 225 236
Note: See TracChangeset
for help on using the changeset viewer.