Changeset 179
- Timestamp:
- 07/09/08 12:17:19 (1 year ago)
- Files:
-
- trunk/lib/lib_simd.h (modified) (2 diffs)
- trunk/lib/sse_simd_t.h (added)
- trunk/src/bitlex.c (modified) (16 diffs)
- trunk/src/bitplex.c (modified) (3 diffs)
- trunk/src/byteplex.c (modified) (24 diffs)
- trunk/src/engine.c (modified) (4 diffs)
- trunk/src/symtab.c (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/lib/lib_simd.h
r90 r179 17 17 18 18 #if (defined(__i386) || defined(__x86_64)) 19 #ifdef TEMPLATED_SIMD_LIB 20 #include "sse_simd_t.h" 21 #endif 22 #ifndef TEMPLATED_SIMD_LIB 19 23 #include "sse_simd.h" 24 #endif 20 25 #endif 21 26 #ifdef _ARCH_PPC … … 44 49 #endif 45 50 #if BYTE_ORDER == LITTLE_ENDIAN 46 #define sisd_sfl(blk, n) sisd_sll(blk, n) 47 #define sisd_sbl(blk, n) sisd_srl(blk, n) 51 static inline SIMD_type sisd_sfl(SIMD_type blk, SIMD_type n) { 52 return sisd_sll(blk, n); 53 } 54 //#define sisd_sbl(blk, n) sisd_srl(blk, n) 48 55 #define sisd_sfli(blk, n) sisd_slli(blk, n) 49 56 #define sisd_sbli(blk, n) sisd_srli(blk, n) trunk/src/bitlex.c
r169 r179 28 28 x8basis = (BitBlockBasis *) simd_new(BUFFER_SIZE/PACKSIZE); 29 29 validation_stream = (BitBlock *) simd_new(BUFFER_BLOCKS+SENTINEL_BLOCKS); 30 #ifdef TEMPLATED_SIMD_LIB 31 validation_stream[BUFFER_BLOCKS] = simd_const<1,1>(); 32 #endif 33 #ifndef TEMPLATED_SIMD_LIB 30 34 validation_stream[BUFFER_BLOCKS] = simd_const_1(1); 35 #endif 31 36 }; 32 37 … … 161 166 BitBlock temp1 = simd_or(bit[0], bit[1]); 162 167 BitBlock temp2 = simd_or(temp1, bit[2]); 168 #ifdef TEMPLATED_SIMD_LIB 169 Control = simd_andc(simd_const<1,1>(), temp2); 170 #endif 171 #ifndef TEMPLATED_SIMD_LIB 163 172 Control = simd_andc(simd_const_1(1), temp2); 173 #endif 164 174 BitBlock temp3 = simd_or(bit[2], bit[3]); 165 175 BitBlock temp4 = simd_or(temp1, temp3); … … 212 222 BitBlock temp26 = simd_or(bit[5], temp9); 213 223 BitBlock temp27 = simd_and(bit[4], temp26); 224 #ifdef TEMPLATED_SIMD_LIB 225 BitBlock temp28 = simd_andc(simd_const<1,1>(), temp4); 226 #endif 227 #ifndef TEMPLATED_SIMD_LIB 214 228 BitBlock temp28 = simd_andc(simd_const_1(1), temp4); 229 #endif 215 230 BitBlock temp29 = simd_if(bit[3], temp27, temp28); 216 231 BitBlock temp30 = simd_andc(temp29, temp25); … … 234 249 BitBlock temp48 = simd_and(temp21, temp15); 235 250 BitBlock temp49 = simd_andc(temp47, temp48); 251 #ifdef TEMPLATED_SIMD_LIB 252 Control = simd_andc(simd_const<1,1>(), temp49); 253 #endif 254 #ifndef TEMPLATED_SIMD_LIB 236 255 Control = simd_andc(simd_const_1(1), temp49); 256 #endif 237 257 BitBlock temp50 = simd_andc(bit[7], bit[6]); 238 258 BitBlock temp51 = simd_and(temp15, temp50); … … 321 341 BitBlock temp35 = simd_or(temp31, temp34); 322 342 BitBlock temp36 = simd_and(temp17, bit[4]); 343 #ifdef TEMPLATED_SIMD_LIB 344 BitBlock temp37 = simd_andc(simd_const<1,1>(), temp20); 345 #endif 346 #ifndef TEMPLATED_SIMD_LIB 323 347 BitBlock temp37 = simd_andc(simd_const_1(1), temp20); 348 #endif 324 349 BitBlock temp38 = simd_if(bit[5], temp37, temp20); 325 350 BitBlock temp39 = simd_and(temp36, temp38); … … 554 579 However, at the beginning of the buffer, no suffixes 555 580 are expected, so this value is initialized to zeroes. */ 581 #ifdef TEMPLATED_SIMD_LIB 582 BitBlock prefix_pending = simd_const<1,0>(); 583 /* If a suffix is pending, then it may involve one of 584 the special case prefixes E0, ED. F0, F4, or the 585 EF prefix or EF_BF combination for FFFF/FFFE detection.*/ 586 BitBlock E0ED_pending = simd_const<1,0>(); 587 BitBlock F0F4_pending = simd_const<1,0>(); 588 BitBlock bit5_pending = simd_const<1,0>(); 589 BitBlock EF_pending = simd_const<1,0>(); 590 BitBlock EF_BF_pending = simd_const<1,0>(); 591 #endif 592 #ifndef TEMPLATED_SIMD_LIB 556 593 BitBlock prefix_pending = simd_const_1(0); 557 594 /* If a suffix is pending, then it may involve one of … … 563 600 BitBlock EF_pending = simd_const_1(0); 564 601 BitBlock EF_BF_pending = simd_const_1(0); 565 602 #endif 566 603 /* Temporary variables used within the block. */ 567 604 BitBlock suffix_required_scope; … … 571 608 572 609 for (int i = 0; i < buffer_blocks; i++) { 610 #ifdef TEMPLATED_SIMD_LIB 611 validation_stream[i] = simd_const<1,0>(); 612 #endif 613 #ifndef TEMPLATED_SIMD_LIB 573 614 validation_stream[i] = simd_const_1(0); 615 #endif 574 616 /* If there is no pending suffix and no bit 0, then there 575 617 are no possible validation issues for this block. */ … … 598 640 599 641 /* Default values of pending variables for next iteration. */ 642 #ifdef TEMPLATED_SIMD_LIB 643 E0ED_pending = simd_const<1,0>(); 644 F0F4_pending = simd_const<1,0>(); 645 bit5_pending = simd_const<1,0>(); 646 EF_pending = simd_const<1,0>(); 647 EF_BF_pending = simd_const<1,0>(); 648 #endif 649 #ifndef TEMPLATED_SIMD_LIB 600 650 E0ED_pending = simd_const_1(0); 601 651 F0F4_pending = simd_const_1(0); … … 603 653 EF_pending = simd_const_1(0); 604 654 EF_BF_pending = simd_const_1(0); 655 #endif 605 656 606 657 X111x = simd_and(simd_and(x8basis[i].bit[4], x8basis[i].bit[5]), x8basis[i].bit[6]); … … 693 744 /* Nothing required for most charsets - but perhaps should have tables. */ 694 745 for (int i = 0; i < buffer_blocks; i++) { 746 #ifdef TEMPLATED_SIMD_LIB 747 validation_stream[i] = simd_const<1,0>(); 748 #endif 749 #ifndef TEMPLATED_SIMD_LIB 695 750 validation_stream[i] = simd_const_1(0); 751 #endif 696 752 } 697 753 }; … … 713 769 /* Nothing required for most cases - but perhaps should have tables. */ 714 770 for (int i = 0; i < buffer_blocks; i++) { 771 #ifdef TEMPLATED_SIMD_LIB 772 validation_stream[i] = simd_const<1,0>(); 773 #endif 774 #ifndef TEMPLATED_SIMD_LIB 715 775 validation_stream[i] = simd_const_1(0); 776 #endif 716 777 } 717 778 } … … 739 800 BitBlock temp10 = simd_andc(temp2, temp9); 740 801 BitBlock temp11 = simd_andc(temp10, bit[0]); 802 #ifdef TEMPLATED_SIMD_LIB 803 Control = simd_andc(simd_const<1,1>(), temp11); 804 #endif 805 #ifndef TEMPLATED_SIMD_LIB 741 806 Control = simd_andc(simd_const_1(1), temp11); 807 #endif 742 808 BitBlock temp12 = simd_or(bit[2], bit[3]); 743 809 BitBlock temp13 = simd_or(temp1, temp12); … … 784 850 BitBlock temp12 = simd_andc(temp11, bit[2]); 785 851 BitBlock temp13 = simd_andc(temp10, temp12); 852 #ifdef TEMPLATED_SIMD_LIB 853 Control = simd_andc(simd_const<1,1>(), temp13); 854 #endif 855 #ifndef TEMPLATED_SIMD_LIB 786 856 Control = simd_andc(simd_const_1(1), temp13); 857 #endif 787 858 BitBlock temp14 = simd_or(bit[2], bit[3]); 788 859 BitBlock temp15 = simd_or(temp1, temp14); … … 840 911 BitBlock temp8 = simd_and(temp4, temp7); 841 912 BitBlock temp9 = simd_andc(temp1, temp8); 913 #ifdef TEMPLATED_SIMD_LIB 914 Control = simd_andc(simd_const<1,1>(), temp9); 915 #endif 916 #ifndef TEMPLATED_SIMD_LIB 842 917 Control = simd_andc(simd_const_1(1), temp9); 918 #endif 843 919 BitBlock temp10 = simd_or(bit[2], bit[3]); 844 920 BitBlock temp11 = simd_or(temp1, temp10); … … 934 1010 935 1011 if (buffer_units < BUFFER_SIZE) { 1012 #ifdef TEMPLATED_SIMD_LIB 1013 BitBlock final_block_mask = 1014 sisd_sfl(simd_const<1,1>(), sisd_from_int(buffer_units % BLOCKSIZE)); 1015 #endif 1016 #ifndef TEMPLATED_SIMD_LIB 936 1017 BitBlock final_block_mask = 937 1018 sisd_sfl(simd_const_1(1), sisd_from_int(buffer_units % BLOCKSIZE)); 1019 #endif 938 1020 int lastblk = buffer_units/BLOCKSIZE; 939 1021 for (int j = minLexicalItem; j < LexicalItemCount; j++) { trunk/src/bitplex.c
r113 r179 20 20 */ 21 21 #endif 22 23 24 22 #ifdef TEMPLATED_SIMD_LIB 23 #define s2p_step(s0, s1, hi_mask, shift, p0, p1) \ 24 {\ 25 BitBlock t0, t1;\ 26 t0 = simd_pack<16,h,h>(s0, s1);\ 27 t1 = simd_pack<16,l,l>(s0, s1);\ 28 p0 = simd_if(hi_mask, t0, simd_srli<16>(t1, shift));\ 29 p1 = simd_if(hi_mask, simd_slli<16>(t0, shift), t1);\ 30 } 31 #endif 32 #ifndef TEMPLATED_SIMD_LIB 25 33 #define s2p_step(s0, s1, hi_mask, shift, p0, p1) \ 26 34 {\ … … 31 39 p1 = simd_if(hi_mask, simd_slli_16(t0, shift), t1);\ 32 40 } 41 #endif 33 42 34 43 static inline void s2p_bytepack(BytePack s[], BitBlock p[]) { 44 #ifdef TEMPLATED_SIMD_LIB 45 BitBlock mask_2 = simd_himask<2>(); 46 BitBlock mask_4 = simd_himask<4>(); 47 BitBlock mask_8 = simd_himask<8>(); 48 #endif 49 #ifndef TEMPLATED_SIMD_LIB 35 50 BitBlock mask_2 = simd_himask_2; 36 51 BitBlock mask_4 = simd_himask_4; 37 52 BitBlock mask_8 = simd_himask_8; 53 #endif 38 54 BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3; 39 55 BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3; … … 61 77 s2p_step(bit33337777_0, bit33337777_1, mask_8, 4, p[3], p[7]); 62 78 } 63 64 65 79 Bitplex::Bitplex () { 66 80 x8basis = (BitBlockBasis *) simd_new(BUFFER_SIZE/PACKSIZE); trunk/src/byteplex.c
r178 r179 119 119 120 120 121 inlinevoid DoDuplex(BytePack * src_data, int packs_in_buffer,121 void DoDuplex(BytePack * src_data, int packs_in_buffer, 122 122 BytePack * p0, BytePack * p1) { 123 123 … … 126 126 BytePack s1 = src_data[2*pk+1]; 127 127 #if (BYTE_ORDER == LITTLE_ENDIAN) 128 #ifdef TEMPLATED_SIMD_LIB 129 p0[pk] = simd_pack<16,l,l>(s1, s0); 130 p1[pk] = simd_pack<16,h,h>(s1, s0); 131 #endif 132 #ifndef TEMPLATED_SIMD_LIB 128 133 p0[pk] = simd_pack_16_ll(s1, s0); 129 134 p1[pk] = simd_pack_16_hh(s1, s0); 130 135 #endif 136 #endif 131 137 #if (BYTE_ORDER == BIG_ENDIAN) 132 p0[pk] = simd_pack_16_hh(s0, s1); 133 p1[pk] = simd_pack_16_ll(s0, s1); 134 #endif 135 } 136 } 137 138 #ifdef TEMPLATED_SIMD_LIB 139 p0[pk] = simd_pack<16,l,l>(s0, s1); 140 p1[pk] = simd_pack<16,h,h>(s0, s1); 141 #endif 142 #ifndef TEMPLATED_SIMD_LIB 143 p0[pk] = simd_pack_16_ll(s0, s1); 144 p1[pk] = simd_pack_16_hh(s0, s1); 145 #endif 146 #endif 147 } 148 } 149 138 150 void U16LE_Buffer::DoByteplex() { 139 151 DoDuplex(src_buffer, packs_in_buffer, x16lo, x16hi); … … 153 165 BytePack s3 = src_data[4*pk+3]; 154 166 #if (BYTE_ORDER == LITTLE_ENDIAN) 167 #ifdef TEMPLATED_SIMD_LIB 168 BytePack p02_0 = simd_pack<16,l,l>(s1, s0); 169 BytePack p13_0 = simd_pack<16,h,h>(s1, s0); 170 BytePack p02_1 = simd_pack<16,l,l>(s3, s2); 171 BytePack p13_1 = simd_pack<16,h,h>(s3, s2); 172 p0[pk] = simd_pack<16,l,l>(p02_1, p02_0); 173 p1[pk] = simd_pack<16,l,l>(p13_1, p13_0); 174 p2[pk] = simd_pack<16,h,h>(p02_1, p02_0); 175 p3[pk] = simd_pack<16,h,h>(p13_1, p13_0); 176 #endif 177 #ifndef TEMPLATED_SIMD_LIB 155 178 BytePack p02_0 = simd_pack_16_ll(s1, s0); 156 179 BytePack p13_0 = simd_pack_16_hh(s1, s0); … … 162 185 p3[pk] = simd_pack_16_hh(p13_1, p13_0); 163 186 #endif 187 #endif 164 188 #if (BYTE_ORDER == BIG_ENDIAN) 189 #ifdef TEMPLATED_SIMD_LIB 190 BytePack p02_0 = simd_pack<16,h,h>(s0, s1); 191 BytePack p13_0 = simd_pack<16,l,l>(s0, s1); 192 BytePack p02_1 = simd_pack<16,h,h>(s2, s3); 193 BytePack p13_1 = simd_pack<16,l,l>(s2, s3); 194 p0[pk] = simd_pack<16,h,h>(p02_0, p02_1); 195 p1[pk] = simd_pack<16,h,h>(p13_0, p13_1); 196 p2[pk] = simd_pack<16,l,l>(p02_0, p02_1); 197 p3[pk] = simd_pack<16,l,l>(p13_0, p13_1); 198 #endif 199 #ifndef TEMPLATED_SIMD_LIB 165 200 BytePack p02_0 = simd_pack_16_hh(s0, s1); 166 201 BytePack p13_0 = simd_pack_16_ll(s0, s1); … … 172 207 p3[pk] = simd_pack_16_ll(p13_0, p13_1); 173 208 #endif 209 #endif 174 210 } 175 211 } … … 201 237 void U16_Buffer::PreparePseudoASCII_Stream() { 202 238 for (int pk = 0; pk < packs_in_buffer; pk++) { 203 x8data[pk] = simd_or(x16lo[pk], 204 simd_andc(simd_const_8(0x80), 239 #ifdef TEMPLATED_SIMD_LIB 240 x8data[pk] = simd_or(x16lo[pk], simd_andc(simd_const<8>(0x80), 241 simd_eq<8>(x16hi[pk], simd_const<8>(0)))); 242 #endif 243 #ifndef TEMPLATED_SIMD_LIB 244 x8data[pk] = simd_or(x16lo[pk], simd_andc(simd_const_8(0x80), 205 245 simd_eq_8(x16hi[pk], simd_const_8(0)))); 246 #endif 206 247 } 207 248 } … … 210 251 for (int pk = 0; pk < packs_in_buffer; pk++) { 211 252 BytePack hi = simd_or(simd_or(x32hh[pk], x32hl[pk]), x32lh[pk]); 212 x8data[pk] = simd_or(x32ll[pk], 213 simd_andc(simd_const_8(0x80), 253 #ifdef TEMPLATED_SIMD_LIB 254 x8data[pk] = simd_or(x32ll[pk], simd_andc(simd_const<8>(0x80), 255 simd_eq<8>(hi, simd_const<8>(0)))); 256 #endif 257 #ifndef TEMPLATED_SIMD_LIB 258 x8data[pk] = simd_or(x32ll[pk], simd_andc(simd_const_8(0x80), 214 259 simd_eq_8(hi, simd_const_8(0)))); 260 #endif 215 261 } 216 262 } … … 238 284 239 285 template <CodeUnit_Base C> 240 void X8_Buffer<C>::InitializeBuffer(unsigned char * src, int lgth){ 241 int byte_advance = BYTEPLEX_SIZE - lgth; 286 void X8_Buffer<C>::InitializeBuffer(unsigned char * src, int lgth){ 287 int byte_advance = BYTEPLEX_SIZE - lgth; 242 288 int bytes_read = CopyAndFill(src, lgth, byte_advance); 243 289 Set_limits(bytes_read + lgth); 244 290 } 245 291 246 void U16_Buffer::InitializeBuffer(unsigned char * src, int lgth){ 247 int byte_advance = BYTEPLEX_SIZE * 2 - lgth; 292 void U16_Buffer::InitializeBuffer(unsigned char * src, int lgth){ 293 int byte_advance = BYTEPLEX_SIZE * 2 - lgth; 248 294 int bytes_read = CopyAndFill(src, lgth, byte_advance); 249 295 if (bytes_read % 2 != 0) { … … 253 299 } 254 300 255 void U32_Buffer::InitializeBuffer(unsigned char * src, int lgth){ 256 int byte_advance = BYTEPLEX_SIZE * 4 - lgth; 301 void U32_Buffer::InitializeBuffer(unsigned char * src, int lgth){ 302 int byte_advance = BYTEPLEX_SIZE * 4 - lgth; 257 303 int bytes_read = CopyAndFill(src, lgth, byte_advance); 258 304 if (bytes_read % 4 != 0) { … … 265 311 266 312 template <CodeUnit_Base C> 267 void X8_Buffer<C>::AdvanceInputBuffer(int advance_amt){ 268 int bytes_to_keep = units_in_buffer - advance_amt; 313 void X8_Buffer<C>::AdvanceInputBuffer(int advance_amt){ 314 int bytes_to_keep = units_in_buffer - advance_amt; 269 315 int bytes_read = CopyAndFill(&((unsigned char *)src_buffer)[advance_amt], 270 316 bytes_to_keep, advance_amt); … … 272 318 } 273 319 274 void U16_Buffer::AdvanceInputBuffer(int advance_amt){ 275 int bytes_to_keep = (units_in_buffer - advance_amt)*2; 320 void U16_Buffer::AdvanceInputBuffer(int advance_amt){ 321 int bytes_to_keep = (units_in_buffer - advance_amt)*2; 276 322 int bytes_read = CopyAndFill(&((unsigned char *)src_buffer)[advance_amt*2], 277 323 bytes_to_keep, advance_amt*2); … … 282 328 } 283 329 284 void U32_Buffer::AdvanceInputBuffer(int advance_amt){ 285 int bytes_to_keep = (units_in_buffer - advance_amt)*4; 330 void U32_Buffer::AdvanceInputBuffer(int advance_amt){ 331 int bytes_to_keep = (units_in_buffer - advance_amt)*4; 286 332 int bytes_read = CopyAndFill(&((unsigned char *)src_buffer)[advance_amt*4], 287 333 bytes_to_keep, advance_amt*4); … … 296 342 BytePack hi_surrogate; 297 343 BytePack lo_surrogate; 344 #ifdef TEMPLATED_SIMD_LIB 345 BytePack hi_surrogate_pending = simd_const<8>(0); 346 #endif 347 #ifndef TEMPLATED_SIMD_LIB 298 348 BytePack hi_surrogate_pending = simd_const_8(0); 349 #endif 299 350 BytePack surrogate_scope; 300 351 BytePack u16_surrogate_error; 301 // BytePack u16_surrogate_accum = simd_const _8(0);302 // BytePack u16_FFFE_FFFF_accum = simd_const _8(0);352 // BytePack u16_surrogate_accum = simd_const<8>(0); 353 // BytePack u16_FFFE_FFFF_accum = simd_const<8>(0); 303 354 BytePack u16_FFFE_FFFF; 304 355 for (int pk = 0; pk < packs_in_buffer; pk++) { … … 307 358 of surrogate pairs. Validation requires that these values 308 359 only occur in well-formed pairs. */ 360 #ifdef TEMPLATED_SIMD_LIB 361 surrogate_select = simd_and(x16hi[pk], simd_const<8>(0xDC)); 362 hi_surrogate = simd_eq<8>(surrogate_select, simd_const<8>(0xD8)); 363 lo_surrogate = simd_eq<8>(surrogate_select, simd_const<8>(0xDC)); 364 surrogate_scope = simd_or(hi_surrogate_pending, 365 sisd_sfli(hi_surrogate, 8)); 366 367 u16_surrogate_error = simd_xor(surrogate_scope, lo_surrogate); 368 hi_surrogate_pending = sisd_sbli(hi_surrogate, 8 * (PACKSIZE-1)); 369 /* The values FFFE and FFFF are excluded. */ 370 u16_FFFE_FFFF = simd_eq<8>(simd_and(x16hi[pk], 371 simd_or(x16lo[pk], simd_const<8>(1))), 372 simd_const<8>(0xFF)); 373 #endif 374 #ifndef TEMPLATED_SIMD_LIB 309 375 surrogate_select = simd_and(x16hi[pk], simd_const_8(0xDC)); 310 376 hi_surrogate = simd_eq_8(surrogate_select, simd_const_8(0xD8)); … … 312 378 surrogate_scope = simd_or(hi_surrogate_pending, 313 379 sisd_sfli(hi_surrogate, 8)); 314 380 315 381 u16_surrogate_error = simd_xor(surrogate_scope, lo_surrogate); 316 382 hi_surrogate_pending = sisd_sbli(hi_surrogate, 8 * (PACKSIZE-1)); … … 319 385 simd_or(x16lo[pk], simd_const_8(1))), 320 386 simd_const_8(0xFF)); 387 #endif 321 388 // u16_FFFE_FFFF_accum = simd_or(u16_FFFE_FFFF_accum, u16_FFFE_FFFF); 322 389 u16_surrogate_error = simd_or(u16_surrogate_error, u16_FFFE_FFFF); 323 390 324 391 if (bitblock_has_bit(u16_surrogate_error)) { 325 392 CharSetValidationError("UTF-16 (relative position reported)", … … 333 400 #ifdef X16HILO_ACCESS 334 401 int packs = (buffer_units - 1)/PACKSIZE + 1; 402 #ifdef TEMPLATED_SIMD_LIB 403 BytePack u16_surrogate_accum = simd_const<8>(0); 404 BytePack u16_FFFE_FFFF_accum = simd_const<8>(0); 405 #endif 406 #ifndef TEMPLATED_SIMD_LIB 335 407 BytePack u16_surrogate_accum = simd_const_8(0); 336 408 BytePack u16_FFFE_FFFF_accum = simd_const_8(0); 409 #endif 337 410 BytePack u16_FFFE_FFFF; 338 411 for (int pk = 0; pk < packs; pk++) { 339 412 /* The high byte of UCS-2 code units cannot be in the range D8-DF. 340 413 This corresponds to the D800-DFFF range of illegal codepoints 341 reserved for UTF-16 surrogate pairs. Accumulate the results. 414 reserved for UTF-16 surrogate pairs. Accumulate the results. 342 415 To check, 0x20 is added to each such octet, mapping the D8-DF 343 416 range to F8-FF and wrapping E0-FF values around. The max value 344 is then accumulated. */ 417 is then accumulated. */ 418 #ifdef TEMPLATED_SIMD_LIB 345 419 u16_surrogate_accum = 346 simd_max_8(u16_surrogate_accum, 347 simd_add_8(x16hi[pk], simd_const_8(0x20))); 420 simd_max_8(u16_surrogate_accum, simd_add<8>(x16hi[pk], simd_const<8>(0x20))); 421 /* The values FFFE and FFFF are excluded. */ 422 u16_FFFE_FFFF = simd_eq<8>(simd_and(x16hi[pk], 423 simd_or(x16lo[pk], simd_const<8>(1))), simd_const<8>(0xFF)); 424 u16_FFFE_FFFF_accum = simd_or(u16_FFFE_FFFF_accum, u16_FFFE_FFFF); 425 #endif 426 #ifndef TEMPLATED_SIMD_LIB 427 u16_surrogate_accum = 428 simd_max_8(u16_surrogate_accum, simd_add_8(x16hi[pk], simd_const_8(0x20))); 348 429 /* The values FFFE and FFFF are excluded. */ 349 430 u16_FFFE_FFFF = simd_eq_8(simd_and(x16hi[pk], 350 simd_or(x16lo[pk], simd_const_8(1))), 351 simd_const_8(0xFF)); 431 simd_or(x16lo[pk], simd_const_8(1))), simd_const_8(0xFF)); 352 432 u16_FFFE_FFFF_accum = simd_or(u16_FFFE_FFFF_accum, u16_FFFE_FFFF); 353 } 433 #endif 434 } 435 #ifdef TEMPLATED_SIMD_LIB 436 u16_surrogate_accum = simd_eq<8>(simd_or(u16_surrogate_accum, simd_const<8>(0x07)), 437 simd_const<8>(0xFF)); 438 #endif 439 #ifndef TEMPLATED_SIMD_LIB 354 440 u16_surrogate_accum = simd_eq_8(simd_or(u16_surrogate_accum, simd_const_8(0x07)), 355 441 simd_const_8(0xFF)); 442 #endif 443 356 444 if (bitblock_has_bit(simd_or(u16_surrogate_accum, u16_FFFE_FFFF_accum))) 357 445 CharSetValidationError("UCS-2"); … … 367 455 #ifdef X32BYTEPLEX_ACCESS 368 456 int packs = (buffer_units - 1)/PACKSIZE + 1; 457 #ifdef TEMPLATED_SIMD_LIB 458 BytePack u32hh_accum = simd_const<8>(0); 459 BytePack u32hl_accum = simd_const<8>(0); 460 BytePack u32_surrogate_accum = simd_const<8>(0); 461 BytePack u32_FFFE_FFFF_accum = simd_const<8>(0); 462 #endif 463 #ifndef TEMPLATED_SIMD_LIB 369 464 BytePack u32hh_accum = simd_const_8(0); 370 465 BytePack u32hl_accum = simd_const_8(0); 371 466 BytePack u32_surrogate_accum = simd_const_8(0); 372 467 BytePack u32_FFFE_FFFF_accum = simd_const_8(0); 468 #endif 373 469 BytePack u32_BMP_select; 374 470 BytePack u32l_FFFE_FFFF; … … 379 475 /* The second octet has a max value of 0x10, corresponding to the 380 476 maximum Unicode code point value of 0x10FFFF. Accumulate the 381 maximum of all u32hl values observed. */ 477 maximum of all u32hl values observed. */ 382 478 u32hl_accum = simd_max_8(u32hl_accum, x32hl[pk]); 383 479 /* The third octet cannot be in the range D8-DF if the second octet 384 480 is 0. This corresponds to the D800-DFFF range of illegal codepoints 385 reserved for UTF-16 surrogate pairs. Accumulate the results. 481 reserved for UTF-16 surrogate pairs. Accumulate the results. 386 482 To check, 0x20 is added to each such octet, mapping the D8-DF 387 483 range to F8-FF and wrapping E0-FF values around. The max value 388 484 is then accumulated. */ 485 #ifdef TEMPLATED_SIMD_LIB 486 u32_BMP_select = simd_eq<8>(x32hl[pk], simd_const<8>(0)); 487 u32_surrogate_accum = simd_max_8(u32_surrogate_accum, 488 simd_and(u32_BMP_select, simd_add<8>(x32lh[pk], simd_const<8>(0x20)))); 489 /* The low two octets cannot have the value FFFE or FFFF if 490 we're in the BMP (second octet is 0). */ 491 u32l_FFFE_FFFF = simd_eq<8>(simd_and(x32lh[pk], 492 simd_or(x32ll[pk], simd_const<8>(1))),simd_const<8>(0xFF)); 493 u32_FFFE_FFFF_accum = simd_or(u32_FFFE_FFFF_accum, 494 simd_and(u32_BMP_select, u32l_FFFE_FFFF)); 495 #endif 496 #ifndef TEMPLATED_SIMD_LIB 389 497 u32_BMP_select = simd_eq_8(x32hl[pk], simd_const_8(0)); 390 u32_surrogate_accum = 391 simd_max_8(u32_surrogate_accum, 392 simd_and(u32_BMP_select, 393 simd_add_8(x32lh[pk], simd_const_8(0x20)))); 498 u32_surrogate_accum = simd_max_8(u32_surrogate_accum, 499 simd_and(u32_BMP_select, simd_add<8>(x32lh[pk], simd_const_8(0x20)))); 394 500 /* The low two octets cannot have the value FFFE or FFFF if 395 501 we're in the BMP (second octet is 0). */ 396 502 u32l_FFFE_FFFF = simd_eq_8(simd_and(x32lh[pk], 397 simd_or(x32ll[pk], simd_const_8(1))), 398 simd_const_8(0xFF)); 503 simd_or(x32ll[pk], simd_const_8(1))),simd_const_8(0xFF)); 399 504 u32_FFFE_FFFF_accum = simd_or(u32_FFFE_FFFF_accum, 400 505 simd_and(u32_BMP_select, u32l_FFFE_FFFF)); 401 } 506 #endif 507 } 508 #ifdef TEMPLATED_SIMD_LIB 509 u32hl_accum = simd_gt_8(u32hl_accum, simd_const<8>(0x10)); 510 u32_surrogate_accum = simd_eq<8>(simd_or(u32_surrogate_accum, simd_const<8>(0x07)), 511 simd_const<8>(0xFF)); 512 #endif 513 #ifndef TEMPLATED_SIMD_LIB 402 514 u32hl_accum = simd_gt_8(u32hl_accum, simd_const_8(0x10)); 403 515 u32_surrogate_accum = simd_eq_8(simd_or(u32_surrogate_accum, simd_const_8(0x07)), 404 516 simd_const_8(0xFF)); 517 #endif 405 518 if (bitblock_has_bit(simd_or(simd_or(u32hh_accum, u32hl_accum), 406 519 simd_or(u32_surrogate_accum, u32_FFFE_FFFF_accum)))) { … … 416 529 Byteplex * b; 417 530 if (likely(e->code_unit_size == SingleByte)) { 418 if (likely(e->code_unit_base == ASCII)) 531 if (likely(e->code_unit_base == ASCII)) 419 532 b = new X8_Buffer<ASCII>(); 420 533 else b = new X8_Buffer<EBCDIC>(); … … 430 543 case Unusual_2143: b = new U32_2143_Buffer(); break; 431 544 case Unusual_3412: b = new U32_3412_Buffer(); break; 432 } 545 } 433 546 return b; 434 547 } … … 439 552 return b; 440 553 } 441 554 442 555 Byteplex * Byteplex::ByteplexFactory(Entity_Info * e, unsigned char * buffer_bytes, int buffer_size) { 443 556 Byteplex * b = ByteplexFactory(e); … … 511 624 template <> 512 625 void X8_Buffer<EBCDIC>::to_UTF8(int name_pos, int lgth, char * u8_ptr){ 513 626 514 627 } 515 628 void U16_Buffer::to_UTF8(int name_pos, int lgth, char * u8_ptr){ … … 562 675 u8_ptr[u8_lgth+1] = 0x80 + ((u32hl[i] & 0x03) << 4) + (u32lh[i] >> 4); 563 676 u8_ptr[u8_lgth+2] = 0x80 + ((u32lh[i] & 0x0F) << 2) + (u32ll[i] >> 6); 564 u8_ptr[u8_lgth+3] = 0x80 + (u32ll[i] & 0x3F); 677 u8_ptr[u8_lgth+3] = 0x80 + (u32ll[i] & 0x3F); 565 678 u8_lgth += 4; 566 679 } trunk/src/engine.c
r176 r179 205 205 206 206 /* Install sentinels for every lexical item stream*/ 207 #ifndef OPTIMIZE_SHORT_SCAN 207 #ifdef TEMPLATED_SIMD_LIB 208 BitBlock sentinel_value = simd_const<1,1>(); 209 #endif 210 #ifndef TEMPLATED_SIMD_LIB 208 211 BitBlock sentinel_value = simd_const_1(1); 209 212 #endif 210 #ifdef OPTIMIZE_SHORT_SCAN 211 BitBlock sentinel_value = sisd_sfli(simd_const_1(1), 8*sizeof(unsigned long)); 212 #endif 213 213 214 for (int j = minLexicalItem; j < LexicalItemCount; j++) { 214 215 buf->item_stream[j][BUFFER_BLOCKS] = sentinel_value; … … 934 935 char * end_elem_name = &((char *) x8data)[buffer_rel_pos]; 935 936 937 #ifdef TEMPLATED_SIMD_LIB 938 BytePack byte_compare = simd_eq<8>(sisd_load_unaligned((BytePack *) end_elem_name), 939 sisd_load_unaligned((BytePack *) start_elem_name)); 940 #endif 941 #ifndef TEMPLATED_SIMD_LIB 936 942 BytePack byte_compare = simd_eq_8(sisd_load_unaligned((BytePack *) end_elem_name), 937 943 sisd_load_unaligned((BytePack *) start_elem_name)); 944 #endif 938 945 if (lgth < 16) { 939 946 int expected_bits = ~(-1 << lgth); … … 947 954 overlapping with the first byte_compare. */ 948 955 int pos = (lgth - 1) % PACKSIZE + 1; 956 #ifdef TEMPLATED_SIMD_LIB 957 byte_compare = simd_or(byte_compare, simd_eq<8>(sisd_load_unaligned((BytePack *) &end_elem_name[pos]), 958 sisd_load_unaligned((BytePack *) &start_elem_name[pos]))); 959 #endif 960 #ifndef TEMPLATED_SIMD_LIB 949 961 byte_compare = simd_or(byte_compare, simd_eq_8(sisd_load_unaligned((BytePack *) &end_elem_name[pos]), 950 962 sisd_load_unaligned((BytePack *) &start_elem_name[pos]))); 963 #endif 951 964 pos += 16; 952 965 while (pos < lgth) { … … 954 967 WF_Error(wfErr_GIMatch); 955 968 } 969 #ifdef TEMPLATED_SIMD_LIB 970 byte_compare = simd_eq<8>(sisd_load_unaligned((BytePack *) &end_elem_name[pos]), 971 sisd_load_unaligned((BytePack *) &start_elem_name[pos])); 972 #endif 973 #ifndef TEMPLATED_SIMD_LIB 956 974 byte_compare = simd_eq_8(sisd_load_unaligned((BytePack *) &end_elem_name[pos]), 957 975 sisd_load_unaligned((BytePack *) &start_elem_name[pos])); 976 #endif 958 977 pos += 16; 959 978 } trunk/src/symtab.c
r174 r179 248 248 } 249 249 } 250 #ifdef TEMPLATED_SIMD_LIB 251 return !simd_any_sign_bit<8>(b); 252 #endif 253 #ifndef TEMPLATED_SIMD_LIB 250 254 return !simd_any_sign_bit_8(b); 255 #endif 251 256 } 252 257
