- Timestamp:
- Aug 29, 2014, 2:56:16 AM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
proto/s2k/trunk/demo/transpose/transpose.cpp
r4092 r4093 52 52 #define LE 53 53 #define BE 54 #define LE_S2K54 #define S2K 55 55 56 56 #define s2p_ideal_le(s_7, s_6, s_5, s_4, s_3, s_2, s_1, s_0, r_0, r_1, r_2, r_3, r_4, r_5, r_6, r_7) \ … … 86 86 } while(0) 87 87 88 static void s2p_ le(BytePack b76543210_7, BytePack b76543210_6, BytePack b76543210_5, BytePack b76543210_4,88 static void s2p_be(BytePack b76543210_7, BytePack b76543210_6, BytePack b76543210_5, BytePack b76543210_4, 89 89 BytePack b76543210_3, BytePack b76543210_2, BytePack b76543210_1, BytePack b76543210_0, 90 90 BitBlock & b0, BitBlock & b1, BitBlock & b2, BitBlock & b3, … … 124 124 } 125 125 126 127 static void s2p_be(BytePack b76543210_7, BytePack b76543210_6, BytePack b76543210_5, BytePack b76543210_4, 126 static void s2p_le(BytePack b76543210_7, BytePack b76543210_6, BytePack b76543210_5, BytePack b76543210_4, 128 127 BytePack b76543210_3, BytePack b76543210_2, BytePack b76543210_1, BytePack b76543210_0, 129 BitBlock & b 0, BitBlock & b1, BitBlock & b2, BitBlock & b3,130 BitBlock & b 4, BitBlock & b5, BitBlock & b6, BitBlock & b7)128 BitBlock & b7, BitBlock & b6, BitBlock & b5, BitBlock & b4, 129 BitBlock & b3, BitBlock & b2, BitBlock & b1, BitBlock & b0) 131 130 { 132 131 BitBlock b7654_0, b7654_1, b7654_2, b7654_3; … … 153 152 b10_0 = hsimd<4>::packl(b3210_1, b3210_0); 154 153 155 b0= hsimd<2>::packh(b76_1, b76_0);156 b1= hsimd<2>::packl(b76_1, b76_0);157 b2= hsimd<2>::packh(b54_1, b54_0);158 b3= hsimd<2>::packl(b54_1, b54_0);159 b4= hsimd<2>::packh(b32_1, b32_0);160 b5= hsimd<2>::packl(b32_1, b32_0);161 b6= hsimd<2>::packh(b10_1, b10_0);162 b7= hsimd<2>::packl(b10_1, b10_0);154 b7 = hsimd<2>::packh(b76_1, b76_0); 155 b6 = hsimd<2>::packl(b76_1, b76_0); 156 b5 = hsimd<2>::packh(b54_1, b54_0); 157 b4 = hsimd<2>::packl(b54_1, b54_0); 158 b3 = hsimd<2>::packh(b32_1, b32_0); 159 b2 = hsimd<2>::packl(b32_1, b32_0); 160 b1 = hsimd<2>::packh(b10_1, b10_0); 161 b0 = hsimd<2>::packl(b10_1, b10_0); 163 162 } 164 163 … … 250 249 BitBlock b3, b2, b1, b0; 251 250 252 253 251 int k = 0; 254 252 int bytes_avail = infile_size; 253 255 254 256 255 ////////////////////////////////////////////////////////////////////////////////////////// … … 258 257 ////////////////////////////////////////////////////////////////////////////////////////// 259 258 260 while (bytes_avail >= BLOCK_SIZE) 259 260 while (bytes_avail >= BLOCK_SIZE) 261 261 { 262 { 263 BitBlock b76543210 [8]; 264 memcpy(&b76543210, &infile_buffer[k], BLOCK_SIZE); 265 266 BitBlock b7654 [4]; 267 BitBlock b3210 [4]; 268 BitBlock b76 [2]; 269 BitBlock b54 [2]; 270 BitBlock b32 [2]; 271 BitBlock b10 [2]; 272 273 BitBlock b7[1]; 274 BitBlock b6[1]; 275 BitBlock b5[1]; 276 BitBlock b4[1]; 277 BitBlock b3[1]; 278 BitBlock b2[1]; 279 BitBlock b1[1]; 280 BitBlock b0[1]; 281 282 for(int i=0; i<8/2; i++) { 283 b7654[i] = hsimd<8>::packh(b76543210[i+i+1], b76543210[i+i]); 284 } 285 286 for(int i=0; i<8/2; i++) { 287 b3210[i] = hsimd<8>::packl(b76543210[i+i+1], b76543210[i+i]); 288 } 289 290 for(int i=0; i<4/2; i++) { 291 b76[i] = hsimd<4>::packh(b7654[i+i+1], b7654[i+i]); 292 } 293 294 for(int i=0; i<4/2; i++) { 295 b54[i] = hsimd<4>::packl(b7654[i+i+1], b7654[i+i]); 296 } 297 298 for(int i=0; i<4/2; i++) { 299 b32[i] = hsimd<4>::packh(b3210[i+i+1], b3210[i+i]); 300 } 301 302 for(int i=0; i<4/2; i++) { 303 b10[i] = hsimd<4>::packl(b3210[i+i+1], b3210[i+i]); 304 } 305 306 for(int i=0; i<2/2; i++) { 307 b7[0] = hsimd<2>::packh(b76[i+i+1], b76[i+i]); 308 } 309 310 for(int i=0; i<2/2; i++) { 311 b6[0] = hsimd<2>::packl(b76[i+i+1], b76[i+i]); 312 } 313 314 for(int i=0; i<2/2; i++) { 315 b5[0] = hsimd<2>::packh(b54[i+i+1], b54[i+i]); 316 } 317 318 for(int i=0; i<2/2; i++) { 319 b4[0] = hsimd<2>::packl(b54[i+i+1], b54[i+i]); 320 } 321 322 for(int i=0; i<2/2; i++) { 323 b3[0] = hsimd<2>::packh(b32[i+i+1], b32[i+i]); 324 } 325 326 for(int i=0; i<2/2; i++) { 327 b2[0] = hsimd<2>::packl(b32[i+i+1], b32[i+i]); 328 } 329 330 for(int i=0; i<2/2; i++) { 331 b1[0] = hsimd<2>::packh(b10[i+i+1], b10[i+i]); 332 } 333 334 for(int i=0; i<2/2; i++) { 335 b0[0] = hsimd<2>::packl(b10[i+i+1], b10[i+i]); 336 } 337 338 printf("\n"); 339 print_register<BitBlock>("b7", b7[0]); 340 print_register<BitBlock>("b6", b6[0]); 341 print_register<BitBlock>("b5", b5[0]); 342 print_register<BitBlock>("b4", b4[0]); 343 print_register<BitBlock>("b3", b3[0]); 344 print_register<BitBlock>("b2", b2[0]); 345 print_register<BitBlock>("b1", b1[0]); 346 print_register<BitBlock>("b0", b0[0]); 347 348 printf("---"); 349 } 350 ////////////////////////////////////////////////////////////////////////////////////////// 351 { 262 352 Byte = (BytePack *) &infile_buffer[k]; 263 353 264 #ifdef BE 265 s2p_ be(Byte[7], Byte[6], Byte[5], Byte[4],354 #ifdef BE 355 s2p_le(Byte[7], Byte[6], Byte[5], Byte[4], 266 356 Byte[3], Byte[2], Byte[1], Byte[0], 267 b7, b6, b5, b4, 357 b7, b6, b5, b4, 268 358 b3, b2, b1, b0); 269 359 … … 279 369 #endif 280 370 281 #ifdef LE 282 s2p(Byte[0], Byte[1], Byte[2], Byte[3], 283 Byte[4], Byte[5], Byte[6], Byte[7], 284 b0, b1, b2, b3, b4, b5, b6, b7); 371 k += BLOCK_SIZE; 372 bytes_avail -= BLOCK_SIZE; 373 } 374 } 375 ////////////////////////////////////////////////////////////////////////////////////////// 376 // Final Partial Block. 377 ////////////////////////////////////////////////////////////////////////////////////////// 378 if(bytes_avail > 0) 379 { 380 381 Byte = (BytePack *) &infile_buffer[k]; 382 BitBlock EOF_mask = eof_mask(bytes_avail); 383 384 #ifdef BE 385 s2p_le(Byte[7], Byte[6], Byte[5], Byte[4], 386 Byte[3], Byte[2], Byte[1], Byte[0], 387 b7, b6, b5, b4, 388 b3, b2, b1, b0); 389 390 391 392 b7 = simd_and(b7, EOF_mask); 393 b6 = simd_and(b6, EOF_mask); 394 b5 = simd_and(b5, EOF_mask); 395 b4 = simd_and(b4, EOF_mask); 396 b3 = simd_and(b3, EOF_mask); 397 b2 = simd_and(b2, EOF_mask); 398 b1 = simd_and(b1, EOF_mask); 399 b0 = simd_and(b0, EOF_mask); 285 400 286 401 printf("\n"); … … 295 410 #endif 296 411 297 k += BLOCK_SIZE; 298 bytes_avail -= BLOCK_SIZE; 299 } 300 301 ////////////////////////////////////////////////////////////////////////////////////////// 302 // Final Partial Block. 303 ////////////////////////////////////////////////////////////////////////////////////////// 304 if(bytes_avail > 0) 305 { 306 307 Byte = (BytePack *) &infile_buffer[k]; 308 BitBlock EOF_mask = eof_mask(bytes_avail); 309 310 #ifdef BE 311 s2p_be(Byte[7], Byte[6], Byte[5], Byte[4], 312 Byte[3], Byte[2], Byte[1], Byte[0], 313 b7, b6, b5, b4, 314 b3, b2, b1, b0); 315 316 317 318 b7 = simd_and(b7, EOF_mask); 319 b6 = simd_and(b6, EOF_mask); 320 b5 = simd_and(b5, EOF_mask); 321 b4 = simd_and(b4, EOF_mask); 322 b3 = simd_and(b3, EOF_mask); 323 b2 = simd_and(b2, EOF_mask); 324 b1 = simd_and(b1, EOF_mask); 325 b0 = simd_and(b0, EOF_mask); 326 327 printf("\n"); 328 print_register<BitBlock>("b7", b7); 329 print_register<BitBlock>("b6", b6); 330 print_register<BitBlock>("b5", b5); 331 print_register<BitBlock>("b4", b4); 332 print_register<BitBlock>("b3", b3); 333 print_register<BitBlock>("b2", b2); 334 print_register<BitBlock>("b1", b1); 335 print_register<BitBlock>("b0", b0); 336 #endif 337 338 #ifdef LE 339 s2p(Byte[0], Byte[1], Byte[2], Byte[3], 340 Byte[4], Byte[5], Byte[6], Byte[7], 341 b0, b1, b2, b3, 342 b4, b5, b6, b7); 343 344 b7 = simd_and(b7, EOF_mask); 345 b6 = simd_and(b6, EOF_mask); 346 b5 = simd_and(b5, EOF_mask); 347 b4 = simd_and(b4, EOF_mask); 348 b3 = simd_and(b3, EOF_mask); 349 b2 = simd_and(b2, EOF_mask); 350 b1 = simd_and(b1, EOF_mask); 351 b0 = simd_and(b0, EOF_mask); 352 353 printf("\n"); 354 print_register<BitBlock>("b7", b7); 355 print_register<BitBlock>("b6", b6); 356 print_register<BitBlock>("b5", b5); 357 print_register<BitBlock>("b4", b4); 358 print_register<BitBlock>("b3", b3); 359 print_register<BitBlock>("b2", b2); 360 print_register<BitBlock>("b1", b1); 361 print_register<BitBlock>("b0", b0); 362 #endif 363 364 } 365 366 } 367 368 369 412 413 } 414 415 } 416 417 418
Note: See TracChangeset
for help on using the changeset viewer.