Changeset 6098 for icGREP/icgrepdevel/icgrep/IR_Gen/idisa_avx_builder.cpp
 Timestamp:
 Jun 16, 2018, 6:52:23 PM (10 months ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/IR_Gen/idisa_avx_builder.cpp
r6087 r6098 751 751 } 752 752 753 Value * IDISA_AVX512F_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) { 754 if (hostCPUFeatures.hasAVX512BW && ((fw == 1)  (fw == 2))) { 755 // Bit interleave using shuffle. 756 Value * shufFn = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_pshuf_b_512); 757 // Make a shuffle table that translates the lower 4 bits of each byte in 758 // order to spread out the bits: xxxxdcba => .d.c.b.a 759 // We use two copies of the table for the AVX2 _mm256_shuffle_epi8 760 Constant * interleave_table = bit_interleave_byteshuffle_table(fw); 761 // Merge the bytes. 762 Value * byte_merge = esimd_mergeh(8, a, b); 763 Value * zeroByteSplat = fwCast(8, allZeroes()); 764 Constant * mask = ConstantInt::getAllOnesValue(getInt64Ty()); 765 Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask}); 766 Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw); 767 // For each 16bit field, interleave the low bits of the two bytes. 768 low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8fw)); 769 // For each 16bit field, interleave the high bits of the two bytes. 770 high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8fw)); 771 return simd_or(low_bits, high_bits); 772 } 773 // Otherwise use default AVX2 logic. 774 return IDISA_AVX2_Builder::esimd_mergeh(fw, a, b); 775 } 776 777 Value * IDISA_AVX512F_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) { 778 if (hostCPUFeatures.hasAVX512BW && ((fw == 1)  (fw == 2))) { 779 // Bit interleave using shuffle. 780 Value * shufFn = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_pshuf_b_512); 781 // Make a shuffle table that translates the lower 4 bits of each byte in 782 // order to spread out the bits: xxxxdcba => .d.c.b.a 783 // We use two copies of the table for the AVX2 _mm256_shuffle_epi8 784 Constant * interleave_table = bit_interleave_byteshuffle_table(fw); 785 // Merge the bytes. 786 Value * byte_merge = esimd_mergel(8, a, b); 787 Value * zeroByteSplat = fwCast(8, allZeroes()); 788 Constant * mask = ConstantInt::getAllOnesValue(getInt64Ty()); 789 Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask}); 790 Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw); 791 // For each 16bit field, interleave the low bits of the two bytes. 792 low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8fw)); 793 // For each 16bit field, interleave the high bits of the two bytes. 794 high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8fw)); 795 return simd_or(low_bits, high_bits); 796 } 797 // Otherwise use default AVX2 logic. 798 return IDISA_AVX2_Builder::esimd_mergel(fw, a, b); 799 } 800 801 753 802 void IDISA_AVX512F_Builder::getAVX512Features() { 754 803 llvm::StringMap<bool> features;
Note: See TracChangeset
for help on using the changeset viewer.