Changeset 6076 for icGREP/icgrepdevel/icgrep/IR_Gen/idisa_sse_builder.cpp
 Timestamp:
 Jun 9, 2018, 1:51:10 PM (11 months ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/IR_Gen/idisa_sse_builder.cpp
r6057 r6076 1 1 /* 2 * Copyright (c) 201 6International Characters.2 * Copyright (c) 2018 International Characters. 3 3 * This software is licensed to the public under the Open Software License 3.0. 4 4 * icgrep is a trademark of International Characters. … … 14 14 std::string IDISA_SSE_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSE_" + std::to_string(mBitBlockWidth) : "SSE";} 15 15 std::string IDISA_SSE2_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSE2_" + std::to_string(mBitBlockWidth) : "SSE2";} 16 std::string IDISA_SSSE3_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSSE3_" + std::to_string(mBitBlockWidth) : "SSSE3";} 16 17 17 18 Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) { … … 21 22 } 22 23 // Otherwise use default logic. 23 return IDISA_ Builder::hsimd_packh(fw, a, b);24 return IDISA_SSE_Builder::hsimd_packh(fw, a, b); 24 25 } 25 26 … … 154 155 } 155 156 156 Value * IDISA_SSE2_Builder::mvmd_shuffle(unsigned fw, Value * a, Value * shuffle_table) {157 Value * IDISA_SSE2_Builder::mvmd_shuffle(unsigned fw, Value * a, Value * index_vector) { 157 158 if ((mBitBlockWidth == 128) && (fw == 64)) { 158 159 // First create a vector with exchanged values of the 2 fields. … … 165 166 Value * xchg_vec = ConstantVector::get({xchg, 2}); 166 167 Constant * oneSplat = ConstantVector::getSplat(2, ConstantInt::get(getInt64Ty(), 1)); 167 Value * exchange_mask = simd_eq(fw, simd_and( shuffle_table, oneSplat), xchg_vec);168 Value * exchange_mask = simd_eq(fw, simd_and(index_vector, oneSplat), xchg_vec); 168 169 Value * rslt = simd_xor(simd_and(changed, exchange_mask), a); 169 170 return rslt; 170 171 } 171 return IDISA_ Builder::mvmd_shuffle(fw, a, shuffle_table);172 return IDISA_SSE_Builder::mvmd_shuffle(fw, a, index_vector); 172 173 } 173 174 … … 187 188 } 188 189 189 190 } 190 Constant * IDISA_SSSE3_Builder::bit_interleave_byteshuffle_table(unsigned fw) { 191 const unsigned fieldCount = mBitBlockWidth/8; 192 if (fw > 2) llvm::report_fatal_error("bit_interleave_byteshuffle_table requires fw == 1 or fw == 2"); 193 // Bit interleave using shuffle. 194 // Make a shuffle table that translates the lower 4 bits of each byte in 195 // order to spread out the bits: xxxxdcba => .d.c.b.a (fw = 1) 196 Constant * bit_interleave[fieldCount]; 197 for (unsigned i = 0; i < fieldCount; i++) { 198 if (fw == 1) 199 bit_interleave[i] = getInt8((i & 1)  ((i & 2) << 1)  ((i & 4) << 2)  ((i & 8) << 3)); 200 else bit_interleave[i] = getInt8((i & 3)  ((i & 0x0C) << 2)); 201 } 202 return ConstantVector::get({bit_interleave, fieldCount}); 203 } 204 205 Value * IDISA_SSSE3_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) { 206 if ((fw == 1)  (fw == 2)) { 207 Constant * interleave_table = bit_interleave_byteshuffle_table(fw); 208 // Merge the bytes. 209 Value * byte_merge = esimd_mergeh(8, a, b); 210 Value * low_bits = mvmd_shuffle(8, interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8)))); 211 Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw); 212 // For each 16bit field, interleave the low bits of the two bytes. 213 low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8fw)); 214 // For each 16bit field, interleave the high bits of the two bytes. 215 high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8fw)); 216 return simd_or(low_bits, high_bits); 217 } 218 // Otherwise use default SSE logic. 219 return IDISA_SSE2_Builder::esimd_mergeh(fw, a, b); 220 } 221 222 Value * IDISA_SSSE3_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) { 223 if ((fw == 1)  (fw == 2)) { 224 Constant * interleave_table = bit_interleave_byteshuffle_table(fw); 225 // Merge the bytes. 226 Value * byte_merge = esimd_mergel(8, a, b); 227 Value * low_bits = mvmd_shuffle(8, interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8)))); 228 Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw); 229 // For each 16bit field, interleave the low bits of the two bytes. 230 low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8fw)); 231 // For each 16bit field, interleave the high bits of the two bytes. 232 high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8fw)); 233 return simd_or(low_bits, high_bits); 234 } 235 // Otherwise use default SSE2 logic. 236 return IDISA_SSE2_Builder::esimd_mergel(fw, a, b); 237 } 238 239 llvm::Value * IDISA_SSSE3_Builder::mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) { 240 if (mBitBlockWidth == 128 && fw > 8) { 241 // Create a table for shuffling with smaller field widths. 242 const unsigned fieldCount = mBitBlockWidth/fw; 243 Constant * idxMask = ConstantVector::getSplat(fieldCount, ConstantInt::get(getIntNTy(fw), fieldCount1)); 244 Value * idx = simd_and(index_vector, idxMask); 245 unsigned half_fw = fw/2; 246 unsigned field_count = mBitBlockWidth/half_fw; 247 // Build a ConstantVector of alternating 0 and 1 values. 248 Constant * Idxs[field_count]; 249 for (unsigned int i = 0; i < field_count; i++) { 250 Idxs[i] = ConstantInt::get(getIntNTy(fw/2), i & 1); 251 } 252 Constant * splat01 = ConstantVector::get({Idxs, field_count}); 253 254 Value * half_fw_indexes = simd_or(idx, mvmd_slli(half_fw, idx, 1)); 255 half_fw_indexes = simd_add(fw, simd_add(fw, half_fw_indexes, half_fw_indexes), splat01); 256 Value * rslt = mvmd_shuffle(half_fw, data_table, half_fw_indexes); 257 return rslt; 258 } 259 if (mBitBlockWidth == 128 && fw == 8) { 260 Value * shuf8Func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_ssse3_pshuf_b_128); 261 return CreateCall(shuf8Func, {fwCast(8, data_table), fwCast(8, simd_and(index_vector, simd_lomask(8)))}); 262 } 263 return IDISA_SSE2_Builder::mvmd_shuffle(fw, data_table, index_vector); 264 } 265 266 267 }
Note: See TracChangeset
for help on using the changeset viewer.