Changeset 6076


Ignore:
Timestamp:
Jun 9, 2018, 1:51:10 PM (4 months ago)
Author:
cameron
Message:

IDISA_SSSE3 builder; other IDISA operations

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6063 r6076  
    387387
    388388   
    389 llvm::Value * IDISA_AVX2_Builder::mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table) {
     389llvm::Value * IDISA_AVX2_Builder::mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * index_vector) {
    390390    if (mBitBlockWidth == 256 && fw > 32) {
     391        const unsigned fieldCount = mBitBlockWidth/fw;
    391392        // Create a table for shuffling with smaller field widths.
     393        Constant * idxMask = ConstantVector::getSplat(fieldCount, ConstantInt::get(getIntNTy(fw), fieldCount-1));
     394        Value * idx = simd_and(index_vector, idxMask);
    392395        unsigned half_fw = fw/2;
    393396        unsigned field_count = mBitBlockWidth/half_fw;
     
    395398        Constant * Idxs[field_count];
    396399        for (unsigned int i = 0; i < field_count; i++) {
    397             Idxs[i] = getInt32(i & 1);
     400            Idxs[i] = ConstantInt::get(getIntNTy(fw/2), i & 1);
    398401        }
    399402        Constant * splat01 = ConstantVector::get({Idxs, field_count});
    400         Value * half_shuffle_table = simd_or(shuffle_table, mvmd_slli(half_fw, shuffle_table, 1));
    401         half_shuffle_table = simd_add(fw, simd_add(fw, half_shuffle_table, half_shuffle_table), splat01);
    402         Value * rslt = mvmd_shuffle(half_fw, a, half_shuffle_table);
     403        Value * half_fw_indexes = simd_or(idx, mvmd_slli(half_fw, idx, 1));
     404        half_fw_indexes = simd_add(fw, simd_add(fw, half_fw_indexes, half_fw_indexes), splat01);
     405        Value * rslt = mvmd_shuffle(half_fw, a, half_fw_indexes);
    403406        return rslt;
    404407    }
    405408    if (mBitBlockWidth == 256 && fw == 32) {
    406409        Value * shuf32Func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_permd);
    407         return CreateCall(shuf32Func, {fwCast(32, a), fwCast(32, shuffle_table)});
    408     }
    409     return IDISA_Builder::mvmd_shuffle(fw, a, shuffle_table);
     410        return CreateCall(shuf32Func, {fwCast(32, a), fwCast(32, index_vector)});
     411    }
     412    return IDISA_Builder::mvmd_shuffle(fw, a, index_vector);
    410413}
    411414
     
    580583}
    581584
    582 llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table) {
     585llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) {
    583586    const unsigned fieldCount = mBitBlockWidth/fw;
    584587    if (mBitBlockWidth == 512 && fw == 32) {
    585588        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_d_512);
    586589        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    587         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, a), mask});
     590        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, data_table), fwCast(fw, data_table), mask});
    588591    }
    589592    if (mBitBlockWidth == 512 && fw == 64) {
    590593        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_q_512);
    591594        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    592         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, a), mask});
     595        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, data_table), fwCast(fw, data_table), mask});
    593596    }
    594597    if (mBitBlockWidth == 512 && fw == 16 && hostCPUFeatures.hasAVX512BW) {
    595598        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_hi_512);
    596599        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    597         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, a), mask});
    598     }
    599     return IDISA_Builder::mvmd_shuffle(fw, a, shuffle_table);
    600 }
    601 
    602 llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle2(unsigned fw, Value * a, Value * b, llvm::Value * shuffle_table) {
     600        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, data_table), fwCast(fw, data_table), mask});
     601    }
     602    return IDISA_Builder::mvmd_shuffle(fw, data_table, index_vector);
     603}
     604
     605llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle2(unsigned fw, Value * table0, llvm::Value * table1, llvm::Value * index_vector) {
    603606    const unsigned fieldCount = mBitBlockWidth/fw;
    604607    if (mBitBlockWidth == 512 && fw == 32) {
    605608        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_d_512);
    606609        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    607         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, b), mask});
     610        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, table0), fwCast(fw, table1), mask});
    608611    }
    609612    if (mBitBlockWidth == 512 && fw == 64) {
    610613        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_q_512);
    611614        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    612         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, b), mask});
     615        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, table0), fwCast(fw, table1), mask});
    613616    }
    614617    if (mBitBlockWidth == 512 && fw == 16 && hostCPUFeatures.hasAVX512BW) {
    615618        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_hi_512);
    616619        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    617         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, b), mask});
    618     }
    619     return IDISA_Builder::mvmd_shuffle2(fw, a, b, shuffle_table);
     620        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, table0), fwCast(fw, table1), mask});
     621    }
     622    return IDISA_Builder::mvmd_shuffle2(fw, table0, table1, index_vector);
    620623}
    621624
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r6055 r6076  
    5151    llvm::Value * mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift) override;
    5252    llvm::Value * mvmd_sll(unsigned fw, llvm::Value * a, llvm::Value * shift) override;
    53     llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table) override;
     53    llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) override;
    5454    llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
    5555    llvm::Value * simd_pext(unsigned fw, llvm::Value * v, llvm::Value * extract_mask) override;
     
    7979    llvm::Value * mvmd_dslli(unsigned fw, llvm::Value * a, llvm::Value * b, unsigned shift) override;
    8080    llvm::Value * hsimd_signmask(unsigned fw, llvm::Value * a) override;
    81     llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table) override;
    82     llvm::Value * mvmd_shuffle2(unsigned fw, llvm::Value * a, llvm::Value * b, llvm::Value * shuffle_table) override;
     81    llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) override;
     82    llvm::Value * mvmd_shuffle2(unsigned fw, llvm::Value * table0, llvm::Value * table1, llvm::Value * index_vector) override;
    8383    llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask) override;
    8484    llvm::Value * mvmd_srl(unsigned fw, llvm::Value * a, llvm::Value * shift) override;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6071 r6076  
    139139Value * IDISA_Builder::simd_ugt(unsigned fw, Value * a, Value * b) {
    140140    if (fw == 1) return simd_and(a, simd_not(b));
     141    if (fw < 8) {
     142        Value * half_ugt = simd_ugt(fw/2, a, b);
     143        Value * half_eq = simd_eq(fw/2, a, b);
     144        Value * ugt_0 = simd_or(simd_srli(fw, half_ugt, fw/2), simd_and(half_ugt, simd_srli(fw, half_eq, fw/2)));
     145        return simd_or(ugt_0, simd_slli(32, ugt_0, fw/2));
     146    }
    141147    if (fw < 8) report_fatal_error("Unsupported field width: ugt " + std::to_string(fw));
    142148    return CreateSExt(CreateICmpUGT(fwCast(fw, a), fwCast(fw, b)), fwVectorType(fw));
     
    235241        Idxs[i] = ConstantInt::get(fwTy, i + field_count);
    236242    }
    237     Value * shuffle = simd_sub(fw, ConstantVector::get({Idxs, field_count}), simd_fill(fw, shift));
    238     Value * rslt = mvmd_shuffle2(fw, fwCast(fw, b), fwCast(fw, a), shuffle);
     243    Value * shuffle_indexes = simd_sub(fw, ConstantVector::get({Idxs, field_count}), simd_fill(fw, shift));
     244    Value * rslt = mvmd_shuffle2(fw, fwCast(fw, b), fwCast(fw, a), shuffle_indexes);
    239245    return rslt;
    240246}
     
    593599}
    594600
    595 Value * IDISA_Builder::mvmd_shuffle(unsigned fw, Value * a, Value * shuffle_table) {
     601Value * IDISA_Builder::mvmd_shuffle(unsigned fw, Value * table, Value * index_vector) {
    596602    report_fatal_error("Unsupported field width: mvmd_shuffle " + std::to_string(fw));
    597603}
    598604   
    599 Value * IDISA_Builder::mvmd_shuffle2(unsigned fw, Value * a, Value *b, Value * shuffle_table) {
     605Value * IDISA_Builder::mvmd_shuffle2(unsigned fw, Value * table0, Value * table1, Value * index_vector) {
    600606    //  Use two shuffles, with selection by the bit value within the shuffle_table.
    601607    const auto field_count = mBitBlockWidth/fw;
    602608    Constant * selectorSplat = ConstantVector::getSplat(field_count, ConstantInt::get(getIntNTy(fw), field_count));
    603     Value * selectMask = simd_eq(fw, simd_and(shuffle_table, selectorSplat), selectorSplat);
    604     Value * tbl = simd_and(shuffle_table, simd_not(selectorSplat));
    605     Value * rslt= simd_or(simd_and(mvmd_shuffle(fw, a, tbl), simd_not(selectMask)), simd_and(mvmd_shuffle(fw, b, tbl), selectMask));
     609    Value * selectMask = simd_eq(fw, simd_and(index_vector, selectorSplat), selectorSplat);
     610    Value * tbl = simd_and(index_vector, simd_not(selectorSplat));
     611    Value * rslt= simd_or(simd_and(mvmd_shuffle(fw, table0, index_vector), simd_not(selectMask)), simd_and(mvmd_shuffle(fw, table1, index_vector), selectMask));
    606612    return rslt;
    607613}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r6059 r6076  
    152152    virtual llvm::Value * mvmd_dslli(unsigned fw, llvm::Value * a, llvm::Value * b, unsigned shift);
    153153    virtual llvm::Value * mvmd_dsll(unsigned fw, llvm::Value * a, llvm::Value * b, llvm::Value * shift);
    154     virtual llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table);
    155     virtual llvm::Value * mvmd_shuffle2(unsigned fw, llvm::Value * a, llvm::Value *b, llvm::Value * shuffle_table);
     154    virtual llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector);
     155    virtual llvm::Value * mvmd_shuffle2(unsigned fw, llvm::Value * table0, llvm::Value * table1, llvm::Value * index_vector);
    156156    virtual llvm::Value * mvmd_compress(unsigned fw, llvm::Value * a, llvm::Value * select_mask);
    157157
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r6057 r6076  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    1414std::string IDISA_SSE_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSE_" + std::to_string(mBitBlockWidth) : "SSE";}
    1515std::string IDISA_SSE2_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSE2_" + std::to_string(mBitBlockWidth) : "SSE2";}
     16std::string IDISA_SSSE3_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSSE3_" + std::to_string(mBitBlockWidth) : "SSSE3";}
    1617
    1718Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {   
     
    2122    }
    2223    // Otherwise use default logic.
    23     return IDISA_Builder::hsimd_packh(fw, a, b);
     24    return IDISA_SSE_Builder::hsimd_packh(fw, a, b);
    2425}
    2526
     
    154155}
    155156   
    156 Value * IDISA_SSE2_Builder::mvmd_shuffle(unsigned fw, Value * a, Value * shuffle_table) {
     157Value * IDISA_SSE2_Builder::mvmd_shuffle(unsigned fw, Value * a, Value * index_vector) {
    157158    if ((mBitBlockWidth == 128) && (fw == 64)) {
    158159        // First create a vector with exchanged values of the 2 fields.
     
    165166        Value * xchg_vec = ConstantVector::get({xchg, 2});
    166167        Constant * oneSplat = ConstantVector::getSplat(2, ConstantInt::get(getInt64Ty(), 1));
    167         Value * exchange_mask = simd_eq(fw, simd_and(shuffle_table, oneSplat), xchg_vec);
     168        Value * exchange_mask = simd_eq(fw, simd_and(index_vector, oneSplat), xchg_vec);
    168169        Value * rslt = simd_xor(simd_and(changed, exchange_mask), a);
    169170        return rslt;
    170171    }
    171     return IDISA_Builder::mvmd_shuffle(fw, a, shuffle_table);
     172    return IDISA_SSE_Builder::mvmd_shuffle(fw, a, index_vector);
    172173}
    173174
     
    187188}
    188189
    189 
    190 }
     190Constant * IDISA_SSSE3_Builder::bit_interleave_byteshuffle_table(unsigned fw) {
     191    const unsigned fieldCount = mBitBlockWidth/8;
     192    if (fw > 2) llvm::report_fatal_error("bit_interleave_byteshuffle_table requires fw == 1 or fw == 2");
     193    // Bit interleave using shuffle.
     194    // Make a shuffle table that translates the lower 4 bits of each byte in
     195    // order to spread out the bits: xxxxdcba => .d.c.b.a (fw = 1)
     196    Constant * bit_interleave[fieldCount];
     197    for (unsigned i = 0; i < fieldCount; i++) {
     198        if (fw == 1)
     199            bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
     200        else bit_interleave[i] = getInt8((i & 3) | ((i & 0x0C) << 2));
     201    }
     202    return ConstantVector::get({bit_interleave, fieldCount});
     203}
     204
     205Value * IDISA_SSSE3_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
     206    if ((fw == 1) || (fw == 2)) {
     207        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
     208        // Merge the bytes.
     209        Value * byte_merge = esimd_mergeh(8, a, b);
     210        Value * low_bits = mvmd_shuffle(8, interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))));
     211        Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw);
     212        // For each 16-bit field, interleave the low bits of the two bytes.
     213        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     214        // For each 16-bit field, interleave the high bits of the two bytes.
     215        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     216        return simd_or(low_bits, high_bits);
     217    }
     218    // Otherwise use default SSE logic.
     219    return IDISA_SSE2_Builder::esimd_mergeh(fw, a, b);
     220}
     221
     222Value * IDISA_SSSE3_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
     223    if ((fw == 1) || (fw == 2)) {
     224        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
     225        // Merge the bytes.
     226        Value * byte_merge = esimd_mergel(8, a, b);
     227        Value * low_bits = mvmd_shuffle(8, interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))));
     228        Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw);
     229        // For each 16-bit field, interleave the low bits of the two bytes.
     230        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     231        // For each 16-bit field, interleave the high bits of the two bytes.
     232        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     233        return simd_or(low_bits, high_bits);
     234    }
     235    // Otherwise use default SSE2 logic.
     236    return IDISA_SSE2_Builder::esimd_mergel(fw, a, b);
     237}
     238
     239llvm::Value * IDISA_SSSE3_Builder::mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) {
     240    if (mBitBlockWidth == 128 && fw > 8) {
     241        // Create a table for shuffling with smaller field widths.
     242        const unsigned fieldCount = mBitBlockWidth/fw;
     243        Constant * idxMask = ConstantVector::getSplat(fieldCount, ConstantInt::get(getIntNTy(fw), fieldCount-1));
     244        Value * idx = simd_and(index_vector, idxMask);
     245        unsigned half_fw = fw/2;
     246        unsigned field_count = mBitBlockWidth/half_fw;
     247        // Build a ConstantVector of alternating 0 and 1 values.
     248        Constant * Idxs[field_count];
     249        for (unsigned int i = 0; i < field_count; i++) {
     250            Idxs[i] = ConstantInt::get(getIntNTy(fw/2), i & 1);
     251        }
     252        Constant * splat01 = ConstantVector::get({Idxs, field_count});
     253       
     254        Value * half_fw_indexes = simd_or(idx, mvmd_slli(half_fw, idx, 1));
     255        half_fw_indexes = simd_add(fw, simd_add(fw, half_fw_indexes, half_fw_indexes), splat01);
     256        Value * rslt = mvmd_shuffle(half_fw, data_table, half_fw_indexes);
     257        return rslt;
     258    }
     259    if (mBitBlockWidth == 128 && fw == 8) {
     260        Value * shuf8Func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_ssse3_pshuf_b_128);
     261        return CreateCall(shuf8Func, {fwCast(8, data_table), fwCast(8, simd_and(index_vector, simd_lomask(8)))});
     262    }
     263    return IDISA_SSE2_Builder::mvmd_shuffle(fw, data_table, index_vector);
     264}
     265
     266
     267}
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.h

    r6050 r6076  
    55 *  Copyright (c) 2018 International Characters.
    66 *  This software is licensed to the public under the Open Software License 3.0.
    7  *  icgrep is a trademark of International Characters.
    87 */
    98
     
    4039    llvm::Value * hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) override;
    4140    std::pair<llvm::Value *, llvm::Value *> bitblock_advance(llvm::Value * a, llvm::Value * shiftin, unsigned shift) override;
    42     llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table) override;
     41    llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) override;
    4342    ~IDISA_SSE2_Builder() {}
     43};
     44
     45class IDISA_SSSE3_Builder : public IDISA_SSE2_Builder {
     46public:
     47   
     48    IDISA_SSSE3_Builder(llvm::LLVMContext & C, unsigned bitBlockWidth, unsigned stride)
     49    : IDISA_Builder(C, bitBlockWidth, stride)
     50    , IDISA_SSE2_Builder(C, bitBlockWidth, stride)
     51    {
     52
     53    }
     54   
     55    virtual std::string getBuilderUniqueName() override;
     56    llvm::Value * esimd_mergeh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     57    llvm::Value * esimd_mergel(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     58    llvm::Value * mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) override;
     59    ~IDISA_SSSE3_Builder() {}
     60
     61private:
     62    llvm::Constant * bit_interleave_byteshuffle_table(unsigned fw);
    4463};
    4564
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_target.cpp

    r6055 r6076  
    3535    }
    3636    return hostCPUFeatures;
     37}
     38
     39bool SSSE3_available() {
     40    StringMap<bool> features;
     41    if (sys::getHostCPUFeatures(features)) {
     42        return features.lookup("ssse3");
     43    }
     44    return false;
    3745}
    3846
     
    8795        return new KernelBuilderImpl<IDISA_I64_Builder>(C, codegen::BlockSize, codegen::BlockSize);
    8896    }
     97    if (SSSE3_available()) return new KernelBuilderImpl<IDISA_SSSE3_Builder>(C, codegen::BlockSize, codegen::BlockSize);
    8998    return new KernelBuilderImpl<IDISA_SSE2_Builder>(C, codegen::BlockSize, codegen::BlockSize);
    9099}
Note: See TracChangeset for help on using the changeset viewer.