Changeset 3939 for trunk


Ignore:
Timestamp:
Jul 31, 2014, 8:41:10 PM (4 years ago)
Author:
linmengl
Message:

test out PEXT with s2p_ideal, put down results

Location:
trunk/lib_ir
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib_ir/CMakeLists.txt

    r3938 r3939  
    11cmake_minimum_required (VERSION 2.8)
    22project (IR_Library)
     3
     4enable_testing()
    35
    46option (USE_AVX2 "Compile for Haswell arch with AVX2, BMI, BMI2")
     
    2527add_executable(test_pack test_pack.cpp s2p_optimized.o)
    2628
     29# test packh_2,4,8 only on avx2
     30if (USE_AVX2)
     31    add_executable(test_pack_avx2 test_pack_avx2.cpp s2p_optimized.o)
     32    add_test(
     33      NAME ctest_pack_avx2
     34      COMMAND ./test_pack_avx2)
     35endif (USE_AVX2)
     36
    2737include_directories("xmlwf/util" "xmlwf/lib")
    2838add_executable(xmlwf xmlwf/src/xmlwf.cpp s2p_optimized.o)
     
    4555  DEPENDS ${PROJECT_SOURCE_DIR}/s2p.ll)
    4656
    47 enable_testing()
    4857add_test(
    4958  NAME ctest_s2p
     
    6271  COMMAND ${CMAKE_CTEST_COMMAND}
    6372  DEPENDS test_s2p test_link test_pack xmlwf)
     73
     74# `make check` also depends on test_pack_avx2
     75if (USE_AVX2)
     76  add_dependencies(check test_pack_avx2)
     77endif (USE_AVX2)
    6478
    6579add_custom_target (perf_xmlwf
  • trunk/lib_ir/README.md

    r3925 r3939  
    1010make
    1111"""
     12
     13Compile on Haswell arch with AVX2 and BMI2
     14===================
     15For Intel Haswell target, append `-DUSE_AVX2=on` in `cmake` flags. A new test is added and `s2p_ideal` is enabled.
    1216
    1317Check
  • trunk/lib_ir/s2p.h

    r3933 r3939  
    55
    66extern "C" {
    7   //void s2p_ideal(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock *p0, BitBlock *p1, BitBlock *p2, BitBlock *p3, BitBlock *p4, BitBlock *p5, BitBlock *p6, BitBlock *p7);
    87
    98  BitBlock packh_16(BitBlock a, BitBlock b);
     
    1615  BitBlock packl_4(BitBlock a, BitBlock b);
    1716
     17  BitBlock packh_2(BitBlock a, BitBlock b);
     18  BitBlock packl_2(BitBlock a, BitBlock b);
     19
    1820  BitBlock ifh_1(BitBlock a, BitBlock b, BitBlock c);
    1921  BitBlock srli_16(BitBlock a, BitBlock shift_mask);
     
    2325
    2426  void s2p_bytepack_ir(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock* p0, BitBlock* p1, BitBlock* p2, BitBlock* p3, BitBlock* p4, BitBlock* p5, BitBlock* p6, BitBlock* p7);
     27
     28  void s2p_ideal_ir(BitBlock s0, BitBlock s1, BitBlock s2, BitBlock s3, BitBlock s4, BitBlock s5, BitBlock s6, BitBlock s7, BitBlock *p0, BitBlock *p1, BitBlock *p2, BitBlock *p3, BitBlock *p4, BitBlock *p5, BitBlock *p6, BitBlock *p7);
    2529
    2630  BitBlock const16_1();
     
    3539//S2P_ALGORITHM: s2p_bytepack_inline, written in pure IR in order to use
    3640//immediate constants in shifting.
    37 #define S2P_ALGORITHM s2p_bytepack_inline
     41//s2p_ideal: use 3 stage packh2,4,8
     42#define S2P_ALGORITHM s2p_ideal
     43
     44#define s2p_ideal(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
     45  s2p_ideal_ir(s0, s1, s2, s3, s4, s5, s6, s7, &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7)
    3846
    3947#define s2p_bytepack_inline(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
  • trunk/lib_ir/s2p.ll

    r3938 r3939  
    241241}
    242242
    243 define void @s2p_ideal(<4 x i32> %s0, <4 x i32> %s1, <4 x i32> %s2, <4 x i32> %s3,
    244                        <4 x i32> %s4, <4 x i32> %s5, <4 x i32> %s6, <4 x i32> %s7,
    245                        <4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2, <4 x i32>* %p3,
    246                        <4 x i32>* %p4, <4 x i32>* %p5, <4 x i32>* %p6, <4 x i32>* %p7) {
     243define void @s2p_ideal_ir(<4 x i32> %s0, <4 x i32> %s1, <4 x i32> %s2, <4 x i32> %s3,
     244                          <4 x i32> %s4, <4 x i32> %s5, <4 x i32> %s6, <4 x i32> %s7,
     245                          <4 x i32>* %p0, <4 x i32>* %p1, <4 x i32>* %p2, <4 x i32>* %p3,
     246                          <4 x i32>* %p4, <4 x i32>* %p5, <4 x i32>* %p6, <4 x i32>* %p7) {
    247247entry:
    248248
  • trunk/lib_ir/xmlwf/perf.txt

    r3937 r3939  
    4343only compile s2p with avx2/bmi2. The rest is SSE2
    4444xmlwf_perf   &  3.807   &   4.241   &   4.452   &   4.762   &   5.078 \\ \hline
     45
     46===========================================
     47use PEXT for packh_2/4/8
     48xmlwf_perf   &  4.349   &   4.81   &    4.979   &   5.342   &   5.661 \\ \hline
Note: See TracChangeset for help on using the changeset viewer.