source: trunk/lib_ir/AgnerTestP/PMCTest/PMCTestB.pytemplate.cpp @ 4232

Last change on this file since 4232 was 4232, checked in by linmengl, 5 years ago

micro benchmark with Agner's testp ready

File size: 9.7 KB
Line 
1//                       PMCTestB.cpp                    2014-04-15 Agner Fog
2//
3//          Multithread PMC Test program for Windows and Linux
4//
5// This program is intended for testing the performance of a little piece of
6// code written in C or C++. The code to test is inserted at the place marked
7// "Test code start".
8// All sections that can be modified by the user are marked with ###########.
9//
10// The code to test will be executed REPETITIONS times and the test results
11// will be output for each repetition. This program measures how many clock
12// cycles the code to test takes in each repetition. Furthermore, it is
13// possible to set a number of Performance Monitor Counters (PMC) to count
14// the number of micro-operations (uops), cache misses, branch mispredictions,
15// etc.
16//
17// The setup of the Performance Monitor Counters is microprocessor-specific.
18// The specifications for PMC setup for each microprocessor family is defined
19// in the tables CounterDefinitions and CounterTypesDesired.
20//
21// See PMCTest.txt for instructions.
22
23//////////////////////////////////////////////////////////////////////////////
24
25#include "PMCTest.h"
26#include <utility.h>
27extern "C" {
28  SIMD_type {{DeclaredIRFunc}}(SIMD_type a, SIMD_type b);
29}
30
31
32/*############################################################################
33#
34#        Define constants
35#
36############################################################################*/
37
38// number of repetitions of test. You may change this up to MAXREPEAT
39#define REPETITIONS  8
40
41// Number of threads
42#define NUM_THREADS  1
43
44// Use performance monitor counters. Set to 0 if not used
45#define USE_PERFORMANCE_COUNTERS  1
46
47// Subtract overhead from counts (0 if not)
48#define SUBTRACT_OVERHEAD 1
49
50// Number of repetitions in loop to find overhead
51#define OVERHEAD_REPETITIONS  5
52
53// Cache line size (for preventing threads using same cache lines)
54#define CACHELINESIZE  64
55
56
57/*############################################################################
58#
59#        list of desired counter types
60#
61############################################################################*/
62//
63// Here you can select which performance monitor counters you want for your test.
64// Select id numbers from the table CounterDefinitions[] in PMCTestA.cpp.
65// The maximum number of counters you can have is MAXCOUNTERS.
66// Insert zeroes if you have less than MAXCOUNTERS counters.
67
68extern "C" {
69    int CounterTypesDesired[MAXCOUNTERS] = {
70        1,      // core clock cycles (Intel Core 2 and later)
71        9,      // instructions (not P4)
72        100,    // micro-operations
73        311     // data cache mises
74    };
75}
76
77
78/*############################################################################
79#
80#        Thread data
81#
82############################################################################*/
83// Align SThreadData structure by cache line size to avoid multiple threads
84// writing to the same cache line
85ALIGNEDSTRUCTURE(SThreadData, CACHELINESIZE) {
86    //__declspec(align(CACHELINESIZE)) struct SThreadData {
87    // Data for each thread
88    int CountTemp[MAXCOUNTERS+1];      // temporary storage of clock counts and PMC counts
89    int CountOverhead[MAXCOUNTERS+1];  // temporary storage of count overhead
90    int ClockResults[REPETITIONS];     // clock count results
91    int PMCResults[REPETITIONS*MAXCOUNTERS]; // PMC count results
92};
93
94extern "C" {
95    SThreadData ThreadData[NUM_THREADS];// Results for all threads
96    int NumThreads = NUM_THREADS;       // Number of threads
97    int NumCounters = 0;                // Number of valid PMC counters in Counters[]
98    int MaxNumCounters = MAXCOUNTERS;   // Maximum number of PMC counters
99    int UsePMC = USE_PERFORMANCE_COUNTERS;// 0 if no PMC counters used
100    int *PThreadData = (int*)ThreadData;// Pointer to measured data for all threads
101    int ThreadDataSize = sizeof(SThreadData);// Size of per-thread counter data block (bytes)
102    // offset of clock results of first thread into ThreadData (bytes)
103    int ClockResultsOS = int(ThreadData[0].ClockResults-ThreadData[0].CountTemp)*sizeof(int);
104    // offset of PMC results of first thread into ThreadData (bytes)
105    int PMCResultsOS = int(ThreadData[0].PMCResults-ThreadData[0].CountTemp)*sizeof(int);
106    // counter register numbers used
107    int Counters[MAXCOUNTERS] = {0};
108    int EventRegistersUsed[MAXCOUNTERS] = {0};
109    // optional extra output
110    int RatioOut[4] = {0};              // See PMCTest.h for explanation
111    int TempOut = 0;                    // See PMCTest.h for explanation
112        const char * RatioOutTitle = "?";   // Column heading for optional extra output of ratio
113    const char * TempOutTitle = "?";    // Column heading for optional arbitrary output
114}
115
116
117/*############################################################################
118#
119#        User data
120#
121############################################################################*/
122
123// Put any data definitions your test code needs here:
124
125#define ROUND_UP(A,B)  ((A+B-1)/B*B)  // Round up A to nearest multiple of B
126
127#define DATA_SIZE {{DataSize}}
128// Make sure USER_DATA_SIZE is a multiple of the cache line size, because there
129// is a penalty if multiple threads access the same cache line:
130#define USER_DATA_SIZE  ROUND_UP(DATA_SIZE,CACHELINESIZE)
131
132SIMD_type UserData[NUM_THREADS][USER_DATA_SIZE];
133SIMD_type one_constant = simd<2>::himask();
134
135
136
137//////////////////////////////////////////////////////////////////////////////
138//    Test Loop
139//////////////////////////////////////////////////////////////////////////////
140
141int TestLoop (int thread) {
142    // this function runs the code to test REPETITIONS times
143    // and reads the counters before and after each run:
144    int i;                        // counter index
145    int repi;                     // repetition index
146
147    for (i = 0; i < MAXCOUNTERS+1; i++) {
148        ThreadData[thread].CountOverhead[i] = 0x7FFFFFFF;
149    }
150
151    /*############################################################################
152    #
153    #        Initializations
154    #
155    ############################################################################*/
156
157    // place any user initializations here:
158    for (int i = 0; i < DATA_SIZE; i++) UserData[thread][i] = {{UserDataInit}};
159
160
161    /*############################################################################
162    #
163    #        Initializations end
164    #
165    ############################################################################*/
166
167    // first test loop.
168    // Measure overhead = the test count produced by the test program itself
169    for (repi = 0; repi < OVERHEAD_REPETITIONS; repi++) {
170
171        Serialize();
172
173#if USE_PERFORMANCE_COUNTERS
174        // Read counters
175        for (i = 0; i < MAXCOUNTERS; i++) {
176            ThreadData[thread].CountTemp[i+1] = (int)Readpmc(Counters[i]);
177        }
178#endif
179
180        Serialize();
181        ThreadData[thread].CountTemp[0] = (int)Readtsc();
182        Serialize();
183
184        // no test code here
185
186        Serialize();
187        ThreadData[thread].CountTemp[0] -= (int)Readtsc();
188        Serialize();
189
190#if USE_PERFORMANCE_COUNTERS
191        // Read counters
192        for (i = 0; i < MAXCOUNTERS; i++) {
193            ThreadData[thread].CountTemp[i+1] -= (int)Readpmc(Counters[i]);
194        }
195#endif
196        Serialize();
197
198        // find minimum counts
199        for (i = 0; i < MAXCOUNTERS+1; i++) {
200            if (-ThreadData[thread].CountTemp[i] < ThreadData[thread].CountOverhead[i]) {
201                ThreadData[thread].CountOverhead[i] = -ThreadData[thread].CountTemp[i];
202            }
203        }
204    }
205
206
207    // Second test loop. Includes code to test.
208    // This must be identical to first test loop, except for the test code
209    for (repi = 0; repi < REPETITIONS; repi++) {
210
211        Serialize();
212
213#if USE_PERFORMANCE_COUNTERS
214        // Read counters
215        for (i = 0; i < MAXCOUNTERS; i++) {
216            ThreadData[thread].CountTemp[i+1] = (int)Readpmc(Counters[i]);
217        }
218#endif
219
220        Serialize();
221        ThreadData[thread].CountTemp[0] = (int)Readtsc();
222        Serialize();
223
224
225        /*############################################################################
226        #
227        #        Test code start
228        #
229        ############################################################################*/
230
231        // Put the code to test here,
232        // or a call to a function defined in a separate module       
233        {% if NoLoop %}
234          {% for ii in range(0, TestInstrCount) %}
235          UserData[thread][{{ii}}] = {{ TestInstr | replace("[i]", "[" ~ ii ~ "]") }};
236          {% endfor %}
237        {% else %}
238        for (i = 0; i < {{TestInstrCount}}; i++) UserData[thread][i] = {{TestInstr}};
239        {% endif %}
240
241
242        /*############################################################################
243        #
244        #        Test code end
245        #
246        ############################################################################*/
247
248        Serialize();
249        ThreadData[thread].CountTemp[0] -= (int)Readtsc();
250        Serialize();
251
252#if USE_PERFORMANCE_COUNTERS
253        // Read counters
254        for (i = 0; i < MAXCOUNTERS; i++) {
255            ThreadData[thread].CountTemp[i+1] -= (int)Readpmc(Counters[i]);
256        }
257#endif
258        Serialize();
259
260        // subtract overhead
261        ThreadData[thread].ClockResults[repi] = -ThreadData[thread].CountTemp[0] - ThreadData[thread].CountOverhead[0];
262        for (i = 0; i < MAXCOUNTERS; i++) {
263            ThreadData[thread].PMCResults[repi+i*REPETITIONS] = -ThreadData[thread].CountTemp[i+1] - ThreadData[thread].CountOverhead[i+1];
264        }
265    }
266
267    // return
268    return REPETITIONS;
269}
Note: See TracBrowser for help on using the repository browser.