source: trunk/lib_ir/AgnerTestP/PMCTest/PMCTestB.cpp @ 4221

Last change on this file since 4221 was 4221, checked in by linmengl, 5 years ago

initial checkin of Agner Fog's performance script

File size: 9.2 KB
RevLine 
[4221]1//                       PMCTestB.cpp                    2014-04-15 Agner Fog
2//
3//          Multithread PMC Test program for Windows and Linux
4//
5// This program is intended for testing the performance of a little piece of
6// code written in C or C++. The code to test is inserted at the place marked
7// "Test code start".
8// All sections that can be modified by the user are marked with ###########.
9//
10// The code to test will be executed REPETITIONS times and the test results
11// will be output for each repetition. This program measures how many clock
12// cycles the code to test takes in each repetition. Furthermore, it is
13// possible to set a number of Performance Monitor Counters (PMC) to count
14// the number of micro-operations (uops), cache misses, branch mispredictions,
15// etc.
16//
17// The setup of the Performance Monitor Counters is microprocessor-specific.
18// The specifications for PMC setup for each microprocessor family is defined
19// in the tables CounterDefinitions and CounterTypesDesired.
20//
21// See PMCTest.txt for instructions.
22//
23// © 2000-2014 GNU General Public License www.gnu.org/licences
24//////////////////////////////////////////////////////////////////////////////
25
26#include "PMCTest.h"
27
28
29/*############################################################################
30#
31#        Define constants
32#
33############################################################################*/
34
35// number of repetitions of test. You may change this up to MAXREPEAT
36#define REPETITIONS  8
37
38// Number of threads
39#define NUM_THREADS  1
40
41// Use performance monitor counters. Set to 0 if not used
42#define USE_PERFORMANCE_COUNTERS  1
43
44// Subtract overhead from counts (0 if not)
45#define SUBTRACT_OVERHEAD 1
46
47// Number of repetitions in loop to find overhead
48#define OVERHEAD_REPETITIONS  5
49
50// Cache line size (for preventing threads using same cache lines)
51#define CACHELINESIZE  64
52
53
54/*############################################################################
55#
56#        list of desired counter types
57#
58############################################################################*/
59//
60// Here you can select which performance monitor counters you want for your test.
61// Select id numbers from the table CounterDefinitions[] in PMCTestA.cpp.
62// The maximum number of counters you can have is MAXCOUNTERS.
63// Insert zeroes if you have less than MAXCOUNTERS counters.
64
65extern "C" {
66    int CounterTypesDesired[MAXCOUNTERS] = {
67        1,      // core clock cycles (Intel Core 2 and later)
68        9,      // instructions (not P4)
69        100,    // micro-operations
70        311     // data cache mises
71    };
72}
73
74
75/*############################################################################
76#
77#        Thread data
78#
79############################################################################*/
80// Align SThreadData structure by cache line size to avoid multiple threads
81// writing to the same cache line
82ALIGNEDSTRUCTURE(SThreadData, CACHELINESIZE) {
83    //__declspec(align(CACHELINESIZE)) struct SThreadData {
84    // Data for each thread
85    int CountTemp[MAXCOUNTERS+1];      // temporary storage of clock counts and PMC counts
86    int CountOverhead[MAXCOUNTERS+1];  // temporary storage of count overhead
87    int ClockResults[REPETITIONS];     // clock count results
88    int PMCResults[REPETITIONS*MAXCOUNTERS]; // PMC count results
89};
90
91extern "C" {
92    SThreadData ThreadData[NUM_THREADS];// Results for all threads
93    int NumThreads = NUM_THREADS;       // Number of threads
94    int NumCounters = 0;                // Number of valid PMC counters in Counters[]
95    int MaxNumCounters = MAXCOUNTERS;   // Maximum number of PMC counters
96    int UsePMC = USE_PERFORMANCE_COUNTERS;// 0 if no PMC counters used
97    int *PThreadData = (int*)ThreadData;// Pointer to measured data for all threads
98    int ThreadDataSize = sizeof(SThreadData);// Size of per-thread counter data block (bytes)
99    // offset of clock results of first thread into ThreadData (bytes)
100    int ClockResultsOS = int(ThreadData[0].ClockResults-ThreadData[0].CountTemp)*sizeof(int);
101    // offset of PMC results of first thread into ThreadData (bytes)
102    int PMCResultsOS = int(ThreadData[0].PMCResults-ThreadData[0].CountTemp)*sizeof(int);
103    // counter register numbers used
104    int Counters[MAXCOUNTERS] = {0};
105    int EventRegistersUsed[MAXCOUNTERS] = {0};
106    // optional extra output
107    int RatioOut[4] = {0};              // See PMCTest.h for explanation
108    int TempOut = 0;                    // See PMCTest.h for explanation
109        const char * RatioOutTitle = "?";   // Column heading for optional extra output of ratio
110    const char * TempOutTitle = "?";    // Column heading for optional arbitrary output
111}
112
113
114/*############################################################################
115#
116#        User data
117#
118############################################################################*/
119
120// Put any data definitions your test code needs here:
121
122#define ROUND_UP(A,B)  ((A+B-1)/B*B)  // Round up A to nearest multiple of B
123
124// Make sure USER_DATA_SIZE is a multiple of the cache line size, because there
125// is a penalty if multiple threads access the same cache line:
126#define USER_DATA_SIZE  ROUND_UP(1000,CACHELINESIZE)
127
128int UserData[NUM_THREADS][USER_DATA_SIZE];
129
130
131
132//////////////////////////////////////////////////////////////////////////////
133//    Test Loop
134//////////////////////////////////////////////////////////////////////////////
135
136int TestLoop (int thread) {
137    // this function runs the code to test REPETITIONS times
138    // and reads the counters before and after each run:
139    int i;                        // counter index
140    int repi;                     // repetition index
141
142    for (i = 0; i < MAXCOUNTERS+1; i++) {
143        ThreadData[thread].CountOverhead[i] = 0x7FFFFFFF;
144    }
145
146    /*############################################################################
147    #
148    #        Initializations
149    #
150    ############################################################################*/
151
152    // place any user initializations here:
153
154
155    /*############################################################################
156    #
157    #        Initializations end
158    #
159    ############################################################################*/
160
161    // first test loop.
162    // Measure overhead = the test count produced by the test program itself
163    for (repi = 0; repi < OVERHEAD_REPETITIONS; repi++) {
164
165        Serialize();
166
167#if USE_PERFORMANCE_COUNTERS
168        // Read counters
169        for (i = 0; i < MAXCOUNTERS; i++) {
170            ThreadData[thread].CountTemp[i+1] = (int)Readpmc(Counters[i]);
171        }
172#endif
173
174        Serialize();
175        ThreadData[thread].CountTemp[0] = (int)Readtsc();
176        Serialize();
177
178        // no test code here
179
180        Serialize();
181        ThreadData[thread].CountTemp[0] -= (int)Readtsc();
182        Serialize();
183
184#if USE_PERFORMANCE_COUNTERS
185        // Read counters
186        for (i = 0; i < MAXCOUNTERS; i++) {
187            ThreadData[thread].CountTemp[i+1] -= (int)Readpmc(Counters[i]);
188        }
189#endif
190        Serialize();
191
192        // find minimum counts
193        for (i = 0; i < MAXCOUNTERS+1; i++) {
194            if (-ThreadData[thread].CountTemp[i] < ThreadData[thread].CountOverhead[i]) {
195                ThreadData[thread].CountOverhead[i] = -ThreadData[thread].CountTemp[i];
196            }
197        }
198    }
199
200
201    // Second test loop. Includes code to test.
202    // This must be identical to first test loop, except for the test code
203    for (repi = 0; repi < REPETITIONS; repi++) {
204
205        Serialize();
206
207#if USE_PERFORMANCE_COUNTERS
208        // Read counters
209        for (i = 0; i < MAXCOUNTERS; i++) {
210            ThreadData[thread].CountTemp[i+1] = (int)Readpmc(Counters[i]);
211        }
212#endif
213
214        Serialize();
215        ThreadData[thread].CountTemp[0] = (int)Readtsc();
216        Serialize();
217
218
219        /*############################################################################
220        #
221        #        Test code start
222        #
223        ############################################################################*/
224
225        // Put the code to test here,
226        // or a call to a function defined in a separate module
227        //œœ
228
229        for (i = 0; i < 1000; i++) UserData[thread][i] *= 99;
230
231
232        /*############################################################################
233        #
234        #        Test code end
235        #
236        ############################################################################*/
237
238        Serialize();
239        ThreadData[thread].CountTemp[0] -= (int)Readtsc();
240        Serialize();
241
242#if USE_PERFORMANCE_COUNTERS
243        // Read counters
244        for (i = 0; i < MAXCOUNTERS; i++) {
245            ThreadData[thread].CountTemp[i+1] -= (int)Readpmc(Counters[i]);
246        }
247#endif
248        Serialize();
249
250        // subtract overhead
251        ThreadData[thread].ClockResults[repi] = -ThreadData[thread].CountTemp[0] - ThreadData[thread].CountOverhead[0];
252        for (i = 0; i < MAXCOUNTERS; i++) {
253            ThreadData[thread].PMCResults[repi+i*REPETITIONS] = -ThreadData[thread].CountTemp[i+1] - ThreadData[thread].CountOverhead[i+1];
254        }
255    }
256
257    // return
258    return REPETITIONS;
259}
Note: See TracBrowser for help on using the repository browser.