source: icGREP/icgrep-devel/performance/perf.py @ 4127

Last change on this file since 4127 was 4126, checked in by linmengl, 5 years ago

initial checkin of performance script; based on Ben's script, will do correctness check first and record performance data in json format. A python script will be developed to visualize performance difference.

File size: 13.8 KB
Line 
1from subprocess import *
2from collections import *
3import csv
4import StringIO
5import os.path
6import sys
7import shutil
8import argparse
9import json
10import re
11
12# run several times and get the average, use perf -r
13RUNS = 1
14
15# Used for correctness check, generated by grep.
16# Correctness check is necessary, to make sure commands behind the script
17# is executed correctly.
18ground_truth = {}
19ground_truth_file = 'ground_truth.json'
20
21is_verbose = False
22is_reground = False
23
24def escape_quotes(e):
25    return e.replace("'", "'\\''")
26
27
28def gen_ground_truth(execution):
29    """
30    Check if we have ground truth of this execution.
31    If not, generate with grep.
32    """
33    file_des = execution.file.description
34    exp_des = execution.expression.description
35
36    if not file_des in ground_truth:
37        ground_truth[file_des] = {}
38    if not exp_des in ground_truth[file_des]:
39        #run grep to get a ground truth
40        escaped_e = escape_quotes(execution.expression.expression)
41        try:
42            res = check_output(["grep", "-c", "-E", escaped_e, execution.file.file])
43        except CalledProcessError as e:
44            if e.returncode == 1:
45                res = "0"
46            else:
47                print "grep execution error with {0}, {1}".format(file_des, exp_des)
48
49        print "GROUND TRUTH:", file_des, exp_des, res
50        ground_truth[file_des][exp_des] = int(res.strip())
51
52
53def execute(execution):
54    gen_ground_truth(execution)
55
56    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
57    if not os.path.exists(outputpath):
58        os.makedirs(outputpath)
59
60    one=execute_one(execution, RUNS)
61    results_file = open(outputpath+'/'+execution.stat,'w')
62    results_file.write(str(one))
63    results_file.close()
64
65    return one
66
67
68def execute_one(execution, count):
69    file_des = execution.file.description
70    exp_des = execution.expression.description
71
72    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
73    log_file = open('perflog.txt', 'a')
74    log_file.write('\nEXECUTION: '+execution.file.description+' '+ execution.expression.description+' '+ execution.program+' '+ execution.stat+'\n')
75    log_file.close()
76    redirectoutput= ' 2>&1 | tee '+outputpath+'/output.txt >> perflog.txt'
77    if execution.program=='grep':
78        escaped_e = escape_quotes(execution.expression.expression)
79        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' grep -c -E \'' + escaped_e + '\' '+ execution.file.file + redirectoutput]
80        p = Popen(cmd,shell=True)
81        p.wait()
82    if execution.program=='nrgrep112':
83        nrgreplocation='nrgrep-1.1.2/nrgrep'
84        e = execution.expression.expression
85        if nrgrep_override.has_key(e): e = nrgrep_override[e]
86        e = escape_quotes(e)
87        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
88        p = Popen(cmd,shell=True)
89        p.wait()
90    if execution.program=='icgrep':
91        icgreplocation = '../icgrep-build/icgrep'
92        e = escape_quotes(execution.expression.expression)
93        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+ icgreplocation +' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
94        p = Popen(cmd,shell=True)
95        p.wait()
96    if execution.program=='nrgrep':
97        nrgreplocation='nrgrep-1.0/nrgrep'
98        e = execution.expression.expression
99        if nrgrep_override.has_key(e): e = nrgrep_override[e]
100        e = escape_quotes(e)
101        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
102        p = Popen(cmd,shell=True)
103        p.wait()
104    if execution.program=='pcregrep':
105        pcregreplocation='pcre-8.12/pcregrep'
106        e = escape_quotes(execution.expression.expression)
107        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
108        p = Popen(cmd,shell=True)
109        p.wait()
110    if execution.program=='gre2p':
111        pcregreplocation='gre2p/gre2p'
112        e = escape_quotes(execution.expression.expression)
113        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
114        p = Popen(cmd,shell=True)
115        p.wait()
116    if execution.program=='agrep':
117        agreplocation='agrep-3.41/agrep'
118        e = execution.expression.expression
119        if agrep_override.has_key(e): e = agrep_override[e]
120        e = escape_quotes(e)
121        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+agreplocation+' -0 -c \'' + e + '\' '+ execution.file.file + redirectoutput]
122        p = Popen(cmd,shell=True)
123        p.wait()
124    if execution.program=='h2cv5':
125        h2cv5location='h2cv5'
126        e = execution.expression.expression
127        e = escape_quotes(e)
128        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+h2cv5location+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
129        p = Popen(cmd,shell=True)
130        p.wait()
131    output_file = open('perfoutput', 'r')
132    output = output_file.read()
133    endpos = output.rfind(execution.stat)-1
134    startpos=endpos-1
135    while output[startpos-1].isdigit():
136        startpos-=1
137
138    #check execution result with ground truth.
139    res_file = open(outputpath + '/output.txt')
140    found = res_file.read().strip()
141    #this regex is designed for icgrep, other program may need more regex
142    m = re.match(r'Matching Lines:(\d+)', found)
143    if m:
144        if int(m.group(1)) != ground_truth[file_des][exp_des]:
145            print "Correctness check error: {0} : {1}".format(file_des, exp_des)
146
147    return int(output[startpos:endpos])
148
149
150Expression = namedtuple('Expression', ['expression', 'description', 'types'])
151expressions = []
152File = namedtuple('File',['file','description','types'])
153files = []
154executions = []
155Execution = namedtuple('Execution', ['stat', 'program', 'file', 'expression'])
156
157#http://sljit.sourceforge.net/regex_perf.html
158files.append(File('data/mark.txt','mark_twain',('regex_perf',)))
159expressions.append(Expression('Twain', 'Twain', ('regex_perf',)))
160expressions.append(Expression('^Twain', '^Twain', ('regex_perf',)))
161expressions.append(Expression('Twain$', 'Twain$', ('regex_perf',)))
162expressions.append(Expression('Huck[a-zA-Z]+|Finn[a-zA-Z]+', 'Huck_or_Finn_', ('regex_perf',)))
163expressions.append(Expression('a[^x]{20}b', 'a----b', ('regex_perf',)))
164expressions.append(Expression('Tom|Sawyer|Huckleberry|Finn', 'Tom_or_Huck', ('regex_perf',)))
165expressions.append(Expression('.{0,3}(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf',)))
166expressions.append(Expression('[a-zA-Z]+ing', '___ing', ('regex_perf',)))
167expressions.append(Expression('^[a-zA-Z]{0,4}ing[^a-zA-Z]', '....ing_', ('regex_perf',)))
168expressions.append(Expression('[a-zA-Z]+ing$', '___ing$', ('regex_perf',)))
169expressions.append(Expression('^[a-zA-Z ]{5,}$', 'symline5+$', ('regex_perf',)))
170expressions.append(Expression('^.{16,20}$', 'line16-20', ('regex_perf',)))
171expressions.append(Expression('([a-f](.[d-m].){0,2}[h-n]){2}', 'af_hnaf_hn', ('regex_perf',)))
172expressions.append(Expression('([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]', '_awyer_or_inn', ('regex_perf',)))
173expressions.append(Expression('"[^"]{0,30}[?!.]"', 'quoted_sentence0-30', ('regex_perf',)))
174expressions.append(Expression('Tom.{10,25}river|river.{10,25}Tom', 'Tom_near_river', ('regex_perf',)))
175
176
177expressions.append(Expression('a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b', 'a----b', ('regex_perf2',)))
178expressions.append(Expression('.?.?.?(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf2',)))
179expressions.append(Expression('^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]', '....ing_', ('regex_perf2',)))
180expressions.append(Expression('^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*$', 'symline5+$', ('regex_perf2',)))
181expressions.append(Expression('^.................?.?.?.?$', 'line16-20', ('regex_perf2',)))
182expressions.append(Expression('([a-f](.[d-m].)?(.[d-m].)?[h-n])([a-f](.[d-m].)?(.[d-m].)?[h-n])', 'af_hnaf_hn', ('regex_perf2',)))
183expressions.append(Expression('"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"', 'quoted_sentence0-30', ('regex_perf2',)))
184expressions.append(Expression('Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom', 'Tom_near_river', ('regex_perf2',)))
185
186nrgrep_override = {}
187nrgrep_override['a[^x]{20}b'] = 'a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b'
188nrgrep_override['.{0,3}(Tom|Sawyer|Huckleberry|Finn)'] = '.?.?.?(Tom|Sawyer|Huckleberry|Finn)'
189nrgrep_override['^[a-zA-Z]{0,4}ing[^a-zA-Z]'] = '^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]'
190nrgrep_override['^[a-zA-Z ]{5,}$'] = '^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*[\x0d]$'
191nrgrep_override['^.{16,20}$'] = '^................?.?.?.?[\x0d]$'
192nrgrep_override['([a-f](.[d-m].){0,2}[h-n]){2}'] = '[a-f](.[d-m].)?(.[d-m].)?[h-n][a-f](.[d-m].)?(.[d-m].)?[h-n]'
193nrgrep_override['"[^"]{0,30}[?!.]"'] = '"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"'
194nrgrep_override['Tom.{10,25}river|river.{10,25}Tom'] = 'Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom'
195
196agrep_override = nrgrep_override
197agrep_override['Huck[a-zA-Z]+|Finn[a-zA-Z]+'] = 'Huck[a-zA-Z][a-zA-Z]*|Finn[a-zA-Z][a-zA-Z]*'
198agrep_override['[a-zA-Z]+ing'] = '[a-zA-Z][a-zA-Z]*ing'
199agrep_override['[a-zA-Z]+ing$'] = '[a-zA-Z][a-zA-Z]*ing$'
200agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?'
201agrep_override['([^ @]+)@([^ @]+)'] = '([^ @][^ @]*)@([^ @][^ @]*)'
202agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?|([^ @][^ @]*)@([^ @][^ @]*)'
203agrep_override['[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+'] = '[0-9][0-9]*[.][0-9][0-9]*\, *[0-9][0-9]*[.][0-9][0-9]*'
204
205
206#http://lh3lh3.users.sourceforge.net/reb.shtml
207files.append(File('data/howto','howto',('reb',)))
208expressions.append(Expression('@', 'at', ('reb',)))
209#expressions.append(Expression('([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?', 'URI', ('reb',)))
210expressions.append(Expression('([^ @]+)@([^ @]+)', 'Email', ('reb',)))
211expressions.append(Expression('([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)', 'Date', ('reb',)))
212expressions.append(Expression('(([a-zA-Z][a-zA-Z0-9]*)://|mailto:)([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)', 'URIorEmail', ('reb',)))
213expressions.append(Expression('[ ](0x)?([a-fA-F0-9][a-fA-F0-9])+[.:,?! ]', 'xHexBytes', ('reb',)))
214expressions.append(Expression('[A-Z]((([a-zA-Z]*a[a-zA-Z]*[ ])*[a-zA-Z]*e[a-zA-Z]*[ ])*[a-zA-Z]*s[a-zA-Z]*[ ])*[.?!]', 'zStarHeight', ('reb',)))
215#expressions.append(Expression('0x([a-fA-F0-9][a-fA-F0-9])+', 'HexBytes', ('reb',)))
216
217
218files.append(File('../../../trunk/QA/TD/roads-2.gml', 'roads', ('gml',)))
219expressions.append(Expression('<!--([^-]|-[^-])*-->', 'comment', ('gml',)))
220expressions.append(Expression('<[^!?][^>]*>', 'tag', ('gml',)))
221expressions.append(Expression('<gml:[^>]* xmlns:gml="[^"]*"[^>]*>', 'xmlns:gml', ('gml',)))
222expressions.append(Expression('[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+', 'coord', ('gml',)))
223expressions.append(Expression(r"""["']|&quot;|&apos;|&#0*3[49];|&#x0*2[27];""", 'xquote', ('gml',)))
224
225
226#expressions.append(Expression('(^|[^A-Z0-9:])([A-F0-9]{1,4}:){7}[A-F0-9]{1,4}($|[^A-Z0-9:])', 'IPv6', ('reb',)))
227
228programs = ['icgrep']
229#
230#stats = ['cycles:u', 'instructions:u', 'cache-misses:u', 'L1-dcache-load-misses:u', 'L1-dcache-store-misses:u', 'L1-dcache-prefetch-misses:u', 'L1-icache-load-misses:u']
231stats = ['cycles:u','instructions:u']
232
233
234typeargument=False
235if len(sys.argv)>1:
236        type=sys.argv[1]
237        typeargument=True
238
239for file in files:
240        for expression in expressions:
241                match = False
242                for type1 in file.types:
243                        for type2 in expression.types:
244                                if type1 == type2:
245                                        if typeargument:
246                                                if type1==type:
247                                                        match = True
248                                        else:
249                                                match = True
250                if match:
251                        for program in programs:
252                                for stat in stats:
253                                        executions.append(Execution(stat, program, file, expression))
254csv_output = StringIO.StringIO()
255writer = csv.writer(csv_output)
256
257log_file = open('perflog.txt', 'w')
258log_file.close()
259
260if __name__ == "__main__":
261    # Parse args
262    parser = argparse.ArgumentParser()
263    parser.add_argument("-r", "--reground", help="regenerate groundtruth (grep) results", action="store_true")
264    parser.add_argument("-v", "--verbose", help="more output", action="store_true")
265    args = parser.parse_args()
266    is_reground = args.reground
267    is_verbose = args.verbose
268
269    if not os.path.isfile(ground_truth_file) or is_reground:
270        f = open(ground_truth_file,'w')
271        f.close()
272    else:
273        ground_truth = json.load(fp = open(ground_truth_file, 'r'))
274
275    for execution in executions:
276            calc = execute(execution)
277            writer.writerow((execution.file.description, execution.expression.description, execution.program, execution.stat, calc))
278
279    print csv_output.getvalue()
280
281    #dump ground truth
282    json.dump(obj=ground_truth, fp=open(ground_truth_file, 'w'), indent=4, separators=(',', ': '))
Note: See TracBrowser for help on using the repository browser.