source: icGREP/icgrep-devel/performance/perf.py @ 4140

Last change on this file since 4140 was 4140, checked in by linmengl, 5 years ago

remove ipdb; error handle in perf.py

File size: 14.9 KB
Line 
1from subprocess import *
2from collections import *
3import os.path
4import sys
5import shutil
6import argparse
7import json
8import re
9import time
10
11# run several times and get the average perf data; use perf -r
12RUNS = 1
13
14# Used for correctness check, generated by grep.
15# Correctness check is necessary, to make sure commands behind the script
16# is executed correctly.
17ground_truth = {}
18ground_truth_file = 'ground_truth.json'
19
20
21def escape_quotes(e):
22    return e.replace("'", "'\\''")
23
24
25def gen_ground_truth(execution):
26    """
27    Check if we have ground truth of this execution.
28    If not, generate with grep.
29    """
30    file_des = execution.file.description
31    exp_des = execution.expression.description
32
33    if not file_des in ground_truth:
34        ground_truth[file_des] = {}
35    if not exp_des in ground_truth[file_des]:
36        #run grep to get a ground truth
37        escaped_e = escape_quotes(execution.expression.expression)
38        try:
39            res = check_output(["grep", "-c", "-E", escaped_e, execution.file.file])
40        except CalledProcessError as e:
41            if e.returncode == 1:
42                res = "0"
43            else:
44                print "grep execution error with {0}, {1}".format(file_des, exp_des)
45
46        print "GROUND TRUTH:", file_des, exp_des, res
47        ground_truth[file_des][exp_des] = int(res.strip())
48
49
50def check_count_result(found_string, truth_number):
51    #this regex is designed for icgrep, other program may need more regex
52    m = re.match(r'Matching Lines:(\d+)', found_string)
53    if m:
54        if int(m.group(1)) != truth_number:
55            print "Correctness check error: {0} : {1}".format(file_des, exp_des)
56        return
57
58    #regex for grep
59    m = re.match(r'(\d+)', found_string)
60    if m:
61        if int(m.group(1)) != truth_number:
62            print "Correctness check error: {0} : {1}".format(file_des, exp_des)
63        return
64
65    print "Warning: '{0}' and '{1}' may not match".format(found_string, truth_number)
66
67
68def execute(execution):
69    if args.verbose:
70        print "Executing: regex {0} on file {1}".format(execution.expression.description,
71                                                        execution.file.description)
72
73    gen_ground_truth(execution)
74
75    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
76    if not os.path.exists(outputpath):
77        os.makedirs(outputpath)
78
79    one=execute_one(execution, RUNS)
80    results_file = open(outputpath+'/'+execution.stat,'w')
81    results_file.write(str(one))
82    results_file.close()
83
84    return one
85
86
87def execute_one(execution, count):
88    file_des = execution.file.description
89    exp_des = execution.expression.description
90
91    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
92    log_file = open('perflog.txt', 'a')
93    log_file.write('\nEXECUTION: '+execution.file.description+' '+ execution.expression.description+' '+ execution.program+' '+ execution.stat+'\n')
94    log_file.close()
95    redirectoutput= ' 2>&1 | tee '+outputpath+'/output.txt >> perflog.txt'
96    if execution.program=='grep':
97        escaped_e = escape_quotes(execution.expression.expression)
98        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' grep -c -E \'' + escaped_e + '\' '+ execution.file.file + redirectoutput]
99        p = Popen(cmd,shell=True)
100        p.wait()
101    if execution.program=='nrgrep112':
102        nrgreplocation='nrgrep-1.1.2/nrgrep'
103        e = execution.expression.expression
104        if nrgrep_override.has_key(e): e = nrgrep_override[e]
105        e = escape_quotes(e)
106        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
107        p = Popen(cmd,shell=True)
108        p.wait()
109    if execution.program=='icgrep':
110        icgreplocation = '../icgrep-build/icgrep'
111        e = escape_quotes(execution.expression.expression)
112        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+ icgreplocation +' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
113        p = Popen(cmd,shell=True)
114        p.wait()
115    if execution.program=='nrgrep':
116        nrgreplocation='nrgrep-1.0/nrgrep'
117        e = execution.expression.expression
118        if nrgrep_override.has_key(e): e = nrgrep_override[e]
119        e = escape_quotes(e)
120        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
121        p = Popen(cmd,shell=True)
122        p.wait()
123    if execution.program=='pcregrep':
124        pcregreplocation='pcre-8.12/pcregrep'
125        e = escape_quotes(execution.expression.expression)
126        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
127        p = Popen(cmd,shell=True)
128        p.wait()
129    if execution.program=='gre2p':
130        pcregreplocation='gre2p/gre2p'
131        e = escape_quotes(execution.expression.expression)
132        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
133        p = Popen(cmd,shell=True)
134        p.wait()
135    if execution.program=='agrep':
136        agreplocation='agrep-3.41/agrep'
137        e = execution.expression.expression
138        if agrep_override.has_key(e): e = agrep_override[e]
139        e = escape_quotes(e)
140        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+agreplocation+' -0 -c \'' + e + '\' '+ execution.file.file + redirectoutput]
141        p = Popen(cmd,shell=True)
142        p.wait()
143    if execution.program=='h2cv5':
144        h2cv5location='h2cv5'
145        e = execution.expression.expression
146        e = escape_quotes(e)
147        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+h2cv5location+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
148        p = Popen(cmd,shell=True)
149        p.wait()
150    output_file = open('perfoutput', 'r')
151    output = output_file.read()
152    endpos = output.rfind(execution.stat)-1
153    startpos=endpos-1
154    while output[startpos-1].isdigit():
155        startpos-=1
156
157    #check execution result with ground truth.
158    res_file = open(outputpath + '/output.txt')
159    found = res_file.read().strip()
160    check_count_result(found, ground_truth[file_des][exp_des])
161
162    return int(output[startpos:endpos])
163
164
165Expression = namedtuple('Expression', ['expression', 'description', 'types'])
166expressions = []
167File = namedtuple('File',['file','description','types'])
168files = []
169executions = []
170Execution = namedtuple('Execution', ['stat', 'program', 'file', 'expression'])
171
172#http://sljit.sourceforge.net/regex_perf.html
173files.append(File('data/mark.txt','mark_twain',('regex_perf',)))
174expressions.append(Expression('Twain', 'Twain', ('regex_perf',)))
175expressions.append(Expression('^Twain', '^Twain', ('regex_perf',)))
176expressions.append(Expression('Twain$', 'Twain$', ('regex_perf',)))
177expressions.append(Expression('Huck[a-zA-Z]+|Finn[a-zA-Z]+', 'Huck_or_Finn_', ('regex_perf',)))
178expressions.append(Expression('a[^x]{20}b', 'a----b', ('regex_perf',)))
179expressions.append(Expression('Tom|Sawyer|Huckleberry|Finn', 'Tom_or_Huck', ('regex_perf',)))
180expressions.append(Expression('.{0,3}(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf',)))
181expressions.append(Expression('[a-zA-Z]+ing', '___ing', ('regex_perf',)))
182expressions.append(Expression('^[a-zA-Z]{0,4}ing[^a-zA-Z]', '....ing_', ('regex_perf',)))
183expressions.append(Expression('[a-zA-Z]+ing$', '___ing$', ('regex_perf',)))
184expressions.append(Expression('^[a-zA-Z ]{5,}$', 'symline5+$', ('regex_perf',)))
185expressions.append(Expression('^.{16,20}$', 'line16-20', ('regex_perf',)))
186expressions.append(Expression('([a-f](.[d-m].){0,2}[h-n]){2}', 'af_hnaf_hn', ('regex_perf',)))
187expressions.append(Expression('([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]', '_awyer_or_inn', ('regex_perf',)))
188expressions.append(Expression('"[^"]{0,30}[?!.]"', 'quoted_sentence0-30', ('regex_perf',)))
189expressions.append(Expression('Tom.{10,25}river|river.{10,25}Tom', 'Tom_near_river', ('regex_perf',)))
190
191
192expressions.append(Expression('a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b', 'a----b', ('regex_perf2',)))
193expressions.append(Expression('.?.?.?(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf2',)))
194expressions.append(Expression('^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]', '....ing_', ('regex_perf2',)))
195expressions.append(Expression('^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*$', 'symline5+$', ('regex_perf2',)))
196expressions.append(Expression('^.................?.?.?.?$', 'line16-20', ('regex_perf2',)))
197expressions.append(Expression('([a-f](.[d-m].)?(.[d-m].)?[h-n])([a-f](.[d-m].)?(.[d-m].)?[h-n])', 'af_hnaf_hn', ('regex_perf2',)))
198expressions.append(Expression('"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"', 'quoted_sentence0-30', ('regex_perf2',)))
199expressions.append(Expression('Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom', 'Tom_near_river', ('regex_perf2',)))
200
201nrgrep_override = {}
202nrgrep_override['a[^x]{20}b'] = 'a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b'
203nrgrep_override['.{0,3}(Tom|Sawyer|Huckleberry|Finn)'] = '.?.?.?(Tom|Sawyer|Huckleberry|Finn)'
204nrgrep_override['^[a-zA-Z]{0,4}ing[^a-zA-Z]'] = '^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]'
205nrgrep_override['^[a-zA-Z ]{5,}$'] = '^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*[\x0d]$'
206nrgrep_override['^.{16,20}$'] = '^................?.?.?.?[\x0d]$'
207nrgrep_override['([a-f](.[d-m].){0,2}[h-n]){2}'] = '[a-f](.[d-m].)?(.[d-m].)?[h-n][a-f](.[d-m].)?(.[d-m].)?[h-n]'
208nrgrep_override['"[^"]{0,30}[?!.]"'] = '"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"'
209nrgrep_override['Tom.{10,25}river|river.{10,25}Tom'] = 'Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom'
210
211agrep_override = nrgrep_override
212agrep_override['Huck[a-zA-Z]+|Finn[a-zA-Z]+'] = 'Huck[a-zA-Z][a-zA-Z]*|Finn[a-zA-Z][a-zA-Z]*'
213agrep_override['[a-zA-Z]+ing'] = '[a-zA-Z][a-zA-Z]*ing'
214agrep_override['[a-zA-Z]+ing$'] = '[a-zA-Z][a-zA-Z]*ing$'
215agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?'
216agrep_override['([^ @]+)@([^ @]+)'] = '([^ @][^ @]*)@([^ @][^ @]*)'
217agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?|([^ @][^ @]*)@([^ @][^ @]*)'
218agrep_override['[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+'] = '[0-9][0-9]*[.][0-9][0-9]*\, *[0-9][0-9]*[.][0-9][0-9]*'
219
220
221#http://lh3lh3.users.sourceforge.net/reb.shtml
222files.append(File('data/howto','howto',('reb',)))
223expressions.append(Expression('@', 'at', ('reb',)))
224#expressions.append(Expression('([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?', 'URI', ('reb',)))
225expressions.append(Expression('([^ @]+)@([^ @]+)', 'Email', ('reb',)))
226expressions.append(Expression('([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)', 'Date', ('reb',)))
227expressions.append(Expression('(([a-zA-Z][a-zA-Z0-9]*)://|mailto:)([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)', 'URIorEmail', ('reb',)))
228expressions.append(Expression('[ ](0x)?([a-fA-F0-9][a-fA-F0-9])+[.:,?! ]', 'xHexBytes', ('reb',)))
229expressions.append(Expression('[A-Z]((([a-zA-Z]*a[a-zA-Z]*[ ])*[a-zA-Z]*e[a-zA-Z]*[ ])*[a-zA-Z]*s[a-zA-Z]*[ ])*[.?!]', 'zStarHeight', ('reb',)))
230#expressions.append(Expression('0x([a-fA-F0-9][a-fA-F0-9])+', 'HexBytes', ('reb',)))
231
232
233files.append(File('../../../trunk/QA/TD/roads-2.gml', 'roads', ('gml',)))
234expressions.append(Expression('<!--([^-]|-[^-])*-->', 'comment', ('gml',)))
235expressions.append(Expression('<[^!?][^>]*>', 'tag', ('gml',)))
236expressions.append(Expression('<gml:[^>]* xmlns:gml="[^"]*"[^>]*>', 'xmlns:gml', ('gml',)))
237expressions.append(Expression('[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+', 'coord', ('gml',)))
238expressions.append(Expression(r"""["']|&quot;|&apos;|&#0*3[49];|&#x0*2[27];""", 'xquote', ('gml',)))
239
240#expressions.append(Expression('(^|[^A-Z0-9:])([A-F0-9]{1,4}:){7}[A-F0-9]{1,4}($|[^A-Z0-9:])', 'IPv6', ('reb',)))
241
242#stats = ['cycles:u', 'instructions:u', 'cache-misses:u', 'L1-dcache-load-misses:u', 'L1-dcache-store-misses:u', 'L1-dcache-prefetch-misses:u', 'L1-icache-load-misses:u']
243stats = ['cycles:u','instructions:u']
244
245#init perf result, which is a dict,
246#perf[program][file][expression][stat] = cycles/instructions etc.
247perf = {}
248
249log_file = open('perflog.txt', 'w')
250log_file.close()
251
252def init_executions():
253    for file in files:
254        for expression in expressions:
255            match = False
256            for type1 in file.types:
257                for type2 in expression.types:
258                    if type1 == type2:
259                        match = True
260            if match:
261                for stat in stats:
262                    executions.append(Execution(stat, args.program,
263                                                file, expression))
264
265if __name__ == "__main__":
266    # Parse args
267    parser = argparse.ArgumentParser()
268    parser.add_argument("-r", "--reground", help="regenerate groundtruth (grep) results", action="store_true")
269    parser.add_argument("-v", "--verbose", help="more output", action="store_true")
270    parser.add_argument("program", help="program to test performance", choices=["icgrep", "grep", "etc."])
271    args = parser.parse_args()
272
273    if not os.path.isfile(ground_truth_file) or args.reground:
274        f = open(ground_truth_file,'w')
275        f.close()
276    else:
277        try:
278            ground_truth = json.load(fp = open(ground_truth_file, 'r'))
279        except:
280            ground_truth = {}
281
282    init_executions()
283
284    for execution in executions:
285        calc = execute(execution)
286
287        file_des = execution.file.description
288        exp_des = execution.expression.description
289        program_des = execution.program
290        stat = execution.stat
291
292        if program_des not in perf:
293            perf[program_des] = {}
294        if file_des not in perf[program_des]:
295            perf[program_des][file_des] = {}
296        if exp_des not in perf[program_des][file_des]:
297            perf[program_des][file_des][exp_des] = {}
298
299        perf[program_des][file_des][exp_des][stat] = calc
300
301    #dump ground truth
302    json.dump(obj=ground_truth, fp=open(ground_truth_file, 'w'), indent=4, separators=(',', ': '))
303
304    #dump perf result
305    print "Please input a name for perf data (press Enter to use default={0}): ".format(args.program)
306    name = raw_input()
307    if name.strip() == '':
308        name = args.program
309
310    perf_file = name + time.strftime("-%Y%m%d-%H%M%S") + "-perf.json"
311    json.dump(obj=perf, fp=open(perf_file, 'w'), indent =4, separators=(',', ': '))
312    print "Perf result dumped to {0}.".format(perf_file)
Note: See TracBrowser for help on using the repository browser.