source: icGREP/icgrep-devel/performance/perf.py @ 4128

Last change on this file since 4128 was 4128, checked in by linmengl, 5 years ago

modify perf.py; add an interactive perf data viewer; add make target 'perf_icgrep' and 'viewer';

File size: 14.8 KB
Line 
1from subprocess import *
2from collections import *
3import os.path
4import sys
5import shutil
6import argparse
7import json
8import re
9import ipdb
10import time
11
12# run several times and get the average perf data; use perf -r
13RUNS = 1
14
15# Used for correctness check, generated by grep.
16# Correctness check is necessary, to make sure commands behind the script
17# is executed correctly.
18ground_truth = {}
19ground_truth_file = 'ground_truth.json'
20
21
22def escape_quotes(e):
23    return e.replace("'", "'\\''")
24
25
26def gen_ground_truth(execution):
27    """
28    Check if we have ground truth of this execution.
29    If not, generate with grep.
30    """
31    file_des = execution.file.description
32    exp_des = execution.expression.description
33
34    if not file_des in ground_truth:
35        ground_truth[file_des] = {}
36    if not exp_des in ground_truth[file_des]:
37        #run grep to get a ground truth
38        escaped_e = escape_quotes(execution.expression.expression)
39        try:
40            res = check_output(["grep", "-c", "-E", escaped_e, execution.file.file])
41        except CalledProcessError as e:
42            if e.returncode == 1:
43                res = "0"
44            else:
45                print "grep execution error with {0}, {1}".format(file_des, exp_des)
46
47        print "GROUND TRUTH:", file_des, exp_des, res
48        ground_truth[file_des][exp_des] = int(res.strip())
49
50
51def check_count_result(found_string, truth_number):
52    #this regex is designed for icgrep, other program may need more regex
53    m = re.match(r'Matching Lines:(\d+)', found_string)
54    if m:
55        if int(m.group(1)) != truth_number:
56            print "Correctness check error: {0} : {1}".format(file_des, exp_des)
57        return
58
59    #regex for grep
60    m = re.match(r'(\d+)', found_string)
61    if m:
62        if int(m.group(1)) != truth_number:
63            print "Correctness check error: {0} : {1}".format(file_des, exp_des)
64        return
65
66    print "Warning: '{0}' and '{1}' may not match".format(found_string, truth_number)
67
68
69def execute(execution):
70    if args.verbose:
71        print "Executing: regex {0} on file {1}".format(execution.expression.description,
72                                                        execution.file.description)
73
74    gen_ground_truth(execution)
75
76    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
77    if not os.path.exists(outputpath):
78        os.makedirs(outputpath)
79
80    one=execute_one(execution, RUNS)
81    results_file = open(outputpath+'/'+execution.stat,'w')
82    results_file.write(str(one))
83    results_file.close()
84
85    return one
86
87
88def execute_one(execution, count):
89    file_des = execution.file.description
90    exp_des = execution.expression.description
91
92    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
93    log_file = open('perflog.txt', 'a')
94    log_file.write('\nEXECUTION: '+execution.file.description+' '+ execution.expression.description+' '+ execution.program+' '+ execution.stat+'\n')
95    log_file.close()
96    redirectoutput= ' 2>&1 | tee '+outputpath+'/output.txt >> perflog.txt'
97    if execution.program=='grep':
98        escaped_e = escape_quotes(execution.expression.expression)
99        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' grep -c -E \'' + escaped_e + '\' '+ execution.file.file + redirectoutput]
100        p = Popen(cmd,shell=True)
101        p.wait()
102    if execution.program=='nrgrep112':
103        nrgreplocation='nrgrep-1.1.2/nrgrep'
104        e = execution.expression.expression
105        if nrgrep_override.has_key(e): e = nrgrep_override[e]
106        e = escape_quotes(e)
107        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
108        p = Popen(cmd,shell=True)
109        p.wait()
110    if execution.program=='icgrep':
111        icgreplocation = '../icgrep-build/icgrep'
112        e = escape_quotes(execution.expression.expression)
113        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+ icgreplocation +' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
114        p = Popen(cmd,shell=True)
115        p.wait()
116    if execution.program=='nrgrep':
117        nrgreplocation='nrgrep-1.0/nrgrep'
118        e = execution.expression.expression
119        if nrgrep_override.has_key(e): e = nrgrep_override[e]
120        e = escape_quotes(e)
121        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
122        p = Popen(cmd,shell=True)
123        p.wait()
124    if execution.program=='pcregrep':
125        pcregreplocation='pcre-8.12/pcregrep'
126        e = escape_quotes(execution.expression.expression)
127        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
128        p = Popen(cmd,shell=True)
129        p.wait()
130    if execution.program=='gre2p':
131        pcregreplocation='gre2p/gre2p'
132        e = escape_quotes(execution.expression.expression)
133        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
134        p = Popen(cmd,shell=True)
135        p.wait()
136    if execution.program=='agrep':
137        agreplocation='agrep-3.41/agrep'
138        e = execution.expression.expression
139        if agrep_override.has_key(e): e = agrep_override[e]
140        e = escape_quotes(e)
141        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+agreplocation+' -0 -c \'' + e + '\' '+ execution.file.file + redirectoutput]
142        p = Popen(cmd,shell=True)
143        p.wait()
144    if execution.program=='h2cv5':
145        h2cv5location='h2cv5'
146        e = execution.expression.expression
147        e = escape_quotes(e)
148        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+h2cv5location+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
149        p = Popen(cmd,shell=True)
150        p.wait()
151    output_file = open('perfoutput', 'r')
152    output = output_file.read()
153    endpos = output.rfind(execution.stat)-1
154    startpos=endpos-1
155    while output[startpos-1].isdigit():
156        startpos-=1
157
158    #check execution result with ground truth.
159    res_file = open(outputpath + '/output.txt')
160    found = res_file.read().strip()
161    check_count_result(found, ground_truth[file_des][exp_des])
162
163    return int(output[startpos:endpos])
164
165
166Expression = namedtuple('Expression', ['expression', 'description', 'types'])
167expressions = []
168File = namedtuple('File',['file','description','types'])
169files = []
170executions = []
171Execution = namedtuple('Execution', ['stat', 'program', 'file', 'expression'])
172
173#http://sljit.sourceforge.net/regex_perf.html
174files.append(File('data/mark.txt','mark_twain',('regex_perf',)))
175expressions.append(Expression('Twain', 'Twain', ('regex_perf',)))
176expressions.append(Expression('^Twain', '^Twain', ('regex_perf',)))
177expressions.append(Expression('Twain$', 'Twain$', ('regex_perf',)))
178expressions.append(Expression('Huck[a-zA-Z]+|Finn[a-zA-Z]+', 'Huck_or_Finn_', ('regex_perf',)))
179expressions.append(Expression('a[^x]{20}b', 'a----b', ('regex_perf',)))
180expressions.append(Expression('Tom|Sawyer|Huckleberry|Finn', 'Tom_or_Huck', ('regex_perf',)))
181expressions.append(Expression('.{0,3}(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf',)))
182expressions.append(Expression('[a-zA-Z]+ing', '___ing', ('regex_perf',)))
183expressions.append(Expression('^[a-zA-Z]{0,4}ing[^a-zA-Z]', '....ing_', ('regex_perf',)))
184expressions.append(Expression('[a-zA-Z]+ing$', '___ing$', ('regex_perf',)))
185expressions.append(Expression('^[a-zA-Z ]{5,}$', 'symline5+$', ('regex_perf',)))
186expressions.append(Expression('^.{16,20}$', 'line16-20', ('regex_perf',)))
187expressions.append(Expression('([a-f](.[d-m].){0,2}[h-n]){2}', 'af_hnaf_hn', ('regex_perf',)))
188expressions.append(Expression('([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]', '_awyer_or_inn', ('regex_perf',)))
189expressions.append(Expression('"[^"]{0,30}[?!.]"', 'quoted_sentence0-30', ('regex_perf',)))
190expressions.append(Expression('Tom.{10,25}river|river.{10,25}Tom', 'Tom_near_river', ('regex_perf',)))
191
192
193expressions.append(Expression('a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b', 'a----b', ('regex_perf2',)))
194expressions.append(Expression('.?.?.?(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf2',)))
195expressions.append(Expression('^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]', '....ing_', ('regex_perf2',)))
196expressions.append(Expression('^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*$', 'symline5+$', ('regex_perf2',)))
197expressions.append(Expression('^.................?.?.?.?$', 'line16-20', ('regex_perf2',)))
198expressions.append(Expression('([a-f](.[d-m].)?(.[d-m].)?[h-n])([a-f](.[d-m].)?(.[d-m].)?[h-n])', 'af_hnaf_hn', ('regex_perf2',)))
199expressions.append(Expression('"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"', 'quoted_sentence0-30', ('regex_perf2',)))
200expressions.append(Expression('Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom', 'Tom_near_river', ('regex_perf2',)))
201
202nrgrep_override = {}
203nrgrep_override['a[^x]{20}b'] = 'a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b'
204nrgrep_override['.{0,3}(Tom|Sawyer|Huckleberry|Finn)'] = '.?.?.?(Tom|Sawyer|Huckleberry|Finn)'
205nrgrep_override['^[a-zA-Z]{0,4}ing[^a-zA-Z]'] = '^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]'
206nrgrep_override['^[a-zA-Z ]{5,}$'] = '^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*[\x0d]$'
207nrgrep_override['^.{16,20}$'] = '^................?.?.?.?[\x0d]$'
208nrgrep_override['([a-f](.[d-m].){0,2}[h-n]){2}'] = '[a-f](.[d-m].)?(.[d-m].)?[h-n][a-f](.[d-m].)?(.[d-m].)?[h-n]'
209nrgrep_override['"[^"]{0,30}[?!.]"'] = '"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"'
210nrgrep_override['Tom.{10,25}river|river.{10,25}Tom'] = 'Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom'
211
212agrep_override = nrgrep_override
213agrep_override['Huck[a-zA-Z]+|Finn[a-zA-Z]+'] = 'Huck[a-zA-Z][a-zA-Z]*|Finn[a-zA-Z][a-zA-Z]*'
214agrep_override['[a-zA-Z]+ing'] = '[a-zA-Z][a-zA-Z]*ing'
215agrep_override['[a-zA-Z]+ing$'] = '[a-zA-Z][a-zA-Z]*ing$'
216agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?'
217agrep_override['([^ @]+)@([^ @]+)'] = '([^ @][^ @]*)@([^ @][^ @]*)'
218agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?|([^ @][^ @]*)@([^ @][^ @]*)'
219agrep_override['[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+'] = '[0-9][0-9]*[.][0-9][0-9]*\, *[0-9][0-9]*[.][0-9][0-9]*'
220
221
222#http://lh3lh3.users.sourceforge.net/reb.shtml
223files.append(File('data/howto','howto',('reb',)))
224expressions.append(Expression('@', 'at', ('reb',)))
225#expressions.append(Expression('([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?', 'URI', ('reb',)))
226expressions.append(Expression('([^ @]+)@([^ @]+)', 'Email', ('reb',)))
227expressions.append(Expression('([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)', 'Date', ('reb',)))
228expressions.append(Expression('(([a-zA-Z][a-zA-Z0-9]*)://|mailto:)([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)', 'URIorEmail', ('reb',)))
229expressions.append(Expression('[ ](0x)?([a-fA-F0-9][a-fA-F0-9])+[.:,?! ]', 'xHexBytes', ('reb',)))
230expressions.append(Expression('[A-Z]((([a-zA-Z]*a[a-zA-Z]*[ ])*[a-zA-Z]*e[a-zA-Z]*[ ])*[a-zA-Z]*s[a-zA-Z]*[ ])*[.?!]', 'zStarHeight', ('reb',)))
231#expressions.append(Expression('0x([a-fA-F0-9][a-fA-F0-9])+', 'HexBytes', ('reb',)))
232
233
234files.append(File('../../../trunk/QA/TD/roads-2.gml', 'roads', ('gml',)))
235expressions.append(Expression('<!--([^-]|-[^-])*-->', 'comment', ('gml',)))
236expressions.append(Expression('<[^!?][^>]*>', 'tag', ('gml',)))
237expressions.append(Expression('<gml:[^>]* xmlns:gml="[^"]*"[^>]*>', 'xmlns:gml', ('gml',)))
238expressions.append(Expression('[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+', 'coord', ('gml',)))
239expressions.append(Expression(r"""["']|&quot;|&apos;|&#0*3[49];|&#x0*2[27];""", 'xquote', ('gml',)))
240
241#expressions.append(Expression('(^|[^A-Z0-9:])([A-F0-9]{1,4}:){7}[A-F0-9]{1,4}($|[^A-Z0-9:])', 'IPv6', ('reb',)))
242
243#stats = ['cycles:u', 'instructions:u', 'cache-misses:u', 'L1-dcache-load-misses:u', 'L1-dcache-store-misses:u', 'L1-dcache-prefetch-misses:u', 'L1-icache-load-misses:u']
244stats = ['cycles:u','instructions:u']
245
246#init perf result, which is a dict,
247#perf[program][file][expression][stat] = cycles/instructions etc.
248perf = {}
249
250log_file = open('perflog.txt', 'w')
251log_file.close()
252
253def init_executions():
254    for file in files:
255        for expression in expressions:
256            match = False
257            for type1 in file.types:
258                for type2 in expression.types:
259                    if type1 == type2:
260                        match = True
261            if match:
262                for stat in stats:
263                    executions.append(Execution(stat, args.program,
264                                                file, expression))
265
266if __name__ == "__main__":
267    # Parse args
268    parser = argparse.ArgumentParser()
269    parser.add_argument("-r", "--reground", help="regenerate groundtruth (grep) results", action="store_true")
270    parser.add_argument("-v", "--verbose", help="more output", action="store_true")
271    parser.add_argument("program", help="program to test performance", choices=["icgrep", "grep", "etc."])
272    args = parser.parse_args()
273
274    if not os.path.isfile(ground_truth_file) or args.reground:
275        f = open(ground_truth_file,'w')
276        f.close()
277    else:
278        ground_truth = json.load(fp = open(ground_truth_file, 'r'))
279
280    init_executions()
281
282    for execution in executions:
283        calc = execute(execution)
284
285        file_des = execution.file.description
286        exp_des = execution.expression.description
287        program_des = execution.program
288        stat = execution.stat
289
290        if program_des not in perf:
291            perf[program_des] = {}
292        if file_des not in perf[program_des]:
293            perf[program_des][file_des] = {}
294        if exp_des not in perf[program_des][file_des]:
295            perf[program_des][file_des][exp_des] = {}
296
297        perf[program_des][file_des][exp_des][stat] = calc
298
299    #dump ground truth
300    json.dump(obj=ground_truth, fp=open(ground_truth_file, 'w'), indent=4, separators=(',', ': '))
301
302    #dump perf result
303    print "Please input a name for perf data (press Enter to use default={0}): ".format(args.program)
304    name = raw_input()
305    if name.strip() == '':
306        name = args.program
307
308    perf_file = name + time.strftime("-%Y%m%d-%H%M%S") + "-perf.json"
309    json.dump(obj=perf, fp=open(perf_file, 'w'), indent =4, separators=(',', ': '))
310    print "Perf result dumped to {0}.".format(perf_file)
Note: See TracBrowser for help on using the repository browser.