source: icGREP/icgrep-devel/performance/perf.py @ 4899

Last change on this file since 4899 was 4618, checked in by nmedfort, 4 years ago

More modifications to UnicodeSet? class.

File size: 15.0 KB
Line 
1from subprocess import *
2from collections import *
3import os.path
4import sys
5import shutil
6import argparse
7import json
8import re
9import time
10
11# run several times and get the average perf data; use perf -r
12RUNS = 1
13
14# Used for correctness check, generated by grep.
15# Correctness check is necessary, to make sure commands behind the script
16# is executed correctly.
17ground_truth = {}
18ground_truth_file = 'ground_truth.json'
19
20
21def escape_quotes(e):
22    return e.replace("'", "'\\''")
23
24
25def gen_ground_truth(execution):
26    """
27    Check if we have ground truth of this execution.
28    If not, generate with grep.
29    """
30    file_des = execution.file.description
31    exp_des = execution.expression.description
32
33    if not file_des in ground_truth:
34        ground_truth[file_des] = {}
35    if not exp_des in ground_truth[file_des]:
36        #run grep to get a ground truth
37        escaped_e = escape_quotes(execution.expression.expression)
38        try:
39            res = check_output(["grep", "-c", "-E", escaped_e, execution.file.file])
40        except CalledProcessError as e:
41            if e.returncode == 1:
42                res = "0"
43            else:
44                print "grep execution error with {0}, {1}".format(file_des, exp_des)
45
46        print "GROUND TRUTH:", file_des, exp_des, res
47        ground_truth[file_des][exp_des] = int(res.strip())
48
49
50def check_count_result(found_string, truth_number):
51    #this regex is designed for icgrep, other program may need more regex
52    m = re.match(r'Matching Lines:(\d+)', found_string)
53    if m:
54        if int(m.group(1)) != truth_number:
55            print "Expect {0}, but get {1}".format(truth_number, found_string)
56            return False
57        return True
58
59    #regex for grep
60    m = re.match(r'(\d+)', found_string)
61    if m:
62        if int(m.group(1)) != truth_number:
63            print "Expect {0}, but get {1}".format(truth_number, found_string)
64            return False
65        return True
66
67    print "Warning: '{0}' and '{1}' may not match".format(found_string, truth_number)
68    return True
69
70
71def execute(execution):
72    if args.verbose:
73        print "Executing: regex {0} on file {1}".format(execution.expression.description,
74                                                        execution.file.description)
75
76    gen_ground_truth(execution)
77
78    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
79    if not os.path.exists(outputpath):
80        os.makedirs(outputpath)
81
82    one=execute_one(execution, RUNS)
83    results_file = open(outputpath+'/'+execution.stat,'w')
84    results_file.write(str(one))
85    results_file.close()
86
87    return one
88
89
90def execute_one(execution, count):
91    file_des = execution.file.description
92    exp_des = execution.expression.description
93
94    outputpath = 'output/'+execution.file.description+'/'+execution.expression.description+'/'+execution.program
95    log_file = open('perflog.txt', 'a')
96    log_file.write('\nEXECUTION: '+execution.file.description+' '+ execution.expression.description+' '+ execution.program+' '+ execution.stat+'\n')
97    log_file.close()
98    redirectoutput= ' 2>&1 | tee '+outputpath+'/output.txt >> perflog.txt'
99    if execution.program=='grep':
100        escaped_e = escape_quotes(execution.expression.expression)
101        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' grep -c -E \'' + escaped_e + '\' '+ execution.file.file + redirectoutput]
102        p = Popen(cmd,shell=True)
103        p.wait()
104    if execution.program=='nrgrep112':
105        nrgreplocation='nrgrep-1.1.2/nrgrep'
106        e = execution.expression.expression
107        if nrgrep_override.has_key(e): e = nrgrep_override[e]
108        e = escape_quotes(e)
109        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
110        p = Popen(cmd,shell=True)
111        p.wait()
112    if execution.program=='icgrep':
113        icgreplocation = '../icgrep-build/icgrep'
114        e = escape_quotes(execution.expression.expression)
115        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+ icgreplocation +' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
116        p = Popen(cmd,shell=True)
117        p.wait()
118    if execution.program=='nrgrep':
119        nrgreplocation='nrgrep-1.0/nrgrep'
120        e = execution.expression.expression
121        if nrgrep_override.has_key(e): e = nrgrep_override[e]
122        e = escape_quotes(e)
123        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+nrgreplocation+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
124        p = Popen(cmd,shell=True)
125        p.wait()
126    if execution.program=='pcregrep':
127        pcregreplocation='pcre-8.12/pcregrep'
128        e = escape_quotes(execution.expression.expression)
129        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
130        p = Popen(cmd,shell=True)
131        p.wait()
132    if execution.program=='gre2p':
133        pcregreplocation='gre2p/gre2p'
134        e = escape_quotes(execution.expression.expression)
135        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+pcregreplocation+' -c  \'' + e + '\' '+ execution.file.file + redirectoutput]
136        p = Popen(cmd,shell=True)
137        p.wait()
138    if execution.program=='agrep':
139        agreplocation='agrep-3.41/agrep'
140        e = execution.expression.expression
141        if agrep_override.has_key(e): e = agrep_override[e]
142        e = escape_quotes(e)
143        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+agreplocation+' -0 -c \'' + e + '\' '+ execution.file.file + redirectoutput]
144        p = Popen(cmd,shell=True)
145        p.wait()
146    if execution.program=='h2cv5':
147        h2cv5location='h2cv5'
148        e = execution.expression.expression
149        e = escape_quotes(e)
150        cmd = ['perf stat -x, -o perfoutput -r '+str(count)+' -e '+ execution.stat+ ' '+h2cv5location+' -c \'' + e + '\' '+ execution.file.file + redirectoutput]
151        p = Popen(cmd,shell=True)
152        p.wait()
153    output_file = open('perfoutput', 'r')
154    output = output_file.read()
155    endpos = output.rfind(execution.stat)-1
156    startpos=endpos-1
157    while output[startpos-1].isdigit():
158        startpos-=1
159
160    #check execution result with ground truth.
161    res_file = open(outputpath + '/output.txt')
162    found = res_file.read().strip()
163    if not check_count_result(found, ground_truth[file_des][exp_des]):
164        print "Correctness check error: {0} : {1}".format(file_des, exp_des)
165
166    return int(output[startpos:endpos])
167
168
169Expression = namedtuple('Expression', ['expression', 'description', 'types'])
170expressions = []
171File = namedtuple('File',['file','description','types'])
172files = []
173executions = []
174Execution = namedtuple('Execution', ['stat', 'program', 'file', 'expression'])
175
176#http://sljit.sourceforge.net/regex_perf.html
177files.append(File('data/mark.txt','mark_twain',('regex_perf',)))
178expressions.append(Expression('Twain', 'Twain', ('regex_perf',)))
179expressions.append(Expression('^Twain', '^Twain', ('regex_perf',)))
180expressions.append(Expression('Twain$', 'Twain$', ('regex_perf',)))
181expressions.append(Expression('Huck[a-zA-Z]+|Finn[a-zA-Z]+', 'Huck_or_Finn_', ('regex_perf',)))
182expressions.append(Expression('a[^x]{20}b', 'a----b', ('regex_perf',)))
183expressions.append(Expression('Tom|Sawyer|Huckleberry|Finn', 'Tom_or_Huck', ('regex_perf',)))
184expressions.append(Expression('.{0,3}(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf',)))
185expressions.append(Expression('[a-zA-Z]+ing', '___ing', ('regex_perf',)))
186expressions.append(Expression('^[a-zA-Z]{0,4}ing[^a-zA-Z]', '....ing_', ('regex_perf',)))
187expressions.append(Expression('[a-zA-Z]+ing$', '___ing$', ('regex_perf',)))
188expressions.append(Expression('^[a-zA-Z ]{5,}$', 'symline5+$', ('regex_perf',)))
189expressions.append(Expression('^.{16,20}$', 'line16-20', ('regex_perf',)))
190expressions.append(Expression('([a-f](.[d-m].){0,2}[h-n]){2}', 'af_hnaf_hn', ('regex_perf',)))
191expressions.append(Expression('([A-Za-z]awyer|[A-Za-z]inn)[^a-zA-Z]', '_awyer_or_inn', ('regex_perf',)))
192expressions.append(Expression('"[^"]{0,30}[?!.]"', 'quoted_sentence0-30', ('regex_perf',)))
193expressions.append(Expression('Tom.{10,25}river|river.{10,25}Tom', 'Tom_near_river', ('regex_perf',)))
194
195
196expressions.append(Expression('a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b', 'a----b', ('regex_perf2',)))
197expressions.append(Expression('.?.?.?(Tom|Sawyer|Huckleberry|Finn)', '__Tom_or_Huck', ('regex_perf2',)))
198expressions.append(Expression('^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]', '....ing_', ('regex_perf2',)))
199expressions.append(Expression('^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*$', 'symline5+$', ('regex_perf2',)))
200expressions.append(Expression('^.................?.?.?.?$', 'line16-20', ('regex_perf2',)))
201expressions.append(Expression('([a-f](.[d-m].)?(.[d-m].)?[h-n])([a-f](.[d-m].)?(.[d-m].)?[h-n])', 'af_hnaf_hn', ('regex_perf2',)))
202expressions.append(Expression('"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"', 'quoted_sentence0-30', ('regex_perf2',)))
203expressions.append(Expression('Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom', 'Tom_near_river', ('regex_perf2',)))
204
205nrgrep_override = {}
206nrgrep_override['a[^x]{20}b'] = 'a[^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x][^x]b'
207nrgrep_override['.{0,3}(Tom|Sawyer|Huckleberry|Finn)'] = '.?.?.?(Tom|Sawyer|Huckleberry|Finn)'
208nrgrep_override['^[a-zA-Z]{0,4}ing[^a-zA-Z]'] = '^[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?[a-zA-Z]?ing[^a-zA-Z]'
209nrgrep_override['^[a-zA-Z ]{5,}$'] = '^[a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ][a-zA-Z ]*[\x0d]$'
210nrgrep_override['^.{16,20}$'] = '^................?.?.?.?[\x0d]$'
211nrgrep_override['([a-f](.[d-m].){0,2}[h-n]){2}'] = '[a-f](.[d-m].)?(.[d-m].)?[h-n][a-f](.[d-m].)?(.[d-m].)?[h-n]'
212nrgrep_override['"[^"]{0,30}[?!.]"'] = '"[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[^"]?[?!.]"'
213nrgrep_override['Tom.{10,25}river|river.{10,25}Tom'] = 'Tom...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?river|river...........?.?.?.?.?.?.?.?.?.?.?.?.?.?.?Tom'
214
215agrep_override = nrgrep_override
216agrep_override['Huck[a-zA-Z]+|Finn[a-zA-Z]+'] = 'Huck[a-zA-Z][a-zA-Z]*|Finn[a-zA-Z][a-zA-Z]*'
217agrep_override['[a-zA-Z]+ing'] = '[a-zA-Z][a-zA-Z]*ing'
218agrep_override['[a-zA-Z]+ing$'] = '[a-zA-Z][a-zA-Z]*ing$'
219agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?'
220agrep_override['([^ @]+)@([^ @]+)'] = '([^ @][^ @]*)@([^ @][^ @]*)'
221agrep_override['([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)'] = '([a-zA-Z][a-zA-Z0-9]*)://([^ /][^ /]*)(/[^ ]*)?|([^ @][^ @]*)@([^ @][^ @]*)'
222agrep_override['[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+'] = '[0-9][0-9]*[.][0-9][0-9]*\, *[0-9][0-9]*[.][0-9][0-9]*'
223
224
225#http://lh3lh3.users.sourceforge.net/reb.shtml
226files.append(File('data/howto','howto',('reb',)))
227expressions.append(Expression('@', 'at', ('reb',)))
228#expressions.append(Expression('([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?', 'URI', ('reb',)))
229expressions.append(Expression('([^ @]+)@([^ @]+)', 'Email', ('reb',)))
230expressions.append(Expression('([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)', 'Date', ('reb',)))
231expressions.append(Expression('(([a-zA-Z][a-zA-Z0-9]*)://|mailto:)([^ /]+)(/[^ ]*)?|([^ @]+)@([^ @]+)', 'URIorEmail', ('reb',)))
232expressions.append(Expression('[ ](0x)?([a-fA-F0-9][a-fA-F0-9])+[.:,?! ]', 'xHexBytes', ('reb',)))
233expressions.append(Expression('[A-Z]((([a-zA-Z]*a[a-zA-Z]*[ ])*[a-zA-Z]*e[a-zA-Z]*[ ])*[a-zA-Z]*s[a-zA-Z]*[ ])*[.?!]', 'zStarHeight', ('reb',)))
234#expressions.append(Expression('0x([a-fA-F0-9][a-fA-F0-9])+', 'HexBytes', ('reb',)))
235
236
237files.append(File('data/roads-2.gml', 'roads', ('gml',)))
238expressions.append(Expression('<!--([^-]|-[^-])*-->', 'comment', ('gml',)))
239expressions.append(Expression('<[^!?][^>]*>', 'tag', ('gml',)))
240expressions.append(Expression('<gml:[^>]* xmlns:gml="[^"]*"[^>]*>', 'xmlns:gml', ('gml',)))
241expressions.append(Expression('[0-9]+[.][0-9]+, *[0-9]+[.][0-9]+', 'coord', ('gml',)))
242expressions.append(Expression(r"""["']|&quot;|&apos;|&#0*3[49];|&#x0*2[27];""", 'xquote', ('gml',)))
243
244#expressions.append(Expression('(^|[^A-Z0-9:])([A-F0-9]{1,4}:){7}[A-F0-9]{1,4}($|[^A-Z0-9:])', 'IPv6', ('reb',)))
245
246#stats = ['cycles:u', 'instructions:u', 'cache-misses:u', 'L1-dcache-load-misses:u', 'L1-dcache-store-misses:u', 'L1-dcache-prefetch-misses:u', 'L1-icache-load-misses:u']
247stats = ['cycles:u','instructions:u']
248
249#init perf result, which is a dict,
250#perf[program][file][expression][stat] = cycles/instructions etc.
251perf = {}
252
253log_file = open('perflog.txt', 'w')
254log_file.close()
255
256def init_executions():
257    for file in files:
258        for expression in expressions:
259            match = False
260            for type1 in file.types:
261                for type2 in expression.types:
262                    if type1 == type2:
263                        match = True
264            if match:
265                for stat in stats:
266                    executions.append(Execution(stat, args.program,
267                                                file, expression))
268
269if __name__ == "__main__":
270    # Parse args
271    parser = argparse.ArgumentParser()
272    parser.add_argument("-r", "--reground", help="regenerate groundtruth (grep) results", action="store_true")
273    parser.add_argument("-v", "--verbose", help="more output", action="store_true")
274    parser.add_argument("program", help="program to test performance", choices=["icgrep", "grep", "etc."])
275    args = parser.parse_args()
276
277    if not os.path.isfile(ground_truth_file) or args.reground:
278        f = open(ground_truth_file,'w')
279        f.close()
280    else:
281        try:
282            ground_truth = json.load(fp = open(ground_truth_file, 'r'))
283        except:
284            ground_truth = {}
285
286    init_executions()
287
288    for execution in executions:
289        calc = execute(execution)
290
291        file_des = execution.file.description
292        exp_des = execution.expression.description
293        program_des = execution.program
294        stat = execution.stat
295
296        if program_des not in perf:
297            perf[program_des] = {}
298        if file_des not in perf[program_des]:
299            perf[program_des][file_des] = {}
300        if exp_des not in perf[program_des][file_des]:
301            perf[program_des][file_des][exp_des] = {}
302
303        perf[program_des][file_des][exp_des][stat] = calc
304
305    #dump ground truth
306    json.dump(obj=ground_truth, fp=open(ground_truth_file, 'w'), indent=4, separators=(',', ': '))
307
308    #dump perf result
309    print "Please input a name for perf data (press Enter to use default={0}): ".format(args.program)
310    name = raw_input()
311    if name.strip() == '':
312        name = args.program
313
314    perf_file = name + time.strftime("-%Y%m%d-%H%M%S") + "-perf.json"
315    json.dump(obj=perf, fp=open(perf_file, 'w'), indent =4, separators=(',', ': '))
316    print "Perf result dumped to {0}.".format(perf_file)
Note: See TracBrowser for help on using the repository browser.