source: icGREP/icgrep-devel/QA/TestCorpora/SpamAssassin/getREs.py @ 5093

Last change on this file since 5093 was 5093, checked in by cameron, 3 years ago

SpamAssassin? rules/regular expressions

File size: 2.3 KB
Line 
1import re, sys
2
3header_exists_RE = re.compile('^ *header .* exists:([-A-Za-z0-9:]+) *(?:#.*)?$')
4
5header_match_RE1 = re.compile('^ *header .* ([-A-Za-z0-9:]+) *=~ *(/)(.*)/([misx]*) *(?:#.*)?$')
6header_match_RE2 = re.compile('^ *header .* ([-A-Za-z0-9:]+) *=~ *m([{])(.*)[}]([misx]*) *(?:#.*)?$')
7header_match_RE3 = re.compile('^ *header .* ([-A-Za-z0-9:]+) *=~ *m([^A-Za-z0-9])(.*)\1([misx]*) *(?:#.*)?$')
8
9# These are currently ignored
10header_nomatch_RE1 = re.compile('^ *header .* ([-A-Za-z0-9:]+) *=!')
11
12
13pattern_RE1 = re.compile('.*(/)(.*)/([misx]*) *(?:#.*)?$')
14pattern_RE2 = re.compile('.*m([{])(.*)[}]([misx]*) *(?:#.*)?$')
15pattern_RE3 = re.compile('.*m([^A-Za-z0-9])(.*)\1([misx]*) *(?:#.*)?$')
16
17
18
19def report_pattern(p):
20    sys.stdout.write(p + "\n")
21
22def get_patterns(fileName):
23    f = open(fileName)
24    lines = f.readlines()
25    for t in lines:
26        match = header_exists_RE.match(t)
27        if match:
28            report_pattern("^" + match.group(1))
29            continue
30        header_match = header_match_RE1.match(t)
31        if not header_match: header_match = header_match_RE2.match(t) # Try m{...} 
32        if not header_match: header_match = header_match_RE3.match(t) # Try m syntax with other delimiters 
33        if header_match:
34            join = ":.*"
35            pattern = header_match.group(3)
36            if pattern[0] == '^':
37                join = ": *"
38                pattern = pattern[1:]
39            if header_match.group(4) != '':
40                pattern = '(?' + header_match.group(4) + ')' + pattern
41            if header_match.group(1) == 'ToCc':
42                report_pattern("^To:" + join + pattern)
43                report_pattern("^Cc:" + join + pattern)
44                continue
45            if header_match.group(1) == 'ALL':
46                report_pattern("^[-A-Za-z0-9]+" + join + pattern)
47                continue
48            report_pattern("^" + header_match.group(1) + join + pattern)
49        match = pattern_RE1.match(t)
50        if not match: match = pattern_RE2.match(t) # Try m{...} 
51        if not match: match = pattern_RE3.match(t) # Try m syntax with other delimiters
52        if not match: continue
53        pattern = match.group(2)
54        if match.group(3) != '':
55            pattern = '(?' + match.group(3) + ')' + pattern
56        report_pattern(pattern)
57    f.close()
58
59if __name__ == "__main__":
60    get_patterns(sys.argv[1])
Note: See TracBrowser for help on using the repository browser.