source: icGREP/icgrep-devel/icgrep/pbix_compiler.cpp @ 3958

Last change on this file since 3958 was 3958, checked in by daled, 5 years ago

Minor update.

File size: 14.3 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "pbix_compiler.h"
8
9Pbix_Compiler::Pbix_Compiler(std::map<std::string, std::string> name_map)
10{
11    m_name_map = name_map;
12    symgen = SymbolGenerator();
13}
14
15CodeGenState Pbix_Compiler::compile_subexpressions(const std::map<std::string, RE*>& re_map)
16{
17    CodeGenState cg_state;
18
19    for (auto it =  re_map.rbegin(); it != re_map.rend(); ++it)
20    {
21        //This is specifically for the utf8 multibyte character classes.
22        if (Seq* seq = dynamic_cast<Seq*>(it->second))
23        {
24            if (seq->getType() == Seq::Byte)
25            {
26                std::string gs_retVal = symgen.gensym("start_marker");
27                cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));
28                cg_state.newsym = gs_retVal;
29
30                std::list<RE*>::iterator endit;
31                endit = seq->GetREList()->end();
32                --endit;
33                std::list<RE*>::iterator it;
34                for (it = seq->GetREList()->begin(); it != seq->GetREList()->end(); ++it)
35                {
36                    Name* name = dynamic_cast<Name*>(*it);
37                    if (it != endit)
38                    {
39                        gs_retVal = symgen.gensym("marker");
40                        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new CharClass(name->getName())))));
41                        cg_state.newsym = gs_retVal;
42                    }
43                    else
44                    {
45                        cg_state.stmtsl.push_back(new Assign(seq->getName(), new And(new Var(cg_state.newsym), new CharClass(name->getName()))));
46                    }
47                }
48            }
49        }
50    }
51
52    return cg_state;
53}
54
55CodeGenState Pbix_Compiler::compile(RE *re)
56{   
57    std::string gs_retVal;
58    CodeGenState cg_state;
59
60    //Set the 'internal.initial' bit stream for the utf-8 multi-byte encoding.
61    gs_retVal = symgen.gensym("start_marker");
62    cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));
63    cg_state.newsym = gs_retVal;
64
65    std::string gs_retVal_m1 = symgen.gensym("marker");
66    cg_state.stmtsl.push_back(new Assign(gs_retVal_m1, new And(new Var(m_name_map.find("UTF8-SingleByte")->second), new Var(cg_state.newsym))));
67
68    std::string gs_retVal_m2 = symgen.gensym("marker");
69    cg_state.stmtsl.push_back(new Assign(gs_retVal_m2, new And(new Var(m_name_map.find("UTF8-Prefix2")->second), new Var(cg_state.newsym))));
70
71    std::string gs_retVal_m3 = symgen.gensym("marker");
72    cg_state.stmtsl.push_back(new Assign(gs_retVal_m3, new And(new Var(m_name_map.find("UTF8-Prefix3")->second), new Var(cg_state.newsym))));
73
74    std::string gs_retVal_m4 = symgen.gensym("marker");
75    cg_state.stmtsl.push_back(new Assign(gs_retVal_m4, new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym))));
76
77    std::string gs_retVal_m5 = symgen.gensym("marker");
78    cg_state.stmtsl.push_back(new Assign(gs_retVal_m5, new Or(new Var(gs_retVal_m2), new Var(gs_retVal_m1))));
79
80    std::string gs_retVal_m6 = symgen.gensym("marker");
81    cg_state.stmtsl.push_back(new Assign(gs_retVal_m6, new Or(new Var(gs_retVal_m5), new Var(gs_retVal_m3))));
82
83    gs_retVal = symgen.gensym("internal.initial");
84    m_name_map.insert(make_pair("internal.initial", gs_retVal));
85    cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(gs_retVal_m6), new Var(gs_retVal_m4))));
86    cg_state.newsym = gs_retVal;
87
88    //Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
89    gs_retVal = symgen.gensym("start_marker");
90    cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));
91    cg_state.newsym = gs_retVal;
92
93    gs_retVal_m1 = symgen.gensym("marker");
94    cg_state.stmtsl.push_back(new Assign(gs_retVal_m1, new And(new Var(m_name_map.find("UTF8-Prefix2")->second), new Var(cg_state.newsym))));
95
96    gs_retVal_m2 = symgen.gensym("marker");
97    cg_state.stmtsl.push_back(new Assign(gs_retVal_m2, new And(new Var(m_name_map.find("UTF8-Prefix3")->second), new Var(cg_state.newsym))));
98
99    gs_retVal_m3 = symgen.gensym("marker");
100    cg_state.stmtsl.push_back(new Assign(gs_retVal_m3, new Advance(new Var(gs_retVal_m2))));
101
102    gs_retVal_m4 = symgen.gensym("marker");
103    cg_state.stmtsl.push_back(new Assign(gs_retVal_m4, new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym))));
104
105    gs_retVal_m5 = symgen.gensym("marker");
106    cg_state.stmtsl.push_back(new Assign(gs_retVal_m5, new Advance(new Var(gs_retVal_m4))));
107
108    gs_retVal_m6 = symgen.gensym("marker");
109    cg_state.stmtsl.push_back(new Assign(gs_retVal_m6, new Advance(new Var(gs_retVal_m5))));
110
111    std::string gs_retVal_m7 = symgen.gensym("marker");
112    cg_state.stmtsl.push_back(new Assign(gs_retVal_m7, new Or(new Var(gs_retVal_m2), new Var(gs_retVal_m1))));
113
114    std::string gs_retVal_m8 = symgen.gensym("marker");
115    cg_state.stmtsl.push_back(new Assign(gs_retVal_m8, new Or(new Var(gs_retVal_m7), new Var(gs_retVal_m3))));
116
117    std::string gs_retVal_m9 = symgen.gensym("marker");
118    cg_state.stmtsl.push_back(new Assign(gs_retVal_m9, new Or(new Var(gs_retVal_m8), new Var(gs_retVal_m4))));
119
120    std::string gs_retVal_m10 = symgen.gensym("marker");
121    cg_state.stmtsl.push_back(new Assign(gs_retVal_m10, new Or(new Var(gs_retVal_m9), new Var(gs_retVal_m5))));
122
123    gs_retVal = symgen.gensym("internal.nonfinal");
124    m_name_map.insert(make_pair("internal.nonfinal", gs_retVal));
125    cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(gs_retVal_m10), new Var(gs_retVal_m6))));
126    cg_state.newsym = gs_retVal;
127
128
129    gs_retVal = symgen.gensym("start_marker");
130    cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));
131    cg_state.newsym = gs_retVal;
132    cg_state = re2pablo_helper(re, cg_state);
133
134    //These three lines are specifically for grep.
135    gs_retVal = symgen.gensym("marker");
136    cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new MatchStar(new Var(cg_state.newsym),
137                                new Not(new Var(m_name_map.find("LineFeed")->second))), new Var(m_name_map.find("LineFeed")->second))));
138    cg_state.newsym = gs_retVal;
139
140    return cg_state;
141}
142
143CodeGenState Pbix_Compiler::re2pablo_helper(RE *re, CodeGenState cg_state)
144{
145    if (Name* name = dynamic_cast<Name*>(re))
146    {
147        std::string gs_retVal = symgen.gensym("marker");
148
149        if (name->getType() == Name::FixedLength)
150        {
151            cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new CharClass(name->getName())))));
152        }
153        else if (name->getType() == Name::UnicodeCategory)
154        {
155            cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new Call(name->getName())))));
156        }
157        else //Name::Unicode
158        {
159            cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new CharClass(name->getName()), new ScanThru(new Var(cg_state.newsym), new CharClass(m_name_map.find("internal.nonfinal")->second))))));
160        }
161        cg_state.newsym = gs_retVal;
162
163        //cout << "\n" << "(" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << endl;
164    }
165    else if (Start* start = dynamic_cast<Start*>(re))
166    {
167        std::string gs_retVal = symgen.gensym("start_of_line_marker");
168        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new Not(new Advance(new Not(new CharClass(m_name_map.find("LineFeed")->second)))))));
169        cg_state.newsym = gs_retVal;
170    }
171    else if (End* end = dynamic_cast<End*>(re))
172    {
173        std::string gs_retVal = symgen.gensym("end_of_line_marker");
174        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new CharClass(m_name_map.find("LineFeed")->second))));
175        cg_state.newsym = gs_retVal;
176    }
177    else if (Seq* seq = dynamic_cast<Seq*>(re))
178    {
179        std::list<RE*>::iterator it = seq->GetREList()->begin();
180        if (it != seq->GetREList()->end())
181        {
182            cg_state = Seq_helper(seq->GetREList(), it, cg_state);
183        }
184    //cout << "\n" << "Seq => (" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << endl;
185    }
186    else if (Alt* alt = dynamic_cast<Alt*>(re))
187    {
188        if (alt->GetREList() == 0)
189        {
190
191            std::string gs_retVal = symgen.gensym("always_fail_marker");
192            cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(0)));
193            cg_state.newsym = gs_retVal;
194        }
195        else
196        {
197            if (alt->GetREList()->size() == 1)
198            {
199                cg_state = re2pablo_helper(alt->GetREList()->front(), cg_state);
200            }
201            else
202            {
203                std::list<RE*>::iterator it = alt->GetREList()->begin();
204                cg_state = Alt_helper(alt->GetREList(), it, cg_state);
205            }
206        }
207    //cout << "\n" << "Alt => (" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << endl;
208    }
209    else if (Rep* rep = dynamic_cast<Rep*>(re))
210    {
211        if ((dynamic_cast<Name*>(rep->getRE()) != 0) && (rep->getLB() == 0) && (rep->getUB()== unboundedRep))
212        {
213            Name* rep_name = dynamic_cast<Name*>(rep->getRE());
214            std::string gs_retVal = symgen.gensym("marker");
215
216            if (rep_name->getType() == Name::FixedLength)
217            {
218                cg_state.stmtsl.push_back(new Assign(gs_retVal, new MatchStar(new Var(cg_state.newsym), new CharClass(rep_name->getName()))));
219            }
220            else if (rep_name->getType() == Name::UnicodeCategory)
221            {
222                // TODO:  ?? not too sure....
223            }
224            else //Name::unicode
225            {
226                std::string t_retVal = symgen.gensym("t");
227                std::string u_retVal = symgen.gensym("u");
228                std::string v_retVal = symgen.gensym("v");
229                std::string new_cur_retVal = symgen.gensym("new_cur");
230
231                cg_state.stmtsl.push_back(new Assign(t_retVal, new Or(new CharClass(m_name_map.find("internal.nonfinal")->second), new CharClass(rep_name->getName()))));
232                cg_state.stmtsl.push_back(new Assign(u_retVal, new MatchStar(new Var(cg_state.newsym), new Var(t_retVal))));
233                cg_state.stmtsl.push_back(new Assign(v_retVal, new And(new Var(u_retVal), new CharClass(m_name_map.find("internal.initial")->second))));
234                cg_state.stmtsl.push_back(new Assign(new_cur_retVal, new And(new Var(u_retVal), new Not(new Var(t_retVal)))));
235
236                cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(v_retVal), new Var(new_cur_retVal))));
237            }
238
239            cg_state.newsym = gs_retVal;
240        }
241        else if (rep->getUB() == unboundedRep)
242        {
243            if (rep->getLB() == 0)
244            {
245                //std::cout << "While, no lb." << std::endl;
246
247                std::string while_test_gs_retVal = symgen.gensym("while_test");
248                std::string while_accum_gs_retVal = symgen.gensym("while_accum");
249                CodeGenState while_test_state;
250                while_test_state.newsym = while_test_gs_retVal;
251                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), while_test_state);
252                cg_state.stmtsl.push_back(new Assign(while_test_gs_retVal, new Var(cg_state.newsym)));
253                cg_state.stmtsl.push_back(new Assign(while_accum_gs_retVal, new Var(cg_state.newsym)));
254                std::list<PabloS*> stmtList;
255                stmtList = t1_cg_state.stmtsl;
256                stmtList.push_back(new Assign(while_test_gs_retVal, new And(new Var(t1_cg_state.newsym), new Not(new Var(while_accum_gs_retVal)))));
257                stmtList.push_back(new Assign(while_accum_gs_retVal, new Or(new Var(while_accum_gs_retVal), new Var(t1_cg_state.newsym))));
258                cg_state.stmtsl.push_back( new While(new Var(while_test_gs_retVal), stmtList));
259                cg_state.newsym = while_accum_gs_retVal;
260            }
261            else //if (rep->getLB() > 1)
262            {
263                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
264                rep->setLB(rep->getLB() - 1);
265                cg_state = re2pablo_helper(rep, t1_cg_state);
266            }
267        }
268        else if (rep->getUB() != unboundedRep)
269        {
270            if ((rep->getLB() == 0) && (rep->getUB() == 0))
271            {
272                //Just fall through...do nothing.
273            }
274            else if ((rep->getLB() == 0) && (rep->getUB() > 0))
275            {
276                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
277                rep->setUB(rep->getUB() - 1);
278                CodeGenState t2_cg_state = re2pablo_helper(re, t1_cg_state);
279                std::string gs_retVal = symgen.gensym("alt_marker");
280                cg_state.stmtsl = t2_cg_state.stmtsl;
281                cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(cg_state.newsym), new Var(t2_cg_state.newsym))));
282                cg_state.newsym = gs_retVal;
283            }
284            else //if ((rep->getLB() > 0) && (rep->getUB() > 0))
285            {
286                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
287                rep->setLB(rep->getLB() - 1);
288                rep->setUB(rep->getUB() - 1);
289                cg_state = re2pablo_helper(rep, t1_cg_state);
290            }
291        }
292    }
293
294    return cg_state;
295}
296
297CodeGenState Pbix_Compiler::Seq_helper(std::list<RE*>* lst, std::list<RE*>::const_iterator it, CodeGenState cg_state)
298{
299    if (it != lst->end())
300    {
301        cg_state = re2pablo_helper(*it, cg_state);
302        cg_state = Seq_helper(lst, ++it, cg_state);
303    }
304
305    return cg_state;
306}
307
308CodeGenState Pbix_Compiler::Alt_helper(std::list<RE*>* lst, std::list<RE*>::const_iterator it, CodeGenState cg_state)
309{
310    CodeGenState t1_cg_state = re2pablo_helper(*it, cg_state);
311    cg_state.stmtsl = t1_cg_state.stmtsl;
312    ++it;
313    if (it != lst->end())
314    {
315        CodeGenState t2_cg_state = Alt_helper(lst, it, cg_state);
316        cg_state.stmtsl = t2_cg_state.stmtsl;
317        std::string gs_retVal = symgen.gensym("alt_marker");
318        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(t1_cg_state.newsym), new Var(t2_cg_state.newsym))));
319        cg_state.newsym = gs_retVal;
320    }
321    else
322    {
323        cg_state.newsym = t1_cg_state.newsym;
324    }
325
326    return cg_state;
327}
328
Note: See TracBrowser for help on using the repository browser.