source: icGREP/icgrep-devel/icgrep/pbix_compiler.cpp @ 3965

Last change on this file since 3965 was 3965, checked in by daled, 5 years ago

The Unicode category 'Nd' is in place and it is working.

File size: 11.5 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "pbix_compiler.h"
8
9Pbix_Compiler::Pbix_Compiler(std::map<std::string, std::string> name_map)
10{
11    m_name_map = name_map;
12    symgen = SymbolGenerator();
13}
14
15CodeGenState Pbix_Compiler::compile_subexpressions(const std::map<std::string, RE*>& re_map)
16{
17    CodeGenState cg_state;
18
19    for (auto it =  re_map.rbegin(); it != re_map.rend(); ++it)
20    {
21        //This is specifically for the utf8 multibyte character classes.
22        if (Seq* seq = dynamic_cast<Seq*>(it->second))
23        {
24            if (seq->getType() == Seq::Byte)
25            {
26                std::string gs_retVal = symgen.gensym("start_marker");
27                cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(1)));
28                cg_state.newsym = gs_retVal;
29
30                std::list<RE*>::iterator endit;
31                endit = seq->GetREList()->end();
32                --endit;
33                std::list<RE*>::iterator it;
34                for (it = seq->GetREList()->begin(); it != seq->GetREList()->end(); ++it)
35                {
36                    Name* name = dynamic_cast<Name*>(*it);
37                    if (it != endit)
38                    {
39                        gs_retVal = symgen.gensym("marker");
40                        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new CharClass(name->getName())))));
41                        cg_state.newsym = gs_retVal;
42                    }
43                    else
44                    {
45                        cg_state.stmtsl.push_back(new Assign(seq->getName(), new And(new Var(cg_state.newsym), new CharClass(name->getName()))));
46                    }
47                }
48            }
49        }
50    }
51
52    return cg_state;
53}
54
55CodeGenState Pbix_Compiler::compile(RE *re)
56{   
57    CodeGenState cg_state;
58
59    std::string gs_m0 = symgen.gensym("start_marker");
60    cg_state.stmtsl.push_back(new Assign(gs_m0, new All(1)));
61    cg_state.newsym = gs_m0;
62
63    //Set the 'internal.initial' bit stream for the utf-8 multi-byte encoding.
64    std::string gs_initial = symgen.gensym("internal.initial");
65    m_name_map.insert(make_pair("internal.initial", gs_initial));
66    cg_state.stmtsl.push_back(new Assign(gs_initial, new Or(new Or( new Or( new And(new Var(m_name_map.find("UTF8-Prefix2")->second),
67        new Var(cg_state.newsym)),  new And(new Var(m_name_map.find("UTF8-SingleByte")->second), new Var(cg_state.newsym))),
68        new And(new Var(m_name_map.find("UTF8-Prefix3")->second), new Var(cg_state.newsym))),
69        new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym)))));
70    cg_state.newsym = gs_initial;
71
72    //Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
73    cg_state.newsym = gs_m0;
74    std::string gs_nonfinal = symgen.gensym("internal.nonfinal");
75    m_name_map.insert(make_pair("internal.nonfinal", gs_nonfinal));
76    cg_state.stmtsl.push_back(new Assign(gs_nonfinal, new Or(new Or(new Or(new Or(new Or( new And(new Var(m_name_map.find("UTF8-Prefix3")->second),
77        new Var(cg_state.newsym)),  new And(new Var(m_name_map.find("UTF8-Prefix2")->second), new Var(cg_state.newsym))),
78        new Advance( new And(new Var(m_name_map.find("UTF8-Prefix3")->second), new Var(cg_state.newsym)))),
79        new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym))), new Advance(
80        new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym)))), new Advance(
81        new Advance( new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym)))))));
82    cg_state.newsym = gs_nonfinal;
83
84    cg_state.newsym = gs_m0;
85    cg_state = re2pablo_helper(re, cg_state);
86
87    //These three lines are specifically for grep.
88    std::string gs_retVal = symgen.gensym("marker");
89    cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new MatchStar(new Var(cg_state.newsym),
90        new Not(new Var(m_name_map.find("LineFeed")->second))), new Var(m_name_map.find("LineFeed")->second))));
91    cg_state.newsym = gs_retVal;
92
93    return cg_state;
94}
95
96CodeGenState Pbix_Compiler::re2pablo_helper(RE *re, CodeGenState cg_state)
97{
98    if (Name* name = dynamic_cast<Name*>(re))
99    {
100        std::string gs_retVal = symgen.gensym("marker");
101
102        if (name->getType() == Name::FixedLength)
103        {
104            cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new CharClass(name->getName())))));
105        }
106        else if (name->getType() == Name::UnicodeCategory)
107        {
108            cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new Var(cg_state.newsym), new Call(name->getName())))));
109        }
110        else //Name::Unicode
111        {
112            cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(new CharClass(name->getName()), new ScanThru(new Var(cg_state.newsym), new CharClass(m_name_map.find("internal.nonfinal")->second))))));
113        }
114        cg_state.newsym = gs_retVal;
115
116        //cout << "\n" << "(" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << endl;
117    }
118    else if (Start* start = dynamic_cast<Start*>(re))
119    {
120        std::string gs_retVal = symgen.gensym("start_of_line_marker");
121        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new Not(new Advance(new Not(new CharClass(m_name_map.find("LineFeed")->second)))))));
122        cg_state.newsym = gs_retVal;
123    }
124    else if (End* end = dynamic_cast<End*>(re))
125    {
126        std::string gs_retVal = symgen.gensym("end_of_line_marker");
127        cg_state.stmtsl.push_back(new Assign(gs_retVal, new And(new Var(cg_state.newsym), new CharClass(m_name_map.find("LineFeed")->second))));
128        cg_state.newsym = gs_retVal;
129    }
130    else if (Seq* seq = dynamic_cast<Seq*>(re))
131    {
132        std::list<RE*>::iterator it = seq->GetREList()->begin();
133        if (it != seq->GetREList()->end())
134        {
135            cg_state = Seq_helper(seq->GetREList(), it, cg_state);
136        }
137    //cout << "\n" << "Seq => (" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << endl;
138    }
139    else if (Alt* alt = dynamic_cast<Alt*>(re))
140    {
141        if (alt->GetREList() == 0)
142        {
143
144            std::string gs_retVal = symgen.gensym("always_fail_marker");
145            cg_state.stmtsl.push_back(new Assign(gs_retVal, new All(0)));
146            cg_state.newsym = gs_retVal;
147        }
148        else
149        {
150            if (alt->GetREList()->size() == 1)
151            {
152                cg_state = re2pablo_helper(alt->GetREList()->front(), cg_state);
153            }
154            else
155            {
156                std::list<RE*>::iterator it = alt->GetREList()->begin();
157                cg_state = Alt_helper(alt->GetREList(), it, cg_state);
158            }
159        }
160    //cout << "\n" << "Alt => (" << StatementPrinter::PrintStmts(cg_state) << ")" << "\n" << endl;
161    }
162    else if (Rep* rep = dynamic_cast<Rep*>(re))
163    {
164        if ((dynamic_cast<Name*>(rep->getRE()) != 0) && (rep->getLB() == 0) && (rep->getUB()== unboundedRep))
165        {
166            Name* rep_name = dynamic_cast<Name*>(rep->getRE());
167            std::string gs_retVal = symgen.gensym("marker");
168
169            if (rep_name->getType() == Name::FixedLength)
170            {
171                cg_state.stmtsl.push_back(new Assign(gs_retVal, new MatchStar(new Var(cg_state.newsym), new CharClass(rep_name->getName()))));
172            }
173            else //Name::Unicode and Name::UnicodeCategory
174            {
175                cg_state.stmtsl.push_back(new Assign(gs_retVal,
176                    new And(new MatchStar(new Var(cg_state.newsym), new Or(new CharClass(m_name_map.find("internal.nonfinal")->second),
177                    new CharClass(rep_name->getName()))), new CharClass(m_name_map.find("internal.initial")->second))));
178            }
179
180            cg_state.newsym = gs_retVal;
181        }
182        else if (rep->getUB() == unboundedRep)
183        {
184            if (rep->getLB() == 0)
185            {
186                //std::cout << "While, no lb." << std::endl;
187
188                std::string while_test_gs_retVal = symgen.gensym("while_test");
189                std::string while_accum_gs_retVal = symgen.gensym("while_accum");
190                CodeGenState while_test_state;
191                while_test_state.newsym = while_test_gs_retVal;
192                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), while_test_state);
193                cg_state.stmtsl.push_back(new Assign(while_test_gs_retVal, new Var(cg_state.newsym)));
194                cg_state.stmtsl.push_back(new Assign(while_accum_gs_retVal, new Var(cg_state.newsym)));
195                std::list<PabloS*> stmtList;
196                stmtList = t1_cg_state.stmtsl;
197                stmtList.push_back(new Assign(while_test_gs_retVal, new And(new Var(t1_cg_state.newsym), new Not(new Var(while_accum_gs_retVal)))));
198                stmtList.push_back(new Assign(while_accum_gs_retVal, new Or(new Var(while_accum_gs_retVal), new Var(t1_cg_state.newsym))));
199                cg_state.stmtsl.push_back( new While(new Var(while_test_gs_retVal), stmtList));
200                cg_state.newsym = while_accum_gs_retVal;
201            }
202            else //if (rep->getLB() > 1)
203            {
204                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
205                rep->setLB(rep->getLB() - 1);
206                cg_state = re2pablo_helper(rep, t1_cg_state);
207            }
208        }
209        else if (rep->getUB() != unboundedRep)
210        {
211            if ((rep->getLB() == 0) && (rep->getUB() == 0))
212            {
213                //Just fall through...do nothing.
214            }
215            else if ((rep->getLB() == 0) && (rep->getUB() > 0))
216            {
217                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
218                rep->setUB(rep->getUB() - 1);
219                CodeGenState t2_cg_state = re2pablo_helper(re, t1_cg_state);
220                std::string gs_retVal = symgen.gensym("alt_marker");
221                cg_state.stmtsl = t2_cg_state.stmtsl;
222                cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(cg_state.newsym), new Var(t2_cg_state.newsym))));
223                cg_state.newsym = gs_retVal;
224            }
225            else //if ((rep->getLB() > 0) && (rep->getUB() > 0))
226            {
227                CodeGenState t1_cg_state = re2pablo_helper(rep->getRE(), cg_state);
228                rep->setLB(rep->getLB() - 1);
229                rep->setUB(rep->getUB() - 1);
230                cg_state = re2pablo_helper(rep, t1_cg_state);
231            }
232        }
233    }
234
235    return cg_state;
236}
237
238CodeGenState Pbix_Compiler::Seq_helper(std::list<RE*>* lst, std::list<RE*>::const_iterator it, CodeGenState cg_state)
239{
240    if (it != lst->end())
241    {
242        cg_state = re2pablo_helper(*it, cg_state);
243        cg_state = Seq_helper(lst, ++it, cg_state);
244    }
245
246    return cg_state;
247}
248
249CodeGenState Pbix_Compiler::Alt_helper(std::list<RE*>* lst, std::list<RE*>::const_iterator it, CodeGenState cg_state)
250{
251    CodeGenState t1_cg_state = re2pablo_helper(*it, cg_state);
252    cg_state.stmtsl = t1_cg_state.stmtsl;
253    ++it;
254    if (it != lst->end())
255    {
256        CodeGenState t2_cg_state = Alt_helper(lst, it, cg_state);
257        cg_state.stmtsl = t2_cg_state.stmtsl;
258        std::string gs_retVal = symgen.gensym("alt_marker");
259        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Or(new Var(t1_cg_state.newsym), new Var(t2_cg_state.newsym))));
260        cg_state.newsym = gs_retVal;
261    }
262    else
263    {
264        cg_state.newsym = t1_cg_state.newsym;
265    }
266
267    return cg_state;
268}
269
Note: See TracBrowser for help on using the repository browser.