Changeset 4000


Ignore:
Timestamp:
Aug 13, 2014, 10:16:11 AM (5 years ago)
Author:
cameron
Message:

Fix unicode match with Scanthru; negated unicode class fix

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/pbix_compiler.cpp

    r3995 r4000  
    6767        std::string gs_initial = symgen.gensym("internal.initial");
    6868        m_name_map.insert(make_pair("internal.initial", gs_initial));
     69        PabloE * u8single = new Var(m_name_map.find("UTF8-SingleByte")->second);
     70        PabloE * u8pfx2 = new Var(m_name_map.find("UTF8-Prefix2")->second);
     71        PabloE * u8pfx3 = new Var(m_name_map.find("UTF8-Prefix3")->second);
     72        PabloE * u8pfx4 = new Var(m_name_map.find("UTF8-Prefix4")->second);
     73        PabloE * u8pfx = new Or(new Or(u8pfx2, u8pfx3), u8pfx4);
     74        cg_state.stmtsl.push_back(new Assign(gs_initial, new Or(u8pfx, u8single)));
     75
     76#if 0
    6977        cg_state.stmtsl.push_back(new Assign(gs_initial, new Or(new Or( new Or( new And(new Var(m_name_map.find("UTF8-Prefix2")->second),
    7078            new Var(cg_state.newsym)),  new And(new Var(m_name_map.find("UTF8-SingleByte")->second), new Var(cg_state.newsym))),
    7179            new And(new Var(m_name_map.find("UTF8-Prefix3")->second), new Var(cg_state.newsym))),
    7280            new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym)))));
     81#endif
    7382        cg_state.newsym = gs_initial;
    7483
     
    7786        std::string gs_nonfinal = symgen.gensym("internal.nonfinal");
    7887        m_name_map.insert(make_pair("internal.nonfinal", gs_nonfinal));
     88        PabloE * u8scope32 = new Advance(u8pfx3);
     89        PabloE * u8scope42 = new Advance(u8pfx4);
     90        PabloE * u8scope43 = new Advance(u8scope42);
     91     
     92        cg_state.stmtsl.push_back(new Assign(gs_nonfinal, new Or(new Or(u8pfx, u8scope32), new Or(u8scope42, u8scope43))));
     93#if 0
    7994        cg_state.stmtsl.push_back(new Assign(gs_nonfinal, new Or(new Or(new Or(new Or(new Or( new And(new Var(m_name_map.find("UTF8-Prefix3")->second),
    8095            new Var(cg_state.newsym)),  new And(new Var(m_name_map.find("UTF8-Prefix2")->second), new Var(cg_state.newsym))),
     
    8398            new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym)))), new Advance(
    8499            new Advance( new And(new Var(m_name_map.find("UTF8-Prefix4")->second), new Var(cg_state.newsym)))))));
     100#endif
    85101        cg_state.newsym = gs_nonfinal;
    86102    }
     
    106122        if (name->getType() != Name::FixedLength) {
    107123            // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
     124            markerExpr = new And(markerExpr, new CharClass(m_name_map.find("internal.initial")->second));
    108125            markerExpr = new ScanThru(markerExpr, new CharClass(m_name_map.find("internal.nonfinal")->second));
    109126        }       
     
    118135        }
    119136        if (name->isNegated()) {
    120             ccExpr = new And(new Not(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second))),
    121                              new CharClass(m_name_map.find("internal.initial")->second));
     137            ccExpr = new Not(new Or(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second)),
     138                                    new CharClass(m_name_map.find("internal.nonfinal")->second)));
    122139        }
    123140        cg_state.stmtsl.push_back(new Assign(gs_retVal, new Advance(new And(ccExpr, markerExpr))));
     
    188205
    189206            if (rep_name->isNegated()) {
    190                 ccExpr = new And(new Not(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second))),
    191                                  new CharClass(m_name_map.find("internal.initial")->second));
     207                ccExpr = new Not(new Or(new Or(ccExpr, new CharClass(m_name_map.find("LineFeed")->second)),
     208                                        new CharClass(m_name_map.find("internal.nonfinal")->second)));
    192209            }
    193210            if (rep_name->getType() == Name::FixedLength)
Note: See TracChangeset for help on using the changeset viewer.