Changeset 4246 for icGREP


Ignore:
Timestamp:
Oct 17, 2014, 1:49:15 PM (5 years ago)
Author:
nmedfort
Message:

Transitory work towards using CC_Namer.

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4244 r4246  
    5555add_library(PabloADT pablo/pe_advance.cpp pablo/pe_all.cpp pablo/pe_and.cpp pablo/pe_call.cpp pablo/pe_charclass.cpp  pablo/pe_matchstar.cpp pablo/pe_scanthru.cpp pablo/pe_not.cpp  pablo/pe_or.cpp  pablo/pabloAST.cpp  pablo/pe_sel.cpp  pablo/pe_var.cpp  pablo/pe_xor.cpp pablo/ps_assign.cpp  pablo/ps_if.cpp  pablo/codegenstate.cpp  pablo/symbol_generator.cpp pablo/ps_while.cpp pablo/printer_pablos.cpp pablo/pablo_compiler.cpp)
    5656add_library(RegExpADT re/re_alt.cpp re/re_cc.cpp re/re_end.cpp re/re_name.cpp re/re_parser.cpp re/re_re.cpp re/re_rep.cpp re/re_seq.cpp re/re_start.cpp re/parsefailure.cpp re/re_reducer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/printer_re.cpp)
    57 add_library(CCADT cc/cc_compiler.cpp utf_encoding.cpp utf8_encoder.cpp unicode_categories.h)
     57add_library(CCADT cc/cc_namemap.cpp cc/cc_compiler.cpp utf_encoding.cpp utf8_encoder.cpp unicode_categories.h)
    5858
    5959include_directories("${PROJECT_SOURCE_DIR}")
  • icGREP/icgrep-devel/icgrep/compiler.cpp

    r4242 r4246  
    6060    #endif
    6161
     62    //Optimization passes to simplify the AST.
     63    re_ast = RE_Nullable::removeNullablePrefix(re_ast);
     64    #ifdef DEBUG_PRINT_RE_AST
     65    std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     66    #endif
     67
     68    re_ast = RE_Nullable::removeNullableSuffix(re_ast);
     69    #ifdef DEBUG_PRINT_RE_AST
     70    std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     71    #endif
     72
    6273    //Add the UTF encoding.
    6374    if (encoding.getType() == Encoding::Type::UTF_8)
     
    6980    //Print to the terminal the AST that was generated by the utf8 encoder.
    7081    std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    71     #endif
    72 
    73     //Optimization passes to simplify the AST.
    74     re_ast = RE_Nullable::removeNullablePrefix(re_ast);
    75     #ifdef DEBUG_PRINT_RE_AST
    76     std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    77     #endif
    78 
    79     re_ast = RE_Nullable::removeNullableSuffix(re_ast);
    80     #ifdef DEBUG_PRINT_RE_AST
    81     std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    8282    #endif
    8383
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4245 r4246  
    3939#include <stdexcept>
    4040
     41//Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
     42//#define USE_IF_FOR_NONFINAL
     43
    4144using namespace pablo;
    4245
     
    4548RE_Compiler::RE_Compiler(PabloBlock & baseCG, std::map<std::string, std::string> name_map)
    4649: mCG(baseCG)
     50, mLineFeed(nullptr)
     51, mInitial(nullptr)
     52, mNonFinal(nullptr)
    4753, m_name_map(name_map)
    4854{
     
    5258void RE_Compiler::compile(RE * re, PabloBlock & cg) {
    5359
     60    mLineFeed = cg.createVar(m_name_map.find("LineFeed")->second);
     61
     62    const std::string initial = "initial";
     63    const std::string nonfinal = "nonfinal";
     64
    5465    if (hasUnicode(re)) {
    55         //Set the 'internal.initial' bit stream for the utf-8 multi-byte encoding.
    56         std::string gs_initial = cg.ssa("initial");
    57         m_name_map.insert(make_pair("initial", gs_initial));
     66        //Set the 'internal.initial' bit stream for the utf-8 multi-byte encoding.       
    5867        PabloAST * u8single = cg.createVar(m_name_map.find("UTF8-SingleByte")->second);
    5968        PabloAST * u8pfx2 = cg.createVar(m_name_map.find("UTF8-Prefix2")->second);
     
    6170        PabloAST * u8pfx4 = cg.createVar(m_name_map.find("UTF8-Prefix4")->second);
    6271        PabloAST * u8pfx = cg.createOr(cg.createOr(u8pfx2, u8pfx3), u8pfx4);
    63         cg.createAssign(gs_initial, cg.createOr(u8pfx, u8single));
    64 
    65         //Set the 'internal.nonfinal' bit stream for the utf-8 multi-byte encoding.
    66         std::string gs_nonfinal = cg.ssa("nonfinal");
    67         m_name_map.insert(make_pair("nonfinal", gs_nonfinal));
    68         //#define USE_IF_FOR_NONFINAL
     72        mInitial = cg.createVar(cg.createAssign(initial, cg.createOr(u8pfx, u8single)));
    6973        #ifdef USE_IF_FOR_NONFINAL
    70         cg.createAssign(gs_nonfinal, cg.createAll(0));
     74        mNonFinal = cg.createVar(cg.createAssign(gs_nonfinal, cg.createAll(0)));
    7175        #endif
    7276        PabloAST * u8scope32 = cg.createAdvance(u8pfx3);
     
    7882        cg.createIf(u8pfx, std::move(it));
    7983        #else       
    80         cg.createAssign(gs_nonfinal, cg.createOr(cg.createOr(u8pfx, u8scope32), cg.createOr(u8scope42, u8scope43)));
     84        mNonFinal = cg.createVar(cg.createAssign(nonfinal, cg.createOr(cg.createOr(u8pfx, u8scope32), cg.createOr(u8scope42, u8scope43))));
    8185        #endif
     86    }
     87    else {
     88        mInitial = cg.createAll(0);
     89        mNonFinal = cg.createAll(0);
    8290    }
    8391
     
    8694
    8795    //These three lines are specifically for grep.
    88     cg.createAssign(cg.ssa("marker"), cg.createAnd(cg.createMatchStar(cg.createVarIfAssign(result),
    89                     cg.createNot(cg.createVar(m_name_map.find("LineFeed")->second))), cg.createVar(m_name_map.find("LineFeed")->second)));
     96    cg.createAssign(cg.ssa("marker"), cg.createAnd(cg.createMatchStar(cg.createVarIfAssign(result), cg.createNot(mLineFeed)), mLineFeed));
    9097}
    9198
     
    107114        // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    108115                PabloAST * marker = cg.createVar(target);
    109         marker = cg.createAnd(marker, cg.createCharClass(m_name_map.find("initial")->second));
    110         marker = cg.createScanThru(marker, cg.createCharClass(m_name_map.find("nonfinal")->second));
    111         PabloAST * dot = cg.createNot(cg.createCharClass(m_name_map.find("LineFeed")->second));
     116        marker = cg.createAnd(marker, mInitial);
     117        marker = cg.createScanThru(marker, mNonFinal);
     118        PabloAST * dot = cg.createNot(mLineFeed);
    112119        target = cg.createAssign(cg.ssa("dot"), cg.createAdvance(cg.createAnd(marker, dot)));
    113120    }
    114121    else if (isa<Start>(re)) {
    115         PabloAST * sol = cg.createNot(cg.createAdvance(cg.createNot(cg.createCharClass(m_name_map.find("LineFeed")->second))));
     122        PabloAST * sol = cg.createNot(cg.createAdvance(cg.createNot(mLineFeed)));
    116123        target = cg.createAssign(cg.ssa("sol"), cg.createAnd(cg.createVarIfAssign(target), sol));
    117124    }
    118125    else if (isa<End>(re)) {
    119         PabloAST * eol = cg.createCharClass(m_name_map.find("LineFeed")->second);
     126        PabloAST * eol = mLineFeed;
    120127        target = cg.createAssign(cg.ssa("eol"), cg.createAnd(cg.createVarIfAssign(target), eol));
    121128    }
     
    128135    if (name->getType() != Name::Type::FixedLength) {
    129136        // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    130         marker = cg.createAnd(marker, cg.createCharClass(m_name_map.find("initial")->second));
    131         marker = cg.createScanThru(marker, cg.createCharClass(m_name_map.find("nonfinal")->second));
     137        marker = cg.createAnd(marker, mInitial);
     138        marker = cg.createScanThru(marker, mNonFinal);
    132139    }
    133140    PabloAST * cc = nullptr;
     
    139146    }
    140147    if (name->isNegated()) {
    141         cc = cg.createNot(cg.createOr(cg.createOr(cc, cg.createCharClass(m_name_map.find("LineFeed")->second)),
    142                                 cg.createCharClass(m_name_map.find("nonfinal")->second)));
     148        cc = cg.createNot(cg.createOr(cg.createOr(cc, mLineFeed), mNonFinal));
    143149    }
    144150    return cg.createAssign(cg.ssa("marker"), cg.createAdvance(cg.createAnd(cc, marker)));
     
    186192        Name * rep_name = dyn_cast<Name>(repeated);
    187193
    188         PabloAST * ccExpr;
     194        PabloAST * cc;
    189195        if (rep_name->getType() == Name::Type::UnicodeCategory) {
    190             ccExpr = cg.createCall(rep_name->getName());
     196            cc = cg.createCall(rep_name->getName());
    191197        }
    192198        else {
    193             ccExpr = cg.createCharClass(rep_name->getName());
     199            cc = cg.createCharClass(rep_name->getName());
    194200        }
    195201
    196202        if (rep_name->isNegated()) {
    197             ccExpr = cg.createNot(cg.createOr(cg.createOr(ccExpr, cg.createCharClass(m_name_map.find("LineFeed")->second)), cg.createCharClass(m_name_map.find("nonfinal")->second)));
     203            cc = cg.createNot(cg.createOr(cg.createOr(cc, mLineFeed), mNonFinal));
    198204        }
    199205
    200206        PabloAST * unbounded = cg.createVar(target);
    201207        if (rep_name->getType() == Name::Type::FixedLength) {
    202             unbounded = cg.createMatchStar(unbounded, ccExpr);
     208            unbounded = cg.createMatchStar(unbounded, cc);
    203209        }
    204210        else { // Name::Unicode and Name::UnicodeCategory
    205             unbounded = cg.createAnd(cg.createMatchStar(unbounded, cg.createOr(cg.createCharClass(m_name_map.find("nonfinal")->second), ccExpr)), cg.createCharClass(m_name_map.find("initial")->second));
     211            unbounded = cg.createAnd(cg.createMatchStar(unbounded, cg.createOr(mNonFinal, cc)), mInitial);
    206212        }
    207213        target = cg.createAssign(cg.ssa("marker"), unbounded);
    208214    }
    209215    else if (isa<Any>(repeated)) {
    210         PabloAST * dot = cg.createNot(cg.createCharClass(m_name_map.find("LineFeed")->second));
     216        PabloAST * dot = cg.createNot(mLineFeed);
    211217        PabloAST * unbounded = cg.createVar(target);
    212         unbounded = cg.createAnd(cg.createMatchStar(unbounded, cg.createOr(cg.createCharClass(m_name_map.find("nonfinal")->second), dot)), cg.createCharClass(m_name_map.find("initial")->second));
     218        unbounded = cg.createAnd(cg.createMatchStar(unbounded, cg.createOr(mNonFinal, dot)), mInitial);
    213219        target = cg.createAssign(cg.ssa("marker"), unbounded);
    214220    }
     
    249255        throw std::runtime_error("Unexpected Null Value passed to RE Compiler!");
    250256    }
    251     else if (isa<Any>(re)) found = true;
     257    else if (isa<Any>(re)) {
     258        found = true;
     259    }
    252260    else if (const Name * name = dyn_cast<const Name>(re)) {
    253261        if ((name->getType() == Name::Type::UnicodeCategory) || (name->getType() == Name::Type::Unicode)) {
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4234 r4246  
    1717
    1818class PabloBlock;
     19class PabloAST;
    1920class Assign;
    2021class Var;
     
    4849
    4950    pablo::PabloBlock &                             mCG;
     51    pablo::Var *                                    mLineFeed;
     52    pablo::PabloAST *                               mInitial;
     53    pablo::PabloAST *                               mNonFinal;
    5054    std::map<std::string, std::string>              m_name_map;
    5155};
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4206 r4246  
    22#define RE_NAME_H
    33
    4 #include "re_re.h"
     4#include <re/re_re.h>
    55#include <string>
    66
     7namespace pablo {
     8    class Var;
     9}
     10
     11
    712namespace re {
     13
     14class CC;
    815
    916class Name : public RE {
     
    1724    enum class Type {
    1825        FixedLength
    19         ,Unicode
    20         ,UnicodeCategory
     26        , Unicode
     27        , UnicodeCategory
    2128    };
    22     void setName(std::string name);
    23     std::string getName() const;
    24     void setNegated(const bool is_negated);
     29    const std::string & getName() const;
    2530    bool isNegated() const;
    26     void setType(const Type type);
    2731    Type getType() const;
     32    RE *getCC() const;
     33    pablo::Var * getVar() const {
     34        return mVar;
     35    }
     36    void setVar(pablo::Var * var) {
     37        mVar = var;
     38    }
     39    void setCC(RE *cc);
    2840    virtual ~Name() {}
    2941protected:
    3042    friend Name * makeName();
    31     friend Name * makeName(const Name *);
    32     friend Name * makeName(std::string, const bool, const Type);
     43    friend Name * makeName(const std::string, RE *);
     44    friend Name * makeName(const std::string, const bool, const Type);
    3345    Name();
    34     Name(std::string name, const bool negated, const Type type);
     46    Name(const std::string && name, const bool negated, const Type type);
     47    Name(const std::string && name, RE * cc);
    3548private:
    36     std::string mName;
    37     bool mNegated;
    38     Type mType;
     49    const std::string   mName;
     50    const bool          mNegated;
     51    const Type          mType;
     52    RE *                mCC;
     53    pablo::Var *        mVar;
    3954};
    4055
     
    4358, mName()
    4459, mNegated(false)
    45 , mType(Type::FixedLength) {
     60, mType(Type::FixedLength)
     61, mCC(nullptr)
     62, mVar(nullptr)
     63{
    4664
    4765}
    4866
    49 inline Name::Name(std::string name, const bool negated, const Type type)
     67inline Name::Name(const std::string && name, const bool negated, const Type type)
    5068: RE(ClassTypeId::Name)
    51 , mName(name)
     69, mName(std::move(name))
    5270, mNegated(negated)
    53 , mType(type) {
     71, mType(type)
     72, mCC(nullptr)
     73, mVar(nullptr)
     74{
    5475
    5576}
    5677
    57 inline void Name::setName(std::string name) {
    58     mName = name;
     78inline Name::Name(const std::string && name, RE * cc)
     79: RE(ClassTypeId::Name)
     80, mName(std::move(name))
     81, mNegated(false)
     82, mType(Type::FixedLength)
     83, mCC(cc)
     84, mVar(nullptr)
     85{
     86
    5987}
    6088
    61 inline std::string Name::getName() const {
     89inline const std::string & Name::getName() const {
    6290    return mName;
    6391}
     
    6795}
    6896
    69 inline void Name::setNegated(const bool is_negated) {
    70     mNegated = is_negated;
     97inline Name::Type Name::getType() const {
     98    return mType;
    7199}
    72100
    73 inline void Name::setType(const Type type) {
    74     mType = type;
     101inline RE * Name::getCC() const {
     102    return mCC;
    75103}
    76104
    77 inline Name::Type Name::getType() const {
    78     return mType;
     105inline void Name::setCC(RE * cc) {
     106    mCC = cc;
    79107}
    80108
     
    83111}
    84112
    85 inline Name * makeName(const Name * name) {
    86     return new Name(*name);
     113inline Name * makeName(const std::string name, const bool negated = false, const Name::Type type = Name::Type::FixedLength) {
     114    return new Name(std::move(name), negated, type);
    87115}
    88116
    89 inline Name * makeName(std::string name, const bool negated = false, const Name::Type type = Name::Type::FixedLength) {
    90     return new Name(name, negated, type);
     117inline Name * makeName(const std::string name, RE * cc) {
     118    return new Name(std::move(name), cc);
    91119}
    92120
Note: See TracChangeset for help on using the changeset viewer.