Changeset 4337 for icGREP/icgrep-devel


Ignore:
Timestamp:
Dec 12, 2014, 1:21:03 PM (5 years ago)
Author:
cameron
Message:

Compile only byteclasses with cc_compiler; resolve CC/BC ambiguity

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4336 r4337  
    1616#include <re/re_rep.h>
    1717#include <re/re_name.h>
     18#include <re/re_diff.h>
     19#include <re/re_intersect.h>
    1820#include <re/printer_re.h>
    1921#include <cc/cc_namemap.hpp>
     
    4749
    4850pablo::Var * CC_Compiler::compileCC(const re::CC *cc) {
    49      return mCG.createVar(mCG.createAssign(cc->canonicalName(), charset_expr(cc)));
    50 }
    51 
    52 std::vector<Var *> CC_Compiler::compile(const CC_NameMap & nameMap) {
    53     for (Name * name : nameMap) {
    54         compile_re(name);
    55     }
    56     if (mAnnotateVariableConstraints) {
    57         computeVariableConstraints();
    58     }
     51     return mCG.createVar(mCG.createAssign(cc->canonicalName(ByteClass), charset_expr(cc)));
     52}
     53
     54std::vector<Var *> CC_Compiler::getBasisBits(const CC_NameMap & nameMap) {
    5955    return mBasisBit;
    6056}
    6157
    62 PabloAST * CC_Compiler::compile_re(RE * re) {
    63     if (isa<Name>(re)) {
    64         return compile_re(cast<Name>(re));
    65     }
    66     else if (isa<Alt>(re)) {
    67         return compile_re(cast<Alt>(re));
    68     }
    69     else if (isa<Seq>(re)) {
    70         return compile_re(cast<Seq>(re));
    71     }
    72     throw std::runtime_error("Unexpected RE node given to CC_Compiler: " + Printer_RE::PrintRE(re));
    73 }
    74 
    75 PabloAST * CC_Compiler::compile_re(Name * name) {
    76     assert(name);
    77     Var * var = name->getCompiled();
    78     if (var == nullptr) {
    79         if (name->getType() != Name::Type::UnicodeCategory) {
    80             RE * def = name->getDefinition();
    81             assert (def);
    82             PabloAST * value = nullptr;
    83             if (isa<CC>(def)) {
    84                 value = charset_expr(cast<CC>(def));
    85             }
    86             else if (isa<Seq>(def)) {
    87                 value = compile_re(cast<Seq>(def));
    88             }
    89             else if (isa<Alt>(def)) {
    90                 value = compile_re(cast<Alt>(def));
    91             }
    92             if (value == nullptr) {
    93                 throw std::runtime_error("Unexpected CC node given to CC_Compiler: " + Printer_RE::PrintRE(name) + " : " + Printer_RE::PrintRE(def));
    94             }
    95             Assign * assign = mCG.createAssign(name->getName(), value);
    96             if (mAnnotateVariableConstraints && isa<CC>(def)) {
    97                 mVariableVector.push_back(std::make_pair(cast<CC>(def), assign));
    98             }
    99             var = mCG.createVar(assign);
    100         }
    101         else {
    102             var = mCG.createVar(name->getName());
    103         }
    104         name->setCompiled(var);
    105     }
    106     return var;
    107 }
    108 
    109 PabloAST * CC_Compiler::compile_re(const Seq * seq) {
    110     Assign * assignment = nullptr;
    111     PabloAST * result = nullptr;
    112     auto i = seq->begin();
    113     while (true) {
    114         PabloAST * cc = compile_re(*i);
    115         result = assignment ? mCG.createAnd(mCG.createVar(assignment), cc) : cc;
    116         if (++i == seq->end()) {
    117             break;
    118         }
    119         assignment = mCG.createAssign("seq", mCG.createAdvance(result, 1));
    120     }
    121     return result;
    122 }
    123 
    124 PabloAST * CC_Compiler::compile_re(const Alt *alt) {
    125     Assign * assignment = nullptr;
    126     PabloAST * result = nullptr;
    127     auto i = alt->begin();
    128     while (true) {
    129         PabloAST * cc = compile_re(*i);
    130         result = assignment ? mCG.createOr(mCG.createVar(assignment), cc) : cc;
    131         if (++i == alt->end()) {
    132             break;
    133         }
    134         assignment = mCG.createAssign("alt", result);
    135     }
    136     return result;
    137 }
     58void CC_Compiler::compileByteClasses(RE * re) {
     59    if (Alt * alt = dyn_cast<Alt>(re)) {
     60        for (auto i = alt->begin(); i != alt->end(); ++i) {
     61            compileByteClasses(*i);
     62        }
     63    }
     64    else if (Seq * seq = dyn_cast<Seq>(re)) {
     65        for (auto i = seq->begin(); i != seq->end(); ++i) {
     66            compileByteClasses(*i);
     67        }
     68    }
     69    else if (Rep * rep = dyn_cast<Rep>(re)) {
     70        compileByteClasses(rep->getRE());
     71    }
     72    else if (Diff * diff = dyn_cast<Diff>(re)) {
     73        compileByteClasses(diff->getRH());
     74        compileByteClasses(diff->getLH());
     75    }
     76    else if (Intersect * e = dyn_cast<Intersect>(re)) {
     77        compileByteClasses(e->getRH());
     78        compileByteClasses(e->getLH());
     79    }
     80    else if (Name * name = dyn_cast<Name>(re)) {
     81        RE * d = name->getDefinition();
     82        if (d && !isa<CC>(d)) {
     83            compileByteClasses(d);
     84        }
     85        else if (d && isa<CC>(d)) {
     86            name->setCompiled(compileCC(cast<CC>(d)));
     87        }
     88    }
     89    else if (CC * cc = dyn_cast<CC>(re)) {
     90        std::cerr << "Shouldn't get here\n";
     91        exit(-1);
     92    }
     93}
     94
    13895
    13996
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r4334 r4337  
    2626    CC_Compiler(pablo::PabloBlock & cg, const Encoding encoding, const bool annotateVariableConstraints = false, const std::string basis_pattern = "basis");
    2727
    28     std::vector<pablo::Var *> compile(const CC_NameMap & nameMap);
     28    std::vector<pablo::Var *> getBasisBits(const CC_NameMap & nameMap);
    2929
    3030    pablo::Var * compileCC(const re::CC *cc);
     31
     32    void compileByteClasses(re::RE * re);
     33
    3134private:
    32 
    33 
    34     pablo::PabloAST * compile_re(re::RE * re);
    35     pablo::PabloAST * compile_re(re::Name * name);
    36     pablo::PabloAST * compile_re(const re::Alt * alt);
    37     pablo::PabloAST * compile_re(const re::Seq *seq);
    38 
    3935    pablo::Var * getBasisVar(const int n) const;
    4036    pablo::PabloAST * bit_pattern_expr(const unsigned pattern, unsigned selected_bits);
  • icGREP/icgrep-devel/icgrep/cc/cc_namemap.cpp

    r4336 r4337  
    1414namespace cc {
    1515
    16 RE * CC_NameMap::process(RE * re) {
     16RE * CC_NameMap::process(RE * re, const CC_type t) {
    1717    if (Alt * alt = dyn_cast<Alt>(re)) {
    1818        for (auto i = alt->begin(); i != alt->end(); ++i) {
    19             *i = process(*i);
     19            *i = process(*i, t);
    2020        }
    2121    }
    2222    else if (Seq * seq = dyn_cast<Seq>(re)) {
    2323        for (auto i = seq->begin(); i != seq->end(); ++i) {
    24             *i = process(*i);
     24            *i = process(*i, t);
    2525        }
    2626    }
    2727    else if (Rep * rep = dyn_cast<Rep>(re)) {
    28         rep->setRE(process(rep->getRE()));
     28        rep->setRE(process(rep->getRE(), t));
    2929    }
    3030    else if (Diff * diff = dyn_cast<Diff>(re)) {
    31         diff->setRH(process(diff->getRH()));
    32         diff->setLH(process(diff->getLH()));
     31        diff->setRH(process(diff->getRH(), t));
     32        diff->setLH(process(diff->getLH(), t));
    3333    }
    3434    else if (Intersect * e = dyn_cast<Intersect>(re)) {
    35         e->setRH(process(e->getRH()));
    36         e->setLH(process(e->getLH()));
     35        e->setRH(process(e->getRH(), t));
     36        e->setLH(process(e->getLH(), t));
    3737    }
    3838    else if (Name * nameNode = dyn_cast<Name>(re)) {
    3939        RE * def = nameNode->getDefinition();
    4040        if (def && !isa<CC>(def)) {
    41             nameNode->setDefinition(process(def));
     41            nameNode->setDefinition(process(def, t));
    4242        }
    4343        std::string classname = nameNode->getName();
     
    5050    }
    5151    else if (CC * cc = dyn_cast<CC>(re)) {
    52         std::string classname = cc->canonicalName();
     52        std::string classname = cc->canonicalName(t);
    5353        auto f = mNameMap.find(classname);
    5454        if (f == mNameMap.end()) {
    55             return insert(std::move(classname), makeName(classname, cc));
     55            Name * n;
     56            if (t == ByteClass) {
     57              n = makeByteName(classname, cc);
     58            }
     59            else {
     60              n = makeName(classname, cc);
     61            }
     62            return insert(std::move(classname), n);
    5663        }
    5764        return f->second;
  • icGREP/icgrep-devel/icgrep/cc/cc_namemap.hpp

    r4334 r4337  
    55#include <vector>
    66#include <string>
     7#include "re/re_name.h"
     8#include "re/re_cc.h"
    79
    810namespace re {
    911    class CC;
    10     class Name;
    1112    class RE;
    1213}
     
    2324    CC_NameMap() {}
    2425
    25     re::RE * process(re::RE * re);
     26    re::RE * process(re::RE * re, const re::CC_type t);
    2627
    2728    inline const re::Name * operator[](const std::string & name) const {
  • icGREP/icgrep-devel/icgrep/compiler.cpp

    r4335 r4337  
    1212
    1313#include <compiler.h>
     14#include <re/re_cc.h>
    1415#include <re/re_nullable.h>
    1516#include <re/re_simplifier.h>
     
    7778
    7879    CC_NameMap nameMap;
    79     re_ast = nameMap.process(re_ast);
     80    re_ast = nameMap.process(re_ast, UnicodeClass);
    8081
    8182    #ifdef DEBUG_PRINT_RE_AST
    8283    std::cerr << "Namer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     84    std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
    8385    #endif
    8486
     
    9193        #endif
    9294    }
    93 
    94     // note: system is clumbersome at the moment; this ought to be done AFTER toUTF8.
    95     //nameMap.addPredefined("LineFeed", makeCC(0x0A));
    9695    #ifdef DEBUG_PRINT_RE_AST
    9796    std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
    9897    #endif
    99 
    100    
    10198   
    10299    re_ast = RE_Simplifier::simplify(re_ast);
     
    110107
    111108    CC_Compiler cc_compiler(main, encoding, enable_multiplexing);
    112     auto basisBits = cc_compiler.compile(nameMap);
     109   
     110    cc_compiler.compileByteClasses(re_ast);
     111   
     112    auto basisBits = cc_compiler.getBasisBits(nameMap);
    113113    #ifdef DEBUG_PRINT_PBIX_AST
    114114    //Print to the terminal the AST that was generated by the character class compiler.
     
    116116    #endif
    117117
     118   
     119   
     120   
    118121    RE_Compiler re_compiler(main, nameMap);
    119122    re_compiler.initializeRequiredStreams(cc_compiler);
  • icGREP/icgrep-devel/icgrep/re/printer_re.cpp

    r4329 r4337  
    4545    {
    4646        retVal = "CC \"";
    47         retVal += re_cc->canonicalName();
     47        retVal += re_cc->canonicalName(UnicodeClass);
    4848        retVal += "\" ";
    4949
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4335 r4337  
    2525}
    2626
    27 std::string CC::canonicalName() const {
     27std::string CC::canonicalName(CC_type t) const {
    2828    std::string name = "CC";
     29    if ((t == ByteClass) && (mSparseCharSet.back().hi_codepoint >= 0x80)) {
     30      name = "BC";
     31    }
    2932    char separator = '_';
    3033    for (const CharSetItem & i : mSparseCharSet) {
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4335 r4337  
    2626typedef std::vector<CharSetItem> CharSetVector;
    2727
     28enum CC_type {UnicodeClass, ByteClass};
     29
    2830class CC : public RE {
    2931public:
     
    5254    static const CodePointType UNICODE_MAX = 0x10FFFF;
    5355
    54     std::string canonicalName() const;
     56    std::string canonicalName(CC_type t) const;
    5557
    5658    CodePointType max_codepoint();
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4335 r4337  
    8686    }
    8787    else {
     88        Var * var = name->getCompiled();
     89        if (var == nullptr) {
     90            RE * def = name->getDefinition();
     91            assert(!isa<CC>(def));  //  Names mapping to CCs should have been compiled.
     92            assert(name->getType == Name::Type::Unicode);  //
     93            // compile in top-level block
     94            name -> setCompiled(pb.createVar(compile(def, mCG)));
     95        }
    8896        return name->getCompiled();
    8997    }
     
    129137inline Assign * RE_Compiler::process(Name * name, Assign * marker, PabloBlock & pb) {
    130138    PabloAST * markerVar = pb.createVar(marker);
    131     if (name->getType() != Name::Type::ASCII) {
     139    if (name->getType() != Name::Type::Byte) {
    132140        // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    133141        markerVar = pb.createAnd(markerVar, mInitial);
     
    221229               
    222230inline bool RE_Compiler::isFixedLength(RE * regexp) {
    223     return isa<Name>(regexp) && ((cast<Name>(regexp)->getType()) == Name::Type::ASCII);
     231    return isa<Name>(regexp) && ((cast<Name>(regexp)->getType()) == Name::Type::Byte);
    224232}
    225233
     
    265273
    266274        unbounded = pb.createVar(marker);
    267         if (name->getType() == Name::Type::ASCII) {
     275        if (name->getType() == Name::Type::Byte) {
    268276            unbounded = pb.createMatchStar(unbounded, cc);
    269277        }
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4336 r4337  
    2424    }
    2525    enum class Type {
    26         ASCII
     26        Byte
    2727        , Unicode
    2828        , UnicodeCategory
     
    4141    virtual ~Name() {}
    4242protected:
    43     friend Name * makeName(const std::string, RE *);
     43    friend Name * makeName(const std::string, RE *);   
     44    friend Name * makeByteName(const std::string, RE *);
    4445    friend Name * makeName(const std::string, const Type);
    4546    void* operator new (std::size_t size) noexcept {
     
    8889    }
    8990    else if (isa<CC>(cc)) {
    90         Name::Type ccType = cast<CC>(cc)->max_codepoint() <= 0x7F ? Name::Type::ASCII : Name::Type::Unicode;
     91        Name::Type ccType = cast<CC>(cc)->max_codepoint() <= 0x7F ? Name::Type::Byte : Name::Type::Unicode;
    9192        return new Name(std::move(name), ccType, cc);
    9293    }
     
    9495}
    9596
     97inline Name * makeByteName(const std::string name, RE * cc) {
     98    if (isa<Name>(cc)) {
     99        return cast<Name>(cc);
     100    }
     101    else {
     102        return new Name(std::move(name), Name::Type::Byte, cc);
     103    }
     104}
     105
    96106}
    97107
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4336 r4337  
    3434        }
    3535    }
    36     ast = nameMap.process(ast);
     36    ast = nameMap.process(ast, ByteClass);
    3737    return ast;
    3838}
Note: See TracChangeset for help on using the changeset viewer.