Changeset 4335 for icGREP


Ignore:
Timestamp:
Dec 11, 2014, 3:58:43 PM (5 years ago)
Author:
cameron
Message:

Ensure Name::Type::ASCII is used only for CCs with max cp 7F

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4334 r4335  
    7777    Var * var = name->getCompiled();
    7878    if (var == nullptr) {
    79         if (name->getType() == Name::Type::FixedLength) {
     79        if (name->getType() != Name::Type::UnicodeCategory) {
    8080            RE * cc = name->getCC();
    8181            assert (cc);
  • icGREP/icgrep-devel/icgrep/compiler.cpp

    r4334 r4335  
    124124    #endif
    125125
    126     RE::release_memory();
    127 
    128126    // Scan through the pablo code and perform DCE and CSE
    129127    UseAnalysis::optimize(main);
     
    138136    LLVM_Gen_RetVal retVal = pablo_compiler.compile(main);
    139137
     138    RE::release_memory();
    140139    PabloAST::release_memory();
    141140
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4329 r4335  
    3939    }
    4040    return name;
     41}
     42
     43CodePointType CC::max_codepoint() {
     44    return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.back().hi_codepoint;
    4145}
    4246
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4329 r4335  
    5353
    5454    std::string canonicalName() const;
     55
     56    CodePointType max_codepoint();
    5557
    5658    void insert_range(const CodePointType lo_codepoint, const CodePointType hi_codepoint);
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4334 r4335  
    129129inline Assign * RE_Compiler::process(Name * name, Assign * marker, PabloBlock & pb) {
    130130    PabloAST * markerVar = pb.createVar(marker);
    131     if (name->getType() != Name::Type::FixedLength) {
     131    if (name->getType() != Name::Type::ASCII) {
    132132        // Move the markers forward through any nonfinal UTF-8 bytes to the final position of each character.
    133133        markerVar = pb.createAnd(markerVar, mInitial);
     
    221221               
    222222inline bool RE_Compiler::isFixedLength(RE * regexp) {
    223     return isa<Name>(regexp) && ((cast<Name>(regexp)->getType()) == Name::Type::FixedLength);
     223    return isa<Name>(regexp) && ((cast<Name>(regexp)->getType()) == Name::Type::ASCII);
    224224}
    225225
     
    265265
    266266        unbounded = pb.createVar(marker);
    267         if (name->getType() == Name::Type::FixedLength) {
     267        if (name->getType() == Name::Type::ASCII) {
    268268            unbounded = pb.createMatchStar(unbounded, cc);
    269269        }
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4329 r4335  
    33
    44#include <re/re_re.h>
     5#include <re/re_cc.h>
    56#include <string>
    67#include <iostream>
     
    1415namespace re {
    1516
    16 class CC;
    17 
    1817class Name : public RE {
    1918public:
     
    2524    }
    2625    enum class Type {
    27         FixedLength
     26        ASCII
    2827        , Unicode
    2928        , UnicodeCategory
     29        , Unknown
    3030    };
    3131    const std::string & getName() const;
     
    7979}
    8080
    81 inline Name * makeName(const std::string name, const Name::Type type = Name::Type::FixedLength) {
     81inline Name * makeName(const std::string name, const Name::Type type = Name::Type::Unicode) {
    8282    return new Name(std::move(name), type, nullptr);
    8383}
     
    8787        return cast<Name>(cc);
    8888    }
    89     return new Name(std::move(name), Name::Type::FixedLength, cc);
     89    else if (isa<CC>(cc)) {
     90        Name::Type ccType = cast<CC>(cc)->max_codepoint() <= 0x7F ? Name::Type::ASCII : Name::Type::Unicode;
     91        return new Name(std::move(name), ccType, cc);
     92    }
     93    else return new Name(std::move(name), Name::Type::Unknown, cc);
    9094}
    9195
Note: See TracChangeset for help on using the changeset viewer.