Changeset 4808


Ignore:
Timestamp:
Sep 30, 2015, 12:26:23 PM (2 years ago)
Author:
nmedfort
Message:

Progress on multi-target UCD compilation

Location:
icGREP/icgrep-devel/icgrep
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r4804 r4808  
    99
    1010namespace UCD {
    11 
    12 /** ------------------------------------------------------------------------------------------------------------- *
    13  * @brief addTarget
    14  ** ------------------------------------------------------------------------------------------------------------- */
    15 inline void UCDCompiler::addTarget(const UnicodeSet & set) {
    16     mTargetMap.emplace(&set, PabloBlock::createZeroes());
    17 }
    1811
    1912/** ------------------------------------------------------------------------------------------------------------- *
     
    369362 * @return the output stream with a 1-bit in any position of a character in the unicode set
    370363 ** ------------------------------------------------------------------------------------------------------------- */
    371 PabloAST * UCDCompiler::generateWithDefaultIfHierarchy(const UnicodeSet & set, PabloBuilder & entry) {
     364std::vector<PabloAST *> UCDCompiler::generateWithDefaultIfHierarchy(const std::vector<UnicodeSet> & sets, PabloBuilder & entry) {
    372365
    373366    const RangeList defaultIfHierachy = {
     
    458451        {0x10000, 0x10FFFF}};
    459452
    460     addTarget(set);
     453    addTargets(sets);
    461454    generateRange(defaultIfHierachy, entry);
    462     return mTargetMap[&set];
     455    return std::move(returnMarkers(sets));
    463456}
    464457
     
    469462 * @return the output stream with a 1-bit in any position of a character in the unicode set
    470463 ** ------------------------------------------------------------------------------------------------------------- */
    471 PabloAST * UCDCompiler::generateWithoutIfHierarchy(const UnicodeSet & set, PabloBuilder & entry) {
    472     const RangeList defaultIfHierachy = {{0x10000, 0x10FFFF}};
    473     addTarget(set);
    474     generateRange(defaultIfHierachy, entry);
    475     return mTargetMap[&set];
     464std::vector<PabloAST *> UCDCompiler::generateWithoutIfHierarchy(const std::vector<UnicodeSet> & sets, PabloBuilder & entry) {
     465    const RangeList noIfHierachy = {{0x10000, 0x10FFFF}};
     466
     467    addTargets(sets);
     468    generateRange(noIfHierachy, entry);
     469    return std::move(returnMarkers(sets));
     470}
     471
     472/** ------------------------------------------------------------------------------------------------------------- *
     473 * @brief addTargets
     474 ** ------------------------------------------------------------------------------------------------------------- */
     475inline void UCDCompiler::addTargets(const std::vector<UnicodeSet> &sets) {
     476    for (const UnicodeSet & set : sets) {
     477        mTargetMap.emplace(&set, PabloBlock::createZeroes());
     478    }
     479}
     480
     481/** ------------------------------------------------------------------------------------------------------------- *
     482 * @brief returnMarkers
     483 ** ------------------------------------------------------------------------------------------------------------- */
     484inline std::vector<PabloAST *> UCDCompiler::returnMarkers(const std::vector<UnicodeSet> & sets) const {
     485    std::vector<PabloAST *> markers(sets.size());
     486    unsigned i = 0;
     487    for (const UnicodeSet & set : sets) {
     488        auto f = mTargetMap.find(&set);
     489        assert (f != mTargetMap.end());
     490        assert (f->second);
     491        markers[i++] = f->second;
     492    }
     493    return std::move(markers);
    476494}
    477495
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.hpp

    r4804 r4808  
    4141    UCDCompiler(cc::CC_Compiler & ccCompiler);
    4242
    43     PabloAST * generateWithDefaultIfHierarchy(const UnicodeSet & set, PabloBuilder & entry);
     43    std::vector<PabloAST *> generateWithDefaultIfHierarchy(const std::vector<UnicodeSet> &sets, PabloBuilder & entry);
    4444
    45     PabloAST * generateWithoutIfHierarchy(const UnicodeSet & set, PabloBuilder & entry);
     45    std::vector<PabloAST *> generateWithoutIfHierarchy(const std::vector<UnicodeSet> & sets, PabloBuilder & entry);
    4646
    4747protected:
     
    6363    PabloAST * makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & builder, PabloAST * prefix);
    6464
    65     void addTarget(const UnicodeSet & set);
    66 
    6765    static RangeList byteDefinitions(const RangeList & list, const unsigned byte_no);
    6866
     
    7674    static RangeList innerRanges(const RangeList & list);
    7775
     76    void addTargets(const std::vector<UnicodeSet> & sets);
     77
     78    std::vector<PabloAST *> returnMarkers(const std::vector<UnicodeSet> &sets) const;
     79
    7880private:
    7981    cc::CC_Compiler &       mCharacterClassCompiler;
  • icGREP/icgrep-devel/icgrep/generate_predefined_ucd_functions.cpp

    r4797 r4808  
    233233 * @brief compileUnicodeSet
    234234 ** ------------------------------------------------------------------------------------------------------------- */
    235 void compileUnicodeSet(std::string name, const UnicodeSet & set, PabloCompiler & pc, Module * module) {
     235void compileUnicodeSet(std::string name, UnicodeSet && set, PabloCompiler & pc, Module * module) {
    236236    #ifdef ENABLE_MULTIPLEXING
    237237    if (MultiplexingDistributionFile) {
     
    252252    // Build the unicode set function
    253253    PabloAST * result = nullptr;
     254    std::vector<UnicodeSet> sets;
     255    sets.push_back(std::move(set));
    254256    if (IfHierarchyStrategy == IfHierarchy::DefaultIfHierarchy) {
    255         result = ucdCompiler.generateWithDefaultIfHierarchy(set, builder);
     257        result = ucdCompiler.generateWithDefaultIfHierarchy(sets, builder).front();
    256258    } else if (IfHierarchyStrategy == IfHierarchy::NoIfHierarchy) {
    257         result = ucdCompiler.generateWithoutIfHierarchy(set, builder);
     259        result = ucdCompiler.generateWithoutIfHierarchy(sets, builder).front();
    258260    } else {
    259261        throw std::runtime_error("Unknown if hierarchy strategy!");
     
    391393        if (EnumeratedPropertyObject * enumObj = dyn_cast<EnumeratedPropertyObject>(obj)) {
    392394            for (const std::string value : *enumObj) {
    393                 const UnicodeSet & set = enumObj->GetCodepointSet(canonicalize_value_name(value));
     395                UnicodeSet set = enumObj->GetCodepointSet(canonicalize_value_name(value));
    394396                std::string name = "__get_" + property_enum_name[enumObj->getPropertyCode()] + "_" + value;
    395                 compileUnicodeSet(name, set, pc, module);
     397                compileUnicodeSet(name, std::move(set), pc, module);
    396398                properties.emplace_back(name);
    397399            }
     
    399401        else if (ExtensionPropertyObject * extObj = dyn_cast<ExtensionPropertyObject>(obj)) {
    400402            for (const std::string value : *extObj) {
    401                 const UnicodeSet & set = extObj->GetCodepointSet(canonicalize_value_name(value));
     403                UnicodeSet set = extObj->GetCodepointSet(canonicalize_value_name(value));
    402404                std::string name = "__get_" + property_enum_name[extObj->getPropertyCode()] + "_" + value;
    403                 compileUnicodeSet(name, set, pc, module);
     405                compileUnicodeSet(name, std::move(set), pc, module);
    404406                properties.emplace_back(name);
    405407            }
    406408        }
    407409        else if (BinaryPropertyObject * binObj = dyn_cast<BinaryPropertyObject>(obj)) {
    408             const UnicodeSet & set = binObj->GetCodepointSet(Binary_ns::Y);
     410            UnicodeSet set = binObj->GetCodepointSet(Binary_ns::Y);
    409411            std::string name = "__get_" + property_enum_name[binObj->getPropertyCode()] + "_Y";
    410             compileUnicodeSet(name, set, pc, module);
     412            compileUnicodeSet(name, std::move(set), pc, module);
    411413            properties.emplace_back(name);
    412414        }
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4773 r4808  
    340340pablo/analysis/pabloverifier.hpp
    341341pablo/analysis/pabloverifier.cpp
     342basis_bits.h
     343toolchain.h
     344toolchain.cpp
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/booleanreassociationpass.cpp

    r4804 r4808  
    1212#include <queue>
    1313#include <set>
    14 #include <iostream>
    1514#include <pablo/printer_pablos.h>
    1615
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_automultiplexing.cpp

    r4797 r4808  
    1717#include <unordered_set>
    1818#include <pablo/optimizers/pablo_simplifier.hpp>
     19#include <pablo/optimizers/booleanreassociationpass.h>
    1920#include <pablo/analysis/pabloverifier.hpp>
    2021
     
    2425using namespace boost::numeric::ublas;
    2526
    26 // #define PRINT_DEBUG_OUTPUT
     27#define PRINT_DEBUG_OUTPUT
    2728
    2829#if !defined(NDEBUG) && !defined(PRINT_DEBUG_OUTPUT)
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_codesinking.cpp

    r4804 r4808  
    3838
    3939/** ------------------------------------------------------------------------------------------------------------- *
     40 * @brief findScopeUsages
     41 ** ------------------------------------------------------------------------------------------------------------- */
     42template <class ScopeSet>
     43inline bool findScopeUsages(Statement * stmt, ScopeSet & scopeSet, const PabloBlock & block) {
     44    for (PabloAST * use : stmt->users()) {
     45        assert (isa<Statement>(use));
     46        PabloBlock * const parent = cast<Statement>(use)->getParent();
     47        if (LLVM_LIKELY(parent == &block)) {
     48            return false;
     49        }
     50        scopeSet.insert(parent);
     51    }
     52    return true;
     53}
     54
     55/** ------------------------------------------------------------------------------------------------------------- *
     56 * @brief findScopeUsages
     57 ** ------------------------------------------------------------------------------------------------------------- */
     58template <class ScopeSet>
     59inline bool findScopeUsages(Statement * stmt, ScopeSet & scopeSet, const PabloBlock & block, const PabloBlock & ignored) {
     60    for (PabloAST * use : stmt->users()) {
     61        assert (isa<Statement>(use));
     62        PabloBlock * const parent = cast<Statement>(use)->getParent();
     63        if (LLVM_LIKELY(parent == &block)) {
     64            return false;
     65        }
     66        if (parent != &ignored) {
     67            scopeSet.insert(parent);
     68        }
     69    }
     70    return true;
     71}
     72
     73/** ------------------------------------------------------------------------------------------------------------- *
    4074 * @brief sink
    4175 ** ------------------------------------------------------------------------------------------------------------- */
    4276void CodeSinking::sink(PabloBlock & block) {
    4377
     78    ScopeSet scopes;
    4479    Statement * stmt = block.back(); // note: reverse AST traversal
    4580    while (stmt) {
    46         Statement * next = stmt->getPrevNode();
     81        Statement * prevNode = stmt->getPrevNode();
     82
     83        bool sinkable = true;
     84        // Scan through this statement's users to see if they're all in a nested scope. If so,
     85        // find the least common ancestor of the scope blocks. If it is not the current scope,
     86        // then we can sink the instruction.
    4787        if (isa<If>(stmt)) {
    48             sink(cast<If>(stmt)->getBody());
    49         } else if (isa<While>(stmt)) {
    50             sink(cast<While>(stmt)->getBody());
    51         } else if (isSafeToMove(stmt)) {
    52 
    53             // Scan through this statement's users to see if they're all in a nested scope. If so,
    54             // find the least comon ancestor of the scope blocks. If it is not the current scope,
    55             // then we can sink the instruction.
    56 
    57             // (Note: the current scope is added to the list of processed ones AFTER we've traversed it.)
    58 
    59             ScopeSet scopes;
    60             bool sinkable = false;
    61             for (const PabloAST * use : stmt->users()) {
    62                 if (const Statement * user = dyn_cast<Statement>(use)) {
    63                     if (mProcessed.count(user->getParent())) {
    64                         sinkable = true;
    65                         scopes.insert(user->getParent());
    66                         continue;
    67                     }
     88            PabloBlock & nested = cast<If>(stmt)->getBody();
     89            sink(nested);
     90            for (Assign * def : cast<const If>(stmt)->getDefined()) {
     91                if (!findScopeUsages(def, scopes, block, nested)) {
    6892                    sinkable = false;
    6993                    break;
    7094                }
    7195            }
    72             if (sinkable) {
     96        } else if (isa<While>(stmt)) {
     97            PabloBlock & nested = cast<While>(stmt)->getBody();
     98            sink(nested);
     99            for (Next * var : cast<const While>(stmt)->getVariants()) {
     100                if (escapes(var) && !findScopeUsages(var, scopes, block, nested)) {
     101                    sinkable = false;
     102                    break;
     103                }
     104            }
     105        } else {
     106            sinkable = isSafeToMove(stmt) ? findScopeUsages(stmt, scopes, block) : false;
     107        }
    73108
    74                 while (scopes.size() > 1) {
    75                     // Find the LCA of both scopes then add the LCA back to the list of scopes.
    76                     PabloBlock * scope1 = scopes.back();
    77                     scopes.pop_back();
    78                     unsigned depth1 = calculateDepthToCurrentBlock(scope1, block);
     109        if (sinkable) {
     110            assert (scopes.size() > 0);
     111            while (scopes.size() > 1) {
     112                // Find the LCA of both scopes then add the LCA back to the list of scopes.
     113                PabloBlock * scope1 = scopes.back(); scopes.pop_back();
     114                unsigned depth1 = calculateDepthToCurrentBlock(scope1, block);
    79115
    80                     PabloBlock * scope2 = scopes.back();
    81                     scopes.pop_back();
    82                     unsigned depth2 = calculateDepthToCurrentBlock(scope2, block);
     116                PabloBlock * scope2 = scopes.back(); scopes.pop_back();
     117                unsigned depth2 = calculateDepthToCurrentBlock(scope2, block);
    83118
    84                     // If one of these scopes is nested deeper than the other, scan upwards through
    85                     // the scope tree until both scopes are at the same depth.
    86                     while (depth1 > depth2) {
    87                         scope1 = scope1->getParent();
    88                         --depth1;
    89                     }
    90                     while (depth1 < depth2) {
    91                         scope2 = scope2->getParent();
    92                         --depth2;
    93                     }
    94 
    95                     // Then iteratively step backwards until we find a matching set of scopes; this
    96                     // must be the LCA of our original scopes.
    97                     while (scope1 != scope2) {
    98                         scope1 = scope1->getParent();
    99                         scope2 = scope2->getParent();
    100                     }
    101                     assert (scope1 && scope2);
    102                     // But if the LCA is the current block, we can't sink the statement.
    103                     if (scope1 == &block) {
    104                         sinkable = false;
    105                         break;
    106                     }
    107                     scopes.push_back(scope1);
     119                // If one of these scopes is nested deeper than the other, scan upwards through
     120                // the scope tree until both scopes are at the same depth.
     121                while (depth1 > depth2) {
     122                    scope1 = scope1->getParent();
     123                    --depth1;
     124                }
     125                while (depth1 < depth2) {
     126                    scope2 = scope2->getParent();
     127                    --depth2;
    108128                }
    109129
    110                 if (sinkable) {
    111                     assert (scopes.size() == 1);
    112                     stmt->insertBefore(scopes.front()->front());
     130                // Then iteratively step backwards until we find a matching set of scopes; this
     131                // must be the LCA of our original scopes.
     132                while (scope1 != scope2) {
     133                    scope1 = scope1->getParent();
     134                    scope2 = scope2->getParent();
    113135                }
     136                assert (scope1 && scope2);
     137                // But if the LCA is the current block, we can't sink the statement.
     138                if (scope1 == &block) {
     139                    sinkable = false;
     140                    break;
     141                }
     142                scopes.push_back(scope1);
     143            }
     144            if (sinkable) {
     145                assert (scopes.size() == 1);
     146                assert (isa<If>(stmt) ? &(cast<If>(stmt)->getBody()) != scopes.front() : true);
     147                assert (isa<While>(stmt) ? &(cast<While>(stmt)->getBody()) != scopes.front() : true);
     148                stmt->insertBefore(scopes.front()->front());
    114149            }
    115150        }
    116         stmt = next;
     151        scopes.clear();
     152        stmt = prevNode;
    117153    }
    118     mProcessed.insert(&block);
    119154}
    120155
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_codesinking.hpp

    r4799 r4808  
    1111
    1212class CodeSinking {
    13 
    1413    struct ScopeSet : public std::vector<PabloBlock *> {
    1514        inline bool insert(PabloBlock * block) {
     
    3332    void sink(PabloBlock & block);
    3433    CodeSinking() { }
    35 private:
    36     ScopeSet mProcessed;
    3734};
    3835
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4803 r4808  
    2828#include <stdexcept>
    2929#include <iostream>
     30#include <pablo/printer_pablos.h>
    3031
    3132#include "llvm/Support/CommandLine.h"
     
    4546                     cl::desc("set mod64 approximate mode"), cl::cat(fREcompilationOptions));
    4647#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    47 static cl::opt<bool> DisablePregeneratedUnicode("disable-pregenerated-unicode", cl::init(false),
    48                      cl::desc("disable use of pregenerated Unicode character class sets"), cl::cat(fREcompilationOptions));
     48static cl::opt<bool> UsePregeneratedUnicode("use-pregenerated-unicode", cl::init(false),
     49                     cl::desc("use fixed pregenerated Unicode character class sets instead"), cl::cat(fREcompilationOptions));
    4950#endif
    5051using namespace pablo;
     
    7273    if (m.pos == newpos) return m;
    7374    PabloAST * a = m.stream;
    74     if (m.pos == FinalMatchByte) {
     75    if (m.pos == MarkerPosition::FinalMatchByte) {
    7576        // Must advance at least to InitialPostPositionByte
    7677        a = pb.createAdvance(a, 1, "adv");
    7778    }
    7879    // Now at InitialPostPositionByte; is a further advance needed?
    79     if (newpos == FinalPostPositionByte) {
     80    if (newpos == MarkerPosition::FinalPostPositionByte) {
    8081        // Must advance through nonfinal bytes
    8182        a = pb.createScanThru(pb.createAnd(mInitial, a), mNonFinal, "scanToFinal");
     
    174175    mFinal = mPB.createNot(mPB.createOr(mNonFinal, u8invalid), "final");
    175176    mUnicodeLineBreak = mPB.createAnd(LB_chars, mPB.createNot(mCRLF));  // count the CR, but not CRLF
     177    PabloAST * const lb = UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed;
     178    mFunction.setResult(1, mPB.createAssign("lf", mPB.createAnd(lb, mPB.createNot(mCRLF))));
     179}
     180
     181void RE_Compiler::gatherUnicodePropertyNames(RE * re, NameSet & nameSet) {
     182    if (Name * name = dyn_cast<Name>(re)) {
     183        if (name->getDefinition()) {
     184            gatherUnicodePropertyNames(name->getDefinition(), nameSet);
     185        } else if (name->getType() == Name::Type::UnicodeProperty) {
     186            nameSet.insert(name);
     187        }
     188    } else if (Seq* seq = dyn_cast<Seq>(re)) {
     189        for (RE * re : *seq) {
     190            gatherUnicodePropertyNames(re, nameSet);
     191        }
     192    } else if (Alt * alt = dyn_cast<Alt>(re)) {
     193        for (RE * re : *alt) {
     194            gatherUnicodePropertyNames(re, nameSet);
     195        }
     196    } else if (Rep * rep = dyn_cast<Rep>(re)) {
     197        gatherUnicodePropertyNames(rep->getRE(), nameSet);
     198    } else if (Assertion * a = dyn_cast<Assertion>(re)) {
     199        gatherUnicodePropertyNames(a->getAsserted(), nameSet);
     200    } else if (Diff * diff = dyn_cast<Diff>(re)) {
     201        gatherUnicodePropertyNames(diff->getLH(), nameSet);
     202        gatherUnicodePropertyNames(diff->getRH(), nameSet);
     203    } else if (Intersect * ix = dyn_cast<Intersect>(re)) {
     204        gatherUnicodePropertyNames(ix->getLH(), nameSet);
     205        gatherUnicodePropertyNames(ix->getRH(), nameSet);
     206    }
     207}
     208
     209void RE_Compiler::compileUnicodeNames(RE * re) {
     210    NameSet nameSet;
     211    gatherUnicodePropertyNames(re, nameSet);
     212#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
     213    if (UsePregeneratedUnicode) {
     214        for (Name * name : nameSet) {
     215            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(name->getFunctionName());
     216            Call * call = mPB.createCall(Prototype::Create(name->getFunctionName(), std::get<1>(ep), std::get<2>(ep), std::get<0>(ep)), mCCCompiler.getBasisBits());
     217            name->setCompiled(mPB.createAnd(call, mPB.createNot(UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed)));
     218        }
     219    } else {
     220#endif
     221        std::vector<UCD::UnicodeSet> sets;
     222        for (Name * name : nameSet) {
     223            sets.push_back(std::move(UCD::resolveUnicodeSet(name)));
     224        }
     225        if (sets.size() > 0) {
     226            UCD::UCDCompiler ucdCompiler(mCCCompiler);
     227            std::vector<PabloAST *> classes(std::move(ucdCompiler.generateWithDefaultIfHierarchy(sets, mPB)));
     228            auto value = classes.begin();
     229            for (Name * name : nameSet) {
     230                name->setCompiled(mPB.createAnd(*value++, mPB.createNot(UNICODE_LINE_BREAK ? mUnicodeLineBreak : mLineFeed)));
     231            }
     232        }
     233#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
     234    }
     235#endif
    176236}
    177237
     
    181241    PabloAST * v = markerVar(match_result);
    182242    mFunction.setResult(0, mPB.createAssign("matches", mPB.createAnd(mPB.createMatchStar(v, mPB.createNot(lb)), lb)));
    183     mFunction.setResult(1, mPB.createAssign("lf", mPB.createAnd(lb, mPB.createNot(mCRLF))));
    184243}
    185244
    186245MarkerType RE_Compiler::compile(RE * re, PabloBuilder & pb) {
    187     return process(re, makeMarker(FinalPostPositionByte, pb.createOnes()), pb);
     246    return process(re, makeMarker(MarkerPosition::FinalPostPositionByte, pb.createOnes()), pb);
    188247}
    189248
    190249PabloAST * RE_Compiler::nextUnicodePosition(MarkerType m, PabloBuilder & pb) {
    191     if (markerPos(m) == FinalPostPositionByte) {
     250    if (markerPos(m) == MarkerPosition::FinalPostPositionByte) {
    192251        return markerVar(m);
    193252    }
    194     else if (markerPos(m) == InitialPostPositionByte) {
     253    else if (markerPos(m) == MarkerPosition::InitialPostPositionByte) {
    195254        return pb.createScanThru(pb.createAnd(mInitial, markerVar(m)), mNonFinal);
    196255    }
     
    219278        PabloAST * nextPos = nextUnicodePosition(marker, pb);
    220279        PabloAST * dot = pb.createNot(UNICODE_LINE_BREAK ? pb.createOr(mUnicodeLineBreak, mCRLF) : mLineFeed);
    221         return makeMarker(FinalMatchByte, pb.createAnd(nextPos, dot, "dot"));
     280        return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(nextPos, dot, "dot"));
    222281    }
    223282    else if (Diff * diff = dyn_cast<Diff>(re)) {
     
    228287    }
    229288    else if (isa<Start>(re)) {
    230         MarkerType m = AdvanceMarker(marker, InitialPostPositionByte, pb);
     289        MarkerType m = AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb);
    231290        if (UNICODE_LINE_BREAK) {
    232291            PabloAST * line_end = mPB.createOr(mUnicodeLineBreak, mCRLF);
    233292            PabloAST * sol = pb.createNot(pb.createOr(pb.createAdvance(pb.createNot(line_end), 1), mCRLF));
    234             return makeMarker(InitialPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
     293            return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
    235294        }
    236295        else {
    237296            PabloAST * sol = pb.createNot(pb.createAdvance(pb.createNot(mLineFeed), 1));
    238             return makeMarker(FinalPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
     297            return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(markerVar(m), sol, "sol"));
    239298        }
    240299    }
    241300    else if (isa<End>(re)) {
    242301        if (UNICODE_LINE_BREAK) {
    243             PabloAST * nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionByte, pb));
    244             return makeMarker(FinalPostPositionByte, pb.createAnd(nextPos, mUnicodeLineBreak, "end"));
    245         }
    246         PabloAST * nextPos = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));  // For LF match
    247         return makeMarker(FinalPostPositionByte, pb.createAnd(nextPos, mLineFeed, "eol"));
     302            PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb));
     303            return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(nextPos, mUnicodeLineBreak, "end"));
     304        }
     305        PabloAST * nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));  // For LF match
     306        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(nextPos, mLineFeed, "eol"));
    248307    }
    249308    return marker;
     
    252311MarkerType RE_Compiler::process(Name * name, MarkerType marker, PabloBuilder & pb) {
    253312    MarkerType nextPos;
    254     if (markerPos(marker) == FinalPostPositionByte) {
     313    if (markerPos(marker) == MarkerPosition::FinalPostPositionByte) {
    255314        nextPos = marker;
    256315    }
    257316    else if (name->getType() == Name::Type::Byte) {
    258         nextPos = AdvanceMarker(marker, InitialPostPositionByte, pb);
     317        nextPos = AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb);
    259318    }
    260319    else {
    261         nextPos = AdvanceMarker(marker, FinalPostPositionByte, pb);
    262     }
    263     return makeMarker(FinalMatchByte, pb.createAnd(markerVar(nextPos), getNamedCharacterClassStream(name, pb), "m"));
     320        nextPos = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     321    }
     322    return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(markerVar(nextPos), getNamedCharacterClassStream(name, pb), "m"));
    264323}
    265324
     
    268327    if (LLVM_LIKELY(var != nullptr)) {
    269328        return var;
    270     }
    271     else if (name->getDefinition() != nullptr) {
     329    } else if (name->getDefinition() != nullptr) {
    272330        MarkerType m = compile(name->getDefinition(), pb);
    273         assert(markerPos(m) == FinalMatchByte);
     331        assert(markerPos(m) == MarkerPosition::FinalMatchByte);
    274332        var = markerVar(m);
    275     }
    276     else if (name->getType() == Name::Type::UnicodeProperty) {
    277         #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    278         if (DisablePregeneratedUnicode) {
    279         #endif
    280             UCD::UCDCompiler ucdCompiler(mCCCompiler);
    281             var = ucdCompiler.generateWithDefaultIfHierarchy(UCD::resolveUnicodeSet(name), pb);
    282         #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    283         } else {
    284             const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(name->getFunctionName());
    285             var = pb.createCall(Prototype::Create(name->getFunctionName(), std::get<1>(ep), std::get<2>(ep), std::get<0>(ep)), mCCCompiler.getBasisBits());
    286         }
    287         #endif
    288     }
    289     else {
     333    } else {
    290334        throw std::runtime_error("Unresolved name " + name->getName());
    291335    }
     
    336380        accum[p] = pb.createOr(accum[p], markerVar(rslt), "alt");
    337381    }
    338     if (isa<Zeroes>(accum[InitialPostPositionByte]) && isa<Zeroes>(accum[FinalPostPositionByte])) {
    339         return makeMarker(FinalMatchByte, accum[FinalMatchByte]);
    340     }
    341     PabloAST * combine = pb.createOr(accum[InitialPostPositionByte], pb.createAdvance(accum[FinalMatchByte], 1), "alt");
     382    if (isa<Zeroes>(accum[MarkerPosition::InitialPostPositionByte]) && isa<Zeroes>(accum[MarkerPosition::FinalPostPositionByte])) {
     383        return makeMarker(MarkerPosition::FinalMatchByte, accum[MarkerPosition::FinalMatchByte]);
     384    }
     385    PabloAST * combine = pb.createOr(accum[InitialPostPositionByte], pb.createAdvance(accum[MarkerPosition::FinalMatchByte], 1), "alt");
    342386    if (isa<Zeroes>(accum[FinalPostPositionByte])) {
    343387        return makeMarker(InitialPostPositionByte, combine);
    344388    }
    345     combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[FinalPostPositionByte], "alt");
    346     return makeMarker(FinalPostPositionByte, combine);
     389    combine = pb.createOr(pb.createScanThru(pb.createAnd(mInitial, combine), mNonFinal), accum[MarkerPosition::FinalPostPositionByte], "alt");
     390    return makeMarker(MarkerPosition::FinalPostPositionByte, combine);
    347391}
    348392
     
    361405    else if (isUnicodeUnitLength(asserted)) {
    362406        MarkerType lookahead = compile(asserted, pb);
    363         assert(markerPos(lookahead) == FinalMatchByte);
     407        assert(markerPos(lookahead) == MarkerPosition::FinalMatchByte);
    364408        PabloAST * la = markerVar(lookahead);
    365409        if (a->getSense() == Assertion::Sense::Negative) {
    366410            la = pb.createNot(la);
    367411        }
    368         MarkerType fbyte = AdvanceMarker(marker, FinalPostPositionByte, pb);
    369         return makeMarker(FinalPostPositionByte, pb.createAnd(markerVar(fbyte), la, "lookahead"));
     412        MarkerType fbyte = AdvanceMarker(marker, MarkerPosition::FinalPostPositionByte, pb);
     413        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(markerVar(fbyte), la, "lookahead"));
    370414    }
    371415    else {
     
    462506        PabloAST * cc = markerVar(compile(repeated, pb));
    463507        PabloAST * cc_lb = consecutive1(cc, 1, lb, pb);
    464         PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == FinalMatchByte ? lb : lb-1);
    465         return makeMarker(FinalMatchByte, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
     508        PabloAST * marker_fwd = pb.createAdvance(markerVar(marker), markerPos(marker) == MarkerPosition::FinalMatchByte ? lb : lb - 1);
     509        return makeMarker(MarkerPosition::FinalMatchByte, pb.createAnd(marker_fwd, cc_lb, "lowerbound"));
    466510    }
    467511    // Fall through to general case.
     
    477521        // Create a mask of positions reachable within ub from current marker.
    478522        // Use matchstar, then apply filter.
    479         PabloAST * match = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
     523        PabloAST * match = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
    480524        PabloAST * upperLimitMask = reachable(match, 1, ub, pb);
    481         PabloAST * cursor = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
     525        PabloAST * cursor = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
    482526        PabloAST * rep_class_var = markerVar(compile(repeated, pb));
    483         return makeMarker(InitialPostPositionByte, pb.createAnd(pb.createMatchStar(cursor, rep_class_var), upperLimitMask, "bounded"));
     527        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAnd(pb.createMatchStar(cursor, rep_class_var), upperLimitMask, "bounded"));
    484528    }
    485529    // Fall through to general case.
     
    495539MarkerType RE_Compiler::processUnboundedRep(RE * repeated, MarkerType marker, PabloBuilder & pb) {
    496540    // always use PostPosition markers for unbounded repetition.
    497     PabloAST * base = markerVar(AdvanceMarker(marker, InitialPostPositionByte, pb));
     541    PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
    498542   
    499543    if (isByteLength(repeated)  && !DisableMatchStar) {
    500544        PabloAST * cc = markerVar(compile(repeated, pb)); 
    501545        PabloAST * mstar = SetMod64Approximation ? pb.createMod64MatchStar(base, cc) : pb.createMatchStar(base, cc, "unbounded");
    502         return makeMarker(InitialPostPositionByte, mstar);
     546        return makeMarker(MarkerPosition::InitialPostPositionByte, mstar);
    503547    }
    504548    else if (isUnicodeUnitLength(repeated) && !DisableMatchStar && !DisableUnicodeMatchStar) {
    505549        PabloAST * cc = markerVar(compile(repeated, pb));
    506550        PabloAST * mstar = SetMod64Approximation ? pb.createMod64MatchStar(base, pb.createOr(mNonFinal, cc)) : pb.createMatchStar(base, pb.createOr(mNonFinal, cc));
    507         return makeMarker(FinalPostPositionByte, pb.createAnd(mstar, mFinal, "unbounded"));
     551        return makeMarker(MarkerPosition::FinalPostPositionByte, pb.createAnd(mstar, mFinal, "unbounded"));
    508552    }
    509553    else if (mStarDepth > 0){
     
    517561        PabloAST * m1 = pb.createOr(base, starPending);
    518562        PabloAST * m2 = pb.createOr(base, starAccum);
    519         PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(InitialPostPositionByte, m1), pb), InitialPostPositionByte, pb));
     563        PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(MarkerPosition::InitialPostPositionByte, m1), pb), MarkerPosition::InitialPostPositionByte, pb));
    520564        Next * nextPending = pb.createNext(starPending, pb.createAnd(loopComputation, pb.createNot(m2)));
    521565        Next * nextStarAccum = pb.createNext(starAccum, pb.createOr(loopComputation, m2));
     
    525569        mStarDepth--;
    526570       
    527         return makeMarker(InitialPostPositionByte, pb.createAssign("unbounded", pb.createOr(base, nextStarAccum)));
     571        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAssign("unbounded", pb.createOr(base, nextStarAccum)));
    528572    }   
    529573    else {
     
    536580        mStarDepth++;
    537581
    538         PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(InitialPostPositionByte, whilePending), wb), InitialPostPositionByte, wb));
     582        PabloAST * loopComputation = markerVar(AdvanceMarker(process(repeated, makeMarker(MarkerPosition::InitialPostPositionByte, whilePending), wb), MarkerPosition::InitialPostPositionByte, wb));
    539583        Next * nextWhilePending = wb.createNext(whilePending, wb.createAnd(loopComputation, wb.createNot(whileAccum)));
    540584        Next * nextWhileAccum = wb.createNext(whileAccum, wb.createOr(loopComputation, whileAccum));
     
    546590        mStarDepth--;
    547591        mLoopVariants.clear();
    548         return makeMarker(InitialPostPositionByte, pb.createAssign("unbounded", nextWhileAccum));
     592        return makeMarker(MarkerPosition::InitialPostPositionByte, pb.createAssign("unbounded", nextWhileAccum));
    549593    }   
    550594} // end of namespace re
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4684 r4808  
    1212#include <cc/cc_compiler.h>
    1313#include <pablo/builder.hpp>
    14 #include <string>
    15 #include <list>
    16 #include <map>
    17 
    18 namespace cc {
    19 class CC_NameMap;
    20 }
     14#ifdef USE_BOOST
     15#include <boost/container/flat_set.hpp>
     16#else
     17#include <unordered_set>
     18#endif
    2119
    2220namespace pablo {
     
    2422}
    2523
     24namespace UCD {
     25class UnicodeSet;
     26}
    2627
    2728/*   Marker streams represent the results of matching steps.
     
    5859    RE_Compiler(pablo::PabloFunction & function, cc::CC_Compiler & ccCompiler);
    5960    void initializeRequiredStreams();
     61    void compileUnicodeNames(RE * re);
    6062    void finalizeMatchResult(MarkerType match_result);
    6163    MarkerType compile(RE * re) {
     
    6466
    6567private:
     68
     69    #ifdef USE_BOOST
     70    using NameSet = boost::container::flat_set<Name *>;
     71    #else
     72    using NameSet = std::unordered_set<Name *>;
     73    #endif
    6674
    6775    MarkerType compile(RE * re, pablo::PabloBuilder & cg);
     
    8795    MarkerType processUnboundedRep(RE * repeated, MarkerType marker, pablo::PabloBuilder & pb);
    8896    MarkerType processBoundedRep(RE * repeated, int ub, MarkerType marker, pablo::PabloBuilder & pb);
     97    static void gatherUnicodePropertyNames(RE * re, NameSet & nameSet);
    8998
    9099private:
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4660 r4808  
    1111}
    1212
     13namespace UCD {
     14    class UnicodeSet;
     15}
    1316
    1417namespace re {
     
    3538
    3639    Type getType() const;
    37     RE *getDefinition() const;
     40    RE * getDefinition() const;
    3841    pablo::PabloAST * getCompiled() const {
    3942        return mCompiled;
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4803 r4808  
    143143        PabloPrinter::print(function->getEntryBlock().statements(), cerr);
    144144    }
    145    
     145
    146146    re::RE_Compiler re_compiler(*function, cc_compiler);
    147147    re_compiler.initializeRequiredStreams();
     148    re_compiler.compileUnicodeNames(re_ast);
    148149    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
    149150
Note: See TracChangeset for help on using the changeset viewer.