Changeset 5801


Ignore:
Timestamp:
Dec 23, 2017, 9:16:39 PM (9 months ago)
Author:
cameron
Message:

Additional Alphabet analysis and transformation

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5793 r5801  
    248248  COMMAND ./run_all "${CMAKE_BINARY_DIR}/u8u16 -segment-size=16 -enable-segment-pipeline-parallel")
    249249
    250 add_test(
    251   NAME lz4d_test
    252   WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/../QA/lz4d
    253   COMMAND ./run_all ${CMAKE_BINARY_DIR}/lz4d)
     250#add_test(
     251#  NAME lz4d_test
     252#  WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/../QA/lz4d
     253#  COMMAND ./run_all ${CMAKE_BINARY_DIR}/lz4d)
    254254
    255255add_test(
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp

    r5800 r5801  
    134134}
    135135   
    136 re::CC * MultiplexedAlphabet::transformCC(re::CC * sourceCC) {
     136re::CC * MultiplexedAlphabet::transformCC(const re::CC * sourceCC) const {
    137137    if (sourceCC->getAlphabet() != mSourceAlphabet) llvm::report_fatal_error("Mismatched source alphabets for transformCC");
    138138   
     
    145145    return CC_union;
    146146}
     147
     148re::CC * MultiplexedAlphabet::invertCC(const re::CC * transformedCC) const {
     149    if (transformedCC->getAlphabet() != this) llvm::report_fatal_error("invertCC applied to non-transformed CC");
     150    re::CC * CC_union = re::makeCC(mSourceAlphabet);
     151    for (const UCD::interval_t i : *transformedCC) {
     152        for (unsigned cp = re::lo_codepoint(i); cp <= re::hi_codepoint(i); cp++) {
     153            CC_union = re::makeCC(mUnicodeSets[cp], CC_union);
     154        }
     155    }
     156    return CC_union;
     157}
     158   
     159
     160   
    147161}
    148162
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.h

    r5800 r5801  
    2727    std::vector<re::CC *> getMultiplexedCCs();
    2828   
    29     re::CC * transformCC(re::CC * sourceCC);
     29    re::CC * transformCC(const re::CC * sourceCC) const;
     30   
     31    re::CC * invertCC(const re::CC * transformedCC) const;
    3032private:
    3133    const Alphabet * mSourceAlphabet;
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5795 r5801  
    3131#include <re/re_multiplex.h>
    3232#include <re/grapheme_clusters.h>
     33#include <re/printer_re.h>
    3334#include <toolchain/toolchain.h>
    3435#include <toolchain/cpudriver.h>
     
    131132        const std::vector<const re::CC *> UnicodeSets = re::collectUnicodeSets(REs[i]);
    132133        std::unique_ptr<cc::MultiplexedAlphabet> mpx = make_unique<MultiplexedAlphabet>("mpx", UnicodeSets);
    133         REs[i] = multiplex(REs[i], UnicodeSets, mpx->getExclusiveSetIDs());
     134        REs[i] = transformCCs(mpx.get(), REs[i]);
     135        //llvm::errs() << Printer_RE::PrintRE(REs[i]) << '\n';
    134136        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    135137        auto numOfCharacterClasses = mpx_basis.size();
  • icGREP/icgrep-devel/icgrep/re/re_analysis.cpp

    r5770 r5801  
    1515#include <re/re_nullable.h>
    1616#include <re/printer_re.h>
     17#include <cc/alphabet.h>
     18#include <cc/multiplex_CCs.h>
    1719#include <limits.h>
    1820#include <llvm/Support/ErrorHandling.h>
     
    119121    } else if (const Intersect * e = dyn_cast<Intersect>(re)) {
    120122        return isByteLength(e->getLH()) && isByteLength(e->getRH());
    121     } else if (isa<CC>(re)) {
    122         return cast<CC>(re)->max_codepoint() <= 0x7F;
    123     } else if (const Name * n = dyn_cast<Name>(re)) {
    124         if (n->getType() == Name::Type::Byte) {
    125             return true;
    126         } else if (n->getType() == Name::Type::Capture || n->getType() == Name::Type::Reference) {
    127             return isByteLength(n->getDefinition());
    128         }
    129         return false;
     123    } else if (const CC * cc = dyn_cast<CC>(re)) {
     124        const cc::Alphabet * a = cc->getAlphabet();
     125        if (a == &cc::Unicode) return (cc->max_codepoint() <= 0x7F);
     126        else if (a == &cc::Byte) return true;
     127        else if (isa<cc::MultiplexedAlphabet>(a)) {
     128            const cc::Alphabet * srcA = cast<cc::MultiplexedAlphabet>(a)->getSourceAlphabet();
     129            if (srcA == &cc::Byte) {
     130                return true;
     131//            } else if (srcA == &cc::Unicode) {
     132//                return cast<cc::MultiplexedAlphabet>(a)->invertCC(cc)->max_codepoint() <= 0x7F;
     133            } else return (a == &cc::Byte);
     134        }
     135        return false;
     136    } else if (const Name * n = dyn_cast<Name>(re)) {
     137        if (n->getType() == Name::Type::ZeroWidth) {
     138            return false;
     139        }
     140        return isByteLength(n->getDefinition());
    130141    }
    131142    return false; // otherwise
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5787 r5801  
    8686
    8787MarkerType RE_Compiler::compileCC(CC * cc, MarkerType marker, PabloBuilder & pb) {
    88     PabloAST * const nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     88    PabloAST * nextPos = markerVar(marker);
     89    if (isByteLength(cc)) {
     90        if (marker.pos == MarkerPosition::FinalMatchUnit) nextPos = pb.createAdvance(nextPos, 1);
     91    }
     92    else {
     93        nextPos = markerVar(AdvanceMarker(marker, MarkerPosition::FinalPostPositionUnit, pb));
     94    }
    8995    return makeMarker(MarkerPosition::FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb)));
    9096}
  • icGREP/icgrep-devel/icgrep/re/re_multiplex.cpp

    r5787 r5801  
    1111#include <re/re_analysis.h>
    1212#include <re/re_memoizer.hpp>
     13#include <re/printer_re.h>
    1314#include <UCD/ucd_compiler.hpp>
    1415#include <UCD/resolve_properties.h>
     
    1819#include <iostream>
    1920#include <functional>
     21#include <llvm/Support/raw_ostream.h>
    2022
    2123using namespace boost::container;
     
    8890}   
    8991
     92
     93RE * transformCCs(cc::MultiplexedAlphabet * mpx, RE * re) {
     94    if (CC * cc = dyn_cast<CC>(re)) {
     95        if (cc->getAlphabet() == mpx->getSourceAlphabet()) {
     96            re = mpx->transformCC(cc);
     97        }
     98    } else if (Name * name = dyn_cast<Name>(re)) {
     99        if (LLVM_LIKELY(name->getDefinition() != nullptr)) {
     100            RE * xfrm = transformCCs(mpx, name->getDefinition());
     101            if (name->getType() == Name::Type::ZeroWidth)
     102                re = makeZeroWidth(name->getName(), xfrm);
     103            else
     104                re = makeName(name->getName(), xfrm);
     105        } else {
     106            UndefinedNameError(name);
     107        }
     108    } else if (Seq * seq = dyn_cast<Seq>(re)) {
     109        std::vector<RE *> list;
     110        list.reserve(seq->size());
     111        for (RE * item : *seq) {
     112            item = transformCCs(mpx, item);
     113            list.push_back(item);
     114        }
     115        re = makeSeq(list.begin(), list.end());
     116    } else if (Alt * alt = dyn_cast<Alt>(re)) {
     117        std::vector<RE *> list;
     118        list.reserve(alt->size());
     119        for (RE * item : *alt) {
     120            item = transformCCs(mpx, item);
     121            list.push_back(item);
     122        }
     123        re = makeAlt(list.begin(), list.end());
     124    } else if (Assertion * a = dyn_cast<Assertion>(re)) {
     125        re = makeAssertion(transformCCs(mpx, a->getAsserted()), a->getKind(), a->getSense());
     126    } else if (Rep * rep = dyn_cast<Rep>(re)) {
     127        RE * expr = transformCCs(mpx, rep->getRE());
     128        re = makeRep(expr, rep->getLB(), rep->getUB());
     129    } else if (Diff * diff = dyn_cast<Diff>(re)) {
     130        re = makeDiff(transformCCs(mpx, diff->getLH()), transformCCs(mpx, diff->getRH()));
     131    } else if (Intersect * e = dyn_cast<Intersect>(re)) {
     132        re = makeIntersect(transformCCs(mpx, e->getLH()), transformCCs(mpx, e->getRH()));
     133    } else if (Group * g = dyn_cast<Group>(re)) {
     134        re = makeGroup(g->getMode(), transformCCs(mpx, g->getRE()), g->getSense());
     135    }
     136    return re;
     137};
     138
     139
    90140}
  • icGREP/icgrep-devel/icgrep/re/re_multiplex.h

    r5748 r5801  
    33
    44#include <UCD/ucd_compiler.hpp>
     5#include <cc/multiplex_CCs.h>
    56
    67namespace re {
     
    1415                   const std::vector<std::vector<unsigned>> & exclusiveSetIDs);
    1516
     17    RE * transformCCs(cc::MultiplexedAlphabet * mpx, RE * r);
     18
     19
    1620}
    1721#endif
Note: See TracChangeset for help on using the changeset viewer.