Ignore:
Timestamp:
Dec 31, 2017, 7:22:14 PM (15 months ago)
Author:
cameron
Message:

Supporting multiple alphabets in RE compilation - initial check-in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5813 r5816  
    2929#include <re/to_utf8.h>
    3030#include <re/re_toolchain.h>        // for AlgorithmOptionIsSet, RE_Algorith...
    31 #include "cc/cc_compiler.h"         // for CC_Compiler
     31#include <cc/alphabet.h>
     32#include <cc/cc_compiler.h>
    3233#include "pablo/builder.hpp"        // for PabloBuilder
    3334#include <IR_Gen/idisa_target.h>    // for AVX2_available
     35#include <llvm/ADT/STLExtras.h> // for make_unique
     36#include <llvm/Support/raw_ostream.h>
    3437#include <llvm/Support/ErrorHandling.h>
    3538
    3639namespace pablo { class PabloAST; }
     40namespace pablo { class Var; }
    3741namespace pablo { class PabloKernel; }
    3842namespace re { class Alt; }
     
    4549
    4650namespace re {
     51
     52   
     53void RE_Compiler::addAlphabet(cc::Alphabet * a, pablo::Var * basis_set) {
     54    mAlphabets.push_back(a);
     55    mAlphabetCompilers.push_back(make_unique<cc::CC_Compiler>(mKernel, basis_set));
     56}
    4757
    4858using MarkerType = RE_Compiler::MarkerType;
     
    97107MarkerType RE_Compiler::compileCC(CC * cc, MarkerType marker, PabloBuilder & pb) {
    98108    PabloAST * nextPos = markerVar(marker);
    99     // If Unicode CCs weren't pulled out earlier, we generate the equivalent
    100     // byte sequence as an RE.
    101     if (cc->getAlphabet() == &cc::Unicode) {
    102          MarkerType m = compile(toUTF8(cc), pb);
    103          nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
    104          return makeMarker(FinalMatchUnit, pb.createAnd(markerVar(m), nextPos));
    105     }
    106     if (isByteLength(cc)) {
     109    const cc::Alphabet * a = cc->getAlphabet();
     110    if (a == &cc::Byte) {
    107111        if (marker.pos == FinalMatchUnit) {
    108112            nextPos = pb.createAdvance(nextPos, 1);
    109113        }
     114        return makeMarker(FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb)));
     115    } else if (a == &cc::Unicode) {
     116        MarkerType m = compile(toUTF8(cc), pb);
     117        nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
     118        return makeMarker(FinalMatchUnit, pb.createAnd(markerVar(m), nextPos));
    110119    } else {
    111         nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
    112     }
    113     return makeMarker(FinalMatchUnit, pb.createAnd(nextPos, mCCCompiler.compileCC(cc, pb)));
     120        if (isByteLength(cc)) {
     121            if (marker.pos == FinalMatchUnit) {
     122                nextPos = pb.createAdvance(nextPos, 1);
     123            }
     124        } else {
     125            nextPos = markerVar(AdvanceMarker(marker, FinalPostPositionUnit, pb));
     126        }
     127        unsigned i = 0;
     128        while (i < mAlphabets.size() && (a != mAlphabets[i])) i++;
     129        if (i == mAlphabets.size()) llvm::report_fatal_error("Alphabet " + a->getName() + " has no CC compiler");
     130        return makeMarker(FinalMatchUnit, pb.createAnd(nextPos, mAlphabetCompilers[i]->compileCC(cc, pb)));
     131    }
    114132}
    115133
Note: See TracChangeset for help on using the changeset viewer.