Ignore:
Timestamp:
Jul 12, 2015, 3:52:43 PM (4 years ago)
Author:
nmedfort
Message:

Start of work to make the pablo compiler reenterant. Fixed bug that prevented it from using Less optimization level.

Location:
icGREP/icgrep-devel/icgrep
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4660 r4661  
    107107# add the executable
    108108add_executable(icgrep icgrep.cpp do_grep.cpp compiler.cpp)
     109add_executable(generate_predefined_ucd_functions generate_predefined_ucd_functions.cpp)
    109110
    110111target_link_libraries (PabloADT ${REQ_LLVM_LIBRARIES})
     
    113114target_link_libraries (RegExpADT PabloADT CCADT UCDlib)
    114115target_link_libraries (icgrep UCDlib PabloADT RegExpADT CCADT ${REQ_LLVM_LIBRARIES})
     116target_link_libraries (generate_predefined_ucd_functions UCDlib PabloADT CCADT ${REQ_LLVM_LIBRARIES})
    115117
    116118#Check compiler support for 0x / 11
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r4633 r4661  
    6767}
    6868
     69PropertyObject::iterator ExtensionPropertyObject::begin() const {
     70    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
     71        return obj->begin();
     72    }
     73    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
     74}
     75
     76PropertyObject::iterator ExtensionPropertyObject::end() const {
     77    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
     78        return obj->end();
     79    }
     80    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
     81}
     82
    6983const UnicodeSet & ExtensionPropertyObject::GetCodepointSet(const std::string & value_spec) {
    7084    int property_enum_val = GetPropertyValueEnumCode(value_spec);
     
    8195
    8296int ExtensionPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
    83     int c = property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);
    84     return c;
     97    return property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);
    8598}
    8699
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r4633 r4661  
    1717namespace UCD {
    1818
    19     std::string canonicalize_value_name(const std::string & prop_or_val);
     19std::string canonicalize_value_name(const std::string & prop_or_val);
    2020
    21         class PropertyObject {
    22         public:
    23         enum class ClassTypeId : unsigned {
    24             NumericProperty, CodepointProperty, StringProperty, MiscellaneousProperty, EnumeratedProperty, ExtensionProperty, CatalogProperty, BinaryProperty,  UnsupportedProperty
    25         };
    26         inline ClassTypeId getClassTypeId() const {
    27             return the_kind;
    28         }
    29                 PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {}
    30         virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    31                 property_t the_property;
    32                 ClassTypeId the_kind;
    33         };
    34        
    35         class UnsupportedPropertyObject : public PropertyObject {
    36         public:
    37         static inline bool classof(const PropertyObject * p) {
    38             return p->getClassTypeId() == ClassTypeId::UnsupportedProperty;
    39         }
    40         static inline bool classof(const void *) {
    41             return false;
    42         }
    43        
    44                 UnsupportedPropertyObject(property_t p, ClassTypeId k) : PropertyObject(p, k) {}
    45         UnicodeSet GetCodepointSet(const std::string &);
    46         UnicodeSet GetCodepointSet(const int);
    47         };
    48        
    49         class EnumeratedPropertyObject : public PropertyObject {
    50         public:
    51         static inline bool classof(const PropertyObject * p) {
    52             return p->getClassTypeId() == ClassTypeId::EnumeratedProperty;
    53         }
    54         static inline bool classof(const void *) {
    55             return false;
    56         }
    57                
     21class PropertyObject {
     22public:
     23    enum class ClassTypeId : unsigned {
     24        NumericProperty
     25        , CodepointProperty
     26        , StringProperty
     27        , MiscellaneousProperty
     28        , EnumeratedProperty
     29        , ExtensionProperty
     30        , CatalogProperty
     31        , BinaryProperty
     32        , UnsupportedProperty
     33    };
     34    using iterator = const std::vector<std::string>::const_iterator;
     35    inline ClassTypeId getClassTypeId() const {
     36        return the_kind;
     37    }
     38    inline property_t getPropertyCode() const {
     39        return the_property;
     40    }
     41    PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {}
     42    virtual int GetPropertyValueEnumCode(const std::string & value_spec);
     43    property_t the_property;
     44    ClassTypeId the_kind;
     45};
    5846
    59                 EnumeratedPropertyObject(UCD::property_t p,
    60                                  const std::vector<std::string> & enum_names,
    61                                  const std::vector<std::string> & names,
    62                                  std::unordered_map<std::string, int> & aliases,
    63                                  std::vector<const UnicodeSet *> && sets)
    64         : PropertyObject(p, ClassTypeId::EnumeratedProperty)
    65         , property_value_enum_names(enum_names)
    66         , property_value_full_names(names)
    67         , property_value_aliases(aliases)
    68         , uninitialized(true)
    69         , property_value_sets(sets) {
     47class UnsupportedPropertyObject : public PropertyObject {
     48public:
     49    static inline bool classof(const PropertyObject * p) {
     50        return p->getClassTypeId() == ClassTypeId::UnsupportedProperty;
     51    }
     52    static inline bool classof(const void *) {
     53        return false;
     54    }
     55
     56    UnsupportedPropertyObject(property_t p, ClassTypeId k) : PropertyObject(p, k) {}
     57    UnicodeSet GetCodepointSet(const std::string &);
     58    UnicodeSet GetCodepointSet(const int);
     59};
     60
     61class EnumeratedPropertyObject : public PropertyObject {
     62public:
     63    static inline bool classof(const PropertyObject * p) {
     64        return p->getClassTypeId() == ClassTypeId::EnumeratedProperty;
     65    }
     66    static inline bool classof(const void *) {
     67        return false;
     68    }
     69
     70    EnumeratedPropertyObject(UCD::property_t p,
     71                             const std::vector<std::string> & enum_names,
     72                             const std::vector<std::string> & names,
     73                             std::unordered_map<std::string, int> & aliases,
     74                             std::vector<const UnicodeSet *> && sets)
     75    : PropertyObject(p, ClassTypeId::EnumeratedProperty)
     76    , property_value_enum_names(enum_names)
     77    , property_value_full_names(names)
     78    , property_value_aliases(aliases)
     79    , uninitialized(true)
     80    , property_value_sets(sets) {
     81
     82    }
     83
     84    virtual int GetPropertyValueEnumCode(const std::string & value_spec);
     85    const UnicodeSet & GetCodepointSet(const std::string & value_spec);
     86    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
     87
     88    iterator begin() const {
     89        return property_value_enum_names.cbegin();
     90    }
     91
     92     iterator end() const {
     93        return property_value_enum_names.cend();
     94    }
     95
     96private:
     97    const std::vector<std::string> & property_value_enum_names;  // never changes
     98    const std::vector<std::string> & property_value_full_names;  // never changes
     99    std::unordered_map<std::string, int> & property_value_aliases;
     100    bool uninitialized; // full names must be added dynamically.
     101    const std::vector<const UnicodeSet *> property_value_sets;
     102};
     103
     104class ExtensionPropertyObject : public PropertyObject {
     105public:
     106    static inline bool classof(const PropertyObject * p) {
     107        return p->getClassTypeId() == ClassTypeId::ExtensionProperty;
     108    }
     109    static inline bool classof(const void *) {
     110        return false;
     111    }
     112
     113    ExtensionPropertyObject(UCD::property_t p,
     114                            UCD::property_t base,
     115                            std::vector<const UnicodeSet *> && sets)
     116    : PropertyObject(p, ClassTypeId::ExtensionProperty)
     117    , base_property(base)
     118    , property_value_sets(sets) {
    70119
    71120
    72         }
     121    }
    73122
    74         virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    75         const UnicodeSet & GetCodepointSet(const std::string & value_spec);
    76         const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
    77                
    78         private:
    79         const std::vector<std::string> & property_value_enum_names;  // never changes
    80         const std::vector<std::string> & property_value_full_names;  // never changes
    81         std::unordered_map<std::string, int> & property_value_aliases;
    82         bool uninitialized; // full names must be added dynamically.
    83         const std::vector<const UnicodeSet *> property_value_sets;
    84         };
    85        
    86         class ExtensionPropertyObject : public PropertyObject {
    87         public:
    88         static inline bool classof(const PropertyObject * p) {
    89             return p->getClassTypeId() == ClassTypeId::ExtensionProperty;
    90         }
    91         static inline bool classof(const void *) {
    92             return false;
    93         }
    94                
     123    iterator begin() const;
    95124
    96                 ExtensionPropertyObject(UCD::property_t p,
    97                                  UCD::property_t base,
    98                                  std::vector<const UnicodeSet *> && sets)
    99         : PropertyObject(p, ClassTypeId::ExtensionProperty)
    100         , base_property(base)
    101         , property_value_sets(sets) {
     125    iterator end() const;
    102126
     127    virtual int GetPropertyValueEnumCode(const std::string & value_spec);
     128    const UnicodeSet & GetCodepointSet(const std::string & value_spec);
     129    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
    103130
    104         }
     131private:
     132    const property_t base_property;
     133    const std::vector<const UnicodeSet *> property_value_sets;
     134};
    105135
    106         virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    107         const UnicodeSet & GetCodepointSet(const std::string & value_spec);
    108         const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
    109                
    110         private:
    111         const property_t base_property;
    112         const std::vector<const UnicodeSet *> property_value_sets;
    113         };
    114        
    115         class BinaryPropertyObject : public PropertyObject {
    116         public:
    117         static inline bool classof(const PropertyObject * p) {
    118             return p->getClassTypeId() == ClassTypeId::BinaryProperty;
    119         }
    120         static inline bool classof(const void *) {
    121             return false;
    122         }
    123                
    124         BinaryPropertyObject(UCD::property_t p, UnicodeSet s)
     136class BinaryPropertyObject : public PropertyObject {
     137public:
     138    static inline bool classof(const PropertyObject * p) {
     139        return p->getClassTypeId() == ClassTypeId::BinaryProperty;
     140    }
     141    static inline bool classof(const void *) {
     142        return false;
     143    }
     144
     145    BinaryPropertyObject(UCD::property_t p, UnicodeSet s)
    125146        : PropertyObject(p, ClassTypeId::BinaryProperty)
    126147        , the_codepoint_set(s) {
    127148
    128         }
    129         UnicodeSet GetCodepointSet(const std::string & value_spec) const;
    130         UnicodeSet GetCodepointSet(const int property_enum_val) const;
    131     private:
    132                 UnicodeSet the_codepoint_set;       
    133         };
    134        
     149    }
     150    UnicodeSet GetCodepointSet(const std::string & value_spec) const;
     151    UnicodeSet GetCodepointSet(const int property_enum_val) const;
     152private:
     153    UnicodeSet the_codepoint_set;
     154};
     155
    135156}
    136        
     157
    137158#endif
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r4660 r4661  
    9595            if (valit->second == Binary_ns::Y) {
    9696                name->setFunctionName("__get_" + lowercase(property_enum_name[theprop]) + "_Y");
    97                 return;
    9897            }
    9998            else {
    10099                Name * binprop = makeName("__get_" + lowercase(property_enum_name[theprop]) + "_Y", Name::Type::UnicodeProperty);
    101100                name->setDefinition(makeDiff(makeAny(), binprop));
    102                 return;
    103101            }
    104102        }
     
    110108
    111109        // No namespace (property) name.   Try as a general category.
    112         int valcode = GetPropertyValueEnumCode(gc, value);
    113         if (valcode >= 0) {
     110
     111        int valcode;
     112
     113        if ((valcode = GetPropertyValueEnumCode(gc, value)) >= 0) {
    114114            name->setFunctionName("__get_gc_" + GC_ns::enum_names[valcode]);
    115115            return;
    116116        }
    117         valcode = GetPropertyValueEnumCode(sc, value);
    118         if (valcode >= 0) {
     117
     118        if ((valcode = GetPropertyValueEnumCode(sc, value)) >= 0) {
    119119            name->setFunctionName("__get_sc_" + SC_ns::enum_names[valcode]);
    120120            return;
    121121        }
     122
    122123        // Try as a binary property.
    123124        auto propit = alias_map.find(value);
     
    126127            if (isa<BinaryPropertyObject>(property_object_table[theprop])) {
    127128                name->setFunctionName("__get_" + lowercase(property_enum_name[theprop]) + "_Y");
    128                 return;
    129129            }
    130130            else {
     
    135135        else if (value == "any") {
    136136            name->setDefinition(makeAny());
    137             return;
    138137        }
    139138        else if (value == "assigned") {
    140139            Name * Cn = makeName("Cn", Name::Type::UnicodeProperty);
    141140            name->setDefinition(makeDiff(makeAny(), Cn));
    142             return;
    143141        }
    144142        else if (value == "ascii") {
    145143            name->setFunctionName("__get_blk_ASCII");
    146             return;
    147144        }
    148145        // Now compatibility properties of UTR #18 Annex C
     
    151148            Name * hexdigit = makeName("Hex_digit", Name::Type::UnicodeProperty);
    152149            name->setDefinition(makeAlt({Nd, hexdigit}));
    153             return;
    154150        }
    155151        else if (value == "alnum") {
     
    157153            Name * alpha = makeName("alphabetic", Name::Type::UnicodeProperty);
    158154            name->setDefinition(makeAlt({digit, alpha}));
    159             return;
    160155        }
    161156        else if (value == "blank") {
     
    163158            CC * tab = makeCC(0x09);
    164159            name->setDefinition(makeAlt({space_sep, tab}));
    165             return;
    166160        }
    167161        else if (value == "graph") {
     
    173167            nongraph->setDefinition(makeAlt({space, ctrl, surr, unassigned}));
    174168            name->setDefinition(makeDiff(makeAny(), nongraph));
    175             return;
    176169        }
    177170        else if (value == "print") {
     
    179172            Name * space_sep = makeName("space_separator", Name::Type::UnicodeProperty);
    180173            name->setDefinition(makeAlt({graph, space_sep}));
    181             return;
    182174        }
    183175        else if (value == "word") {
     
    187179            Name * join = makeName("Join_Control", Name::Type::UnicodeProperty);
    188180            name->setDefinition(makeAlt({alnum, mark, conn, join}));
    189             return;
    190181        }
    191182        else {
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r4638 r4661  
    2727 * @param ifRangeList
    2828 ** ------------------------------------------------------------------------------------------------------------- */
    29 PabloAST * UCDCompiler::generateWithIfHierarchy(const RangeList & ifRanges, const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & block) {
    30 
    31     PabloAST * target = block.createZeroes();
     29PabloAST * UCDCompiler::generateWithIfHierarchy(const RangeList & ifRanges, const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder) {
     30
     31    PabloAST * target = builder.createZeroes();
    3232    // Codepoints in unenclosed ranges will be computed unconditionally.
    3333    // Generate them first so that computed subexpressions may be shared
     
    3737
    3838    for (const auto rg : rangeGaps(enclosed, lo, hi)) {
    39         target = generateSubRanges(set, lo_codepoint(rg), hi_codepoint(rg), block, target);
     39        target = generateSubRanges(set, lo_codepoint(rg), hi_codepoint(rg), builder, target);
    4040    }
    4141
     
    4646        std::tie(lo, hi) = range;
    4747        if (set.intersects(lo, hi)) {
    48             PabloBuilder inner_block = PabloBuilder::Create(block);
     48            PabloBuilder inner_block = PabloBuilder::Create(builder);
    4949            PabloAST * inner_target = generateWithIfHierarchy(inner, set, lo, hi, inner_block);
    5050            // If this range is empty, just skip creating the if block
     
    5353            }
    5454            Assign * matches = inner_block.createAssign("m", inner_target);
    55             block.createIf(ifTestCompiler(lo, hi, block), {matches}, inner_block);
    56             target = block.createOr(target, matches);
     55            builder.createIf(ifTestCompiler(lo, hi, builder), {matches}, inner_block);
     56            target = builder.createOr(target, matches);
    5757        }
    5858    }
     
    6565 * @param ifRangeList
    6666 ** ------------------------------------------------------------------------------------------------------------- */
    67 PabloAST * UCDCompiler::generateSubRanges(const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & block, PabloAST * target) {
     67PabloAST * UCDCompiler::generateSubRanges(const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder, PabloAST * target) {
    6868    const auto range = rangeIntersect(set, lo, hi);
    6969    // Divide by UTF-8 length, separating out E0, ED, F0 and F4 ranges
     
    7373    for (auto r : ranges) {
    7474        const auto subrange = rangeIntersect(range, lo_codepoint(r), hi_codepoint(r));
    75         target = sequenceGenerator(std::move(subrange), 1, block, target, nullptr);
     75        target = sequenceGenerator(std::move(subrange), 1, builder, target, nullptr);
    7676    }
    7777    return target;
     
    8686 * matching the sequences up to byte number byte_no have been generated.
    8787 ** ------------------------------------------------------------------------------------------------------------- */
    88 PabloAST * UCDCompiler::sequenceGenerator(const RangeList && ranges, const unsigned byte_no, PabloBuilder & block, PabloAST * target, PabloAST * prefix) {
     88PabloAST * UCDCompiler::sequenceGenerator(const RangeList && ranges, const unsigned byte_no, PabloBuilder & builder, PabloAST * target, PabloAST * prefix) {
    8989
    9090    if (LLVM_LIKELY(!ranges.empty())) {
     
    9898        if (min != max) {
    9999            const auto mid = UTF8_Encoder::maxCodePoint(min);
    100             target = sequenceGenerator(std::move(rangeIntersect(ranges, lo, mid)), byte_no, block, target, prefix);
    101             target = sequenceGenerator(std::move(rangeIntersect(ranges, mid + 1, hi)), byte_no, block, target, prefix);
     100            target = sequenceGenerator(std::move(rangeIntersect(ranges, lo, mid)), byte_no, builder, target, prefix);
     101            target = sequenceGenerator(std::move(rangeIntersect(ranges, mid + 1, hi)), byte_no, builder, target, prefix);
    102102        }
    103103        else if (min == byte_no) {
     
    105105            // Use the byte class compiler to generate matches for these codepoints.
    106106            const auto bytes = byteDefinitions(ranges, byte_no);
    107             PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(bytes), block);
     107            PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(bytes), builder);
    108108            if (byte_no > 1) {
    109                 var = block.createAnd(var, block.createAdvance(makePrefix(lo, byte_no, block, prefix), 1));
    110             }
    111             target = block.createOr(target, var);
     109                var = builder.createAnd(var, builder.createAdvance(makePrefix(lo, byte_no, builder, prefix), 1));
     110            }
     111            target = builder.createOr(target, var);
    112112        }
    113113        else {
     
    120120                    if (!UTF8_Encoder::isLowCodePointAfterByte(lo, byte_no)) {
    121121                        const codepoint_t mid = lo | ((1 << (6 * (min - byte_no))) - 1);
    122                         target = sequenceGenerator(lo, mid, byte_no, block, target, prefix);
    123                         target = sequenceGenerator(mid + 1, hi, byte_no, block, target, prefix);
     122                        target = sequenceGenerator(lo, mid, byte_no, builder, target, prefix);
     123                        target = sequenceGenerator(mid + 1, hi, byte_no, builder, target, prefix);
    124124                    }
    125125                    else if (!UTF8_Encoder::isHighCodePointAfterByte(hi, byte_no)) {
    126126                        const codepoint_t mid = hi & ~((1 << (6 * (min - byte_no))) - 1);
    127                         target = sequenceGenerator(lo, mid - 1, byte_no, block, target, prefix);
    128                         target = sequenceGenerator(mid, hi, byte_no, block, target, prefix);
     127                        target = sequenceGenerator(lo, mid - 1, byte_no, builder, target, prefix);
     128                        target = sequenceGenerator(mid, hi, byte_no, builder, target, prefix);
    129129                    }
    130130                    else { // we have a prefix group of type (a)
    131                         PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
     131                        PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
    132132                        if (byte_no > 1) {
    133                             var = block.createAnd(block.createAdvance(prefix, 1), var);
     133                            var = builder.createAnd(builder.createAdvance(prefix, 1), var);
    134134                        }
    135135                        for (unsigned i = byte_no; i != UTF8_Encoder::length(lo); ++i) {
    136                             var = block.createAnd(mSuffixVar, block.createAdvance(var, 1));
     136                            var = builder.createAnd(mSuffixVar, builder.createAdvance(var, 1));
    137137                        }
    138                         target = block.createOr(target, var);
     138                        target = builder.createOr(target, var);
    139139                    }
    140140                }
    141141                else { // lbyte == hbyte
    142                     PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
     142                    PabloAST * var = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
    143143                    if (byte_no > 1) {
    144                         var = block.createAnd(block.createAdvance(prefix ? prefix : var, 1), var);
     144                        var = builder.createAnd(builder.createAdvance(prefix ? prefix : var, 1), var);
    145145                    }
    146146                    if (byte_no < UTF8_Encoder::length(lo)) {
    147                         target = sequenceGenerator(lo, hi, byte_no + 1, block, target, var);
     147                        target = sequenceGenerator(lo, hi, byte_no + 1, builder, target, var);
    148148                    }
    149149                }
     
    157157 * @brief sequenceGenerator
    158158 ** ------------------------------------------------------------------------------------------------------------- */
    159 inline PabloAST * UCDCompiler::sequenceGenerator(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & block, PabloAST * target, PabloAST * prefix) {
    160     return sequenceGenerator({{ lo, hi }}, byte_no, block, target, prefix);
     159inline PabloAST * UCDCompiler::sequenceGenerator(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & builder, PabloAST * target, PabloAST * prefix) {
     160    return sequenceGenerator({{ lo, hi }}, byte_no, builder, target, prefix);
    161161}
    162162
     
    164164 * @brief ifTestCompiler
    165165 ** ------------------------------------------------------------------------------------------------------------- */
    166 inline PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, PabloBuilder & block) {
    167     return ifTestCompiler(lo, hi, 1, block, block.createOnes());
     166inline PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder) {
     167    return ifTestCompiler(lo, hi, 1, builder, builder.createOnes());
    168168}
    169169
     
    171171 * @brief ifTestCompiler
    172172 ** ------------------------------------------------------------------------------------------------------------- */
    173 PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & block, PabloAST * target) {
     173PabloAST * UCDCompiler::ifTestCompiler(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & builder, PabloAST * target) {
    174174
    175175    codepoint_t lo_byte = UTF8_Encoder::encodingByte(lo, byte_no);
     
    183183            if (hi == 0x10FFFF) hi_byte = 0xFF;
    184184        }
    185         PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
    186         target = block.createAnd(cc, target);
     185        PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
     186        target = builder.createAnd(cc, target);
    187187    }
    188188    else if (lo_byte == hi_byte) {
    189         PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), block);
    190         target = block.createAnd(cc, target);
    191         target = block.createAdvance(target, 1);
    192         target = ifTestCompiler(lo, hi, byte_no + 1, block, target);
     189        PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
     190        target = builder.createAnd(cc, target);
     191        target = builder.createAdvance(target, 1);
     192        target = ifTestCompiler(lo, hi, byte_no + 1, builder, target);
    193193    }
    194194    else if (!at_hi_boundary) {
    195195        const auto mid = UTF8_Encoder::minCodePointWithCommonBytes(hi, byte_no);
    196         PabloAST * e1 = ifTestCompiler(lo, mid - 1, byte_no, block, target);
    197         PabloAST * e2 = ifTestCompiler(mid, hi, byte_no, block, target);
    198         target = block.createOr(e1, e2);
     196        PabloAST * e1 = ifTestCompiler(lo, mid - 1, byte_no, builder, target);
     197        PabloAST * e2 = ifTestCompiler(mid, hi, byte_no, builder, target);
     198        target = builder.createOr(e1, e2);
    199199    }
    200200    else {
    201201        const auto mid = UTF8_Encoder::maxCodePointWithCommonBytes(lo, byte_no);
    202         PabloAST * e1 = ifTestCompiler(lo, mid, byte_no, block, target);
    203         PabloAST * e2 = ifTestCompiler(mid + 1, hi, byte_no, block, target);
    204         target = block.createOr(e1, e2);
     202        PabloAST * e1 = ifTestCompiler(lo, mid, byte_no, builder, target);
     203        PabloAST * e2 = ifTestCompiler(mid + 1, hi, byte_no, builder, target);
     204        target = builder.createOr(e1, e2);
    205205    }
    206206    return target;
     
    214214 * Ensure the sequence of preceding bytes is defined, up to, but not including the given byte_no
    215215 ** ------------------------------------------------------------------------------------------------------------- */
    216 PabloAST * UCDCompiler::makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & pb, PabloAST * prefix) {
     216PabloAST * UCDCompiler::makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & builder, PabloAST * prefix) {
    217217    assert (byte_no >= 1 && byte_no <= 4);
    218218    assert (byte_no == 1 || prefix != nullptr);
    219219    for (unsigned i = 1; i != byte_no; ++i) {
    220220        const CC * const cc = makeCC(UTF8_Encoder::encodingByte(cp, i));
    221         PabloAST * var = mCharacterClassCompiler.compileCC(cc, pb);
     221        PabloAST * var = mCharacterClassCompiler.compileCC(cc, builder);
    222222        if (i > 1) {
    223             var = pb.createAnd(var, pb.createAdvance(prefix, 1));
     223            var = builder.createAnd(var, builder.createAdvance(prefix, 1));
    224224        }
    225225        prefix = var;
     
    427427        {0x10000, 0x10FFFF}};
    428428
    429 //    llvm::raw_os_ostream out(std::cerr);
    430 
    431 //    set.dump(out);
    432 
    433 //    for (auto range : set) {
    434 //        out << range.first << ',' << range.second << "\n";
    435 //    }
    436 
    437 //    out.flush();
    438 
    439429    return generateWithIfHierarchy(defaultIfHierachy, set, entry);
    440430}
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.hpp

    r4631 r4661  
    3535protected:
    3636
    37     PabloAST * generateWithIfHierarchy(const RangeList & ifRanges, const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & block);
     37    PabloAST * generateWithIfHierarchy(const RangeList & ifRanges, const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder);
    3838
    39     PabloAST * generateSubRanges(const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & block, PabloAST * target);
     39    PabloAST * generateSubRanges(const UnicodeSet & set, const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder, PabloAST * target);
    4040
    41     PabloAST * sequenceGenerator(const RangeList && ranges, const unsigned byte_no, PabloBuilder & block, PabloAST * target, PabloAST * prefix);
     41    PabloAST * sequenceGenerator(const RangeList && ranges, const unsigned byte_no, PabloBuilder & builder, PabloAST * target, PabloAST * prefix);
    4242
    43     PabloAST * sequenceGenerator(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & block, PabloAST * target, PabloAST * prefix);
     43    PabloAST * sequenceGenerator(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & builder, PabloAST * target, PabloAST * prefix);
    4444
    45     PabloAST * ifTestCompiler(const codepoint_t lo, const codepoint_t hi, PabloBuilder & block);
     45    PabloAST * ifTestCompiler(const codepoint_t lo, const codepoint_t hi, PabloBuilder & builder);
    4646
    47     PabloAST * ifTestCompiler(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & block, PabloAST * target);
     47    PabloAST * ifTestCompiler(const codepoint_t lo, const codepoint_t hi, const unsigned byte_no, PabloBuilder & builder, PabloAST * target);
    4848
    49     PabloAST * makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & pb, PabloAST * prefix);
     49    PabloAST * makePrefix(const codepoint_t cp, const unsigned byte_no, PabloBuilder & builder, PabloAST * prefix);
    5050
    5151    static RangeList byteDefinitions(const RangeList & list, const unsigned byte_no);
  • icGREP/icgrep-devel/icgrep/generate_predefined_ucd_functions.cpp

    r4657 r4661  
    99#include <fstream>
    1010
     11#include <cc/cc_compiler.h>
    1112#include <UCD/unicode_set.h>
     13#include <UCD/PropertyObjectTable.h>
    1214#include <UCD/ucd_compiler.hpp>
    1315#include <pablo/pablo_compiler.h>
     16#include <pablo/builder.hpp>
    1417#include <pablo/function.h>
    1518#include <llvm/Support/CommandLine.h>
     19#include <utf_encoding.h>
     20#include <pablo/optimizers/pablo_simplifier.hpp>
     21#include <pablo/optimizers/pablo_codesinking.hpp>
     22#include <pablo/optimizers/pablo_automultiplexing.hpp>
     23#include <llvm/ExecutionEngine/ExecutionEngine.h>
     24#include <boost/algorithm/string/case_conv.hpp>
     25
     26#include <iostream>
    1627
    1728using namespace pablo;
     29using namespace UCD;
     30using namespace cc;
     31
     32inline std::string lowercase(const std::string & name) {
     33    std::locale loc;
     34    return boost::algorithm::to_lower_copy(name, loc);
     35}
     36
     37ExecutionEngine * compile(std::string name, const UnicodeSet & set, PabloCompiler & pc, ExecutionEngine * engine) {
     38
     39    PabloFunction function = PabloFunction::Create(std::move(name));
     40    Encoding encoding(Encoding::Type::UTF_8, 8);
     41    CC_Compiler ccCompiler(function, encoding);
     42    UCDCompiler ucdCompiler(ccCompiler);
     43    PabloBuilder builder(function.getEntryBlock());
     44
     45    std::cerr << "Compiling " << name << std::endl;
     46
     47    // Build the unicode set function
     48    ucdCompiler.generateWithDefaultIfHierarchy(set, builder);
     49    // Optimize it at the pablo level
     50    Simplifier::optimize(function);
     51    CodeSinking::optimize(function);
     52    // AutoMultiplexing::optimize(function);
     53
     54
     55    if (engine) {
     56        engine->removeModule(pc.getModule());
     57    }
     58
     59    // Now compile the function ...
     60    return pc.compile(function).getExecutionEngine();
     61}
    1862
    1963int main(int argc, char *argv[]) {
    2064
    2165    PabloCompiler pc;
     66    ExecutionEngine * engine = nullptr;
    2267
    23     // Make the property generator create a for each loop that iterates through all properties
     68    for (PropertyObject * obj : property_object_table) {
    2469
     70        if (isa<UnsupportedPropertyObject>(obj)) continue;
    2571
     72        if (auto * enumObj = dyn_cast<EnumeratedPropertyObject>(obj)) {
     73            for (const std::string value : *enumObj) {
     74                const UnicodeSet & set = enumObj->GetCodepointSet(canonicalize_value_name(value));
     75                engine = compile("__get_" + property_enum_name[enumObj->getPropertyCode()] + "_" + value, set, pc, engine);
     76            }
     77            break;
     78        }
    2679
     80//        if (auto * extObj = dyn_cast<ExtensionPropertyObject>(obj)) {
     81//            for (const std::string value : *extObj) {
     82//                const UnicodeSet & set = extObj->GetCodepointSet(canonicalize_value_name(value));
     83//                engine = compile("__get_" + property_enum_name[extObj->getPropertyCode()] + "_" + value, set, pc, engine);
     84//            }
     85//        }
    2786
     87//        if (auto * binObj = dyn_cast<BinaryPropertyObject>(obj)) {
     88//            const UnicodeSet & set = binObj->GetCodepointSet(Binary_ns::Y);
     89//            compile("__get_" + property_enum_name[binObj->getPropertyCode()] + "_Y", set, pc);
     90//        }
     91    }
    2892
     93    pc.getModule()->dump();
    2994
    30 
    31 
    32 
    33 
    34 
    35 
     95    return 0;
    3696}
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4659 r4661  
    7474, mBuilder(&LLVM_Builder)
    7575, mCarryManager(nullptr)
    76 , mExecutionEngine(nullptr)
    7776, mBitBlockType(VectorType::get(IntegerType::get(mMod->getContext(), 64), BLOCK_SIZE / 64))
    7877, mInputPtr(nullptr)
     
    8685, mOutputAddressPtr(nullptr)
    8786, mMaxWhileDepth(0)
    88 , mPrintRegisterFunction(nullptr)
    89 {
    90     //Create the jit execution engine.up
    91     InitializeNativeTarget();
    92     InitializeNativeTargetAsmPrinter();
    93     InitializeNativeTargetAsmParser();
     87, mPrintRegisterFunction(nullptr) {
     88
    9489}
    9590
     
    114109}
    115110
    116 CompiledPabloFunction PabloCompiler::compile(PabloFunction & function)
    117 {
     111CompiledPabloFunction PabloCompiler::compile(PabloFunction & function) {
    118112    mWhileDepth = 0;
    119113    mIfDepth = 0;
    120114    mMaxWhileDepth = 0;
    121115    mCarryManager = new CarryManager(mMod, mBuilder, mBitBlockType, mZeroInitializer, mOneInitializer);
    122    
     116
     117    Examine(function.getEntryBlock());
     118   
     119    InitializeNativeTarget();
     120    InitializeNativeTargetAsmPrinter();
     121    InitializeNativeTargetAsmParser();
     122
    123123    std::string errMessage;
    124124#ifdef USE_LLVM_3_5
     
    133133#endif
    134134    builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
    135     mExecutionEngine = builder.create();
    136     if (mExecutionEngine == nullptr) {
     135    ExecutionEngine * ee = builder.create();
     136    if (ee == nullptr) {
    137137        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    138138    }
    139139
    140140    GenerateFunction(function);
    141     DeclareFunctions();
    142 
    143     Examine(function.getEntryBlock());
    144     DeclareCallFunctions();
     141    DeclareFunctions(ee);
     142    DeclareCallFunctions(ee);
    145143
    146144    mWhileDepth = 0;
     
    191189    verifyModule(*mMod, &dbgs());
    192190
    193     mExecutionEngine->finalizeObject();
     191    ee->finalizeObject();
    194192
    195193    delete mCarryManager;
     
    197195
    198196    //Return the required size of the carry data area to the process_block function.
    199     return CompiledPabloFunction(totalCarryDataSize * sizeof(BitBlock), mFunction, mExecutionEngine);
     197    return CompiledPabloFunction(totalCarryDataSize * sizeof(BitBlock), mFunction, ee);
    200198}
    201199
     
    314312}
    315313
    316 inline void PabloCompiler::DeclareFunctions() {
     314inline void PabloCompiler::DeclareFunctions(ExecutionEngine * ee) {
    317315    if (DumpTrace || TraceNext) {
    318316        //This function can be used for testing to print the contents of a register from JIT'd code to the terminal window.
    319317        mPrintRegisterFunction = mMod->getOrInsertFunction("wrapped_print_register", Type::getVoidTy(getGlobalContext()), Type::getInt8PtrTy(getGlobalContext()), mBitBlockType, NULL);
    320         mExecutionEngine->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
     318        ee->addGlobalMapping(cast<GlobalValue>(mPrintRegisterFunction), (void *)&wrapped_print_register);
    321319    }
    322320}
     
    340338}
    341339
    342 void PabloCompiler::DeclareCallFunctions() {
     340void PabloCompiler::DeclareCallFunctions(ExecutionEngine * ee) {
    343341    for (auto mapping : mCalleeMap) {
    344342        const String * callee = mapping.first;
     
    351349                throw std::runtime_error("Could not create static method call for external function \"" + callee->to_string() + "\"");
    352350            }
    353             mExecutionEngine->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
     351            ee->addGlobalMapping(cast<GlobalValue>(externalValue), fn_ptr);
    354352            mCalleeMap[callee] = externalValue;
    355353        }
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4659 r4661  
    6767    CompiledPabloFunction(size_t carryDataSize, Function * function, ExecutionEngine * executionEngine);
    6868
     69    inline Function * getLLVMFunction() const {
     70        return mFunction;
     71    }
     72
     73    inline ExecutionEngine * getExecutionEngine() const {
     74        return mExecutionEngine;
     75    }
     76
    6977    inline CompiledPabloFunction(CompiledPabloFunction && cpf)
    7078    : CarryDataSize(cpf.CarryDataSize)
     
    101109    ~PabloCompiler();
    102110    void InstallExternalFunction(std::string C_fn_name, void * fn_ptr);
    103     CompiledPabloFunction compile(pablo::PabloFunction &function);
     111    CompiledPabloFunction compile(pablo::PabloFunction & function);
     112    Module * getModule();
    104113private:
    105114    void GenerateFunction(PabloFunction & function);
    106     void DeclareFunctions();
     115    void DeclareFunctions(ExecutionEngine * ee);
    107116    void Examine(PabloBlock & blk);
    108     void DeclareCallFunctions();
     117    void DeclareCallFunctions(ExecutionEngine * ee);
    109118    void SetOutputValue(Value * marker, const unsigned index);
    110119
     
    147156    IRBuilder <> *                      mBuilder;
    148157    CarryManager *                      mCarryManager;
    149     ExecutionEngine*                    mExecutionEngine;
    150158
    151159    VectorType* const                   mBitBlockType;
     
    173181};
    174182
     183inline Module * PabloCompiler::getModule() {
     184    return mMod;
     185}
     186
    175187}
    176188
Note: See TracChangeset for help on using the changeset viewer.