Changeset 4316 for icGREP


Ignore:
Timestamp:
Dec 3, 2014, 6:47:44 AM (5 years ago)
Author:
cameron
Message:

Case insensitive matching - initial check in.

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4300 r4316  
    5555add_library(PabloADT pablo/pe_and.cpp pablo/pe_not.cpp pablo/pe_or.cpp  pablo/pabloAST.cpp  pablo/pe_sel.cpp  pablo/pe_xor.cpp pablo/codegenstate.cpp  pablo/symbol_generator.cpp pablo/analysis/useanalysis.cpp pablo/printer_pablos.cpp pablo/pablo_compiler.cpp)
    5656add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_parser.cpp re/re_rep.cpp re/parsefailure.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/printer_re.cpp)
    57 add_library(CCADT cc/cc_namemap.cpp cc/cc_compiler.cpp utf_encoding.cpp utf8_encoder.cpp unicode_categories.h)
     57add_library(CCADT cc/cc_namemap.cpp cc/cc_compiler.cpp utf_encoding.cpp utf8_encoder.cpp unicode_categories.h UCD/CaseFolding_txt.cpp)
    5858
    5959include_directories("${PROJECT_SOURCE_DIR}")
     
    6161include_directories("${PROJECT_SOURCE_DIR}/cc")
    6262include_directories("${PROJECT_SOURCE_DIR}/pablo")
     63include_directories("${PROJECT_SOURCE_DIR}/UCD")
    6364include_directories("${PROJECT_SOURCE_DIR}/include")
    6465include_directories("${PROJECT_SOURCE_DIR}/include/simd-lib")
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4312 r4316  
    1515#include <re/re_intersect.h>
    1616#include <re/parsefailure.h>
     17#include <UCD/CaseFolding_txt.h>
    1718#include <algorithm>
    1819
     
    130131            case ']': case '}':
    131132                if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) {
    132                     return makeCC(parse_utf8_codepoint());
     133                    return build_CC(parse_utf8_codepoint());
    133134                }
    134135                else throw ParseFailure("Use  \\] or \\} for literal ] or }.");
     
    143144                return parse_escaped();
    144145            default:
    145                 return makeCC(parse_utf8_codepoint());
     146                return build_CC(parse_utf8_codepoint());
    146147        }
    147148    }
     
    361362      return parse_escaped_set();
    362363    else
    363       return makeCC(parse_escaped_codepoint());
     364      return build_CC(parse_escaped_codepoint());
    364365}
    365366
     
    633634            case rangeHyphen:
    634635                if (lastItemKind != CodepointItem) throw ParseFailure("Range operator - has illegal left operand.");
    635                 cc->insert_range(lastCodepointItem, parse_codepoint());
     636                CC_add_range(cc, lastCodepointItem, parse_codepoint());
    636637                lastItemKind = RangeItem;
    637638                break;
     
    654655                else {
    655656                    lastCodepointItem = parse_escaped_codepoint();
    656                     cc->insert(lastCodepointItem);
     657                    CC_add_codepoint(cc, lastCodepointItem);
    657658                    lastItemKind = CodepointItem;
    658659                }
     
    660661            case emptyOperator:
    661662                lastCodepointItem = parse_utf8_codepoint();
    662                 cc->insert(lastCodepointItem);
     663                CC_add_codepoint(cc, lastCodepointItem);
    663664                lastItemKind = CodepointItem;
    664665                break;
     
    812813}
    813814
    814 }
     815CC * RE_Parser::build_CC(codepoint_t cp) {
     816    CC * cc = makeCC();
     817    CC_add_codepoint(cc, cp);
     818    return cc;
     819}
     820
     821void RE_Parser::CC_add_codepoint(CC * cc, codepoint_t cp) {
     822    if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
     823        caseInsensitiveInsert(cc, cp);
     824    }
     825    else cc->insert(cp);
     826}
     827
     828void RE_Parser::CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi) {
     829    if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
     830        caseInsensitiveInsertRange(cc, lo, hi);
     831    }
     832    else cc-> insert_range(lo, hi);
     833}
     834   
     835   
     836}
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4312 r4316  
    8080
    8181    inline void throw_incomplete_expression_error_if_end_of_stream() const;
     82   
     83    // CC insertion dependent on case-insensitive flag.
     84    CC * build_CC(codepoint_t cp);
     85   
     86    void CC_add_codepoint(CC * cc, codepoint_t cp);
     87   
     88    void CC_add_range(CC * cc, codepoint_t lo, codepoint_t hi);
    8289
    8390private:
Note: See TracChangeset for help on using the changeset viewer.