Changeset 5161


Ignore:
Timestamp:
Sep 24, 2016, 10:26:59 AM (3 years ago)
Author:
cameron
Message:

Override LLVM error_handler for return code 2; convert ParseFailure? to LLVM fatal error.

Location:
icGREP/icgrep-devel/icgrep
Files:
2 deleted
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5160 r5161  
    8282add_library(PabloADT ${PABLO_SRC})
    8383add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
    84 add_library(RegExpCompiler re/re_parser.cpp re/parsefailure.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp)
     84add_library(RegExpCompiler re/re_parser.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp)
    8585add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding_txt.cpp)
    8686add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp)
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5156 r5161  
    88#include <vector>
    99#include <llvm/Support/CommandLine.h>
     10#include <llvm/Support/ErrorHandling.h>
     11#include <llvm/Support/Signals.h>
    1012#include <re/re_alt.h>
    1113#include <re/re_parser.h>
     
    1921#include <pablo/pablo_toolchain.h>
    2022#include <mutex>
     23
    2124
    2225#include <iostream> // MEEE
     
    4952         Gives you colored output + back-referencing capability."), cl::cat(EnhancedGrepOptions));
    5053
     54//
     55// Handler for errors reported through llvm::report_fatal_error.  Report
     56// and signal error code 2 (grep convention).
     57//
     58static void icgrep_error_handler(void *UserData, const std::string &Message,
     59                             bool GenCrashDiag) {
     60
     61    // Modified from LLVM's internal report_fatal_error logic.
     62    SmallVector<char, 64> Buffer;
     63    raw_svector_ostream OS(Buffer);
     64    OS << "icgrep ERROR: " << Message << "\n";
     65    StringRef MessageStr = OS.str();
     66    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
     67    (void)written; // If something went wrong, we deliberately just give up.
     68
     69    // Run the interrupt handlers to make sure any special cleanups get done, in
     70    // particular that we remove files registered with RemoveFileOnSignal.
     71    llvm::sys::RunInterruptHandlers();
     72    exit(2);
     73}
    5174
    5275static std::string allREs;
     
    212235
    213236int main(int argc, char *argv[]) {
     237    llvm::install_fatal_error_handler(&icgrep_error_handler);
    214238    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    215239    cl::ParseCommandLineOptions(argc, argv);
     
    259283            const int rc = pthread_create(&threads[i], NULL, DoGrep, (void *)&grepEngine);
    260284            if (rc) {
    261                 throw std::runtime_error("Failed to create thread: code " + std::to_string(rc));
     285                llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
    262286            }
    263287        }
     
    267291            const int rc = pthread_join(threads[i], &status);
    268292            if (rc) {
    269                 throw std::runtime_error("Failed to join thread: code " + std::to_string(rc));
     293                llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
    270294            }
    271295        }
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5132 r5161  
    2020#include <grep_engine.h>
    2121#include <sstream>
    22 #include <iostream>
    2322#include <string>
    2423#include <algorithm>
     
    3231
    3332
     33
     34
    3435namespace re {
     36   
    3537
    3638RE * RE_Parser::parse(const std::string & regular_expression, ModeFlagSet initialFlags) {
     
    4244    RE * re = parser.parse_RE();
    4345    if (re == nullptr) {
    44         throw ParseFailure("An unexpected parsing error occurred!");
     46        ParseFailure("An unexpected parsing error occurred!");
    4547    }
    4648    return re;
     
    5860
    5961RE * makeAtomicGroup(RE * r) {
    60     throw ParseFailure("Atomic grouping not supported.");
     62    RE_Parser::ParseFailure("Atomic grouping not supported.");
    6163}
    6264
     
    8183    }
    8284    if (alt.empty()) {
    83         throw NoRegularExpressionFound();
     85        ParseFailure("No regular expression found!");
    8486    }
    8587    return makeAlt(alt.begin(), alt.end());
     
    119121                break;
    120122            case '*': case '+': case '?': case '{':
    121                 throw NothingToRepeat();
     123                ParseFailure("Need something to repeat before *, +, ? or {.");
    122124            case ']':
    123125                if (LEGACY_UNESCAPED_RBRAK_RBRACE_ALLOWED) {
    124126                    return createCC(parse_utf8_codepoint());
    125127                }
    126                 throw ParseFailure("Use  \\] for literal ].");
     128                ParseFailure("Use  \\] for literal ].");
    127129            case '}':
    128130                if (fNested) {
     
    131133                    return createCC(parse_utf8_codepoint());
    132134                }
    133                 throw ParseFailure("Use \\} for literal }.");
     135                ParseFailure("Use \\} for literal }.");
    134136            case '[':
    135137                mCursor++;
     
    198200                    group_expr = makeNegativeLookBehindAssertion(parse_alt());
    199201                } else {
    200                     throw ParseFailure("Illegal lookbehind assertion syntax.");
     202                    ParseFailure("Illegal lookbehind assertion syntax.");
    201203                }
    202204                break;
     
    216218                        //case 'x': modeBit = IGNORE_SPACE_MODE_FLAG; break;
    217219                        //case 'd': modeBit = UNIX_LINES_MODE_FLAG; break;
    218                         default: throw ParseFailure("Unsupported mode flag.");
     220                        default: ParseFailure("Unsupported mode flag.");
    219221                    }
    220222                    ++mCursor;
     
    236238                }
    237239            default:
    238                 throw ParseFailure("Illegal (? syntax.");
     240                ParseFailure("Illegal (? syntax.");
    239241        }
    240242    } else { // Capturing paren group.
     
    248250    }
    249251    if (*mCursor != ')') {
    250         throw ParseFailure("Closing parenthesis required.");
     252        ParseFailure("Closing parenthesis required.");
    251253    }
    252254    ++mCursor;
     
    279281        if (hasRep) {
    280282            if (lb > MAX_REPETITION_LOWER_BOUND || ub > MAX_REPETITION_UPPER_BOUND) {
    281                 throw ParseFailure("Bounded repetition exceeds icgrep implementation limit");
     283                ParseFailure("Bounded repetition exceeds icgrep implementation limit");
    282284            }
    283285            if ((ub != Rep::UNBOUNDED_REP) && (lb > ub)) {
    284                 throw ParseFailure("Lower bound cannot exceed upper bound in bounded repetition");
     286                ParseFailure("Lower bound cannot exceed upper bound in bounded repetition");
    285287            }
    286288            ++mCursor;
     
    290292            } else if (*mCursor == '+') {
    291293                ++mCursor;
    292                 throw ParseFailure("Possessive repetition is not supported in icgrep 1.0");
     294                ParseFailure("Possessive repetition is not supported in icgrep 1.0");
    293295            }
    294296            re = makeRep(re, lb, ub);
     
    308310        upper_bound = lower_bound;
    309311    } else if (*mCursor != ',') {
    310         throw BadLowerBound();
     312        ParseFailure("Bad lower bound!");
    311313    } else if (*++mCursor == '}') {
    312314        upper_bound = Rep::UNBOUNDED_REP;
     
    314316        upper_bound = parse_int();
    315317        if (*mCursor != '}') {
    316             throw BadUpperBound();
     318            ParseFailure("Bad upper bound!");
    317319        }
    318320    }
     
    353355        }
    354356        else {
    355             throw ParseFailure("Back reference " + backref + " without prior capture group.");
     357            ParseFailure("Back reference " + backref + " without prior capture group.");
    356358        }
    357359    }
     
    374376                        re = complemented ? makeZeroWidth("NonGCB") : makeZeroWidth("GCB");
    375377                        break;
    376                     case 'w': throw ParseFailure("\\b{w} not yet supported.");
    377                     case 'l': throw ParseFailure("\\b{l} not yet supported.");
    378                     case 's': throw ParseFailure("\\b{s} not yet supported.");
    379                     default: throw ParseFailure("Unrecognized boundary assertion");
     378                    case 'w': ParseFailure("\\b{w} not yet supported.");
     379                    case 'l': ParseFailure("\\b{l} not yet supported.");
     380                    case 's': ParseFailure("\\b{s} not yet supported.");
     381                    default: ParseFailure("Unrecognized boundary assertion");
    380382                }
    381383                if (*++mCursor != '}') {
    382                     throw ParseFailure("Malformed boundary assertion");
     384                    ParseFailure("Malformed boundary assertion");
    383385                }
    384386                ++mCursor;
     
    407409        case 'q':
    408410            if (*++mCursor != '{') {
    409                 throw ParseFailure("Malformed grapheme-boundary property expression");
    410             }
    411             ++mCursor;
    412             throw ParseFailure("Literal grapheme cluster expressions not yet supported.");
     411                ParseFailure("Malformed grapheme-boundary property expression");
     412            }
     413            ++mCursor;
     414            ParseFailure("Literal grapheme cluster expressions not yet supported.");
    413415            if (*mCursor != '}') {
    414                 throw ParseFailure("Malformed grapheme-boundary property expression");
     416                ParseFailure("Malformed grapheme-boundary property expression");
    415417            }
    416418            ++mCursor;
     
    420422        case 'p':
    421423            if (*++mCursor != '{') {
    422                 throw ParseFailure("Malformed property expression");
     424                ParseFailure("Malformed property expression");
    423425            }
    424426            ++mCursor;
    425427            re = parsePropertyExpression();
    426428            if (*mCursor != '}') {
    427                 throw ParseFailure("Malformed property expression");
     429                ParseFailure("Malformed property expression");
    428430            }
    429431            ++mCursor;
     
    436438        case 'N':
    437439            if (*++mCursor != '{') {
    438                 throw ParseFailure("Malformed \\N expression");
     440                ParseFailure("Malformed \\N expression");
    439441            }
    440442            ++mCursor;
    441443            re = parseNamePatternExpression();
    442444            if (*mCursor != '}') {
    443                 throw ParseFailure("Malformed \\N expression");
     445                ParseFailure("Malformed \\N expression");
    444446            }
    445447            ++mCursor;
     
    453455            return makeWordEnd();
    454456        default:
    455             throw ParseFailure("Internal error");
    456     }
     457            ParseFailure("Internal error");
     458    }
     459}
     460   
     461void InvalidUTF8Encoding() {
     462    RE_Parser::ParseFailure("Invalid UTF-8 encoding!");
    457463}
    458464
     
    465471    if (pfx < 0xE0) {
    466472        if (pfx < 0xC2) {  // bare suffix or illegal prefix 0xC0 or 0xC2
    467             throw InvalidUTF8Encoding();
     473            InvalidUTF8Encoding();
    468474        }
    469475        suffix_bytes = 1;
     
    478484    while (suffix_bytes--) {
    479485        if (mCursor.noMore()) {
    480             throw InvalidUTF8Encoding();
     486            InvalidUTF8Encoding();
    481487        }
    482488        char_t sfx = *mCursor++;
    483489        if ((sfx & 0xC0) != 0x80) {
    484             throw InvalidUTF8Encoding();
     490            InvalidUTF8Encoding();
    485491        }
    486492        cp = (cp << 6) | (sfx & 0x3F);
     
    489495    // or a 4-byte sequence is used to encode a codepoint < 0x10000.
    490496    if ((pfx == 0xE0 && cp < 0x800) || (pfx == 0xF0 && cp < 0x10000)) {
    491         throw InvalidUTF8Encoding();
     497        InvalidUTF8Encoding();
    492498    }
    493499    // It is an error if a 4-byte sequence is used to encode a codepoint
    494500    // above the Unicode maximum.
    495501    if (cp > UCD::UNICODE_MAX) {
    496         throw InvalidUTF8Encoding();
     502        InvalidUTF8Encoding();
    497503    }
    498504    return cp;
     
    653659        lastCodepointItem = static_cast<codepoint_t> ('-');
    654660        if (*mCursor == '-') {
    655             throw ParseFailure("Set operator has no left operand.");
     661            ParseFailure("Set operator has no left operand.");
    656662        }
    657663    }
     
    662668            case setDiffOp: {
    663669                if (lastItemKind == NoItem) {
    664                     throw ParseFailure("Set operator has no left operand.");
     670                    ParseFailure("Set operator has no left operand.");
    665671                }
    666672                if (!cc->empty()) {
     
    688694            case setCloser: {
    689695                if (lastItemKind == NoItem) {
    690                     throw ParseFailure("Set operator has no right operand.");
     696                    ParseFailure("Set operator has no right operand.");
    691697                }
    692698                if (!cc->empty()) {
     
    745751                    lastItemKind = BrackettedSetItem;
    746752                    if (*mCursor++ != ':' || *mCursor++ != ']')
    747                         throw ParseFailure("Posix set expression improperly terminated.");
     753                        ParseFailure("Posix set expression improperly terminated.");
    748754                }
    749755            }
     
    751757            case rangeHyphen:
    752758                if (lastItemKind != CodepointItem) {
    753                     throw ParseFailure("Range operator - has illegal left operand.");
     759                    ParseFailure("Range operator - has illegal left operand.");
    754760                }
    755761                insert_range(cc, lastCodepointItem, parse_codepoint());
     
    784790        }
    785791    }
    786     throw ParseFailure("Set expression not properly terminated.");
     792    ParseFailure("Set expression not properly terminated.");
    787793}
    788794
     
    828834            }
    829835            else if (*mCursor++ == '?') return 0x7F;  // \c? ==> DEL
    830             else throw("Illegal \\c escape sequence");
     836            else ParseFailure("Illegal \\c escape sequence");
    831837        case '0': // Octal escape:  0 - 0377
    832838            ++mCursor;
     
    837843                ++mCursor;
    838844                cp_value = parse_octal_codepoint(1, 7);
    839                 if (*mCursor++ != '}') throw ParseFailure("Malformed octal escape sequence");
     845                if (*mCursor++ != '}') ParseFailure("Malformed octal escape sequence");
    840846                return cp_value;
    841847            }
    842848            else {
    843                 throw ParseFailure("Malformed octal escape sequence");
     849                ParseFailure("Malformed octal escape sequence");
    844850            }
    845851        case 'x':
     
    848854              ++mCursor;
    849855              cp_value = parse_hex_codepoint(1, 6);
    850               if (*mCursor++ != '}') throw ParseFailure("Malformed hex escape sequence");
     856              if (*mCursor++ != '}') ParseFailure("Malformed hex escape sequence");
    851857              return cp_value;
    852858            }
     
    859865                ++mCursor;
    860866                cp_value = parse_hex_codepoint(1, 6);
    861                 if (*mCursor++ != '}') throw ParseFailure("Malformed hex escape sequence");
     867                if (*mCursor++ != '}') ParseFailure("Malformed hex escape sequence");
    862868                return cp_value;
    863869            }
     
    871877            // Escaped letters should be reserved for special functions.
    872878            if (((*mCursor >= 'A') && (*mCursor <= 'Z')) || ((*mCursor >= 'a') && (*mCursor <= 'z')))
    873                 throw ParseFailure("Undefined or unsupported escape sequence");
     879                ParseFailure("Undefined or unsupported escape sequence");
    874880            else if ((*mCursor < 0x20) || (*mCursor >= 0x7F))
    875                 throw ParseFailure("Illegal escape sequence");
     881                ParseFailure("Illegal escape sequence");
    876882            else return static_cast<codepoint_t>(*mCursor++);
    877883    }
     
    890896        ++count;
    891897    }
    892     if (count < mindigits) throw ParseFailure("Octal sequence has too few digits");
    893     if (value > UCD::UNICODE_MAX) throw ParseFailure("Octal value too large");
     898    if (count < mindigits) ParseFailure("Octal sequence has too few digits");
     899    if (value > UCD::UNICODE_MAX) ParseFailure("Octal value too large");
    894900    return value;
    895901}
     
    909915        ++count;
    910916    }
    911     if (count < mindigits) throw ParseFailure("Hexadecimal sequence has too few digits");
    912     if (value > UCD::UNICODE_MAX) throw ParseFailure("Hexadecimal value too large");
     917    if (count < mindigits) ParseFailure("Hexadecimal sequence has too few digits");
     918    if (value > UCD::UNICODE_MAX) ParseFailure("Hexadecimal value too large");
    913919    return value;
    914920}
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r5132 r5161  
    11/*
    2  *  Copyright (c) 2014 International Characters.
     2 *  Copyright (c) 2014-6 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    1717#include <map>
    1818#include <re/re_memoizer.hpp>
    19 #include <re/parsefailure.h>
     19#include <llvm/Support/ErrorHandling.h>
    2020
    2121namespace re {
     
    3939typedef unsigned ModeFlagSet;
    4040
     41
    4142class RE_Parser
    4243{
     
    4546    static RE * parse(const std::string &input_string, ModeFlagSet initialFlags);
    4647
     48   
     49    static LLVM_ATTRIBUTE_NORETURN void ParseFailure(std::string errmsg) {
     50        llvm::report_fatal_error(errmsg);
     51    }
     52   
    4753private:
    48 
    4954    using NameMap = std::map<std::pair<std::string, std::string>, re::Name *>;
    5055
     
    5762        inline Cursor & operator++() {
    5863            if (LLVM_UNLIKELY(mCursor == mEnd)) {
    59                 throw IncompleteRegularExpression();
     64                ParseFailure("Incomplete regular expression!");
    6065            }
    6166            ++mCursor;
     
    6570        inline Cursor operator++(int) {
    6671            if (LLVM_UNLIKELY(mCursor == mEnd)) {
    67                 throw IncompleteRegularExpression();
     72                ParseFailure("Incomplete regular expression!");
    6873            }
    6974            Cursor tmp(*this);
Note: See TracChangeset for help on using the changeset viewer.