Changeset 5900


Ignore:
Timestamp:
Mar 11, 2018, 9:30:05 AM (13 months ago)
Author:
cameron
Message:

Unicode-lines option; set default linebreak to LF

Location:
icGREP/icgrep-devel
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/QA/greptest.xml

    r5862 r5900  
    371371</datafile>
    372372
    373 <grepcase regexp="^$" datafile="CRLF" grepcount="1"/>
    374 <grepcase regexp="^$" datafile="CRLF" flags="-v" grepcount="3"/>
    375 <grepcase regexp="^.*$" datafile="CRLF" grepcount="4"/>
    376 <grepcase regexp="" datafile="CRLF" grepcount="4"/>
     373<grepcase regexp="^$" datafile="CRLF" flags="-Unicode-lines" grepcount="1"/>
     374<grepcase regexp="^$" datafile="CRLF" flags="-v -Unicode-lines" grepcount="3"/>
     375<grepcase regexp="^.*$" datafile="CRLF" flags="-Unicode-lines" grepcount="4"/>
     376<grepcase regexp="" datafile="CRLF" flags="-Unicode-lines" grepcount="4"/>
    377377
    378378 <datafile id = "LU_test">
     
    584584Unterminated</datafile>
    585585
    586 <grepcase regexp="^.*$" datafile="LineBreaking" grepcount="19"/>
    587 <grepcase regexp="^\X*$" datafile="LineBreaking" grepcount="19"/>
    588 <grepcase regexp="(?g)^.*$" datafile="LineBreaking" grepcount="19"/>
    589 <grepcase regexp="Unterminated$" datafile="LineBreaking" grepcount="1"/>
    590 <grepcase regexp="^CRLF.$" datafile="LineBreaking" grepcount="5"/>
    591 <grepcase regexp="LS[0-9]*" datafile="LineBreaking" grepcount="6"/>
    592 <grepcase regexp="PS" datafile="LineBreaking" grepcount="4"/>
    593 <grepcase regexp="\S" datafile="LineBreaking" grepcount="16"/>
    594 <grepcase regexp="$" datafile="LineBreaking" grepcount="19"/>
    595 <grepcase regexp="\p{ascii}" datafile="LineBreaking" grepcount="16"/>
     586<grepcase regexp="^.*$" datafile="LineBreaking" flags="-Unicode-lines" grepcount="19"/>
     587<grepcase regexp="^\X*$" datafile="LineBreaking" flags="-Unicode-lines" grepcount="19"/>
     588<grepcase regexp="(?g)^.*$" datafile="LineBreaking" flags="-Unicode-lines" grepcount="19"/>
     589<grepcase regexp="Unterminated$" datafile="LineBreaking" flags="-Unicode-lines" grepcount="1"/>
     590<grepcase regexp="^CRLF.$" datafile="LineBreaking" flags="-Unicode-lines" grepcount="5"/>
     591<grepcase regexp="LS[0-9]*" datafile="LineBreaking" flags="-Unicode-lines" grepcount="6"/>
     592<grepcase regexp="PS" datafile="LineBreaking" flags="-Unicode-lines" grepcount="4"/>
     593<grepcase regexp="\S" datafile="LineBreaking" flags="-Unicode-lines" grepcount="16"/>
     594<grepcase regexp="$" datafile="LineBreaking" flags="-Unicode-lines" grepcount="19"/>
     595<grepcase regexp="$" datafile="LineBreaking" grepcount="9"/>
     596<grepcase regexp="\p{ascii}" datafile="LineBreaking" flags="-Unicode-lines" grepcount="16"/>
    596597
    597598<grepcase regexp="[a-z]{20}" datafile="4KiB-onepage" grepcount="77"/>
     
    816817</datafile>
    817818
    818 <grepcase regexp="^\X$" datafile="graphemebreaktest" grepcount="55"/>
     819<grepcase regexp="^\X$" datafile="graphemebreaktest" flags="-Unicode-lines" grepcount="55"/>
    819820<!--<grepcase regexp="^\X\X$" datafile="graphemebreaktest" grepcount="153"/>
    820821<grepcase regexp="^\X{3}$" datafile="graphemebreaktest" grepcount="2"/>-->
     
    854855
    855856<grepcase regexp="\p{name=/AIRPLANE/}" datafile="../All_good" flags="-enable-object-cache=0 -enable-segment-pipeline-parallel" grepcount="8"/>
    856 <grepcase regexp="\xc4[\x80-\x85]" datafile="../All_good" grepcount="5"/>
     857<grepcase regexp="\xc4[\x80-\x85]" datafile="../All_good" flags="-Unicode-lines" grepcount="5"/>
     858<grepcase regexp="\xc4[\x80-\x85]" datafile="../All_good" flags="-Unicode-lines=0" grepcount="6"/>
    857859<grepcase regexp="[\N{GREEK CAPITAL LETTER ALPHA}-\N{Greek capital letter UPSILON with DIALYTIKA}]" datafile="../All_good" grepcount="27"/>
    858860</greptest>
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r5897 r5900  
    147147    mNextFileToPrint(0),
    148148    grepMatchFound(false),
    149     mGrepRecordBreak(GrepRecordBreakKind::Unicode),
     149    mGrepRecordBreak(GrepRecordBreakKind::LF),
    150150    mMoveMatchesToEOL(true),
    151151    mEngineThread(pthread_self()) {}
  • icGREP/icgrep-devel/icgrep/grep_interface.cpp

    r5753 r5900  
    9191static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
    9292static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
     93
     94bool UnicodeLinesFlag;
     95static cl::opt<bool, true> UnicodeLinesOption("Unicode-lines", cl::location(UnicodeLinesFlag), cl::desc("Enable Unicode line breaks (LF/VT/FF/CR/NEL/LS/PS/CRLF)"), cl::cat(Input_Options));
    9396
    9497bool MmapFlag;
  • icGREP/icgrep-devel/icgrep/grep_interface.h

    r5705 r5900  
    5050extern bool BinaryFlag; // -U
    5151extern bool NullDataFlag; // -z
     52extern bool UnicodeLinesFlag; // -Unicode-lines
     53
    5254extern bool MmapFlag; // -mmap
     55
    5356extern std::string ExcludeFlag; // -exclude
    5457extern std::string ExcludeFromFlag; // -exclude-from
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5894 r5900  
    3737static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
    3838
    39 static cl::opt<bool> UnixBreaks("Unix-line-breaks", cl::desc("Enable Unix line breaks"));
    4039static cl::opt<bool> ByteMode("enable-byte-mode", cl::desc("Process regular expressions in byte mode"));
    4140
     
    198197    }
    199198               
    200     if (UnixBreaks) {
    201         grepEngine->setRecordBreak(grep::GrepRecordBreakKind::LF);
     199    if (grep::UnicodeLinesFlag) {
     200        grepEngine->setRecordBreak(grep::GrepRecordBreakKind::Unicode);
    202201    } else if (grep::NullDataFlag) {
    203202        grepEngine->setRecordBreak(grep::GrepRecordBreakKind::Null);
     203    } else {
     204        grepEngine->setRecordBreak(grep::GrepRecordBreakKind::LF);
    204205    }
    205206   
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5890 r5900  
    11/*
    2  *  Copyright (c) 2017 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 */
     
    135135    it.createAssign(nonFinal, it.createAnd(nonFinal, u8valid));
    136136    pb.createAssign(nonFinal, pb.createOr(nonFinal, CRLF));
    137     PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
     137    //PabloAST * unterminatedLineAtEOF = pb.createAtEOF(pb.createAdvance(pb.createNot(LineBreak), 1), "unterminatedLineAtEOF");
    138138   
    139139    Var * const required = getOutputStreamVar("nonFinal");
    140140    pb.createAssign(pb.createExtract(required, pb.getInteger(0)), nonFinal);
    141     pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
     141    pb.createAssign(pb.createExtract(getOutputStreamVar("UnicodeLB"), pb.getInteger(0)), LineBreak);//pb.createOr(LineBreak, unterminatedLineAtEOF, "EOL"));
    142142}
    143143
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5899 r5900  
    8686
    8787inline MarkerType RE_Compiler::compile(RE * const re, PabloBuilder & pb) {
    88     return process(re, makeMarker(FinalPostPositionUnit, pb.createOnes()), pb);
     88    return process(re, makeMarker(InitialPostPositionUnit, pb.createOnes()), pb);
    8989}
    9090   
Note: See TracChangeset for help on using the changeset viewer.