Changeset 5046 for icGREP


Ignore:
Timestamp:
Jun 9, 2016, 4:15:48 PM (3 years ago)
Author:
xuedongx
Message:

fix index

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r5045 r5046  
    1717 ** ------------------------------------------------------------------------------------------------------------- */
    1818inline codepoint_t encodingByte(const codepoint_t cp, const unsigned n, bool UTF_16) {
    19         return UTF_16 ? UTF16_Encoder::encodingByte(cp, n) : UTF8_Encoder::encodingByte(cp, n);
     19    return UTF_16 ? UTF16_Encoder::encodingByte(cp, n) : UTF8_Encoder::encodingByte(cp, n);
    2020}
    2121
    2222inline unsigned length(const codepoint_t cp, bool UTF_16) {
    23         return UTF_16 ? UTF16_Encoder::length(cp) : UTF8_Encoder::length(cp);
     23    return UTF_16 ? UTF16_Encoder::length(cp) : UTF8_Encoder::length(cp);
    2424}
    2525
    2626inline codepoint_t maxCodePoint(const unsigned length, bool UTF_16) {
    27         return UTF_16 ?  UTF16_Encoder::maxCodePoint(length) : UTF8_Encoder::maxCodePoint(length);
     27    return UTF_16 ?  UTF16_Encoder::maxCodePoint(length) : UTF8_Encoder::maxCodePoint(length);
    2828}
    2929
    3030inline bool isLowCodePointAfterByte(const codepoint_t cp, const unsigned n, bool UTF_16) {
    31         return UTF_16 ? UTF16_Encoder::isLowCodePointAfterByte(cp, n) : UTF8_Encoder::isLowCodePointAfterByte(cp, n);
     31    return UTF_16 ? UTF16_Encoder::isLowCodePointAfterByte(cp, n) : UTF8_Encoder::isLowCodePointAfterByte(cp, n);
    3232}
    3333inline bool isHighCodePointAfterByte(const codepoint_t cp, const unsigned n, bool UTF_16) {
    34         return UTF_16 ? UTF16_Encoder::isHighCodePointAfterByte(cp, n) : UTF8_Encoder::isHighCodePointAfterByte(cp, n);
     34    return UTF_16 ? UTF16_Encoder::isHighCodePointAfterByte(cp, n) : UTF8_Encoder::isHighCodePointAfterByte(cp, n);
    3535}
    3636inline codepoint_t minCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n, bool UTF_16) {
    37         return UTF_16 ? UTF16_Encoder::minCodePointWithCommonBytes(cp, n) : UTF8_Encoder::minCodePointWithCommonBytes(cp, n);
     37    return UTF_16 ? UTF16_Encoder::minCodePointWithCommonBytes(cp, n) : UTF8_Encoder::minCodePointWithCommonBytes(cp, n);
    3838}
    3939inline codepoint_t maxCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n, bool UTF_16) {
    40         return UTF_16 ? UTF16_Encoder::maxCodePointWithCommonBytes(cp, n) : UTF8_Encoder::maxCodePointWithCommonBytes(cp, n);
     40    return UTF_16 ? UTF16_Encoder::maxCodePointWithCommonBytes(cp, n) : UTF8_Encoder::maxCodePointWithCommonBytes(cp, n);
    4141}
    4242
     
    271271                const auto lo_byte = encodingByte(lo, byte_no, isUTF_16);
    272272                const auto hi_byte = encodingByte(hi, byte_no, isUTF_16);
    273                 //std::cout << "lo_byte: " << std::hex << lo_byte << " hi_byte: " << std::hex << hi_byte << std::endl;
    274                                 if (lo_byte != hi_byte) {
    275                                         unsigned num = isUTF_16 ? 10 : 6;
     273                if (lo_byte != hi_byte) {
     274                    unsigned num = isUTF_16 ? 10 : 6;
    276275                    if (!isLowCodePointAfterByte(lo, byte_no, isUTF_16)) {
    277276                        const codepoint_t mid = lo | ((1 << (num * (min - byte_no))) - 1);
     
    334333
    335334    if (at_lo_boundary && at_hi_boundary) {
    336                 if (!isUTF_16) {
    337                         if (lo_byte != hi_byte) {
    338                                 if (lo == 0x80) lo_byte = 0xC0;
    339                                 if (hi == 0x10FFFF) hi_byte = 0xFF;
    340                         }
    341                 }
     335        if (!isUTF_16) {
     336            if (lo_byte != hi_byte) {
     337                if (lo == 0x80) lo_byte = 0xC0;
     338                if (hi == 0x10FFFF) hi_byte = 0xFF;
     339            }
     340        }
    342341        PabloAST * cc = mCharacterClassCompiler.compileCC(makeCC(lo_byte, hi_byte), builder);
    343342        target = builder.createAnd(cc, target);
     
    371370    assert (byte_no >= 1 && byte_no <= 4);
    372371    assert (byte_no == 1 || prefix != nullptr);
    373         bool isUTF_16 = mCharacterClassCompiler.isUTF_16();
     372    bool isUTF_16 = mCharacterClassCompiler.isUTF_16();
    374373    for (unsigned i = 1; i != byte_no; ++i) {
    375374        const CC * const cc = makeCC(encodingByte(cp, i, isUTF_16));
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.h

    r5045 r5046  
    4545    }
    4646
    47         bool isUTF_16() {
    48                 return mEncoding.getBits() == 16;
    49         }
     47    bool isUTF_16() {
     48        return mEncoding.getBits() == 16;
     49    }
    5050
    5151private:
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5045 r5046  
    8282    if ((end_byte < 0xA8) || (end_byte > 0xA9)) return true;
    8383        if (!UTF_16) {
    84         return (static_cast<unsigned char>(fileBuffer[fileSize-3]) != 0xE2) || (penult_byte != 0x80);
     84            return (static_cast<unsigned char>(fileBuffer[fileSize-3]) != 0xE2) || (penult_byte != 0x80);
    8585        }
    8686        else {// UTF_16
    87                 return (penult_byte != 0x20);
     87            return (penult_byte != 0x20);
    8888        }
    8989}
     
    126126void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, bool isNameExpression) {
    127127    isUTF_16 = UTF_16;
    128         Module * M = new Module(moduleName, getGlobalContext());
     128    Module * M = new Module(moduleName, getGlobalContext());
    129129   
    130130    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
     
    132132    kernel::PipelineBuilder pipelineBuilder(M, idb);
    133133
    134         Encoding::Type type;
    135         type = UTF_16 ? Encoding::Type::UTF_16 : Encoding::Type::UTF_8;
    136         unsigned bits;
    137         bits = UTF_16 ? 16 : 8;
    138 
    139         Encoding encoding(type, bits);
    140 
    141         mIsNameExpression = isNameExpression;
     134    Encoding::Type type;
     135    type = UTF_16 ? Encoding::Type::UTF_16 : Encoding::Type::UTF_8;
     136    unsigned bits;
     137    bits = UTF_16 ? 16 : 8;
     138
     139    Encoding encoding(type, bits);
     140
     141    mIsNameExpression = isNameExpression;
    142142    re_ast = re::regular_expression_passes(encoding, re_ast);   
    143143    pablo::PabloFunction * function = re::re2pablo_compiler(encoding, re_ast);
     
    178178    uint64_t finalLineUnterminated = 0;
    179179    if(finalLineIsUnterminated(mFileBuffer, mFileSize, isUTF_16))
    180         finalLineUnterminated = 1;   
     180    finalLineUnterminated = 1;   
    181181    mGrepFunction(mFileBuffer, mFileSize, 0, finalLineUnterminated);
    182182
     
    209209extern "C" {
    210210    void wrapped_report_match(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer, uint64_t filesize, int fileIdx) {
    211                 int index = isUTF_16 ? 2 : 1;
    212                 int idx = fileIdx;
    213        
     211        int index = isUTF_16 ? 2 : 1;
     212        int idx = fileIdx;
     213     
    214214        if (ShowFileNames) {
    215215            resultStrs[idx] << inputFiles[idx] << ':';
     
    236236        }
    237237        unsigned char end_byte = (unsigned char)buffer[line_end];
    238                 unsigned char penult_byte = (unsigned char)(buffer[line_end - 1]);
     238        unsigned char penult_byte = (unsigned char)(buffer[line_end - 1]);
    239239        if (NormalizeLineBreaks) {
    240240            if (end_byte == 0x85) {
     
    252252                // Check for line_end on first byte of CRLF;  note that we don't
    253253                // want to access past the end of buffer.
    254                                 if (line_end + 1 < filesize) {
    255                                         if (!isUTF_16 && buffer[line_end + 1] == 0x0A) {
    256                     // Found CRLF; preserve both bytes.
    257                                                 line_end++;;
    258                                         }
    259                                         if (isUTF_16 && buffer[line_end + 1] == 0x0 && buffer[line_end + 2] == 0x0A) {
    260                     // Found CRLF; preserve both bytes.
    261                                                 line_end += 2;
    262                                         }
    263                                 }
     254                if (line_end + 1 < filesize) {
     255                    if (!isUTF_16 && buffer[line_end + 1] == 0x0A) {
     256                    // Found CRLF; preserve both bytes.
     257                        line_end++;;
     258                    }
     259                    if (isUTF_16 && buffer[line_end + 1] == 0x0 && buffer[line_end + 2] == 0x0A) {
     260                    // Found CRLF; preserve both bytes.
     261                        line_end += 2;
     262                    }
     263                }
    264264            }
    265265            resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start + 1) * index);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5045 r5046  
    4040    mICgrepKernel = new KernelBuilder(iBuilder, "icgrep", codegen::SegmentSize);
    4141    mScanMatchKernel = new KernelBuilder(iBuilder, "scanMatch", codegen::SegmentSize);
    42         if (UTF_16) {
    43                 generateS2P_16Kernel(mMod, iBuilder, mS2PKernel);
    44         }
    45         else {
    46                 generateS2PKernel(mMod, iBuilder, mS2PKernel);
    47         }
     42    if (UTF_16) {
     43        generateS2P_16Kernel(mMod, iBuilder, mS2PKernel);
     44    }
     45    else {
     46        generateS2PKernel(mMod, iBuilder, mS2PKernel);
     47    }
    4848    generateScanMatch(mMod, iBuilder, 64, mScanMatchKernel, isNameExpression);
    4949    pablo_function_passes(function);
     
    208208
    209209    Value * remainingByte = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(mBlockSize));
    210         Value * remainingUnit = iBuilder->CreateLShr(remainingByte, ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1));
     210    Value * remainingUnit = iBuilder->CreateLShr(remainingByte, ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1));
    211211    Value * EOFmark = iBuilder->CreateShl(ConstantInt::get(iBuilder->getIntNTy(mBlockSize), 1), UTF_16 ? remainingUnit : remainingByte);
    212         icGrepInstance->setInternalState("EOFmark", iBuilder->CreateBitCast(EOFmark, mBitBlockType));
     212    icGrepInstance->setInternalState("EOFmark", iBuilder->CreateBitCast(EOFmark, mBitBlockType));
    213213
    214214    icGrepInstance->CreateDoBlockCall();
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.h

    r5045 r5046  
    2828    PipelineBuilder(llvm::Module * m, IDISA::IDISA_Builder * b);
    2929
    30         ~PipelineBuilder();
     30    ~PipelineBuilder();
    3131
    32         void CreateKernels(pablo::PabloFunction * function, bool UTF_16, bool isNameExpression);
     32    void CreateKernels(pablo::PabloFunction * function, bool UTF_16, bool isNameExpression);
    3333    llvm::Function * ExecuteKernels(bool CountOnly, bool UTF_16);
    3434
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r5045 r5046  
    8080
    8181void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    82         kBuilder->addInputStream(16, "unit_pack");
    83         for(unsigned i = 0; i < 16; i++) {
    84                 kBuilder->addOutputStream(1);
    85         }
    86         kBuilder->prepareFunction();
     82    kBuilder->addInputStream(16, "unit_pack");
     83    for(unsigned i = 0; i < 16; i++) {
     84            kBuilder->addOutputStream(1);
     85    }
     86    kBuilder->prepareFunction();
    8787
    8888    Value * ptr = kBuilder->getInputStream(0);
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r5045 r5046  
    1515
    1616    void generateS2PKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    17         void generateS2P_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     17    void generateS2P_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    1818    void generateS2P_idealKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    1919
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5045 r5046  
    4444
    4545void RE_Compiler::initializeRequiredStreams(Encoding encoding) {
    46         if (encoding.getType() == Encoding::Type::UTF_8) {
    47                 RE_Compiler::initializeRequiredStreams_utf8();
    48         }
    49         else if (encoding.getType() == Encoding::Type::UTF_16) {
    50                 RE_Compiler::initializeRequiredStreams_utf16();
    51         }
     46    if (encoding.getType() == Encoding::Type::UTF_8) {
     47            RE_Compiler::initializeRequiredStreams_utf8();
     48    }
     49    else if (encoding.getType() == Encoding::Type::UTF_16) {
     50            RE_Compiler::initializeRequiredStreams_utf16();
     51    }
    5252}
    5353               
     
    6464    mCRLF = acrlf;
    6565
    66         PabloAST * hi_surrogate = mCCCompiler.compileCC(makeCC(0xD800, 0xDBFF));
    67         //PabloAST * lo_surrogate = mCCCompiler.compileCC(makeCC(0xDC00, 0xDFFF));
    68         PabloAST * u16hi_hi_surrogate = mCCCompiler.compileCC(makeCC(0xD800, 0xDB00));    //u16hi_hi_surrogate = [\xD8-\xDB]
    69         PabloAST * u16hi_lo_surrogate = mCCCompiler.compileCC(makeCC(0xDC00, 0xDF00));    //u16hi_lo_surrogate = [\xDC-\xDF]
    70 
    71         PabloAST * invalidTemp = mPB.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
     66    PabloAST * hi_surrogate = mCCCompiler.compileCC(makeCC(0xD800, 0xDBFF));
     67    //PabloAST * lo_surrogate = mCCCompiler.compileCC(makeCC(0xDC00, 0xDFFF));
     68    PabloAST * u16hi_hi_surrogate = mCCCompiler.compileCC(makeCC(0xD800, 0xDB00));    //u16hi_hi_surrogate = [\xD8-\xDB]
     69    PabloAST * u16hi_lo_surrogate = mCCCompiler.compileCC(makeCC(0xDC00, 0xDF00));    //u16hi_lo_surrogate = [\xDC-\xDF]
     70
     71    PabloAST * invalidTemp = mPB.createAdvance(u16hi_hi_surrogate, 1, "InvalidTemp");
    7272    Assign * u16invalid = mPB.createAssign("u16invalid", mPB.createXor(invalidTemp, u16hi_lo_surrogate));//errors.Unicode=pablo.Advance(u16hi_hi_surrogate) ^ u16hi_lo_surrogate
    7373    Assign * u16valid = mPB.createAssign("u16valid", mPB.createNot(u16invalid));
    7474
    7575    PabloAST * u16single_temp = mPB.createOr(mCCCompiler.compileCC(makeCC(0x0000, 0xD7FF)), mCCCompiler.compileCC(makeCC(0xE000, 0xFFFF)));
    76         PabloAST * u16single = mPB.createAnd(u16single_temp, mPB.createNot(u16invalid));
     76    PabloAST * u16single = mPB.createAnd(u16single_temp, mPB.createNot(u16invalid));
    7777   
    7878    mNonFinal = mPB.createAssign("nonfinal", mPB.createAnd(hi_surrogate, u16valid));
    7979    mFinal = mPB.createNot(mPB.createOr(mNonFinal, u16invalid), "final");
    80         mInitial = mPB.createOr(u16single, hi_surrogate, "initial");
     80    mInitial = mPB.createOr(u16single, hi_surrogate, "initial");
    8181   
    8282    PabloAST * LB_chars = mPB.createOr(LF_VT_FF_CR, NEL_LS_PS);
     
    8787    mAny = mPB.createNot(lb, "any");
    8888    mFunction.setResult(1, mPB.createAssign("lf", mLineBreak));
    89         return;
     89    return;
    9090}
    9191void RE_Compiler::initializeRequiredStreams_utf8() {
  • icGREP/icgrep-devel/icgrep/utf16_encoder.cpp

    r5045 r5046  
    1919
    2020bool UTF16_Encoder::isLo_Surrogate(const codepoint_t cp) {
    21         return (cp >= 0xDC00) && (cp <= 0xDFFF);
     21    return (cp >= 0xDC00) && (cp <= 0xDFFF);
    2222}
    2323
     
    2525    codepoint_t retVal = 0;
    2626    const unsigned len = length(cp);
    27         if (len == 1) {
    28                 retVal = cp;
     27    if (len == 1) {
     28        retVal = cp;
     29    }
     30    else {
     31        codepoint_t code = cp - 0x010000;
     32        if (n == 1) {
     33                retVal = (code >> 10) | 0xD800;
    2934        }
    30         else {
    31                 codepoint_t code = cp - 0x010000;
    32                 if (n == 1) {
    33                         retVal = (code >> 10) | 0xD800;
    34                 }
    35                 if (n == 2) {
    36                         retVal = (code & 0x3FF) | 0xDC00;
    37                 }
     35        if (n == 2) {
     36                retVal = (code & 0x3FF) | 0xDC00;
    3837        }
    39         return retVal;
     38    }
     39    return retVal;
    4040}
    4141
    4242unsigned UTF16_Encoder::length(const codepoint_t cp) {
    43         if (cp <= 0xFFFF) {
    44                 return 1;
    45         }
    46         else {
    47                 return 2;
    48         }
     43    if (cp <= 0xFFFF) {
     44        return 1;
     45    }
     46    else {
     47        return 2;
     48    }
    4949}
    5050
    5151codepoint_t UTF16_Encoder::maxCodePoint(const unsigned length) {
    52         if (length == 1) {
    53                 return 0xFFFF;
    54         }
    55         else if (length == 2) {
    56                 return 0x10FFFF;
    57         }
     52    if (length == 1) {
     53        return 0xFFFF;
     54    }
     55    else if (length == 2) {
     56        return 0x10FFFF;
     57    }
    5858    throw std::runtime_error("Unexpected UTF16 Length: " + std::to_string(length));
    5959}
  • icGREP/icgrep-devel/icgrep/utf16_encoder.h

    r5045 r5046  
    1515    static bool isHi_Surrogate(const re::codepoint_t cp);
    1616    static bool isLo_Surrogate(const re::codepoint_t cp);
    17         static unsigned length(const re::codepoint_t cp);
     17    static unsigned length(const re::codepoint_t cp);
    1818    static re::codepoint_t maxCodePoint(const unsigned length);
    1919    static re::codepoint_t encodingByte(const re::codepoint_t cp, const unsigned n);
  • icGREP/icgrep-devel/icgrep/utf_encoding.h

    r5045 r5046  
    1818        ASCII
    1919        , UTF_8
    20                 , UTF_16
     20        , UTF_16
    2121    };
    2222
Note: See TracChangeset for help on using the changeset viewer.