Ignore:
Timestamp:
Aug 5, 2014, 2:37:07 PM (5 years ago)
Author:
daled
Message:

Multibyte character code classes parsed from hex notation are now using named byte sequences.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r3940 r3961  
    129129    {
    130130        Seq* seq = new Seq();
     131        seq->setType((u8Prefix(hbyte) ? Seq::Byte : Seq::Normal));
    131132        seq->AddREListItem(makeByteClass(hbyte));
    132133        seq->AddREListItem(rangeToUTF8_helper(lo, hi, n+1, hlen));
     
    165166}
    166167
     168bool UTF8_Encoder::u8Prefix(int cp)
     169{
     170    bool retVal = false;
     171
     172    if ((cp >= 0xC2) && (cp <= 0xDF))
     173    {
     174        retVal = true;
     175    }
     176    else if ((cp >= 0xE0) && (cp <= 0xEF))
     177    {
     178        retVal = true;
     179    }
     180    else if ((cp >= 0xF0) && (cp <= 0xF4))
     181    {
     182        retVal = true;
     183    }
     184
     185    return retVal;
     186}
     187
    167188CC* UTF8_Encoder::makeByteRange(int lo, int hi)
    168189{
Note: See TracChangeset for help on using the changeset viewer.