Ignore:
Timestamp:
Jul 28, 2018, 6:26:56 PM (10 months ago)
Author:
cameron
Message:

GetStringValue? method for string properties

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5954 r6134  
    1515#include <re/re_analysis.h>
    1616#include <re/re_cc.h>
     17#include <codecvt>
    1718
    1819using namespace llvm;
     
    4344    llvm::report_fatal_error("GetCodepointSetMatchingPattern unsupported");
    4445}
    45    
     46
     47const std::string PropertyObject::GetStringValue(codepoint_t cp) {
     48    llvm::report_fatal_error("GetStringValue unsupported");
     49}
     50
    4651const UnicodeSet EnumeratedPropertyObject::GetCodepointSet(const std::string & value_spec) {
    4752    const int property_enum_val = GetPropertyValueEnumCode(value_spec);
     
    390395    engine.setRecordBreak(grep::GrepRecordBreakKind::LF);
    391396    engine.grepCodeGen(pattern, nullptr, & accum);
    392     engine.doGrep(mStringBuffer, mBufSize);
    393 
     397    const unsigned bufSize = mStringOffsets[mExplicitCps.size()];
     398    engine.doGrep(mStringBuffer, bufSize);
    394399    matched.insert(accum.getAccumulatedSet());
    395400    return matched;
     
    398403const UnicodeSet StringPropertyObject::GetReflexiveSet() {
    399404    return mSelfCodepointSet;
     405}
     406
     407const std::string StringPropertyObject::GetStringValue(codepoint_t cp) {
     408    if (mNullCodepointSet.contains(cp)) return "";
     409    if (mSelfCodepointSet.contains(cp)) {
     410        std::u32string s(1, cp);
     411        std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
     412        return conv.to_bytes(s);
     413    }
     414    // Otherwise, binary search through the explicit cps to find the index.
     415    // string index.
     416    unsigned lo = 0;
     417    unsigned hi = mExplicitCps.size()-1;
     418    while (lo < hi) {
     419        unsigned mid = (lo + hi)/2;
     420        if (cp <= mExplicitCps[mid]) hi = mid;
     421        else lo = mid + 1;
     422    }
     423    // Now lo == hi is the index of the desired string.
     424    unsigned offset = mStringOffsets[lo];
     425    unsigned lgth = mStringOffsets[lo+1] - offset - 1;
     426    return std::string(&mStringBuffer[offset], lgth);
    400427}
    401428
     
    427454    engine.setRecordBreak(grep::GrepRecordBreakKind::LF);
    428455    engine.grepCodeGen(pattern, nullptr, & accum);
    429     engine.doGrep(mStringBuffer, mBufSize);
     456    const unsigned bufSize = mStringOffsets[mExplicitCps.size()];
     457    engine.doGrep(mStringBuffer, bufSize);
    430458    base_set.insert(accum.getAccumulatedSet());
    431459    return base_set;
     
    436464}
    437465
     466const std::string StringOverridePropertyObject::GetStringValue(codepoint_t cp) {
     467    if (!mOverriddenSet.contains(cp)) return mBaseObject.GetStringValue(cp);
     468    // Otherwise, binary search through the explicit cps to find the index.
     469    // string index.
     470    unsigned lo = 0;
     471    unsigned hi = mExplicitCps.size()-1;
     472    while (lo < hi) {
     473        unsigned mid = (lo + hi)/2;
     474        if (cp <= mExplicitCps[mid]) hi = mid;
     475        else lo = mid + 1;
     476    }
     477    // Now lo == hi is the index of the desired string.
     478    unsigned offset = mStringOffsets[lo];
     479    unsigned lgth = mStringOffsets[lo+1] - offset - 1;
     480    return std::string(&mStringBuffer[offset], lgth);
     481}
    438482
    439483const std::string & ObsoletePropertyObject::GetPropertyValueGrepString() {
Note: See TracChangeset for help on using the changeset viewer.