source: icGREP/icgrep-devel/icgrep-1.00/utf8_encoder.h @ 4529

Last change on this file since 4529 was 4249, checked in by nmedfort, 5 years ago

Big update to use CC_NameMap; removed CharClass? and RE_Reducer.

File size: 1.0 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef UTF8_ENCODER_H
8#define UTF8_ENCODER_H
9
10//Regular Expressions
11#include <re/re_cc.h>
12#include <cc/cc_namemap.hpp>
13
14namespace cc {
15
16class CC_NameMap;
17
18class UTF8_Encoder
19{
20public:
21    static re::RE * toUTF8(CC_NameMap & nameMap, re::RE * ast);
22private:
23    static re::RE * rangeToUTF8(const re::CharSetItem & item);
24    static re::RE * rangeToUTF8(const re::CodePointType lo, const re::CodePointType hi, const unsigned index, const unsigned max);
25    static re::CC * makeByteClass(const re::CodePointType cp);
26    static re::CC * makeByteRange(const re::CodePointType lo, const re::CodePointType hi);
27    static bool isUTF8Prefix(const unsigned cp);
28    static unsigned lenUTF8(const unsigned cp);
29    static unsigned maxCodePoint(const unsigned length);
30    static re::CodePointType u8byte(const re::CodePointType codepoint, const unsigned n);
31};
32
33}
34
35#endif // UTF8_ENCODER_H
Note: See TracBrowser for help on using the repository browser.