source: icGREP/icgrep-devel/icgrep/utf8_encoder.h @ 4814

Last change on this file since 4814 was 4814, checked in by nmedfort, 4 years ago

Added union/diff/intersection functionality to RE_Compiler. Removed toUTF8 pass in favour of using the UCD_Compiler.

File size: 1.0 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef UTF8_ENCODER_H
8#define UTF8_ENCODER_H
9
10//Regular Expressions
11#include <re/re_cc.h>
12#include <cc/cc_namemap.hpp>
13
14namespace cc {
15
16class CC_NameMap;
17
18struct UTF8_Encoder {
19    static bool isPrefix(const re::codepoint_t cp);
20    static unsigned length(const re::codepoint_t cp);
21    static re::codepoint_t maxCodePoint(const unsigned length);
22    static re::codepoint_t encodingByte(const re::codepoint_t cp, const unsigned n);
23    static bool isLowCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
24    static bool isHighCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
25    static re::codepoint_t minCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
26    static re::codepoint_t maxCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
27};
28
29}
30
31#endif // UTF8_ENCODER_H
Note: See TracBrowser for help on using the repository browser.