source: icGREP/icgrep-devel/icgrep/utf8_encoder.h @ 4793

Last change on this file since 4793 was 4615, checked in by nmedfort, 4 years ago

Temporary check-in

File size: 1.4 KB
RevLine 
[3850]1/*
2 *  Copyright (c) 2014 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#ifndef UTF8_ENCODER_H
8#define UTF8_ENCODER_H
9
10//Regular Expressions
[4249]11#include <re/re_cc.h>
12#include <cc/cc_namemap.hpp>
[3850]13
[4249]14namespace cc {
[3850]15
[4249]16class CC_NameMap;
17
[4614]18class UTF8_Encoder {
[3850]19public:
[4249]20    static re::RE * toUTF8(CC_NameMap & nameMap, re::RE * ast);
[4614]21
[4615]22    static bool isPrefix(const re::codepoint_t cp);
23    static unsigned length(const re::codepoint_t cp);
[4614]24    static re::codepoint_t maxCodePoint(const unsigned length);
[4615]25    static re::codepoint_t encodingByte(const re::codepoint_t cp, const unsigned n);
26    static bool isLowCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
27    static bool isHighCodePointAfterByte(const re::codepoint_t cp, const unsigned n);
28    static re::codepoint_t minCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
29    static re::codepoint_t maxCodePointWithCommonBytes(const re::codepoint_t cp, const unsigned n);
[3850]30private:
[4614]31    static re::RE * rangeToUTF8(const re::interval_t & item);
[4612]32    static re::RE * rangeToUTF8(const re::codepoint_t lo, const re::codepoint_t hi, const unsigned index, const unsigned max);
33    static re::CC * makeByteClass(const re::codepoint_t cp);
34    static re::CC * makeByteRange(const re::codepoint_t lo, const re::codepoint_t hi);
[3850]35};
36
[4249]37}
38
[3850]39#endif // UTF8_ENCODER_H
Note: See TracBrowser for help on using the repository browser.