EvEmu  0.8.4
11 September 2021
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
UTFCodeParser Class Reference

Public Member Functions

 UTFCodeParser ()
 
 UTFCodeParser (const UTFCodeParser &orig)
 
virtual ~UTFCodeParser ()
 
int addChar (char c)
 
int addChar (char16_t c)
 
int addChar (char32_t c)
 
std::string getUTF8 ()
 
std::u16string getUTF16 ()
 
std::u32string getUTF32 ()
 
void reset ()
 
char32_t getCode ()
 
bool valid ()
 

Private Attributes

char32_t code = 0
 
int byte = 0
 

Detailed Description

Definition at line 101 of file utfUtils.cpp.

Constructor & Destructor Documentation

UTFCodeParser::UTFCodeParser ( )
inline

Definition at line 105 of file utfUtils.cpp.

105 { };
UTFCodeParser::UTFCodeParser ( const UTFCodeParser orig)
inline

Definition at line 107 of file utfUtils.cpp.

107 { };
virtual UTFCodeParser::~UTFCodeParser ( )
inlinevirtual

Definition at line 109 of file utfUtils.cpp.

109 { };

Member Function Documentation

int UTFCodeParser::addChar ( char  c)
inline

Add a UTF8 character to the code.

Parameters
cThe UTF8 character.
Returns
number of additional characters needed. 0 = complete, -1 on Error,

Definition at line 116 of file utfUtils.cpp.

References byte, and code.

Referenced by utf16to8(), and utf8to16().

117  {
118  if (byte == 0)
119  {
120  // First byte of sequence.
121  if ((c & 0x80) == 0)
122  {
123  // Single byte code.
124  code = c;
125  return byte;
126  }
127  // Find number of bytes in code.
128  while (c & 0x80)
129  {
130  c <<= 1;
131  byte++;
132  }
133  code = c >> byte;
134  byte--;
135  if (byte == 0)
136  {
137  // Error: in middle of character.
138  code = 0;
139  return -1;
140  }
141  return byte;
142  }
143  // Continuing character.
144  if ((c & 0xC0) != 0x80)
145  {
146  // Error: continuation code expected and not found.
147  code = 0;
148  return -1;
149  }
150  byte--;
151  code <<= 6;
152  code += c & 0x3f;
153  return byte;
154  }
char32_t code
Definition: utfUtils.cpp:283

Here is the caller graph for this function:

int UTFCodeParser::addChar ( char16_t  c)
inline

Add a UTF16 character to the code.

Parameters
cThe UTF16 character.
Returns
number of additional characters needed. 0 = complete, -1 on Error,

Definition at line 161 of file utfUtils.cpp.

References byte, and code.

162  {
163  if (byte == 0)
164  {
165  if (c < 0xD800 || c >= 0xE000)
166  {
167  code = c;
168  return byte;
169  }
170  if (c >= 0xDC00)
171  {
172  // Error! This is a continuation character.
173  code = 0;
174  return -1;
175  }
176  byte = 1;
177  code = c & 0x3FF;
178  return byte;
179  }
180  if (c < 0xDC00 || c >= 0xE000)
181  {
182  // Error: Expected continuation character not found.
183  if (c < 0xD800)
184  {
185  // Assume, good code following bad character.
186  code = c;
187  byte = 0;
188  return 0;
189  }
190  // Assume, good start code following bad character.
191  byte = 1;
192  code = c & 0x3FF;
193  return byte;
194  }
195  byte--;
196  code <<= 10;
197  code += c & 0x3FF;
198  code += 0x010000;
199  return byte;
200  }
char32_t code
Definition: utfUtils.cpp:283
int UTFCodeParser::addChar ( char32_t  c)
inline

Add a UTF32 character to the code.

Parameters
cThe UTF32 character.
Returns
number of additional characters needed. 0 = complete, -1 on Error,

Definition at line 207 of file utfUtils.cpp.

References byte, and code.

208  {
209  code = c;
210  byte = 0;
211  return byte;
212  }
char32_t code
Definition: utfUtils.cpp:283
char32_t UTFCodeParser::getCode ( )
inline

Get the current character code value.

Returns

Definition at line 268 of file utfUtils.cpp.

References code.

269  {
270  return code;
271  }
char32_t code
Definition: utfUtils.cpp:283
std::u16string UTFCodeParser::getUTF16 ( )
inline

Get the current code as a UTF16 character string.

Returns
The UTF16 representation of the code of an empty string if the code is not valid.

Definition at line 231 of file utfUtils.cpp.

References byte, code, and codeToUTF16().

Referenced by utf8to16().

232  {
233  if (byte == 0)
234  {
235  return codeToUTF16(code);
236  }
237  return u"";
238  }
std::u16string codeToUTF16(char32_t code)
Definition: utfUtils.cpp:85
char32_t code
Definition: utfUtils.cpp:283

Here is the call graph for this function:

Here is the caller graph for this function:

std::u32string UTFCodeParser::getUTF32 ( )
inline

Get the current code as a UTF32 character string.

Returns
The UTF32 representation of the code of an empty string if the code is not valid.

Definition at line 244 of file utfUtils.cpp.

References byte, and code.

245  {
246  if (byte == 0)
247  {
248  std::u32string out;
249  out += code;
250  return out;
251  }
252  return U"";
253  }
char32_t code
Definition: utfUtils.cpp:283
std::string UTFCodeParser::getUTF8 ( )
inline

Get the current code as a UTF8 character string.

Returns
The UTF8 representation of the code of an empty string if the code is not valid.

Definition at line 218 of file utfUtils.cpp.

References byte, code, and codeToUTF8().

Referenced by utf16to8().

219  {
220  if (byte == 0)
221  {
222  return codeToUTF8(code);
223  }
224  return "";
225  }
std::string codeToUTF8(char32_t code)
Definition: utfUtils.cpp:33
char32_t code
Definition: utfUtils.cpp:283

Here is the call graph for this function:

Here is the caller graph for this function:

void UTFCodeParser::reset ( )
inline

Reset the state.

Definition at line 258 of file utfUtils.cpp.

References byte, and code.

259  {
260  code = 0;
261  byte = 0;
262  }
char32_t code
Definition: utfUtils.cpp:283
bool UTFCodeParser::valid ( )
inline

Is this current state a valid code?

Returns
True if code is valid.

Definition at line 277 of file utfUtils.cpp.

References byte.

278  {
279  return byte == 0;
280  }

Member Data Documentation

int UTFCodeParser::byte = 0
private

Definition at line 284 of file utfUtils.cpp.

Referenced by addChar(), getUTF16(), getUTF32(), getUTF8(), reset(), and valid().

char32_t UTFCodeParser::code = 0
private

Definition at line 283 of file utfUtils.cpp.

Referenced by addChar(), getCode(), getUTF16(), getUTF32(), getUTF8(), and reset().


The documentation for this class was generated from the following file: