EvEmu  0.8.4
11 September 2021
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
utfUtils.cpp
Go to the documentation of this file.
1 /*
2  ------------------------------------------------------------------------------------
3  LICENSE:
4  ------------------------------------------------------------------------------------
5  This file is part of EVEmu: EVE Online Server Emulator
6  Copyright 2006 - 2021 The EVEmu Team
7  For the latest information visit https://evemu.dev
8  ------------------------------------------------------------------------------------
9  This program is free software; you can redistribute it and/or modify it under
10  the terms of the GNU Lesser General Public License as published by the Free Software
11  Foundation; either version 2 of the License, or (at your option) any later
12  version.
13 
14  This program is distributed in the hope that it will be useful, but WITHOUT
15  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16  FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public License along with
19  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20  Place - Suite 330, Boston, MA 02111-1307, USA, or go to
21  http://www.gnu.org/copyleft/lesser.txt.
22  ------------------------------------------------------------------------------------
23  Author: eve-moo
24  */
25 
26 #include "utfUtils.h"
27 
33 std::string codeToUTF8(char32_t code)
34 {
35  std::string out;
36  if (code < 0x80)
37  {
38  out += (char) code;
39  return out;
40  }
41  if (code < 0x800)
42  {
43  out += (char) (0xC0 + ((code >> 6) & 0x1f));
44  out += (char) (0x80 + (code & 0x3f));
45  return out;
46  }
47  if (code < 0x10000)
48  {
49  out += (char) (0xE0 + ((code >> 12) & 0x1f));
50  out += (char) (0x80 + ((code >> 6) & 0x3f));
51  out += (char) (0x80 + (code & 0x3f));
52  return out;
53  }
54  if (code < 0x200000)
55  {
56  out += (char) (0xF0 + ((code >> 18) & 0x1f));
57  out += (char) (0x80 + ((code >> 12) & 0x3f));
58  out += (char) (0x80 + ((code >> 6) & 0x3f));
59  out += (char) (0x80 + (code & 0x3f));
60  return out;
61  }
62  if (code < 0x4000000)
63  {
64  out += (char) (0xF8 + ((code >> 24) & 0x1f));
65  out += (char) (0x80 + ((code >> 18) & 0x3f));
66  out += (char) (0x80 + ((code >> 12) & 0x3f));
67  out += (char) (0x80 + ((code >> 6) & 0x3f));
68  out += (char) (0x80 + (code & 0x3f));
69  return out;
70  }
71  out += (char) (0xFC + ((code >> 30) & 0x1f));
72  out += (char) (0x80 + ((code >> 24) & 0x3f));
73  out += (char) (0x80 + ((code >> 18) & 0x3f));
74  out += (char) (0x80 + ((code >> 12) & 0x3f));
75  out += (char) (0x80 + ((code >> 6) & 0x3f));
76  out += (char) (0x80 + (code & 0x3f));
77  return out;
78 }
79 
85 std::u16string codeToUTF16(char32_t code)
86 {
87  std::u16string out;
88  if (code < 0x010000)
89  {
90  out += (char16_t) code;
91  return out;
92  }
93  char32_t ch = code - 0x010000;
94  char32_t cl = ch & 0x03FF;
95  ch >>= 10;
96  out += (0xD800 + ch);
97  out += (0xDC00 + cl);
98  return out;
99 }
100 
102 {
103 public:
104 
106 
107  UTFCodeParser(const UTFCodeParser& orig) { };
108 
109  virtual ~UTFCodeParser() { };
110 
116  int addChar(char c)
117  {
118  if (byte == 0)
119  {
120  // First byte of sequence.
121  if ((c & 0x80) == 0)
122  {
123  // Single byte code.
124  code = c;
125  return byte;
126  }
127  // Find number of bytes in code.
128  while (c & 0x80)
129  {
130  c <<= 1;
131  byte++;
132  }
133  code = c >> byte;
134  byte--;
135  if (byte == 0)
136  {
137  // Error: in middle of character.
138  code = 0;
139  return -1;
140  }
141  return byte;
142  }
143  // Continuing character.
144  if ((c & 0xC0) != 0x80)
145  {
146  // Error: continuation code expected and not found.
147  code = 0;
148  return -1;
149  }
150  byte--;
151  code <<= 6;
152  code += c & 0x3f;
153  return byte;
154  }
155 
161  int addChar(char16_t c)
162  {
163  if (byte == 0)
164  {
165  if (c < 0xD800 || c >= 0xE000)
166  {
167  code = c;
168  return byte;
169  }
170  if (c >= 0xDC00)
171  {
172  // Error! This is a continuation character.
173  code = 0;
174  return -1;
175  }
176  byte = 1;
177  code = c & 0x3FF;
178  return byte;
179  }
180  if (c < 0xDC00 || c >= 0xE000)
181  {
182  // Error: Expected continuation character not found.
183  if (c < 0xD800)
184  {
185  // Assume, good code following bad character.
186  code = c;
187  byte = 0;
188  return 0;
189  }
190  // Assume, good start code following bad character.
191  byte = 1;
192  code = c & 0x3FF;
193  return byte;
194  }
195  byte--;
196  code <<= 10;
197  code += c & 0x3FF;
198  code += 0x010000;
199  return byte;
200  }
201 
207  int addChar(char32_t c)
208  {
209  code = c;
210  byte = 0;
211  return byte;
212  }
213 
218  std::string getUTF8()
219  {
220  if (byte == 0)
221  {
222  return codeToUTF8(code);
223  }
224  return "";
225  }
226 
231  std::u16string getUTF16()
232  {
233  if (byte == 0)
234  {
235  return codeToUTF16(code);
236  }
237  return u"";
238  }
239 
244  std::u32string getUTF32()
245  {
246  if (byte == 0)
247  {
248  std::u32string out;
249  out += code;
250  return out;
251  }
252  return U"";
253  }
254 
258  void reset()
259  {
260  code = 0;
261  byte = 0;
262  }
263 
268  char32_t getCode()
269  {
270  return code;
271  }
272 
277  bool valid()
278  {
279  return byte == 0;
280  }
281 
282 private:
283  char32_t code = 0;
284  int byte = 0;
285 
286 };
287 
288 
289 std::u16string utf8to16(std::string &str)
290 {
291  std::u16string out;
292  UTFCodeParser code;
293  for (char16_t c : str)
294  {
295  int res = code.addChar(c);
296  if (res == -1)
297  {
298  // There was an error.
299  return u"";
300  }
301  if (res == 0)
302  {
303  out += code.getUTF16();
304  }
305  }
306  return out;
307 }
308 
309 std::string utf16to8(std::u16string &str)
310 {
311  std::string out;
312  UTFCodeParser code;
313  for (char c : str)
314  {
315  int res = code.addChar(c);
316  if (res == -1)
317  {
318  // There was an error.
319  return "";
320  }
321  if (res == 0)
322  {
323  out += code.getUTF8();
324  }
325  }
326  return out;
327 }
328 
329 
330 
virtual ~UTFCodeParser()
Definition: utfUtils.cpp:109
bool valid()
Definition: utfUtils.cpp:277
std::u16string codeToUTF16(char32_t code)
Definition: utfUtils.cpp:85
std::string codeToUTF8(char32_t code)
Definition: utfUtils.cpp:33
UTFCodeParser(const UTFCodeParser &orig)
Definition: utfUtils.cpp:107
char32_t code
Definition: utfUtils.cpp:283
std::u16string utf8to16(std::string &str)
Definition: utfUtils.cpp:289
std::string getUTF8()
Definition: utfUtils.cpp:218
int addChar(char c)
Definition: utfUtils.cpp:116
std::u16string getUTF16()
Definition: utfUtils.cpp:231
char32_t getCode()
Definition: utfUtils.cpp:268
int addChar(char32_t c)
Definition: utfUtils.cpp:207
std::u32string getUTF32()
Definition: utfUtils.cpp:244
void reset()
Definition: utfUtils.cpp:258
int addChar(char16_t c)
Definition: utfUtils.cpp:161
std::string utf16to8(std::u16string &str)
Definition: utfUtils.cpp:309