summaryrefslogtreecommitdiff
path: root/src/common/string_util.cpp
diff options
context:
space:
mode:
authorGravatar bunnei2014-04-08 19:25:03 -0400
committerGravatar bunnei2014-04-08 19:25:03 -0400
commit63e46abdb8764bc97e91bae862c8d461e61b1965 (patch)
treee73f4aa25d7b4015a265e7bbfb6004dab7561027 /src/common/string_util.cpp
parentfixed some license headers that I missed (diff)
downloadyuzu-63e46abdb8764bc97e91bae862c8d461e61b1965.tar.gz
yuzu-63e46abdb8764bc97e91bae862c8d461e61b1965.tar.xz
yuzu-63e46abdb8764bc97e91bae862c8d461e61b1965.zip
got rid of 'src' folders in each sub-project
Diffstat (limited to 'src/common/string_util.cpp')
-rw-r--r--src/common/string_util.cpp531
1 files changed, 531 insertions, 0 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
new file mode 100644
index 000000000..415dcbbc7
--- /dev/null
+++ b/src/common/string_util.cpp
@@ -0,0 +1,531 @@
1// Copyright 2013 Dolphin Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include <stdlib.h>
6#include <stdio.h>
7#include <algorithm>
8
9#include "common.h"
10#include "common_paths.h"
11#include "string_util.h"
12
13#ifdef _WIN32
14 #include <Windows.h>
15#else
16 #include <iconv.h>
17 #include <errno.h>
18#endif
19
20// faster than sscanf
21bool AsciiToHex(const char* _szValue, u32& result)
22{
23 char *endptr = NULL;
24 const u32 value = strtoul(_szValue, &endptr, 16);
25
26 if (!endptr || *endptr)
27 return false;
28
29 result = value;
30 return true;
31}
32
33bool CharArrayFromFormatV(char* out, int outsize, const char* format, va_list args)
34{
35 int writtenCount;
36
37#ifdef _WIN32
38 // You would think *printf are simple, right? Iterate on each character,
39 // if it's a format specifier handle it properly, etc.
40 //
41 // Nooooo. Not according to the C standard.
42 //
43 // According to the C99 standard (7.19.6.1 "The fprintf function")
44 // The format shall be a multibyte character sequence
45 //
46 // Because some character encodings might have '%' signs in the middle of
47 // a multibyte sequence (SJIS for example only specifies that the first
48 // byte of a 2 byte sequence is "high", the second byte can be anything),
49 // printf functions have to decode the multibyte sequences and try their
50 // best to not screw up.
51 //
52 // Unfortunately, on Windows, the locale for most languages is not UTF-8
53 // as we would need. Notably, for zh_TW, Windows chooses EUC-CN as the
54 // locale, and completely fails when trying to decode UTF-8 as EUC-CN.
55 //
56 // On the other hand, the fix is simple: because we use UTF-8, no such
57 // multibyte handling is required as we can simply assume that no '%' char
58 // will be present in the middle of a multibyte sequence.
59 //
60 // This is why we lookup an ANSI (cp1252) locale here and use _vsnprintf_l.
61 static locale_t c_locale = NULL;
62 if (!c_locale)
63 c_locale = _create_locale(LC_ALL, ".1252");
64 writtenCount = _vsnprintf_l(out, outsize, format, c_locale, args);
65#else
66 writtenCount = vsnprintf(out, outsize, format, args);
67#endif
68
69 if (writtenCount > 0 && writtenCount < outsize)
70 {
71 out[writtenCount] = '\0';
72 return true;
73 }
74 else
75 {
76 out[outsize - 1] = '\0';
77 return false;
78 }
79}
80
81std::string StringFromFormat(const char* format, ...)
82{
83 va_list args;
84 char *buf = NULL;
85#ifdef _WIN32
86 int required = 0;
87
88 va_start(args, format);
89 required = _vscprintf(format, args);
90 buf = new char[required + 1];
91 CharArrayFromFormatV(buf, required + 1, format, args);
92 va_end(args);
93
94 std::string temp = buf;
95 delete[] buf;
96#else
97 va_start(args, format);
98 if (vasprintf(&buf, format, args) < 0)
99 ERROR_LOG(COMMON, "Unable to allocate memory for string");
100 va_end(args);
101
102 std::string temp = buf;
103 free(buf);
104#endif
105 return temp;
106}
107
108// For Debugging. Read out an u8 array.
109std::string ArrayToString(const u8 *data, u32 size, int line_len, bool spaces)
110{
111 std::ostringstream oss;
112 oss << std::setfill('0') << std::hex;
113
114 for (int line = 0; size; ++data, --size)
115 {
116 oss << std::setw(2) << (int)*data;
117
118 if (line_len == ++line)
119 {
120 oss << '\n';
121 line = 0;
122 }
123 else if (spaces)
124 oss << ' ';
125 }
126
127 return oss.str();
128}
129
130// Turns " hej " into "hej". Also handles tabs.
131std::string StripSpaces(const std::string &str)
132{
133 const size_t s = str.find_first_not_of(" \t\r\n");
134
135 if (str.npos != s)
136 return str.substr(s, str.find_last_not_of(" \t\r\n") - s + 1);
137 else
138 return "";
139}
140
141// "\"hello\"" is turned to "hello"
142// This one assumes that the string has already been space stripped in both
143// ends, as done by StripSpaces above, for example.
144std::string StripQuotes(const std::string& s)
145{
146 if (s.size() && '\"' == s[0] && '\"' == *s.rbegin())
147 return s.substr(1, s.size() - 2);
148 else
149 return s;
150}
151
152bool TryParse(const std::string &str, u32 *const output)
153{
154 char *endptr = NULL;
155
156 // Reset errno to a value other than ERANGE
157 errno = 0;
158
159 unsigned long value = strtoul(str.c_str(), &endptr, 0);
160
161 if (!endptr || *endptr)
162 return false;
163
164 if (errno == ERANGE)
165 return false;
166
167#if ULONG_MAX > UINT_MAX
168 if (value >= 0x100000000ull
169 && value <= 0xFFFFFFFF00000000ull)
170 return false;
171#endif
172
173 *output = static_cast<u32>(value);
174 return true;
175}
176
177bool TryParse(const std::string &str, bool *const output)
178{
179 if ("1" == str || !strcasecmp("true", str.c_str()))
180 *output = true;
181 else if ("0" == str || !strcasecmp("false", str.c_str()))
182 *output = false;
183 else
184 return false;
185
186 return true;
187}
188
189std::string StringFromInt(int value)
190{
191 char temp[16];
192 sprintf(temp, "%i", value);
193 return temp;
194}
195
196std::string StringFromBool(bool value)
197{
198 return value ? "True" : "False";
199}
200
201bool SplitPath(const std::string& full_path, std::string* _pPath, std::string* _pFilename, std::string* _pExtension)
202{
203 if (full_path.empty())
204 return false;
205
206 size_t dir_end = full_path.find_last_of("/"
207 // windows needs the : included for something like just "C:" to be considered a directory
208#ifdef _WIN32
209 ":"
210#endif
211 );
212 if (std::string::npos == dir_end)
213 dir_end = 0;
214 else
215 dir_end += 1;
216
217 size_t fname_end = full_path.rfind('.');
218 if (fname_end < dir_end || std::string::npos == fname_end)
219 fname_end = full_path.size();
220
221 if (_pPath)
222 *_pPath = full_path.substr(0, dir_end);
223
224 if (_pFilename)
225 *_pFilename = full_path.substr(dir_end, fname_end - dir_end);
226
227 if (_pExtension)
228 *_pExtension = full_path.substr(fname_end);
229
230 return true;
231}
232
233void BuildCompleteFilename(std::string& _CompleteFilename, const std::string& _Path, const std::string& _Filename)
234{
235 _CompleteFilename = _Path;
236
237 // check for seperator
238 if (DIR_SEP_CHR != *_CompleteFilename.rbegin())
239 _CompleteFilename += DIR_SEP_CHR;
240
241 // add the filename
242 _CompleteFilename += _Filename;
243}
244
245void SplitString(const std::string& str, const char delim, std::vector<std::string>& output)
246{
247 std::istringstream iss(str);
248 output.resize(1);
249
250 while (std::getline(iss, *output.rbegin(), delim))
251 output.push_back("");
252
253 output.pop_back();
254}
255
256std::string TabsToSpaces(int tab_size, const std::string &in)
257{
258 const std::string spaces(tab_size, ' ');
259 std::string out(in);
260
261 size_t i = 0;
262 while (out.npos != (i = out.find('\t')))
263 out.replace(i, 1, spaces);
264
265 return out;
266}
267
268std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest)
269{
270 while(1)
271 {
272 size_t pos = result.find(src);
273 if (pos == std::string::npos) break;
274 result.replace(pos, src.size(), dest);
275 }
276 return result;
277}
278
279// UriDecode and UriEncode are from http://www.codeguru.com/cpp/cpp/string/conversions/print.php/c12759
280// by jinq0123 (November 2, 2006)
281
282// Uri encode and decode.
283// RFC1630, RFC1738, RFC2396
284
285//#include <string>
286//#include <assert.h>
287
288const char HEX2DEC[256] =
289{
290 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
291 /* 0 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
292 /* 1 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
293 /* 2 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
294 /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,16,16, 16,16,16,16,
295
296 /* 4 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
297 /* 5 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
298 /* 6 */ 16,10,11,12, 13,14,15,16, 16,16,16,16, 16,16,16,16,
299 /* 7 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
300
301 /* 8 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
302 /* 9 */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
303 /* A */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
304 /* B */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
305
306 /* C */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
307 /* D */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
308 /* E */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16,
309 /* F */ 16,16,16,16, 16,16,16,16, 16,16,16,16, 16,16,16,16
310};
311
312std::string UriDecode(const std::string & sSrc)
313{
314 // Note from RFC1630: "Sequences which start with a percent sign
315 // but are not followed by two hexadecimal characters (0-9, A-F) are reserved
316 // for future extension"
317
318 const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
319 const size_t SRC_LEN = sSrc.length();
320 const unsigned char * const SRC_END = pSrc + SRC_LEN;
321 const unsigned char * const SRC_LAST_DEC = SRC_END - 2; // last decodable '%'
322
323 char * const pStart = new char[SRC_LEN];
324 char * pEnd = pStart;
325
326 while (pSrc < SRC_LAST_DEC)
327 {
328 if (*pSrc == '%')
329 {
330 char dec1, dec2;
331 if (16 != (dec1 = HEX2DEC[*(pSrc + 1)])
332 && 16 != (dec2 = HEX2DEC[*(pSrc + 2)]))
333 {
334 *pEnd++ = (dec1 << 4) + dec2;
335 pSrc += 3;
336 continue;
337 }
338 }
339
340 *pEnd++ = *pSrc++;
341 }
342
343 // the last 2- chars
344 while (pSrc < SRC_END)
345 *pEnd++ = *pSrc++;
346
347 std::string sResult(pStart, pEnd);
348 delete [] pStart;
349 return sResult;
350}
351
352// Only alphanum is safe.
353const char SAFE[256] =
354{
355 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
356 /* 0 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
357 /* 1 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
358 /* 2 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
359 /* 3 */ 1,1,1,1, 1,1,1,1, 1,1,0,0, 0,0,0,0,
360
361 /* 4 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
362 /* 5 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
363 /* 6 */ 0,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1,
364 /* 7 */ 1,1,1,1, 1,1,1,1, 1,1,1,0, 0,0,0,0,
365
366 /* 8 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
367 /* 9 */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
368 /* A */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
369 /* B */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
370
371 /* C */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
372 /* D */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
373 /* E */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
374 /* F */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
375};
376
377std::string UriEncode(const std::string & sSrc)
378{
379 const char DEC2HEX[16 + 1] = "0123456789ABCDEF";
380 const unsigned char * pSrc = (const unsigned char *)sSrc.c_str();
381 const size_t SRC_LEN = sSrc.length();
382 unsigned char * const pStart = new unsigned char[SRC_LEN * 3];
383 unsigned char * pEnd = pStart;
384 const unsigned char * const SRC_END = pSrc + SRC_LEN;
385
386 for (; pSrc < SRC_END; ++pSrc)
387 {
388 if (SAFE[*pSrc])
389 *pEnd++ = *pSrc;
390 else
391 {
392 // escape this char
393 *pEnd++ = '%';
394 *pEnd++ = DEC2HEX[*pSrc >> 4];
395 *pEnd++ = DEC2HEX[*pSrc & 0x0F];
396 }
397 }
398
399 std::string sResult((char *)pStart, (char *)pEnd);
400 delete [] pStart;
401 return sResult;
402}
403
404#ifdef _WIN32
405
406std::string UTF16ToUTF8(const std::wstring& input)
407{
408 auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr);
409
410 std::string output;
411 output.resize(size);
412
413 if (size == 0 || size != WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), &output[0], output.size(), nullptr, nullptr))
414 output.clear();
415
416 return output;
417}
418
419std::wstring CPToUTF16(u32 code_page, const std::string& input)
420{
421 auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0);
422
423 std::wstring output;
424 output.resize(size);
425
426 if (size == 0 || size != MultiByteToWideChar(code_page, 0, input.data(), input.size(), &output[0], output.size()))
427 output.clear();
428
429 return output;
430}
431
432std::wstring UTF8ToUTF16(const std::string& input)
433{
434 return CPToUTF16(CP_UTF8, input);
435}
436
437std::string SHIFTJISToUTF8(const std::string& input)
438{
439 return UTF16ToUTF8(CPToUTF16(932, input));
440}
441
442std::string CP1252ToUTF8(const std::string& input)
443{
444 return UTF16ToUTF8(CPToUTF16(1252, input));
445}
446
447#else
448
449template <typename T>
450std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
451{
452 std::string result;
453
454 iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
455 if ((iconv_t)-1 == conv_desc)
456 {
457 ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
458 }
459 else
460 {
461 size_t const in_bytes = sizeof(T) * input.size();
462 size_t const out_buffer_size = 4 * in_bytes;
463
464 std::string out_buffer;
465 out_buffer.resize(out_buffer_size);
466
467 auto src_buffer = &input[0];
468 size_t src_bytes = in_bytes;
469 auto dst_buffer = &out_buffer[0];
470 size_t dst_bytes = out_buffer.size();
471
472 while (src_bytes != 0)
473 {
474 size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
475 &dst_buffer, &dst_bytes);
476
477 if ((size_t)-1 == iconv_result)
478 {
479 if (EILSEQ == errno || EINVAL == errno)
480 {
481 // Try to skip the bad character
482 if (src_bytes != 0)
483 {
484 --src_bytes;
485 ++src_buffer;
486 }
487 }
488 else
489 {
490 ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
491 break;
492 }
493 }
494 }
495
496 out_buffer.resize(out_buffer_size - dst_bytes);
497 out_buffer.swap(result);
498
499 iconv_close(conv_desc);
500 }
501
502 return result;
503}
504
505std::string CP1252ToUTF8(const std::string& input)
506{
507 //return CodeToUTF8("CP1252//TRANSLIT", input);
508 //return CodeToUTF8("CP1252//IGNORE", input);
509 return CodeToUTF8("CP1252", input);
510}
511
512std::string SHIFTJISToUTF8(const std::string& input)
513{
514 //return CodeToUTF8("CP932", input);
515 return CodeToUTF8("SJIS", input);
516}
517
518std::string UTF16ToUTF8(const std::wstring& input)
519{
520 std::string result =
521 // CodeToUTF8("UCS-2", input);
522 // CodeToUTF8("UCS-2LE", input);
523 // CodeToUTF8("UTF-16", input);
524 CodeToUTF8("UTF-16LE", input);
525
526 // TODO: why is this needed?
527 result.erase(std::remove(result.begin(), result.end(), 0x00), result.end());
528 return result;
529}
530
531#endif