summaryrefslogtreecommitdiff
path: root/src/common/string_util.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/string_util.cpp')
-rw-r--r--src/common/string_util.cpp204
1 files changed, 130 insertions, 74 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index c1f22bda3..54943d306 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -3,34 +3,29 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstdlib>
7#include <cstdio>
8 6
9#include "common/common.h" 7#include "common/common.h"
10#include "common/common_paths.h"
11#include "common/string_util.h" 8#include "common/string_util.h"
12 9
13#ifdef _WIN32 10#ifdef _WIN32
14 #include <Windows.h> 11 #include <Windows.h>
12 #include <codecvt>
15#else 13#else
16 #include <cerrno>
17 #include <iconv.h> 14 #include <iconv.h>
18#endif 15#endif
19 16
17namespace Common {
18
20/// Make a string lowercase 19/// Make a string lowercase
21void LowerStr(char* str) { 20std::string ToLower(std::string str) {
22 for (int i = 0; str[i]; i++) { 21 std::transform(str.begin(), str.end(), str.begin(), ::tolower);
23 str[i] = tolower(str[ i ]); 22 return str;
24 }
25} 23}
26 24
27/// Make a string uppercase 25/// Make a string uppercase
28void UpperStr(char* str) { 26std::string ToUpper(std::string str) {
29 for (int i=0; i < strlen(str); i++) { 27 std::transform(str.begin(), str.end(), str.begin(), ::toupper);
30 if(str[i] >= 'a' && str[i] <= 'z') { 28 return str;
31 str[i] &= 0xDF;
32 }
33 }
34} 29}
35 30
36// faster than sscanf 31// faster than sscanf
@@ -192,9 +187,9 @@ bool TryParse(const std::string &str, u32 *const output)
192 187
193bool TryParse(const std::string &str, bool *const output) 188bool TryParse(const std::string &str, bool *const output)
194{ 189{
195 if ("1" == str || !strcasecmp("true", str.c_str())) 190 if ("1" == str || "true" == ToLower(str))
196 *output = true; 191 *output = true;
197 else if ("0" == str || !strcasecmp("false", str.c_str())) 192 else if ("0" == str || "false" == ToLower(str))
198 *output = false; 193 *output = false;
199 else 194 else
200 return false; 195 return false;
@@ -202,13 +197,6 @@ bool TryParse(const std::string &str, bool *const output)
202 return true; 197 return true;
203} 198}
204 199
205std::string StringFromInt(int value)
206{
207 char temp[16];
208 sprintf(temp, "%i", value);
209 return temp;
210}
211
212std::string StringFromBool(bool value) 200std::string StringFromBool(bool value)
213{ 201{
214 return value ? "True" : "False"; 202 return value ? "True" : "False";
@@ -283,12 +271,17 @@ std::string TabsToSpaces(int tab_size, const std::string &in)
283 271
284std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest) 272std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest)
285{ 273{
286 while(1) 274 size_t pos = 0;
275
276 if (src == dest)
277 return result;
278
279 while ((pos = result.find(src, pos)) != std::string::npos)
287 { 280 {
288 size_t pos = result.find(src);
289 if (pos == std::string::npos) break;
290 result.replace(pos, src.size(), dest); 281 result.replace(pos, src.size(), dest);
282 pos += dest.length();
291 } 283 }
284
292 return result; 285 return result;
293} 286}
294 287
@@ -419,7 +412,19 @@ std::string UriEncode(const std::string & sSrc)
419 412
420#ifdef _WIN32 413#ifdef _WIN32
421 414
422std::string UTF16ToUTF8(const std::wstring& input) 415std::string UTF16ToUTF8(const std::u16string& input)
416{
417 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
418 return convert.to_bytes(input);
419}
420
421std::u16string UTF8ToUTF16(const std::string& input)
422{
423 std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
424 return convert.from_bytes(input);
425}
426
427static std::string UTF16ToUTF8(const std::wstring& input)
423{ 428{
424 auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr); 429 auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr);
425 430
@@ -432,7 +437,7 @@ std::string UTF16ToUTF8(const std::wstring& input)
432 return output; 437 return output;
433} 438}
434 439
435std::wstring CPToUTF16(u32 code_page, const std::string& input) 440static std::wstring CPToUTF16(u32 code_page, const std::string& input)
436{ 441{
437 auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0); 442 auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0);
438 443
@@ -445,7 +450,7 @@ std::wstring CPToUTF16(u32 code_page, const std::string& input)
445 return output; 450 return output;
446} 451}
447 452
448std::wstring UTF8ToUTF16(const std::string& input) 453std::wstring UTF8ToUTF16W(const std::string &input)
449{ 454{
450 return CPToUTF16(CP_UTF8, input); 455 return CPToUTF16(CP_UTF8, input);
451} 456}
@@ -463,61 +468,123 @@ std::string CP1252ToUTF8(const std::string& input)
463#else 468#else
464 469
465template <typename T> 470template <typename T>
466std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input) 471static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
467{ 472{
468 std::string result; 473 std::string result;
469 474
470 iconv_t const conv_desc = iconv_open("UTF-8", fromcode); 475 iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
471 if ((iconv_t)-1 == conv_desc) 476 if ((iconv_t)(-1) == conv_desc)
472 { 477 {
473 ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno)); 478 ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
479 iconv_close(conv_desc);
480 return {};
474 } 481 }
475 else
476 {
477 size_t const in_bytes = sizeof(T) * input.size();
478 size_t const out_buffer_size = 4 * in_bytes;
479 482
480 std::string out_buffer; 483 const size_t in_bytes = sizeof(T) * input.size();
481 out_buffer.resize(out_buffer_size); 484 // Multiply by 4, which is the max number of bytes to encode a codepoint
485 const size_t out_buffer_size = 4 * in_bytes;
482 486
483 auto src_buffer = &input[0]; 487 std::string out_buffer;
484 size_t src_bytes = in_bytes; 488 out_buffer.resize(out_buffer_size);
485 auto dst_buffer = &out_buffer[0];
486 size_t dst_bytes = out_buffer.size();
487 489
488 while (src_bytes != 0) 490 auto src_buffer = &input[0];
489 { 491 size_t src_bytes = in_bytes;
490 size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes, 492 auto dst_buffer = &out_buffer[0];
491 &dst_buffer, &dst_bytes); 493 size_t dst_bytes = out_buffer.size();
494
495 while (0 != src_bytes)
496 {
497 size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
498 &dst_buffer, &dst_bytes);
492 499
493 if ((size_t)-1 == iconv_result) 500 if (static_cast<size_t>(-1) == iconv_result)
501 {
502 if (EILSEQ == errno || EINVAL == errno)
494 { 503 {
495 if (EILSEQ == errno || EINVAL == errno) 504 // Try to skip the bad character
505 if (0 != src_bytes)
496 { 506 {
497 // Try to skip the bad character 507 --src_bytes;
498 if (src_bytes != 0) 508 ++src_buffer;
499 {
500 --src_bytes;
501 ++src_buffer;
502 }
503 }
504 else
505 {
506 ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
507 break;
508 } 509 }
509 } 510 }
511 else
512 {
513 ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno));
514 break;
515 }
510 } 516 }
517 }
511 518
512 out_buffer.resize(out_buffer_size - dst_bytes); 519 out_buffer.resize(out_buffer_size - dst_bytes);
513 out_buffer.swap(result); 520 out_buffer.swap(result);
514 521
522 iconv_close(conv_desc);
523
524 return result;
525}
526
527std::u16string UTF8ToUTF16(const std::string& input)
528{
529 std::u16string result;
530
531 iconv_t const conv_desc = iconv_open("UTF-16", "UTF-8");
532 if ((iconv_t)(-1) == conv_desc)
533 {
534 ERROR_LOG(COMMON, "Iconv initialization failure [UTF-8]: %s", strerror(errno));
515 iconv_close(conv_desc); 535 iconv_close(conv_desc);
536 return {};
537 }
538
539 const size_t in_bytes = sizeof(char) * input.size();
540 // Multiply by 4, which is the max number of bytes to encode a codepoint
541 const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
542
543 std::u16string out_buffer;
544 out_buffer.resize(out_buffer_size);
545
546 char* src_buffer = const_cast<char*>(&input[0]);
547 size_t src_bytes = in_bytes;
548 char* dst_buffer = (char*)(&out_buffer[0]);
549 size_t dst_bytes = out_buffer.size();
550
551 while (0 != src_bytes)
552 {
553 size_t const iconv_result = iconv(conv_desc, &src_buffer, &src_bytes,
554 &dst_buffer, &dst_bytes);
555
556 if (static_cast<size_t>(-1) == iconv_result)
557 {
558 if (EILSEQ == errno || EINVAL == errno)
559 {
560 // Try to skip the bad character
561 if (0 != src_bytes)
562 {
563 --src_bytes;
564 ++src_buffer;
565 }
566 }
567 else
568 {
569 ERROR_LOG(COMMON, "iconv failure [UTF-8]: %s", strerror(errno));
570 break;
571 }
572 }
516 } 573 }
574
575 out_buffer.resize(out_buffer_size - dst_bytes);
576 out_buffer.swap(result);
577
578 iconv_close(conv_desc);
517 579
518 return result; 580 return result;
519} 581}
520 582
583std::string UTF16ToUTF8(const std::u16string& input)
584{
585 return CodeToUTF8("UTF-16", input);
586}
587
521std::string CP1252ToUTF8(const std::string& input) 588std::string CP1252ToUTF8(const std::string& input)
522{ 589{
523 //return CodeToUTF8("CP1252//TRANSLIT", input); 590 //return CodeToUTF8("CP1252//TRANSLIT", input);
@@ -531,17 +598,6 @@ std::string SHIFTJISToUTF8(const std::string& input)
531 return CodeToUTF8("SJIS", input); 598 return CodeToUTF8("SJIS", input);
532} 599}
533 600
534std::string UTF16ToUTF8(const std::wstring& input)
535{
536 std::string result =
537 // CodeToUTF8("UCS-2", input);
538 // CodeToUTF8("UCS-2LE", input);
539 // CodeToUTF8("UTF-16", input);
540 CodeToUTF8("UTF-16LE", input);
541
542 // TODO: why is this needed?
543 result.erase(std::remove(result.begin(), result.end(), 0x00), result.end());
544 return result;
545}
546
547#endif 601#endif
602
603}