diff options
Diffstat (limited to 'src/common/string_util.cpp')
| -rw-r--r-- | src/common/string_util.cpp | 204 |
1 files changed, 130 insertions, 74 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index c1f22bda3..54943d306 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp | |||
| @@ -3,34 +3,29 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstdlib> | ||
| 7 | #include <cstdio> | ||
| 8 | 6 | ||
| 9 | #include "common/common.h" | 7 | #include "common/common.h" |
| 10 | #include "common/common_paths.h" | ||
| 11 | #include "common/string_util.h" | 8 | #include "common/string_util.h" |
| 12 | 9 | ||
| 13 | #ifdef _WIN32 | 10 | #ifdef _WIN32 |
| 14 | #include <Windows.h> | 11 | #include <Windows.h> |
| 12 | #include <codecvt> | ||
| 15 | #else | 13 | #else |
| 16 | #include <cerrno> | ||
| 17 | #include <iconv.h> | 14 | #include <iconv.h> |
| 18 | #endif | 15 | #endif |
| 19 | 16 | ||
| 17 | namespace Common { | ||
| 18 | |||
| 20 | /// Make a string lowercase | 19 | /// Make a string lowercase |
| 21 | void LowerStr(char* str) { | 20 | std::string ToLower(std::string str) { |
| 22 | for (int i = 0; str[i]; i++) { | 21 | std::transform(str.begin(), str.end(), str.begin(), ::tolower); |
| 23 | str[i] = tolower(str[ i ]); | 22 | return str; |
| 24 | } | ||
| 25 | } | 23 | } |
| 26 | 24 | ||
| 27 | /// Make a string uppercase | 25 | /// Make a string uppercase |
| 28 | void UpperStr(char* str) { | 26 | std::string ToUpper(std::string str) { |
| 29 | for (int i=0; i < strlen(str); i++) { | 27 | std::transform(str.begin(), str.end(), str.begin(), ::toupper); |
| 30 | if(str[i] >= 'a' && str[i] <= 'z') { | 28 | return str; |
| 31 | str[i] &= 0xDF; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | } | 29 | } |
| 35 | 30 | ||
| 36 | // faster than sscanf | 31 | // faster than sscanf |
| @@ -192,9 +187,9 @@ bool TryParse(const std::string &str, u32 *const output) | |||
| 192 | 187 | ||
| 193 | bool TryParse(const std::string &str, bool *const output) | 188 | bool TryParse(const std::string &str, bool *const output) |
| 194 | { | 189 | { |
| 195 | if ("1" == str || !strcasecmp("true", str.c_str())) | 190 | if ("1" == str || "true" == ToLower(str)) |
| 196 | *output = true; | 191 | *output = true; |
| 197 | else if ("0" == str || !strcasecmp("false", str.c_str())) | 192 | else if ("0" == str || "false" == ToLower(str)) |
| 198 | *output = false; | 193 | *output = false; |
| 199 | else | 194 | else |
| 200 | return false; | 195 | return false; |
| @@ -202,13 +197,6 @@ bool TryParse(const std::string &str, bool *const output) | |||
| 202 | return true; | 197 | return true; |
| 203 | } | 198 | } |
| 204 | 199 | ||
| 205 | std::string StringFromInt(int value) | ||
| 206 | { | ||
| 207 | char temp[16]; | ||
| 208 | sprintf(temp, "%i", value); | ||
| 209 | return temp; | ||
| 210 | } | ||
| 211 | |||
| 212 | std::string StringFromBool(bool value) | 200 | std::string StringFromBool(bool value) |
| 213 | { | 201 | { |
| 214 | return value ? "True" : "False"; | 202 | return value ? "True" : "False"; |
| @@ -283,12 +271,17 @@ std::string TabsToSpaces(int tab_size, const std::string &in) | |||
| 283 | 271 | ||
| 284 | std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest) | 272 | std::string ReplaceAll(std::string result, const std::string& src, const std::string& dest) |
| 285 | { | 273 | { |
| 286 | while(1) | 274 | size_t pos = 0; |
| 275 | |||
| 276 | if (src == dest) | ||
| 277 | return result; | ||
| 278 | |||
| 279 | while ((pos = result.find(src, pos)) != std::string::npos) | ||
| 287 | { | 280 | { |
| 288 | size_t pos = result.find(src); | ||
| 289 | if (pos == std::string::npos) break; | ||
| 290 | result.replace(pos, src.size(), dest); | 281 | result.replace(pos, src.size(), dest); |
| 282 | pos += dest.length(); | ||
| 291 | } | 283 | } |
| 284 | |||
| 292 | return result; | 285 | return result; |
| 293 | } | 286 | } |
| 294 | 287 | ||
| @@ -419,7 +412,19 @@ std::string UriEncode(const std::string & sSrc) | |||
| 419 | 412 | ||
| 420 | #ifdef _WIN32 | 413 | #ifdef _WIN32 |
| 421 | 414 | ||
| 422 | std::string UTF16ToUTF8(const std::wstring& input) | 415 | std::string UTF16ToUTF8(const std::u16string& input) |
| 416 | { | ||
| 417 | std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; | ||
| 418 | return convert.to_bytes(input); | ||
| 419 | } | ||
| 420 | |||
| 421 | std::u16string UTF8ToUTF16(const std::string& input) | ||
| 422 | { | ||
| 423 | std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; | ||
| 424 | return convert.from_bytes(input); | ||
| 425 | } | ||
| 426 | |||
| 427 | static std::string UTF16ToUTF8(const std::wstring& input) | ||
| 423 | { | 428 | { |
| 424 | auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr); | 429 | auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr); |
| 425 | 430 | ||
| @@ -432,7 +437,7 @@ std::string UTF16ToUTF8(const std::wstring& input) | |||
| 432 | return output; | 437 | return output; |
| 433 | } | 438 | } |
| 434 | 439 | ||
| 435 | std::wstring CPToUTF16(u32 code_page, const std::string& input) | 440 | static std::wstring CPToUTF16(u32 code_page, const std::string& input) |
| 436 | { | 441 | { |
| 437 | auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0); | 442 | auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0); |
| 438 | 443 | ||
| @@ -445,7 +450,7 @@ std::wstring CPToUTF16(u32 code_page, const std::string& input) | |||
| 445 | return output; | 450 | return output; |
| 446 | } | 451 | } |
| 447 | 452 | ||
| 448 | std::wstring UTF8ToUTF16(const std::string& input) | 453 | std::wstring UTF8ToUTF16W(const std::string &input) |
| 449 | { | 454 | { |
| 450 | return CPToUTF16(CP_UTF8, input); | 455 | return CPToUTF16(CP_UTF8, input); |
| 451 | } | 456 | } |
| @@ -463,61 +468,123 @@ std::string CP1252ToUTF8(const std::string& input) | |||
| 463 | #else | 468 | #else |
| 464 | 469 | ||
| 465 | template <typename T> | 470 | template <typename T> |
| 466 | std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input) | 471 | static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input) |
| 467 | { | 472 | { |
| 468 | std::string result; | 473 | std::string result; |
| 469 | 474 | ||
| 470 | iconv_t const conv_desc = iconv_open("UTF-8", fromcode); | 475 | iconv_t const conv_desc = iconv_open("UTF-8", fromcode); |
| 471 | if ((iconv_t)-1 == conv_desc) | 476 | if ((iconv_t)(-1) == conv_desc) |
| 472 | { | 477 | { |
| 473 | ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno)); | 478 | ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno)); |
| 479 | iconv_close(conv_desc); | ||
| 480 | return {}; | ||
| 474 | } | 481 | } |
| 475 | else | ||
| 476 | { | ||
| 477 | size_t const in_bytes = sizeof(T) * input.size(); | ||
| 478 | size_t const out_buffer_size = 4 * in_bytes; | ||
| 479 | 482 | ||
| 480 | std::string out_buffer; | 483 | const size_t in_bytes = sizeof(T) * input.size(); |
| 481 | out_buffer.resize(out_buffer_size); | 484 | // Multiply by 4, which is the max number of bytes to encode a codepoint |
| 485 | const size_t out_buffer_size = 4 * in_bytes; | ||
| 482 | 486 | ||
| 483 | auto src_buffer = &input[0]; | 487 | std::string out_buffer; |
| 484 | size_t src_bytes = in_bytes; | 488 | out_buffer.resize(out_buffer_size); |
| 485 | auto dst_buffer = &out_buffer[0]; | ||
| 486 | size_t dst_bytes = out_buffer.size(); | ||
| 487 | 489 | ||
| 488 | while (src_bytes != 0) | 490 | auto src_buffer = &input[0]; |
| 489 | { | 491 | size_t src_bytes = in_bytes; |
| 490 | size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes, | 492 | auto dst_buffer = &out_buffer[0]; |
| 491 | &dst_buffer, &dst_bytes); | 493 | size_t dst_bytes = out_buffer.size(); |
| 494 | |||
| 495 | while (0 != src_bytes) | ||
| 496 | { | ||
| 497 | size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes, | ||
| 498 | &dst_buffer, &dst_bytes); | ||
| 492 | 499 | ||
| 493 | if ((size_t)-1 == iconv_result) | 500 | if (static_cast<size_t>(-1) == iconv_result) |
| 501 | { | ||
| 502 | if (EILSEQ == errno || EINVAL == errno) | ||
| 494 | { | 503 | { |
| 495 | if (EILSEQ == errno || EINVAL == errno) | 504 | // Try to skip the bad character |
| 505 | if (0 != src_bytes) | ||
| 496 | { | 506 | { |
| 497 | // Try to skip the bad character | 507 | --src_bytes; |
| 498 | if (src_bytes != 0) | 508 | ++src_buffer; |
| 499 | { | ||
| 500 | --src_bytes; | ||
| 501 | ++src_buffer; | ||
| 502 | } | ||
| 503 | } | ||
| 504 | else | ||
| 505 | { | ||
| 506 | ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno)); | ||
| 507 | break; | ||
| 508 | } | 509 | } |
| 509 | } | 510 | } |
| 511 | else | ||
| 512 | { | ||
| 513 | ERROR_LOG(COMMON, "iconv failure [%s]: %s", fromcode, strerror(errno)); | ||
| 514 | break; | ||
| 515 | } | ||
| 510 | } | 516 | } |
| 517 | } | ||
| 511 | 518 | ||
| 512 | out_buffer.resize(out_buffer_size - dst_bytes); | 519 | out_buffer.resize(out_buffer_size - dst_bytes); |
| 513 | out_buffer.swap(result); | 520 | out_buffer.swap(result); |
| 514 | 521 | ||
| 522 | iconv_close(conv_desc); | ||
| 523 | |||
| 524 | return result; | ||
| 525 | } | ||
| 526 | |||
| 527 | std::u16string UTF8ToUTF16(const std::string& input) | ||
| 528 | { | ||
| 529 | std::u16string result; | ||
| 530 | |||
| 531 | iconv_t const conv_desc = iconv_open("UTF-16", "UTF-8"); | ||
| 532 | if ((iconv_t)(-1) == conv_desc) | ||
| 533 | { | ||
| 534 | ERROR_LOG(COMMON, "Iconv initialization failure [UTF-8]: %s", strerror(errno)); | ||
| 515 | iconv_close(conv_desc); | 535 | iconv_close(conv_desc); |
| 536 | return {}; | ||
| 537 | } | ||
| 538 | |||
| 539 | const size_t in_bytes = sizeof(char) * input.size(); | ||
| 540 | // Multiply by 4, which is the max number of bytes to encode a codepoint | ||
| 541 | const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes; | ||
| 542 | |||
| 543 | std::u16string out_buffer; | ||
| 544 | out_buffer.resize(out_buffer_size); | ||
| 545 | |||
| 546 | char* src_buffer = const_cast<char*>(&input[0]); | ||
| 547 | size_t src_bytes = in_bytes; | ||
| 548 | char* dst_buffer = (char*)(&out_buffer[0]); | ||
| 549 | size_t dst_bytes = out_buffer.size(); | ||
| 550 | |||
| 551 | while (0 != src_bytes) | ||
| 552 | { | ||
| 553 | size_t const iconv_result = iconv(conv_desc, &src_buffer, &src_bytes, | ||
| 554 | &dst_buffer, &dst_bytes); | ||
| 555 | |||
| 556 | if (static_cast<size_t>(-1) == iconv_result) | ||
| 557 | { | ||
| 558 | if (EILSEQ == errno || EINVAL == errno) | ||
| 559 | { | ||
| 560 | // Try to skip the bad character | ||
| 561 | if (0 != src_bytes) | ||
| 562 | { | ||
| 563 | --src_bytes; | ||
| 564 | ++src_buffer; | ||
| 565 | } | ||
| 566 | } | ||
| 567 | else | ||
| 568 | { | ||
| 569 | ERROR_LOG(COMMON, "iconv failure [UTF-8]: %s", strerror(errno)); | ||
| 570 | break; | ||
| 571 | } | ||
| 572 | } | ||
| 516 | } | 573 | } |
| 574 | |||
| 575 | out_buffer.resize(out_buffer_size - dst_bytes); | ||
| 576 | out_buffer.swap(result); | ||
| 577 | |||
| 578 | iconv_close(conv_desc); | ||
| 517 | 579 | ||
| 518 | return result; | 580 | return result; |
| 519 | } | 581 | } |
| 520 | 582 | ||
| 583 | std::string UTF16ToUTF8(const std::u16string& input) | ||
| 584 | { | ||
| 585 | return CodeToUTF8("UTF-16", input); | ||
| 586 | } | ||
| 587 | |||
| 521 | std::string CP1252ToUTF8(const std::string& input) | 588 | std::string CP1252ToUTF8(const std::string& input) |
| 522 | { | 589 | { |
| 523 | //return CodeToUTF8("CP1252//TRANSLIT", input); | 590 | //return CodeToUTF8("CP1252//TRANSLIT", input); |
| @@ -531,17 +598,6 @@ std::string SHIFTJISToUTF8(const std::string& input) | |||
| 531 | return CodeToUTF8("SJIS", input); | 598 | return CodeToUTF8("SJIS", input); |
| 532 | } | 599 | } |
| 533 | 600 | ||
| 534 | std::string UTF16ToUTF8(const std::wstring& input) | ||
| 535 | { | ||
| 536 | std::string result = | ||
| 537 | // CodeToUTF8("UCS-2", input); | ||
| 538 | // CodeToUTF8("UCS-2LE", input); | ||
| 539 | // CodeToUTF8("UTF-16", input); | ||
| 540 | CodeToUTF8("UTF-16LE", input); | ||
| 541 | |||
| 542 | // TODO: why is this needed? | ||
| 543 | result.erase(std::remove(result.begin(), result.end(), 0x00), result.end()); | ||
| 544 | return result; | ||
| 545 | } | ||
| 546 | |||
| 547 | #endif | 601 | #endif |
| 602 | |||
| 603 | } | ||