diff options
| author | 2018-09-22 01:03:47 -0400 | |
|---|---|---|
| committer | 2018-10-02 16:10:34 +0200 | |
| commit | fdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c (patch) | |
| tree | 4b382fd446ab69ab73bb3077966f41ddf4a749c1 /src/common/string_util.cpp | |
| parent | string_util: remove TString conversion for windows (diff) | |
| download | yuzu-fdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c.tar.gz yuzu-fdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c.tar.xz yuzu-fdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c.zip | |
string_util: unify UTF8<->UTF16 conversion to codecvt
Diffstat (limited to 'src/common/string_util.cpp')
| -rw-r--r-- | src/common/string_util.cpp | 115 |
1 files changed, 6 insertions, 109 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index b5e28e34f..731d1db34 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cctype> | 6 | #include <cctype> |
| 7 | #include <cerrno> | 7 | #include <cerrno> |
| 8 | #include <codecvt> | ||
| 8 | #include <cstdio> | 9 | #include <cstdio> |
| 9 | #include <cstdlib> | 10 | #include <cstdlib> |
| 10 | #include <cstring> | 11 | #include <cstring> |
| @@ -13,11 +14,7 @@ | |||
| 13 | #include "common/string_util.h" | 14 | #include "common/string_util.h" |
| 14 | 15 | ||
| 15 | #ifdef _WIN32 | 16 | #ifdef _WIN32 |
| 16 | #include <codecvt> | ||
| 17 | #include <windows.h> | 17 | #include <windows.h> |
| 18 | #include "common/common_funcs.h" | ||
| 19 | #else | ||
| 20 | #include <iconv.h> | ||
| 21 | #endif | 18 | #endif |
| 22 | 19 | ||
| 23 | namespace Common { | 20 | namespace Common { |
| @@ -195,11 +192,9 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st | |||
| 195 | return result; | 192 | return result; |
| 196 | } | 193 | } |
| 197 | 194 | ||
| 198 | #ifdef _WIN32 | ||
| 199 | |||
| 200 | std::string UTF16ToUTF8(const std::u16string& input) { | 195 | std::string UTF16ToUTF8(const std::u16string& input) { |
| 201 | #if _MSC_VER >= 1900 | 196 | #ifdef _MSC_VER |
| 202 | // Workaround for missing char16_t/char32_t instantiations in MSVC2015 | 197 | // Workaround for missing char16_t/char32_t instantiations in MSVC2017 |
| 203 | std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; | 198 | std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; |
| 204 | std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend()); | 199 | std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend()); |
| 205 | return convert.to_bytes(tmp_buffer); | 200 | return convert.to_bytes(tmp_buffer); |
| @@ -210,8 +205,8 @@ std::string UTF16ToUTF8(const std::u16string& input) { | |||
| 210 | } | 205 | } |
| 211 | 206 | ||
| 212 | std::u16string UTF8ToUTF16(const std::string& input) { | 207 | std::u16string UTF8ToUTF16(const std::string& input) { |
| 213 | #if _MSC_VER >= 1900 | 208 | #ifdef _MSC_VER |
| 214 | // Workaround for missing char16_t/char32_t instantiations in MSVC2015 | 209 | // Workaround for missing char16_t/char32_t instantiations in MSVC2017 |
| 215 | std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; | 210 | std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; |
| 216 | auto tmp_buffer = convert.from_bytes(input); | 211 | auto tmp_buffer = convert.from_bytes(input); |
| 217 | return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend()); | 212 | return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend()); |
| @@ -221,6 +216,7 @@ std::u16string UTF8ToUTF16(const std::string& input) { | |||
| 221 | #endif | 216 | #endif |
| 222 | } | 217 | } |
| 223 | 218 | ||
| 219 | #ifdef _WIN32 | ||
| 224 | static std::wstring CPToUTF16(u32 code_page, const std::string& input) { | 220 | static std::wstring CPToUTF16(u32 code_page, const std::string& input) { |
| 225 | const auto size = | 221 | const auto size = |
| 226 | MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0); | 222 | MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0); |
| @@ -261,105 +257,6 @@ std::wstring UTF8ToUTF16W(const std::string& input) { | |||
| 261 | return CPToUTF16(CP_UTF8, input); | 257 | return CPToUTF16(CP_UTF8, input); |
| 262 | } | 258 | } |
| 263 | 259 | ||
| 264 | #else | ||
| 265 | |||
| 266 | template <typename T> | ||
| 267 | static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input) { | ||
| 268 | iconv_t const conv_desc = iconv_open("UTF-8", fromcode); | ||
| 269 | if ((iconv_t)(-1) == conv_desc) { | ||
| 270 | LOG_ERROR(Common, "Iconv initialization failure [{}]: {}", fromcode, strerror(errno)); | ||
| 271 | iconv_close(conv_desc); | ||
| 272 | return {}; | ||
| 273 | } | ||
| 274 | |||
| 275 | const std::size_t in_bytes = sizeof(T) * input.size(); | ||
| 276 | // Multiply by 4, which is the max number of bytes to encode a codepoint | ||
| 277 | const std::size_t out_buffer_size = 4 * in_bytes; | ||
| 278 | |||
| 279 | std::string out_buffer(out_buffer_size, '\0'); | ||
| 280 | |||
| 281 | auto src_buffer = &input[0]; | ||
| 282 | std::size_t src_bytes = in_bytes; | ||
| 283 | auto dst_buffer = &out_buffer[0]; | ||
| 284 | std::size_t dst_bytes = out_buffer.size(); | ||
| 285 | |||
| 286 | while (0 != src_bytes) { | ||
| 287 | std::size_t const iconv_result = | ||
| 288 | iconv(conv_desc, (char**)(&src_buffer), &src_bytes, &dst_buffer, &dst_bytes); | ||
| 289 | |||
| 290 | if (static_cast<std::size_t>(-1) == iconv_result) { | ||
| 291 | if (EILSEQ == errno || EINVAL == errno) { | ||
| 292 | // Try to skip the bad character | ||
| 293 | if (0 != src_bytes) { | ||
| 294 | --src_bytes; | ||
| 295 | ++src_buffer; | ||
| 296 | } | ||
| 297 | } else { | ||
| 298 | LOG_ERROR(Common, "iconv failure [{}]: {}", fromcode, strerror(errno)); | ||
| 299 | break; | ||
| 300 | } | ||
| 301 | } | ||
| 302 | } | ||
| 303 | |||
| 304 | std::string result; | ||
| 305 | out_buffer.resize(out_buffer_size - dst_bytes); | ||
| 306 | out_buffer.swap(result); | ||
| 307 | |||
| 308 | iconv_close(conv_desc); | ||
| 309 | |||
| 310 | return result; | ||
| 311 | } | ||
| 312 | |||
| 313 | std::u16string UTF8ToUTF16(const std::string& input) { | ||
| 314 | iconv_t const conv_desc = iconv_open("UTF-16LE", "UTF-8"); | ||
| 315 | if ((iconv_t)(-1) == conv_desc) { | ||
| 316 | LOG_ERROR(Common, "Iconv initialization failure [UTF-8]: {}", strerror(errno)); | ||
| 317 | iconv_close(conv_desc); | ||
| 318 | return {}; | ||
| 319 | } | ||
| 320 | |||
| 321 | const std::size_t in_bytes = sizeof(char) * input.size(); | ||
| 322 | // Multiply by 4, which is the max number of bytes to encode a codepoint | ||
| 323 | const std::size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes; | ||
| 324 | |||
| 325 | std::u16string out_buffer(out_buffer_size, char16_t{}); | ||
| 326 | |||
| 327 | char* src_buffer = const_cast<char*>(&input[0]); | ||
| 328 | std::size_t src_bytes = in_bytes; | ||
| 329 | char* dst_buffer = (char*)(&out_buffer[0]); | ||
| 330 | std::size_t dst_bytes = out_buffer.size(); | ||
| 331 | |||
| 332 | while (0 != src_bytes) { | ||
| 333 | std::size_t const iconv_result = | ||
| 334 | iconv(conv_desc, &src_buffer, &src_bytes, &dst_buffer, &dst_bytes); | ||
| 335 | |||
| 336 | if (static_cast<std::size_t>(-1) == iconv_result) { | ||
| 337 | if (EILSEQ == errno || EINVAL == errno) { | ||
| 338 | // Try to skip the bad character | ||
| 339 | if (0 != src_bytes) { | ||
| 340 | --src_bytes; | ||
| 341 | ++src_buffer; | ||
| 342 | } | ||
| 343 | } else { | ||
| 344 | LOG_ERROR(Common, "iconv failure [UTF-8]: {}", strerror(errno)); | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | } | ||
| 348 | } | ||
| 349 | |||
| 350 | std::u16string result; | ||
| 351 | out_buffer.resize(out_buffer_size - dst_bytes); | ||
| 352 | out_buffer.swap(result); | ||
| 353 | |||
| 354 | iconv_close(conv_desc); | ||
| 355 | |||
| 356 | return result; | ||
| 357 | } | ||
| 358 | |||
| 359 | std::string UTF16ToUTF8(const std::u16string& input) { | ||
| 360 | return CodeToUTF8("UTF-16LE", input); | ||
| 361 | } | ||
| 362 | |||
| 363 | #endif | 260 | #endif |
| 364 | 261 | ||
| 365 | std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) { | 262 | std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) { |