summaryrefslogtreecommitdiff
path: root/src/common/string_util.cpp
diff options
context:
space:
mode:
authorGravatar Weiyi Wang2018-09-22 01:03:47 -0400
committerGravatar fearlessTobi2018-10-02 16:10:34 +0200
commitfdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c (patch)
tree4b382fd446ab69ab73bb3077966f41ddf4a749c1 /src/common/string_util.cpp
parentstring_util: remove TString conversion for windows (diff)
downloadyuzu-fdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c.tar.gz
yuzu-fdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c.tar.xz
yuzu-fdb35760a77719a9f79c04c2b55cb5a7ba6c1d8c.zip
string_util: unify UTF8<->UTF16 conversion to codecvt
Diffstat (limited to 'src/common/string_util.cpp')
-rw-r--r--src/common/string_util.cpp115
1 files changed, 6 insertions, 109 deletions
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index b5e28e34f..731d1db34 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -5,6 +5,7 @@
5#include <algorithm> 5#include <algorithm>
6#include <cctype> 6#include <cctype>
7#include <cerrno> 7#include <cerrno>
8#include <codecvt>
8#include <cstdio> 9#include <cstdio>
9#include <cstdlib> 10#include <cstdlib>
10#include <cstring> 11#include <cstring>
@@ -13,11 +14,7 @@
13#include "common/string_util.h" 14#include "common/string_util.h"
14 15
15#ifdef _WIN32 16#ifdef _WIN32
16#include <codecvt>
17#include <windows.h> 17#include <windows.h>
18#include "common/common_funcs.h"
19#else
20#include <iconv.h>
21#endif 18#endif
22 19
23namespace Common { 20namespace Common {
@@ -195,11 +192,9 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
195 return result; 192 return result;
196} 193}
197 194
198#ifdef _WIN32
199
200std::string UTF16ToUTF8(const std::u16string& input) { 195std::string UTF16ToUTF8(const std::u16string& input) {
201#if _MSC_VER >= 1900 196#ifdef _MSC_VER
202 // Workaround for missing char16_t/char32_t instantiations in MSVC2015 197 // Workaround for missing char16_t/char32_t instantiations in MSVC2017
203 std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; 198 std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
204 std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend()); 199 std::basic_string<__int16> tmp_buffer(input.cbegin(), input.cend());
205 return convert.to_bytes(tmp_buffer); 200 return convert.to_bytes(tmp_buffer);
@@ -210,8 +205,8 @@ std::string UTF16ToUTF8(const std::u16string& input) {
210} 205}
211 206
212std::u16string UTF8ToUTF16(const std::string& input) { 207std::u16string UTF8ToUTF16(const std::string& input) {
213#if _MSC_VER >= 1900 208#ifdef _MSC_VER
214 // Workaround for missing char16_t/char32_t instantiations in MSVC2015 209 // Workaround for missing char16_t/char32_t instantiations in MSVC2017
215 std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert; 210 std::wstring_convert<std::codecvt_utf8_utf16<__int16>, __int16> convert;
216 auto tmp_buffer = convert.from_bytes(input); 211 auto tmp_buffer = convert.from_bytes(input);
217 return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend()); 212 return std::u16string(tmp_buffer.cbegin(), tmp_buffer.cend());
@@ -221,6 +216,7 @@ std::u16string UTF8ToUTF16(const std::string& input) {
221#endif 216#endif
222} 217}
223 218
219#ifdef _WIN32
224static std::wstring CPToUTF16(u32 code_page, const std::string& input) { 220static std::wstring CPToUTF16(u32 code_page, const std::string& input) {
225 const auto size = 221 const auto size =
226 MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0); 222 MultiByteToWideChar(code_page, 0, input.data(), static_cast<int>(input.size()), nullptr, 0);
@@ -261,105 +257,6 @@ std::wstring UTF8ToUTF16W(const std::string& input) {
261 return CPToUTF16(CP_UTF8, input); 257 return CPToUTF16(CP_UTF8, input);
262} 258}
263 259
264#else
265
266template <typename T>
267static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input) {
268 iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
269 if ((iconv_t)(-1) == conv_desc) {
270 LOG_ERROR(Common, "Iconv initialization failure [{}]: {}", fromcode, strerror(errno));
271 iconv_close(conv_desc);
272 return {};
273 }
274
275 const std::size_t in_bytes = sizeof(T) * input.size();
276 // Multiply by 4, which is the max number of bytes to encode a codepoint
277 const std::size_t out_buffer_size = 4 * in_bytes;
278
279 std::string out_buffer(out_buffer_size, '\0');
280
281 auto src_buffer = &input[0];
282 std::size_t src_bytes = in_bytes;
283 auto dst_buffer = &out_buffer[0];
284 std::size_t dst_bytes = out_buffer.size();
285
286 while (0 != src_bytes) {
287 std::size_t const iconv_result =
288 iconv(conv_desc, (char**)(&src_buffer), &src_bytes, &dst_buffer, &dst_bytes);
289
290 if (static_cast<std::size_t>(-1) == iconv_result) {
291 if (EILSEQ == errno || EINVAL == errno) {
292 // Try to skip the bad character
293 if (0 != src_bytes) {
294 --src_bytes;
295 ++src_buffer;
296 }
297 } else {
298 LOG_ERROR(Common, "iconv failure [{}]: {}", fromcode, strerror(errno));
299 break;
300 }
301 }
302 }
303
304 std::string result;
305 out_buffer.resize(out_buffer_size - dst_bytes);
306 out_buffer.swap(result);
307
308 iconv_close(conv_desc);
309
310 return result;
311}
312
313std::u16string UTF8ToUTF16(const std::string& input) {
314 iconv_t const conv_desc = iconv_open("UTF-16LE", "UTF-8");
315 if ((iconv_t)(-1) == conv_desc) {
316 LOG_ERROR(Common, "Iconv initialization failure [UTF-8]: {}", strerror(errno));
317 iconv_close(conv_desc);
318 return {};
319 }
320
321 const std::size_t in_bytes = sizeof(char) * input.size();
322 // Multiply by 4, which is the max number of bytes to encode a codepoint
323 const std::size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
324
325 std::u16string out_buffer(out_buffer_size, char16_t{});
326
327 char* src_buffer = const_cast<char*>(&input[0]);
328 std::size_t src_bytes = in_bytes;
329 char* dst_buffer = (char*)(&out_buffer[0]);
330 std::size_t dst_bytes = out_buffer.size();
331
332 while (0 != src_bytes) {
333 std::size_t const iconv_result =
334 iconv(conv_desc, &src_buffer, &src_bytes, &dst_buffer, &dst_bytes);
335
336 if (static_cast<std::size_t>(-1) == iconv_result) {
337 if (EILSEQ == errno || EINVAL == errno) {
338 // Try to skip the bad character
339 if (0 != src_bytes) {
340 --src_bytes;
341 ++src_buffer;
342 }
343 } else {
344 LOG_ERROR(Common, "iconv failure [UTF-8]: {}", strerror(errno));
345 break;
346 }
347 }
348 }
349
350 std::u16string result;
351 out_buffer.resize(out_buffer_size - dst_bytes);
352 out_buffer.swap(result);
353
354 iconv_close(conv_desc);
355
356 return result;
357}
358
359std::string UTF16ToUTF8(const std::u16string& input) {
360 return CodeToUTF8("UTF-16LE", input);
361}
362
363#endif 260#endif
364 261
365std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) { 262std::string StringFromFixedZeroTerminatedBuffer(const char* buffer, std::size_t max_len) {