diff options
Diffstat (limited to 'src/common/utf8.h')
| -rw-r--r-- | src/common/utf8.h | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/common/utf8.h b/src/common/utf8.h new file mode 100644 index 000000000..36cf75713 --- /dev/null +++ b/src/common/utf8.h | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | /* | ||
| 2 | Basic UTF-8 manipulation routines | ||
| 3 | by Jeff Bezanson | ||
| 4 | placed in the public domain Fall 2005 | ||
| 5 | |||
| 6 | This code is designed to provide the utilities you need to manipulate | ||
| 7 | UTF-8 as an internal string encoding. These functions do not perform the | ||
| 8 | error checking normally needed when handling UTF-8 data, so if you happen | ||
| 9 | to be from the Unicode Consortium you will want to flay me alive. | ||
| 10 | I do this because error checking can be performed at the boundaries (I/O), | ||
| 11 | with these routines reserved for higher performance on data known to be | ||
| 12 | valid. | ||
| 13 | */ | ||
| 14 | |||
| 15 | // Further modified, and C++ stuff added, by hrydgard@gmail.com. | ||
| 16 | |||
| 17 | #pragma once | ||
| 18 | |||
| 19 | #include "common_types.h" | ||
| 20 | #include <string> | ||
| 21 | |||
| 22 | u32 u8_nextchar(const char *s, int *i); | ||
| 23 | int u8_wc_toutf8(char *dest, u32 ch); | ||
| 24 | int u8_strlen(const char *s); | ||
| 25 | |||
| 26 | class UTF8 { | ||
| 27 | public: | ||
| 28 | static const u32 INVALID = (u32)-1; | ||
| 29 | UTF8(const char *c) : c_(c), index_(0) {} | ||
| 30 | bool end() const { return c_[index_] == 0; } | ||
| 31 | u32 next() { | ||
| 32 | return u8_nextchar(c_, &index_); | ||
| 33 | } | ||
| 34 | u32 peek() { | ||
| 35 | int tempIndex = index_; | ||
| 36 | return u8_nextchar(c_, &tempIndex); | ||
| 37 | } | ||
| 38 | int length() const { | ||
| 39 | return u8_strlen(c_); | ||
| 40 | } | ||
| 41 | int byteIndex() const { | ||
| 42 | return index_; | ||
| 43 | } | ||
| 44 | static int encode(char *dest, u32 ch) { | ||
| 45 | return u8_wc_toutf8(dest, ch); | ||
| 46 | } | ||
| 47 | |||
| 48 | private: | ||
| 49 | const char *c_; | ||
| 50 | int index_; | ||
| 51 | }; | ||
| 52 | |||
| 53 | int UTF8StringNonASCIICount(const char *utf8string); | ||
| 54 | |||
| 55 | bool UTF8StringHasNonASCII(const char *utf8string); | ||
| 56 | |||
| 57 | |||
| 58 | // UTF8 to Win32 UTF-16 | ||
| 59 | // Should be used when calling Win32 api calls | ||
| 60 | #ifdef _WIN32 | ||
| 61 | |||
| 62 | std::string ConvertWStringToUTF8(const std::wstring &wstr); | ||
| 63 | std::string ConvertWStringToUTF8(const wchar_t *wstr); | ||
| 64 | void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, const std::string &source); | ||
| 65 | std::wstring ConvertUTF8ToWString(const std::string &source); | ||
| 66 | |||
| 67 | #endif \ No newline at end of file | ||