#include "VrCodeFormatUtils.h" #ifdef _WIN32 #include "windows.h" #endif //GB2312到UTF-8的转换 int CVrCodeFormatUtils::GB2312ToUtf8(const char* gb2312, char* utf8) { int len = 0; #ifdef _WIN32 len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0); wchar_t* wstr = new wchar_t[len + 1]; memset(wstr, 0, len + 1); MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL); if (wstr) delete[] wstr; #endif return len; } //判断是否是utf8 bool CVrCodeFormatUtils::IsTextUTF8(const char* str, long length) { unsigned char chr; int nBytes = 0; // UFT8可用1-6个字节编码,ASCII用一个字节 bool bAllAscii = true; // 如果全部都是ASCII, 说明不是UTF-8 for (int i = 0; i < length; i++) { chr = *(str + i); if ((chr & 0x80) != 0) // 判断是否ASCII编码,如果不是,说明有可能是UTF-8, ASCII用7位编码,但用一个字节存,最高位标记为0,o0xxxxxxx { bAllAscii = false; } if (nBytes == 0) // 如果不是ASCII码,应该是多字节符,计算字节数 { if (chr >= 0x80) { if (chr >= 0xFC && chr <= 0xFD) nBytes = 6; else if (chr >= 0xF8) nBytes = 5; else if (chr >= 0xF0) nBytes = 4; else if (chr >= 0xE0) nBytes = 3; else if (chr >= 0xC0) nBytes = 2; else return false; nBytes--; } } else // every char of ascii buffer looks like 10xxxxxx, except the first char { if ((chr & 0xC0) != 0x80) { return false; } nBytes--; } } if (nBytes > 0) // format error { return false; } if (bAllAscii) // if all chars are ascii, the buffer is not utf-8 { return false; } return true; } //UTF-8到GB2312的转换 int CVrCodeFormatUtils::Utf8ToGB2312(const char* utf8, char* gb2312) { int len = 0; #ifdef _WIN32 len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0); wchar_t* wstr = new wchar_t[len + 1]; memset(wstr, 0, len + 1); MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len); len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL); WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL); if (wstr) delete[] wstr; #endif return len; } //GB2312到Unicode的转换 int CVrCodeFormatUtils::GB2312ToUnicode(const char* gb2312, char* unicode) { int len = 0; #ifdef _WIN32 UINT nCodePage = 936; //GB2312 len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0); wchar_t* wstr = new wchar_t[len + 1]; memset(wstr, 0, len + 1); MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len); len = len * sizeof(wchar_t); memcpy(unicode, wstr, len); if (wstr) delete[] wstr; #endif return len; } //Unicode到GB2312的转换 int CVrCodeFormatUtils::UnicodeToGB2312(const char* unicode, int size, char*gb2312) { int len = 0; #ifdef _WIN32 UINT nCodePage = 936; //GB2312 wchar_t* wstr = new wchar_t[size / 2 + 1]; memcpy(wstr, unicode, size); len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL); WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL); if (wstr) delete[] wstr; #endif return len; } //UTF-8到Unicode的转换 int CVrCodeFormatUtils::Utf8ToUnicode(const char* utf8, char*unicode) { int len = 0; #ifdef _WIN32 MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0); wchar_t* wstr = new wchar_t[len + 1]; memset(wstr, 0, len + 1); MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len); memcpy(unicode, wstr, len); if (wstr) delete[] wstr; #endif return len; } //Unicode到UTF-8的转换 int CVrCodeFormatUtils::UnicodeToUtf8(const char* unicode, int size, char* utf8) { int len = 0; #ifdef _WIN32 wchar_t* wstr = new wchar_t[size / 2 + 1]; memcpy(wstr, unicode, size); len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL); if (wstr) delete[] wstr; #endif return len; }