2025-07-23 01:35:14 +08:00
|
|
|
|
#include "VrCodeFormatUtils.h"
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
#include "windows.h"
|
|
|
|
|
|
#endif
|
|
|
|
|
|
//GB2312<31><32>UTF-8<><38>ת<EFBFBD><D7AA>
|
|
|
|
|
|
int CVrCodeFormatUtils::GB2312ToUtf8(const char* gb2312, char* utf8)
|
|
|
|
|
|
{
|
|
|
|
|
|
int len = 0;
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
|
|
|
|
|
|
wchar_t* wstr = new wchar_t[len + 1];
|
|
|
|
|
|
memset(wstr, 0, len + 1);
|
|
|
|
|
|
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
|
|
|
|
|
|
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
|
|
|
|
|
|
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
|
|
|
|
|
|
if (wstr) delete[] wstr;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//<2F>ж<EFBFBD><D0B6>Ƿ<EFBFBD><C7B7><EFBFBD>utf8
|
|
|
|
|
|
bool CVrCodeFormatUtils::IsTextUTF8(const char* str, long length)
|
|
|
|
|
|
{
|
|
|
|
|
|
unsigned char chr;
|
|
|
|
|
|
|
|
|
|
|
|
int nBytes = 0; // UFT8<54><38><EFBFBD><EFBFBD>1-6<><36><EFBFBD>ֽڱ<D6BD><DAB1><EFBFBD>,ASCII<49><49>һ<EFBFBD><D2BB><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
|
bool bAllAscii = true; // <20><><EFBFBD><EFBFBD>ȫ<EFBFBD><C8AB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ASCII, ˵<><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>UTF-8
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < length; i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
chr = *(str + i);
|
|
|
|
|
|
|
|
|
|
|
|
if ((chr & 0x80) != 0) // <20>ж<EFBFBD><D0B6>Ƿ<EFBFBD>ASCII<49><49><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,˵<><CBB5><EFBFBD>п<EFBFBD><D0BF><EFBFBD><EFBFBD><EFBFBD>UTF-8, ASCII<49><49>7λ<37><CEBB><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ֽڴ<D6BD>,<2C><><EFBFBD><EFBFBD>λ<EFBFBD><CEBB><EFBFBD><EFBFBD>Ϊ0,o0xxxxxxx
|
|
|
|
|
|
{
|
|
|
|
|
|
bAllAscii = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (nBytes == 0) // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ASCII<49><49>,Ӧ<><D3A6><EFBFBD>Ƕ<EFBFBD><C7B6>ֽڷ<D6BD>,<2C><><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
|
|
|
|
|
|
{
|
|
|
|
|
|
if (chr >= 0x80)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (chr >= 0xFC && chr <= 0xFD)
|
|
|
|
|
|
nBytes = 6;
|
|
|
|
|
|
else if (chr >= 0xF8)
|
|
|
|
|
|
nBytes = 5;
|
|
|
|
|
|
else if (chr >= 0xF0)
|
|
|
|
|
|
nBytes = 4;
|
|
|
|
|
|
else if (chr >= 0xE0)
|
|
|
|
|
|
nBytes = 3;
|
|
|
|
|
|
else if (chr >= 0xC0)
|
|
|
|
|
|
nBytes = 2;
|
|
|
|
|
|
else
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
|
|
nBytes--;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else // every char of ascii buffer looks like 10xxxxxx, except the first char
|
|
|
|
|
|
{
|
|
|
|
|
|
if ((chr & 0xC0) != 0x80)
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
nBytes--;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (nBytes > 0) // format error
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (bAllAscii) // if all chars are ascii, the buffer is not utf-8
|
|
|
|
|
|
{
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//UTF-8<><38>GB2312<31><32>ת<EFBFBD><D7AA>
|
|
|
|
|
|
int CVrCodeFormatUtils::Utf8ToGB2312(const char* utf8, char* gb2312)
|
|
|
|
|
|
{
|
|
|
|
|
|
int len = 0;
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
|
|
|
|
|
|
wchar_t* wstr = new wchar_t[len + 1];
|
|
|
|
|
|
memset(wstr, 0, len + 1);
|
|
|
|
|
|
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
|
|
|
|
|
|
len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
|
|
|
|
|
|
WideCharToMultiByte(CP_ACP, 0, wstr, -1, gb2312, len, NULL, NULL);
|
|
|
|
|
|
if (wstr) delete[] wstr;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//GB2312<31><32>Unicode<64><65>ת<EFBFBD><D7AA>
|
|
|
|
|
|
int CVrCodeFormatUtils::GB2312ToUnicode(const char* gb2312, char* unicode)
|
|
|
|
|
|
{
|
|
|
|
|
|
int len = 0;
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
UINT nCodePage = 936; //GB2312
|
|
|
|
|
|
len = MultiByteToWideChar(nCodePage, 0, gb2312, -1, NULL, 0);
|
|
|
|
|
|
wchar_t* wstr = new wchar_t[len + 1];
|
|
|
|
|
|
memset(wstr, 0, len + 1);
|
|
|
|
|
|
MultiByteToWideChar(nCodePage, 0, gb2312, -1, wstr, len);
|
|
|
|
|
|
len = len * sizeof(wchar_t);
|
|
|
|
|
|
memcpy(unicode, wstr, len);
|
|
|
|
|
|
if (wstr) delete[] wstr;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//Unicode<64><65>GB2312<31><32>ת<EFBFBD><D7AA>
|
|
|
|
|
|
int CVrCodeFormatUtils::UnicodeToGB2312(const char* unicode, int size, char*gb2312)
|
|
|
|
|
|
{
|
|
|
|
|
|
int len = 0;
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
UINT nCodePage = 936; //GB2312
|
|
|
|
|
|
wchar_t* wstr = new wchar_t[size / 2 + 1];
|
|
|
|
|
|
memcpy(wstr, unicode, size);
|
|
|
|
|
|
len = WideCharToMultiByte(nCodePage, 0, wstr, -1, NULL, 0, NULL, NULL);
|
|
|
|
|
|
WideCharToMultiByte(nCodePage, 0, wstr, -1, gb2312, len, NULL, NULL);
|
|
|
|
|
|
if (wstr) delete[] wstr;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//UTF-8<><38>Unicode<64><65>ת<EFBFBD><D7AA>
|
|
|
|
|
|
int CVrCodeFormatUtils::Utf8ToUnicode(const char* utf8, char*unicode)
|
|
|
|
|
|
{
|
|
|
|
|
|
int len = 0;
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
|
|
|
|
|
|
wchar_t* wstr = new wchar_t[len + 1];
|
|
|
|
|
|
memset(wstr, 0, len + 1);
|
|
|
|
|
|
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
|
|
|
|
|
|
memcpy(unicode, wstr, len);
|
|
|
|
|
|
if (wstr) delete[] wstr;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//Unicode<64><65>UTF-8<><38>ת<EFBFBD><D7AA>
|
|
|
|
|
|
int CVrCodeFormatUtils::UnicodeToUtf8(const char* unicode, int size, char* utf8)
|
|
|
|
|
|
{
|
|
|
|
|
|
int len = 0;
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
wchar_t* wstr = new wchar_t[size / 2 + 1];
|
|
|
|
|
|
memcpy(wstr, unicode, size);
|
|
|
|
|
|
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
|
|
|
|
|
|
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8, len, NULL, NULL);
|
|
|
|
|
|
if (wstr) delete[] wstr;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return len;
|
2025-06-08 12:48:04 +08:00
|
|
|
|
}
|