Window下,C++ *** 作 Mysql、Url、utf-8文件 编码问题(读取和写入)

Window下,C++  *** 作 Mysql、Url、utf-8文件 编码问题(读取和写入),第1张

Mysql

读取class="superseo">mysql表,中文乱码

在sql语句执行前执行mysql_query(&mysql, "SET NAMES GB2312");

    mysql_query(&mysql, "SET NAMES GB2312");

写入中文到mysql表,中文乱码

在sql语句执行前执行mysql_query(&mysql, "SET NAMES UTF8");

    mysql_query(&mysql, "SET NAMES UTF8");
URL

当 utf-8格式的url 数据转成字符串,用UrlDecode_UTF8

当 utf-8格式的string 字符串转成普通的std::string,用UTF8_To_string

反之使用string_To_UTF8UrlEncode_UTF8

以下还提供GBK格式url的编码解码

Url:

byte toHex(const byte& x)
{
	return x > 9 ? x - 10 + 'A' : x + '0';
}

byte fromHex(const byte& x)
{
	return isdigit(x) ? x - '0' : x - 'A' + 10;
}

// URL解码,解码为GBK
    std::string URLDecode(std::string strOrg)
    {
        std::string sOut;
        for (size_t ix = 0; ix < strOrg.size(); ix++)
        {
            byte ch = 0;
            if (strOrg[ix] == '%')
            {
                ch = (fromHex(strOrg[ix + 1]) << 4);
                ch |= fromHex(strOrg[ix + 2]);
                ix += 2;
            }
            else if (strOrg[ix] == '+')
            {
                ch = ' ';
            }
            else
            {
                ch = strOrg[ix];
            }
            sOut += (char)ch;
        }
        return sOut;
    }
    
    // URL编码,编码为GBK
    std::string URLEncode(std::string strOrg)
    {
        std::string sOut;
        for (size_t ix = 0; ix < strOrg.size(); ix++)
        {
            byte buf[4];
            memset(buf, 0, 4);
            if (isalnum((byte)strOrg[ix]))
            {
                buf[0] = strOrg[ix];
            }
            //else if ( isspace( (byte)strOrg[ix] ) ) //貌似把空格编码成%20或者+都可以  
            //{  
            //    buf[0] = '+';  
            //}  
            else
            {
                buf[0] = '%';
                buf[1] = toHex((byte)strOrg[ix] >> 4);
                buf[2] = toHex((byte)strOrg[ix] % 16);
            }
            sOut += (char*)buf;
        }
        return sOut;
    }
    
    std::string GBKToUTF8(const std::string strGBK)
    {
        std::string strOutUTF8 = "";
        int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);
        wchar_t* str1 = new wchar_t[n];
        MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n);
        n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);
        char* str2 = new char[n];
        WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);
        strOutUTF8 = str2;
        delete[] str1;
        delete[] str2;
        return strOutUTF8;
    }
    
    std::string UTF8ToGBK(const std::string strUTF8)
    {
        int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
        wchar_t* wszGBK = new wchar_t[len + 1];
        memset(wszGBK, 0, (len + 1) * sizeof(WCHAR));
        MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)strUTF8.c_str(), -1, wszGBK, len);
        len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);
        char* szGBK = new char[len + 1];
        memset(szGBK, 0, len + 1);
        WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, szGBK, len, NULL, NULL);
        //strUTF8 = szGBK;
        std::string strTemp(szGBK);
        delete[] szGBK;
        delete[] wszGBK;
        return strTemp;
    }
    
    // URL编码,编码为GBK
    std::string UrlEncode_GBK(std::string strOrg)
    {
        return URLEncode(strOrg);
    }
    
    // URL编码,编码为UTF-8
    std::string UrlEncode_UTF8(std::string strOrg)
    {
        return URLEncode(GBKToUTF8(strOrg));
    }
    
    // URL解码,解码为GBK
    std::string UrlDecode_GBK(std::string strOrg)
    {
        return URLDecode(strOrg);
    }
    
    // URL解码,解码为UTF-8
    std::string UrlDecode_UTF8(std::string strOrg)
    {
        return UTF8ToGBK(URLDecode(strOrg));
    }

std::string 和 utf-8 string 相互转换:

   std::string string_To_UTF8(const std::string& str)
    {
        int nwLen = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), -1, NULL, 0);
    
        wchar_t* pwBuf = new wchar_t[nwLen + 1];//一定要加1,不然会出现尾巴
        ZeroMemory(pwBuf, nwLen * 2 + 2);
    
        ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), pwBuf, nwLen);
    
        int nLen = ::WideCharToMultiByte(CP_UTF8, 0, pwBuf, -1, NULL, NULL, NULL, NULL);
    
        char* pBuf = new char[nLen + 1];
        ZeroMemory(pBuf, nLen + 1);
    
        ::WideCharToMultiByte(CP_UTF8, 0, pwBuf, nwLen, pBuf, nLen, NULL, NULL);
    
        std::string retStr(pBuf);
    
        delete[]pwBuf;
        delete[]pBuf;
    
        pwBuf = NULL;
        pBuf = NULL;
    
        return retStr;
    }
    
    std::string UTF8_To_string(const std::string& str)
    {
        int nwLen = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, NULL, 0);
    
        wchar_t* pwBuf = new wchar_t[nwLen + 1];//一定要加1,不然会出现尾巴
        memset(pwBuf, 0, nwLen * 2 + 2);
    
        MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), pwBuf, nwLen);
    
        int nLen = WideCharToMultiByte(CP_ACP, 0, pwBuf, -1, NULL, NULL, NULL, NULL);
    
        char* pBuf = new char[nLen + 1];
        memset(pBuf, 0, nLen + 1);
    
        WideCharToMultiByte(CP_ACP, 0, pwBuf, nwLen, pBuf, nLen, NULL, NULL);
    
        std::string retStr = pBuf;
    
        delete[]pBuf;
        delete[]pwBuf;
    
        pBuf = NULL;
        pwBuf = NULL;
    
        return retStr;
    }
std::fstream

当fstream对象读取utf-8文件,先将文件内容从Utf-8转为Unicode,再写到std::wstring中,传到fstream内。

示例:

    #include 
    
    // 读取Utf-8格式的文件,准备写入
    std::fstream  ifile("path", std::ios::in | std::ios::out);
    std::string buffer;
    std::wstring wstr = L"";
    if (!ifile.is_open())
    {
        std::cout << "文件打开失败!" << std::endl;
        return -1;
    }
    while (std::getline(ifile, buffer))
    {
        wstr += tool.UTF8ToUnicode(buffer) + L'\n';
    }
    ifile.close();
    
    // wstring写入utf-8格式文件
    std::fstream ofile("path", std::ios::out);
    std::locale oldLocale1 = ofile.imbue(std::locale(std::locale("chs"), "", LC_CTYPE));
    if (!ofile.is_open())
    {
        std::cout << "文件打开失败!" << std::endl;
        return -1;
    }
    
    ofile << tool.ToUTF8(wstr );
    ofile.close();
    ofile.imbue(oldLocale1);
    // 转码函数
    
    // 1
    std::string UnicodeToUTF8(const std::wstring& wstr)
    {
        std::string ret;
        try {
            std::wstring_convert< std::codecvt_utf8<wchar_t> > wcv;
            ret = wcv.to_bytes(wstr);
        }
        catch (const std::exception& e) {
            std::string str = e.what();
        }
        return ret;
    }
    
    std::wstring UTF8ToUnicode(const std::string& str)
    {
        std::wstring ret;
        try {
            std::wstring_convert< std::codecvt_utf8<wchar_t> > wcv;
            ret = wcv.from_bytes(str);
        }
        catch (const std::exception& e) {
            std::string str = e.what();
        }
        return ret;
    }
    
    std::string ToUTF8(const wchar_t* buffer, int len)
    {
        int size = ::WideCharToMultiByte(CP_UTF8, 0, buffer, len, NULL, 0, NULL, NULL);
        if (size == 0)
            return "";
        std::string newbuffer;
        newbuffer.resize(size);
        ::WideCharToMultiByte(CP_UTF8, 0, buffer, len,
            const_cast<char*>(newbuffer.c_str()), size, NULL, NULL);
    
        return newbuffer;
    }
    
    // 2
    std::string ToUTF8(const std::wstring& str)
    {
        return ToUTF8(str.c_str(), (int)str.size());
    }

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/2990182.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-09-23
下一篇 2022-09-23

发表评论

登录后才能评论

评论列表(0条)

保存