G++测试通过
#include<iostream>
#include<fstream>
#include <string>
#include <map>
#include <iterator>
using namespace std
int main ()
{
ifstream is("input.txt")
char s[5000]
char c
int i = 0
int iChi=0
while ( (c = is.get()) != EOF)
s[i++] = c
s[i] = '\0'
map<string,int>counter
for (i=0s[i]!='\0'i++)
{
if (s[i] &0X80)
{
string temp
temp.push_back(s[i])
temp.push_back(s[i+1])
counter[temp]++
iChi++
i++
}
else continue
}
is.close()
cout<<"汉字总数:"<<iChi<<endl
cout<<"字频:"<<endl
map<string,int>::iterator iter
for(iter=counter.begin()iter!=counter.end()iter++)
{
cout<<iter->first<<":"<<iter->second<<endl
}
}
假设你的系统为 Windows,即中文环境编码为 gbk。看代码:
# -*- encoding: gbk -*-def is_chinese(uchar):
"""判断一个unicode是否是汉字"""
if uchar >= u'\u4E00' and uchar <= u'\u9FA5':
return True
else:
return False
def count_chinese_word(filepath, encoding):
_dict = {}
try:
with open (filepath, 'r') as txt_file:
for line in txt_file:
ustr = line.decode(encoding)
for uchar in ustr:
if is_chinese(uchar):
if _dict.has_key(uchar):
_dict[uchar] = _dict[uchar] + 1
else:
_dict[uchar] = 1
except IOError as ioerr:
print "文件",filepath,"不存在"
return _dict
if __name__ == '__main__':
_dict = count_chinese_word('内容.txt', 'gbk')
import json
print json.dumps(_dict, encoding = "utf-8", indent = 4, ensure_ascii = False)
内容.txt:
运行:
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)