相比之下,用一些文本编辑软件的词频统计功能比较现告渗实,但高频词就需要自己去找了。这类软件的统计功能只能显示词频而不是高频词。
其实word里面的查找/替换功能就类似……
比如,在一篇文章里,点“替灶拍换”,输入“词频软件”,在替换目标中也输入“词频软件”,点“全部替换”就能显示该词在全文中的词频。
#include<IOSTREAM.H>
#include<STDIO.H>
#include<STRING.H>
#include<STDLIB.H>
#include<CONIO.H>
#define WORDLEN 50
#include <IO.H>液洞
#include<TIME.H>
#define PATH 2000
struct node{
char word[WORDLEN]
float num
struct node *pre,*next
}
clock_t start,end
struct node *headptr=(struct node*)malloc(sizeof(struct node))
int main(int argc,char *argv[])
{
int output(char*)
void searchAllFiles(char*)
strcpy(headptr->word," ")
headptr->num=0//用来存储单词的总数
headptr->pre=headptr
headptr->next=headptr
char paths[PATH],patht[PATH]
// printf("喊埋信请输入要查找的文件路径:\n")
//gets(paths)
if(argc<3)
{
printf("请输入程序名,搜索路径及结果文件路径:\n")
return 0
}
strcpy(paths,argv[1])
//printf("请输入要输出结果的文件路径:\n")
//gets(patht)
strcpy(patht,argv[2])
start=clock()
searchAllFiles(paths)
output(patht)
return 0
}
int searchword(char *fpath)
{
char filepath[PATH]
strcpy(filepath,fpath)
FILE *fptr
fptr=fopen(filepath,"r")
char word[20]=""
char sword[2]
char ch
int InsertAndSort(char*)
while((ch=fgetc(fptr))!=EOF)
{
if(!((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||(ch=='?')||(ch=='!')||(ch=='-'))&&(!strcmp(word,"")))//所得到的字符不是要处理的字符且word数组为郑轮空
continue
if((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||(ch=='-'))//得到的字符是字母或连字符
{
sword[0]=tolower(ch)//将其变成小写
strncat(word,sword,1)
continue
}
if(!((ch>='a'&&ch<='z')||(ch>='A'&&ch<='Z')||(ch=='-')||(ch=='?')||(ch=='!'))&&(strcmp(word,"")))//所得到的字符不是要处理的字符且word数组为非空
{
InsertAndSort(word)
strcpy(word,"")//重置word数组为空
continue
}
if(((ch=='?')||(ch=='!'))&&(!strcmp(word,"")))//得到'!'或'?'且word数组为空
{
sword[0]=ch
strncat(word,sword,1)
InsertAndSort(word)
strcpy(word,"")//重置word数组为空
continue
}
if(((ch=='?')||(ch=='!'))&&(strcmp(word,"")))//得到'!'或'?'且word数组非空
{
InsertAndSort(word)//先处理word数组
strcpy(word,"")//重置word数组为空
sword[0]=ch
strncat(word,sword,1)
InsertAndSort(word)
strcpy(word,"")//重置word数组为空
continue
}
continue
}
return 0
}
int InsertAndSort(char* word)
{
struct node *nodeptr//以下部分应该抽象成一个函数
nodeptr=headptr->next
while(nodeptr!=headptr)
{
if(!strcmp(nodeptr->word,word))//这个单词已经存在
{
nodeptr->num+=1
headptr->num+=1
if((nodeptr->num>nodeptr->pre->num)&&(nodeptr->pre!=headptr))//与之前的单词的数目进行对比,找到一个比其数目小的最大单词数目所属单词进行交换
{
struct node* searchptr//新建一个指针用于查找
searchptr=nodeptr->pre->pre
while((nodeptr->num>searchptr->num)&&(searchptr!=headptr))
searchptr=searchptr->pre
searchptr=searchptr->next//回退一个结点
float temp //交换单词数目
temp=nodeptr->num //变量的生存期?
nodeptr->num=searchptr->num
searchptr->num=temp
char tempword[WORDLEN] //交换单词
strcpy(tempword,nodeptr->word)
strcpy(nodeptr->word,searchptr->word)
strcpy(searchptr->word,tempword)
}
break//跳出第二个while循环
}
else
nodeptr=nodeptr->next//继续搜索下一个单词
}
if(nodeptr!=headptr) //判断是否已经找到这个单词
return 0 //已经找到
else
{
struct node* lastptr=(struct node*)malloc(sizeof(struct node))//在链表的尾部新建一个结点存放该单词
strcpy(lastptr->word,word)
lastptr->num=1
lastptr->pre=nodeptr->pre
lastptr->next=headptr
headptr->pre->next=lastptr
headptr->pre=lastptr
headptr->num+=1
}
return 0
}
int output(char* temppath)//输出链表的数据
{
struct node *tempnode
tempnode=headptr->next
FILE *target=fopen(temppath,"w")
if(!target)
{
printf("输入结果失败!\n")
return 0
}
else
{
while(tempnode!=headptr)
{
fprintf(target,"%4.2f",(tempnode->num/headptr->num)*100)
fputs("%",target)
fprintf(target," %s",tempnode->word)
fputs("\n",target)
tempnode=tempnode->next
}
end=clock()
fprintf(target,"the process time is:%.2f seconds\n",(double)(end-start)/(double)CLOCKS_PER_SEC)
return 0
}
}
void searchAllFiles( char *filePath )
{
struct _finddata_t fileInfo
char filePathCpy[PATH]
strcpy(filePathCpy, filePath)
int hfind = _findfirst(filePath, &fileInfo)
if( hfind == -1 ) //打开路径失败返回
{
return
}
else
{
int tag=0
while( tag != -1 ) //从第一个文件..第N个文件
{
if( strcmp( fileInfo.name, "." )==0 || strcmp( fileInfo.name, ".." )==0 ) //一点和两点分别是根目录和当前目录。
{
tag=_findnext( hfind, &fileInfo )//查找下一个配匹的文件
continue
}
//取得全路径
char fullPath[PATH]
strcpy( fullPath, filePathCpy ) //把上一个文夹的路径赋给当前的全部路径
fullPath[ strlen( fullPath ) - strlen( "*" ) ] = '\0'//去掉\\*
strcat( fullPath, fileInfo.name )
if ( fileInfo.attrib &_A_SUBDIR ) //非0,是一个文件夹
{
strcat( fullPath, "\\*" )
searchAllFiles( fullPath )//递归扫描该文件夹的子文件
}
else
{
searchword(fullPath)
}
tag=_findnext(hfind,&fileInfo)//查找下一个配匹的文件
}
_findclose(hfind)
}
}
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)