#include<stdio.h>
#include<math.h>
int x[15][3]={{3,1,0},{3,2,0},{2,2,0},{3,3,0},{2,3,0},{8,8,0},{8,9,0},{9,8,0},{9,7,0},{9,9,0},{16,5,0},{16,4,0},{15,5,0},{15,6,0},{16,6,0}}
double oldcentral[3][3]//旧的中心点的坐标
double newcentral[3][3]//计算距离,分类后求平均值得到的新的中心点的坐标
int clas[15]//15个点各属于哪个类,类的编号从0开始
int clsno
int minDist(double x,double y,double z) //计算三个数的最小值,返回其序号。
{
if(x<=y&&x<=z)
return 0
if(y<x&&y<=z)
return 1
if(z<x&&z<y)
return 2
}
double distA(int i,int j)//计算两个点的距离,i和j分别是数组x和newcentral中的序号
{
double distx,disty,distz,dist
distx=(double)x[i][0]-(double)newcentral[j][0]
disty=(double)x[i][1]-(double)newcentral[j][1]
distz=(double)x[i][2]-(double)newcentral[j][2]
dist=sqrt(distx*distx+disty*disty+distz*distz)
return dist
}
void main()
{
int count[3]//记录每一类的个数;
int i,j
double dist[3]//求坐标点距离3个中心点距离时用到的变量,
double cenL,cenLx,cenLy,cenLz//最后求新旧中心点距离的时候用到的变量
printf("The 15 points are:\n")//把所以点的坐标打印一遍,非必要语句
for(i=0i<15i++)
printf("%d %d %d \n",x[i][0],x[i][1],x[i][2])
for(i=0i<3i++)//新旧中心点赋初值。
for(j=0j<3j++)
{
newcentral[i][j]=(double)x[i][j]
oldcentral[i][j]=-9999.0
}
for(i=0i<3i++)//头3个点作为初始的中心点。
{
clas[i]=i
cls[i]=i
}
while(1)//无限循环,退出条件是新旧中心点的距离小于0.005.
{
for(i=0i<3i++)//记录每一类的个数的数组赋初值
count[i]=0
for(i=0i<3i++)//新中心点的坐标拷贝到旧中心点数组中,因新中心点需重新计算。
for(j=0j<3j++)
oldcentral[i][j]=newcentral[i][j]
for(i=0i<15i++)//对15个点分别计算到中心点的距离。
{
for(j=0j<3j++)
dist[j]=distA(i,j)
clsno=minDist(dist[0],dist[1],dist[2])//求距离最小值,返回距离最小的对应中心点坐标。
clas[i]=clsno//将此点归到距离最小的那一类。
count[clsno]++
}
for(i=0i<3i++)//对新中心点坐标赋初值,进行下面的计算。
for(j=0j<3j++)
newcentral[i][j]=0.0
for(i=0i<15i++)//对每一类的坐标点计算其坐标之和。
for(j=0j<3j++)
newcentral[clas[i]][j]+=x[i][j]
for(i=0i<3i++)//坐标之和除以count数组元素,即得中心点坐标
for(j=0j<3j++)
newcentral[i][j]=newcentral[i][j]/count[i]
int flag=0//标志
for(i=0i<3i++)//求新旧中心点的距离
{
cenLx=newcentral[i][0]-oldcentral[i][0]
cenLy=newcentral[i][1]-oldcentral[i][1]
cenLz=newcentral[i][2]-oldcentral[i][2]
cenL=sqrt(cenLx*cenLx+cenLy*cenLy+cenLz*cenLz)
if(cenL>0.005)//只要有一个距离过大,表明未收敛,重新开始循环
flag=1
}
if(flag!=1)//只有当标志未被设置,退出while循环。
break
}
for(i=0i<15i++) //打印15个点各自所属的类。
{
printf("point %d(%d,%d,%d) belongs to class %d\n",i,x[i][0],x[i][1],x[i][2],clas[i])
}
}
/***************************************************************************** *
* KMEANS *
* *
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>
// FUNCTION PROTOTYPES
// DEFINES
#define SUCCESS 1
#define FAILURE 0
#define TRUE1
#define FALSE 0
#define MAXVECTDIM 20
#define MAXPATTERN 20
#define MAXCLUSTER 10
char *f2a(double x, int width){
char cbuf[255]
char *cp
int i,k
int d,s
cp=fcvt(x,width,&d,&s)
if (s) {
strcpy(cbuf,"-")
}
else {
strcpy(cbuf," ")
} /* endif */
if (d>0) {
for (i=0i<di++) {
cbuf[i+1]=cp[i]
} /* endfor */
cbuf[d+1]=0
cp+=d
strcat(cbuf,".")
strcat(cbuf,cp)
} else {
if (d==0) {
strcat(cbuf,".")
strcat(cbuf,cp)
}
else {
k=-d
strcat(cbuf,".")
for (i=0i<ki++) {
strcat(cbuf,"0")
} /* endfor */
strcat(cbuf,cp)
} /* endif */
} /* endif */
cp=&cbuf[0]
return cp
}
// ***** Defined structures &classes *****
struct aCluster {
double Center[MAXVECTDIM]
int Member[MAXPATTERN] //Index of Vectors belonging to this cluster
int NumMembers
}
struct aVector {
double Center[MAXVECTDIM]
int Size
}
class System {
private:
double Pattern[MAXPATTERN][MAXVECTDIM+1]
aCluster Cluster[MAXCLUSTER]
int NumPatterns // Number of patterns
int SizeVector // Number of dimensions in vector
int NumClusters // Number of clusters
void DistributeSamples() // Step 2 of K-means algorithm
int CalcNewClustCenters()// Step 3 of K-means algorithm
double EucNorm(int, int) // Calc Euclidean norm vector
int FindClosestCluster(int)//ret indx of clust closest to pattern
//whose index is arg
public:
system()
int LoadPatterns(char *fname) // Get pattern data to be clustered
void InitClusters() // Step 1 of K-means algorithm
void RunKMeans() // Overall control K-means process
void ShowClusters() // Show results on screen
void SaveClusters(char *fname)// Save results to file
void ShowCenters()
}
void System::ShowCenters(){
int i,j
printf("Cluster centers:\n")
for (i=0i<NumClustersi++) {
Cluster[i].Member[0]=i
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1])
} /* endfor */
printf("\n")
}
int System::LoadPatterns(char *fname){
FILE *InFilePtr
inti,j
double x
if((InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE
fscanf(InFilePtr, "%d", &NumPatterns) // Read # of patterns
fscanf(InFilePtr, "%d", &SizeVector) // Read dimension of vector
fscanf(InFilePtr, "%d", &NumClusters) // Read # of clusters for K-Means
for (i=0i<NumPatternsi++) { // For each vector
for (j=0j<SizeVectorj++) { // create a pattern
fscanf(InFilePtr,"%lg",&x) // consisting of all elements
Pattern[i][j]=x
} /* endfor */
} /* endfor */
printf("Input patterns:\n")
for (i=0i<NumPatternsi++) {
printf("Pattern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1])
} /* endfor */
printf("\n--------------------\n")
return SUCCESS
}
//***************************************************************************
// InitClusters *
// Arbitrarily assign a vector to each of the K clusters *
// We choose the first K vectors to do this *
//***************************************************************************
void System::InitClusters(){
int i,j
printf("Initial cluster centers:\n")
for (i=0i<NumClustersi++) {
Cluster[i].Member[0]=i
for (j=0j<SizeVectorj++) {
Cluster[i].Center[j]=Pattern[i][j]
} /* endfor */
} /* endfor */
for (i=0i<NumClustersi++) {
printf("ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1])
} /* endfor */
printf("\n")
}
void System::RunKMeans(){
int converged
int pass
pass=1
converged=FALSE
while (converged==FALSE) {
printf("PASS=%d\n",pass++)
DistributeSamples()
converged=CalcNewClustCenters()
ShowCenters()
} /* endwhile */
}
double System::EucNorm(int p, int c){ // Calc Euclidean norm of vector difference
double dist,x // between pattern vector, p, and cluster
int i // center, c.
char zout[128]
char znum[40]
char *pnum
pnum=&znum[0]
strcpy(zout,"d=sqrt(")
printf("The distance from pattern %d to cluster %d is calculated as:\n",c,p)
dist=0
for (i=0i<SizeVector i++){
x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i])
strcat(zout,f2a(x,4))
if (i==0)
strcat(zout,"+")
dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i])
} /* endfor */
printf("%s)\n",zout)
return dist
}
int System::FindClosestCluster(int pat){
int i, ClustID
double MinDist, d
MinDist =9.9e+99
ClustID=-1
for (i=0i<NumClustersi++) {
d=EucNorm(pat,i)
printf("Distance from pattern %d to cluster %d is %f\n\n",pat,i,sqrt(d))
if (d<MinDist) {
MinDist=d
ClustID=i
} /* endif */
} /* endfor */
if (ClustID<0) {
printf("Aaargh")
exit(0)
} /* endif */
return ClustID
}
void System::DistributeSamples(){
int i,pat,Clustid,MemberIndex
//Clear membership list for all current clusters
for (i=0i<NumClustersi++){
Cluster[i].NumMembers=0
}
for (pat=0pat<NumPatternspat++) {
//Find cluster center to which the pattern is closest
Clustid= FindClosestCluster(pat)
printf("patern %d assigned to cluster %d\n\n",pat,Clustid)
//post this pattern to the cluster
MemberIndex=Cluster[Clustid].NumMembers
Cluster[Clustid].Member[MemberIndex]=pat
Cluster[Clustid].NumMembers++
} /* endfor */
}
int System::CalcNewClustCenters(){
int ConvFlag,VectID,i,j,k
double tmp[MAXVECTDIM]
char xs[255]
char ys[255]
char nc1[20]
char nc2[20]
char *pnc1
char *pnc2
char *fpv
pnc1=&nc1[0]
pnc2=&nc2[0]
ConvFlag=TRUE
printf("The new cluster centers are now calculated as:\n")
for (i=0i<NumClustersi++) { //for each cluster
pnc1=itoa(Cluster[i].NumMembers,nc1,10)
pnc2=itoa(i,nc2,10)
strcpy(xs,"Cluster Center")
strcat(xs,nc2)
strcat(xs,"(1/")
strcpy(ys,"(1/")
strcat(xs,nc1)
strcat(ys,nc1)
strcat(xs,")(")
strcat(ys,")(")
for (j=0j<SizeVectorj++) {// clear workspace
tmp[j]=0.0
} /* endfor */
for (j=0j<Cluster[i].NumMembersj++) { //traverse member vectors
VectID=Cluster[i].Member[j]
for (k=0k<SizeVectork++) { //traverse elements of vector
tmp[k] += Pattern[VectID][k] // add (member) pattern elmnt into temp
if (k==0) {
strcat(xs,f2a(Pattern[VectID][k],3))
} else {
strcat(ys,f2a(Pattern[VectID][k],3))
} /* endif */
} /* endfor */
if(j<Cluster[i].NumMembers-1){
strcat(xs,"+")
strcat(ys,"+")
}
else {
strcat(xs,")")
strcat(ys,")")
}
} /* endfor */
for (k=0k<SizeVectork++) {//traverse elements of vector
tmp[k]=tmp[k]/Cluster[i].NumMembers
if (tmp[k] != Cluster[i].Center[k])
ConvFlag=FALSE
Cluster[i].Center[k]=tmp[k]
} /* endfor */
printf("%s,\n",xs)
printf("%s\n",ys)
} /* endfor */
return ConvFlag
}
void System::ShowClusters(){
int cl
for (cl=0cl<NumClusterscl++) {
printf("\nCLUSTER %d ==>[%f,%f]\n", cl,Cluster[cl].Center[0],Cluster[cl].Center[1])
} /* endfor */
}
void System::SaveClusters(char *fname){
}
main(int argc, char *argv[]) {
System kmeans
if (argc<2) {
printf("USAGE: KMEANS PATTERN_FILE\n")
exit(0)
}
if (kmeans.LoadPatterns(argv[1])==FAILURE ){
printf("UNABLE TO READ PATTERN_FILE:%s\n",argv[1])
exit(0)
}
kmeans.InitClusters()
kmeans.RunKMeans()
kmeans.ShowClusters()
}
程序需要一个数据文件格式如下:
5 2 3
2 3 4 5 10 12 5 1 12 10
其中,5表示数据的数量,2表示数据的维度,3表示聚类的数量。
后面每两个实数对应一个数据点。
不过这个程序始数据维数固定为2,后来想改成4以下的任意维度,但没有改完,所以其实只能为2维。
我已经对程序做了注释。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>
#define SUCCESS 1
#define FAILURE 0
#define TRUE 1
#define FALSE 0
#define MAXVECTDIM 4 // 数据最大维数 (看来这个程序写了一半,维数实际上只能为2)
#define MAXPATTERN 1588 // 数据数量最大值
#define MAXCLUSTER 10 // 聚类最大值
// 聚类结构
struct aCluster {
double Center[MAXVECTDIM]// 中心/引力数据对象
int Member[MAXPATTERN] // 该聚类中数据在Pattern中的索引
int NumMembers // 数据的数量
}
struct aVector {
double Center[MAXVECTDIM]
int Size
}
static double Pattern[MAXPATTERN][MAXVECTDIM + 1]// 所有数据存放在这个数组中
// 所以的东西都搁System类里面了
class System {
private :
aCluster Cluster[MAXCLUSTER]// 聚类数组
int NumPatterns // 输入数据的数量
int SizeVector // 数据的维数
int NumClusters // 数据的聚类数
void DistributeSamples()// 根据中心聚类,重新分配数据到不同的聚类
int CalcNewClustCenters() // 重新计算新的聚类中心
double EucNorm(int, int)// 误差准则
int FindClosestCluster(int) // 查找最接近的聚类
public :
System() {}
int LoadPatterns(char * fname) // 从文件中读取数据
void InitClusters()// 初始化聚类
void RunKMeans() // 执行K-Means算法
void ShowClusters()// 显示聚类
void SaveClusters(char * fname)// 保存聚类到文件中
void ShowCenters() // 显示聚类中心数据
}
void System::ShowCenters() {
int i
printf("Cluster centers:\n")
for (i = 0i <NumClustersi++) {
Cluster[i].Member[0] = i
printf("ClusterCenter[%d]=(%f,%f)\n", i, Cluster[i].Center[0],Cluster[i].Center[1])
int b=0
}
printf("\n")
}
int System::LoadPatterns(char * fname) {
FILE * InFilePtr
int i, j
double x
if ( (InFilePtr = fopen(fname, "r")) == NULL)
return FAILURE
// 读入数据的数量,维度和聚类数的数量
fscanf(InFilePtr, "%d", &NumPatterns)
fscanf(InFilePtr, "%d", &SizeVector)
fscanf(InFilePtr, "%d", &NumClusters)
// 读入数据
for (i = 0i <NumPatternsi++) {
for (j = 0j <SizeVectorj++) {
fscanf(InFilePtr, "%lg", &x)
Pattern[i][j] = x
}
}
// 打印读入的数据
printf("Input patterns:\n")
for (i = 0i <NumPatternsi++) {
printf("Pattern[%d]=(%2.3f,%2.3f,%d,%d)\n", i, Pattern[i][0], Pattern[i][1],Pattern[i][2], Pattern[i][3])
}
printf("\n--------------------\n")
return SUCCESS
}
void System::InitClusters() {
int i, j
SizeVector=2// 看来开始数据维数固定为2,后来想改成4以下的任意维度,但没有改完
printf("Initial cluster centers:\n")
// 把前NumClusters个数据作为NumClusters个聚类的数据中心
for (i = 0i <NumClustersi++) {
Cluster[i].Member[0] = i// 记录这个数据的索引到第i个聚类中
for (j = 0j <SizeVectorj++) {
Cluster[i].Center[j] = Pattern[i][j]// 把这个数据作为数据中心
}
}
// 打印聚类的数据中心
for (i = 0i <NumClustersi++) {
printf("ClusterCenter[%d]=(%f,%f)\n", i, Cluster[i].Center[0], Cluster[i].Center[1])
}
printf("\n")
}
void System::RunKMeans() {
int converged// 是否收敛
int pass // 计算的趟数
pass = 1 // 第一趟
converged = FALSE// 没有收敛
while (converged == FALSE) { // 没有收敛的情况下反复跑
printf("PASS=%d\n", pass++)// 打印趟数
DistributeSamples()// 重新分配数据
converged = CalcNewClustCenters()// 计算新的聚类中心,如果计算结果和上次相同,认为已经收敛
ShowCenters()// 显示聚类中心
}
}
// 第p个数据到第c个聚类中心的误差准则(距离的平方)
double System::EucNorm(int p, int c) {
double dist, x// x可能是调试用,没有实际作用
int i
dist = 0
// 将每个维度的差的平方相加,得到距离的平方
for (i = 0i <SizeVectori++) {
x = (Cluster[c].Center[i] - Pattern[p][i]) *
(Cluster[c].Center[i] - Pattern[p][i])
dist += (Cluster[c].Center[i] - Pattern[p][i]) *
(Cluster[c].Center[i] - Pattern[p][i])
}
return dist
}
// 查找第pat个数据的最近聚类
int System ::FindClosestCluster(int pat) {
int i, ClustID/*最近聚类的ID*/
double MinDist/*最小误差*/, d
MinDist = 9.9e+99// 初始为一个极大的值
ClustID = -1
// 依次计算3个聚类到第pat个数据的误差,找出最小值
for (i = 0i <NumClustersi++) {
d = EucNorm(pat, i)
printf("Distance from pattern %d to cluster %d is %f\n", pat, i, sqrt(d))
if (d <MinDist) { // 如果小于前最小值,将改值作为当前最小值
MinDist = d
ClustID = i
}
}
if (ClustID <0) { // 没有找到??! —— 这个应该不可能发生,如果出现表示出现了不可思议的错误
printf("Aaargh")
exit(0)
}
return ClustID
}
void System ::DistributeSamples() {
int i, pat, Clustid, MemberIndex
// 将上次的记录的该聚类中的数据数量清0,重新开始分配数据
for (i = 0i <NumClustersi++) {
Cluster[i].NumMembers = 0
}
// 找出每个数据的最近聚类数据中心,并将该数据分配到该聚类
for (pat = 0pat <NumPatternspat++) {
Clustid = FindClosestCluster(pat)
printf("patern %d assigned to cluster %d\n\n", pat, Clustid)
MemberIndex = Cluster[Clustid].NumMembers // MemberIndex是当前记录的数据数量,也是新加入数据在数组中的位置
Cluster[Clustid].Member[MemberIndex] = pat// 将数据索引加入Member数组
Cluster[Clustid].NumMembers++ // 聚类中的数据数量加1
}
}
int System::CalcNewClustCenters() {
int ConvFlag, VectID, i, j, k
double tmp[MAXVECTDIM]// 临时记录新的聚类中心,因为需要比较,不能直接记录
char xs[255]
char szBuf[255]
ConvFlag = TRUE
printf("The new cluster centers are now calculated as:\n")
// 逐个计算NumClusters个聚类
for (i = 0i <NumClustersi++) {
strcpy(xs,"")
printf("Cluster Center %d (1/%d):\n",i,Cluster[i].NumMembers)
// tmp所有维度数值清0
for (j = 0j <SizeVectorj++) {
tmp[j] = 0.0
}
// 计算聚类中所有数据的所有维度数值的和,为下一步计算均值做准备
for (j = 0j <Cluster[i].NumMembersj++) {
VectID = Cluster[i].Member[j]
for (k = 0k <SizeVectork++) {
tmp[k] += Pattern[VectID][k]
sprintf(szBuf,"%d ",Pattern[VectID][k])
strcat(xs,szBuf)
}
printf("%s\n",xs)// 输出刚才计算的数据
strcpy(xs,"")
}
// 求出均值,并判断是否和上次相同
for (k = 0k <SizeVectork++) {
if(Cluster[i].NumMembers!=0)
tmp[k] = tmp[k] / Cluster[i].NumMembers
if (tmp[k] != Cluster[i].Center[k]) // 如果不同,则认为没有收敛
ConvFlag = FALSE
Cluster[i].Center[k] = tmp[k]// 用新值代替旧值
}
printf("%s,\n", xs)
}
return ConvFlag// 返回收敛情况
}
// 显示聚类中心数据
void System::ShowClusters() {
int cl
for (cl = 0cl <NumClusterscl++) {
printf("\nCLUSTER %d ==>[%f,%f]\n", cl, Cluster[cl].Center[0],
Cluster[cl].Center[1])
}
}
// 没有实现的函数
void System::SaveClusters(char * fname) {
}
#include <windows.h>
void main(int argc, char * argv[]) {
System kmeans
// 如果没有提供参数,显示用法
if (argc <2) {
printf("USAGE: KMEANS PATTERN_FILE\n")
exit(0)
}
// 参数1 为数据文件名,从中读入数据
if (kmeans.LoadPatterns(argv[1]) == FAILURE) {
printf("UNABLE TO READ PATTERN_FILE:%s\n", argv[1])
exit(0)
}
kmeans.InitClusters()
kmeans.RunKMeans()
kmeans.ShowClusters()
kmeans.ShowCenters()
return
}
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)