伪代码:
将所有点看成一个簇当簇数目小于k时对于每一个簇 计算总误差 在给定的簇上面进行k-均值聚类(k=2) 计算将该簇一分为二之后的总误差选择使得误差最小的那个簇进行划分操作
def biKmeans(dataSet,k): m=np.shape(dataSet)[0] clusterAssment=np.mat(np.zeros((m,2))) centroid0=np.mean(dataSet,axis=0).tolist() centList=[centroid0] for j in range(m): clusterAssment[j,1]=distEclud(centroid0, dataSet[j,:])**2 while (len(centList) < k): lowsetSSE=np.inf for i in range(len(centList)): ptsInCurrCluster=dataSet[np.nonzero(clusterAssment[:,0].A==i)[0],:] centroidMat,splitClusterAss=kMeans(ptsInCurrCluster, 2) sseSplit=np.sum(splitClusterAss[:,1]) sseNotSplit=np.sum(clusterAssment[np.nonzero(clusterAssment[:,0].A!=i)[0],1]) if sseSplit+sseNotSplit