目标检测的Tricks | 【Trick13】使用kmeans与遗传算法聚类anchor_python

如有错误，恳请指出。

这篇博客的代码来着博主：太阳花的小绿豆，具体的解释说明可以见参考资料，这里只贴上代码留作笔记使用。

ps：参考资料解释得非常的详细

文章目录

1. kmeans聚类中心
2. kmeans聚类anchor
3. kmeans+遗传算法聚类anchor
4. 遗传算法补充介绍

1. kmeans聚类中心

参考代码：plot_kmeans.py

import numpy as np
from matplotlib import pyplot as plt

np.random.seed(0)
colors = np.array(['blue', 'black'])

def plot_clusters(data, cls, clusters, title=""):
    # 对每一个数据点分配一个颜色
    if cls is None:
        c = [colors[0]] * data.shape[0]
    else:
        c = colors[cls].tolist()

    # 画出所有数据点
    plt.scatter(data[:, 0], data[:, 1], c=c)
    # 画出两个聚类中心点
    for i, clus in enumerate(clusters):
        plt.scatter(clus[0], clus[1], c='gold', marker='*', s=150)
    plt.title(title)
    plt.show()
    plt.close()


def distances(data, clusters):
    xy1 = data[:, None]  # [N,1,2]
    xy2 = clusters[None]  # [1,K,2]
    # xy2 - xy1: (300, 2, 2)
    # np.power(xy2 - xy1, 2): (300, 2, 2)
    # d: (300, 2)
    d = np.sum(np.power(xy2 - xy1, 2), axis=-1)
    return d


# k-means算法: 坐标系上两个点的欧氏距离作为样本之间的距离进行聚类,流程如下:
# 1 手动设定簇的个数k，假设k=2
# 2 在所有样本中随机选取k个样本作为簇的初始中心
# 3 计算每个样本离每个簇中心的距离（这里以欧式距离为例），然后将样本划分到离它最近的簇中
# 4 更新簇的中心，计算每个簇中所有样本的均值（方法不唯一）作为新的簇中心
# 5 重复第3步到第4步直到簇中心不在变化或者簇中心变化很小满足给定终止条件
def k_means(data, k, dist=np.mean):
    """
    k-means methods
    Args:
        data: 需要聚类的data
        k: 簇数(聚成几类)
        dist: 更新簇坐标的方法
    """
    data_number = data.shape[0]
    last_nearest = np.zeros((data_number,))

    # 随机在数据中选择k个聚类中心
    clusters = data[np.random.choice(data_number, k, replace=False)]
    print(f"random cluster: \n {clusters}")
    # plot
    plot_clusters(data, None, clusters, "random clusters")

    step = 0
    while True:
        # 距离为个数据点分别与聚类中心的平方和(不开方)
        d = distances(data, clusters)
        # 对全部数据点按照距离分类,离哪个聚类中心近就分配到哪里
        current_nearest = np.argmin(d, axis=1)

        # plot
        plot_clusters(data, current_nearest, clusters, f"step {step}")

        # 如果上一次判断最邻近位置与当前的判断完全一致表示聚类分类完成
        # (list/tuple).all()表示当前元祖全部为True
        if (last_nearest == current_nearest).all():
            break  # clusters won't change
        for cluster in range(k):
            # use np.mean update clusters
            # 简单来说就是对一类的全部数据点求一个均值,该均值就是聚类中心: (k1, 2) -> (2)
            clusters[cluster] = dist(data[current_nearest == cluster], axis=0)

        # 更新类别
        last_nearest = current_nearest
        step += 1

    return clusters


def main():
    x1, y1 = [np.random.normal(loc=1., size=150) for _ in range(2)]
    x2, y2 = [np.random.normal(loc=5., size=150) for _ in range(2)]

    x = np.concatenate([x1, x2])
    y = np.concatenate([y1, y2])

    plt.scatter(x, y, c='blue')
    plt.title("initial data")
    plt.show()
    plt.close()

    clusters = k_means(np.concatenate([x[:, None], y[:, None]], axis=-1), k=2)
    print(f"k-means fluster: \n {clusters}")


if __name__ == '__main__':
    main()

2. kmeans聚类anchor

参考代码：yolo_kmeans.py

import numpy as np


def wh_iou(wh1, wh2):
    # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
    wh1 = wh1[:, None]  # [N,1,2]
    wh2 = wh2[None]     # [1,M,2]
    # prod用于两个最小边的相乘，也就是交集面积
    inter = np.minimum(wh1, wh2).prod(2)  # [N,M]
    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)


# k-means算法: 1-IOU(bboxes, anchors)作为样本之间的距离进行聚类,流程如下:
# 1 在所有的bboxes中随机挑选k个作为簇的中心。
# 2 计算每个bboxes离每个簇的距离1-IOU(bboxes, anchors)
# 3 计算每个bboxes距离最近的簇中心，并分配到离它最近的簇中
# 4 根据每个簇中的bboxes重新计算簇中心，这里默认使用的是计算中值，自己也可以改成其他方法
# 5 重复3到4直到每个簇中元素不在发生变化
def k_means(boxes, k, dist=np.median):
    """
    yolo k-means methods
    refer: https://github.com/qqwweee/keras-yolo3/blob/master/kmeans.py
    Args:
        boxes: 需要聚类的bboxes
        k: 簇数(聚成几类)
        dist: 更新簇坐标的方法(默认使用中位数，比均值效果略好)
    """
    box_number = boxes.shape[0]
    last_nearest = np.zeros((box_number,))
    # np.random.seed(0)  # 固定随机数种子

    # 在所有的bboxes中随机挑选k个作为簇的中心
    clusters = boxes[np.random.choice(box_number, k, replace=False)]

    while True:
        # 计算每个bboxes离每个簇的距离 1-IOU(bboxes, anchors)
        distances = 1 - wh_iou(boxes, clusters)
        # 计算每个bboxes距离最近的簇中心,其实也就是对每一个bbox进行分类(k个聚类中心中选一个)
        current_nearest = np.argmin(distances, axis=1)
        # 每个簇中元素不在发生变化说明以及聚类完毕
        if (last_nearest == current_nearest).all():
            break  # clusters won't change
        for cluster in range(k):
            # 根据每个簇中的bboxes重新计算簇中心, 这里选择用中位数来更新簇中心
            clusters[cluster] = dist(boxes[current_nearest == cluster], axis=0)

        last_nearest = current_nearest

    return clusters

3. kmeans+遗传算法聚类anchor

首先获取数据集的ground ture，获取样本尺寸，然后根据这些样本来对3个特征层分别聚类3个anchor，所以一共就是输出9个anchor，将anchor按面积的大小由小到大排序，依次分配给3个预测特征层即可。

其中聚类的过程中，配合遗传算法，使得anchor存在小幅度的变动，增强了多样性与泛化性。

ps：以下代码的主要说明见注释

读取数据集中的gtbox参考代码，read_voc.py：

import os
from tqdm import tqdm
from lxml import etree


class VOCDataSet(object):
    def __init__(self, voc_root, year="2012", txt_name: str = "train.txt"):
        assert year in ["2007", "2012"], "year must be in ['2007', '2012']"
        self.root = os.path.join(voc_root, "VOCdevkit", f"VOC{year}")
        self.annotations_root = os.path.join(self.root, "Annotations")

        # read train.txt or val.txt file
        txt_path = os.path.join(self.root, "ImageSets", "Main", txt_name)
        assert os.path.exists(txt_path), "not found {} file.".format(txt_name)

        with open(txt_path) as read:
            self.xml_list = [os.path.join(self.annotations_root, line.strip() + ".xml")
                             for line in read.readlines() if len(line.strip()) > 0]

        # check file
        assert len(self.xml_list) > 0, "in '{}' file does not find any information.".format(txt_path)
        for xml_path in self.xml_list:
            assert os.path.exists(xml_path), "not found '{}' file.".format(xml_path)

    def __len__(self):
        return len(self.xml_list)

    def parse_xml_to_dict(self, xml):
        """
        将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict
        Args:
            xml: xml tree obtained by parsing XML file contents using lxml.etree

        Returns:
            Python dictionary holding XML contents.
        """

        if len(xml) == 0:  # 遍历到底层，直接返回tag对应的信息
            return {xml.tag: xml.text}

        result = {}
        for child in xml:
            child_result = self.parse_xml_to_dict(child)  # 递归遍历标签信息
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:  # 因为object可能有多个，所以需要放入列表里
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}

    def get_info(self):
        im_wh_list = []
        boxes_wh_list = []
        for xml_path in tqdm(self.xml_list, desc="read data info."):
            # read xml
            with open(xml_path) as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = self.parse_xml_to_dict(xml)["annotation"]
            im_height = int(data["size"]["height"])
            im_width = int(data["size"]["width"])

            wh = []
            for obj in data["object"]:
                xmin = float(obj["bndbox"]["xmin"])
                xmax = float(obj["bndbox"]["xmax"])
                ymin = float(obj["bndbox"]["ymin"])
                ymax = float(obj["bndbox"]["ymax"])
                wh.append([(xmax - xmin) / im_width, (ymax - ymin) / im_height])

            if len(wh) == 0:
                continue

            im_wh_list.append([im_width, im_height])
            boxes_wh_list.append(wh)

        return im_wh_list, boxes_wh_list

kmeans+遗传算法聚类anchor参考代码：

import random
import numpy as np
from tqdm import tqdm
from scipy.cluster.vq import kmeans

from read_voc import VOCDataSet
from yolo_kmeans import k_means, wh_iou

# 简要分析:
# 首先计算每个gt与9个anchor的宽高比，获取最小的宽高比, 宽高比较小才说明gt与anchor是比较匹配的
# 然后再挑选其中宽高比最大也就是为每个gt挑选最合适的anchor
# 这里的适应度设置为种群中满足阈值条件的宽高比的平均值，回召率就是查看满足条件的宽高比的数目多少
def anchor_fitness(k: np.ndarray, wh: np.ndarray, thr: float):  # mutation fitness
    r = wh[:, None] / k[None]         # r: {ndrray:(15774, 9, 2)}
    x = np.minimum(r, 1. / r).min(2)  # x: {ndrray:(15774, 9)}
    # x = wh_iou(wh, k)     # iou metric
    best = x.max(1)         # best: {ndrray:(15774,)} 表示在最小宽高比重选择值最大的那一个, 选择的anchor即为最适应
    f = (best * (best > thr).astype(np.float32)).mean()  # fitness
    bpr = (best > thr).astype(np.float32).mean()  # best possible recall
    return f, bpr


# k-means聚类 + Genetic Algorithm遗传算法，在k-means聚类的结果上进行mutation变异，流程如下:
# 1 获取数据集中每个样本的长宽im_wh以及每个bounding boxes的长宽boxes_wh
# 2 将每张图片中长宽的最大值等比例缩放到指定大小img_size
# 3 对bounding boxes由相对大小(小数)到绝对大小的转换(整数)，获取完整的bounding boxes列表wh0
# 4 筛选bounding boxes，保留wh都大于等于两个像素的bounding boxes，得到wh列表
# 5 使用k-means聚类得到k个anchors
# 6 使用遗传算法随机对聚类出的k个anchors的长宽进行变异，如果变异后效果变得更好就将变异后的结果赋值给anchors，
# 如果变异后效果变差就跳过，默认变异1000次。 ps:这里使用anchor_fitness方法计算得到的fitness(适应度)进行对变异效果评估
# 7 将最终变异得到的anchors按照面积进行排序并返回
def main(img_size=512, n=9, thr=0.25, gen=1000):
    # 从数据集中读取所有图片的wh以及对应bboxes的wh
    dataset = VOCDataSet(voc_root="E:\学习\机器学习\数据集\VOC2012", year="2012", txt_name="train.txt")
    im_wh, boxes_wh = dataset.get_info()

    # 最大边缩放到img_size
    im_wh = np.array(im_wh, dtype=np.float32)
    shapes = img_size * im_wh / im_wh.max(1, keepdims=True)
    # [l * s for s, l in zip(shapes, boxes_wh)]: {list: 5717}
    wh0 = np.concatenate([l * s for s, l in zip(shapes, boxes_wh)])  # wh

    # Filter 过滤掉小目标
    i = (wh0 < 3.0).any(1).sum()
    if i:
        print(f'WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
    wh = wh0[(wh0 >= 2.0).any(1)]  # 只保留wh都大于等于2个像素的box

    # Kmeans calculation
    k = k_means(wh, n)

    # 按面积排序
    k = k[np.argsort(k.prod(1))]  # sort small to large
    f, bpr = anchor_fitness(k, wh, thr)
    print("kmeans: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
    print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")

    # Evolve
    # 遗传算法(在kmeans的结果基础上变异mutation)
    npr = np.random
    f, sh, mp, s = anchor_fitness(k, wh, thr)[0], k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
    pbar = tqdm(range(gen), desc=f'Evolving anchors with Genetic Algorithm:')  # progress bar
    for _ in pbar:
        # 对v矩阵[9,2]进行变异来对聚类出来的k个中心进行些许变化，其中v的数据在1左右轻微浮动
        v = np.ones(sh)  # v：{ndarray: (9, 2)}
        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
            v = ((npr.random(sh) < mp) * random.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)  # min=0.3 / max=3.0
        kg = (k.copy() * v).clip(min=2.0)
        # 重新计算适应度，如果更好则进化
        fg, bpr = anchor_fitness(kg, wh, thr)
        if fg > f:  # 如果变异后的适用度比上一轮的好，那么就进化，这里是用宽高比的平均值来表述适应度的选择
            f, k = fg, kg.copy()
            pbar.desc = f'Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'

    # 按面积排序
    k = k[np.argsort(k.prod(1))]  # sort small to large
    print("genetic: " + " ".join([f"[{int(i[0])}, {int(i[1])}]" for i in k]))
    print(f"fitness: {f:.5f}, best possible recall: {bpr:.5f}")


if __name__ == "__main__":
    main()

4. 遗传算法补充介绍

如果简单的看代码，可以还不了解遗传算法的精髓，因为这里没有涉及编码、解码与染色体的概念。只是简单的变异然后计算适应度，如果适应度更高则变异成功。

这里更详细的遗传算法介绍可以查看参考资料2，博主：重学CS，同样写得非常的详细。这里贴上其使用遗传算法计算一个二元函数的最优求解过程

参考代码：


import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

DNA_SIZE = 24
POP_SIZE = 200
CROSSOVER_RATE = 0.8
MUTATION_RATE = 0.005
N_GENERATIONS = 50
X_BOUND = [-3, 3]
Y_BOUND = [-3, 3]

# 定义的一个优化问题
def F(x, y):
	return 3*(1-x)**2*np.exp(-(x**2)-(y+1)**2)- 10*(x/5 - x**3 - y**5)*np.exp(-x**2-y**2)- 1/3**np.exp(-(x+1)**2 - y**2)


# 绘制3D图像
def plot_3d(ax):
	X = np.linspace(*X_BOUND, 100)
	Y = np.linspace(*Y_BOUND, 100)
	X,Y = np.meshgrid(X, Y)
	Z = F(X, Y)
	ax.plot_surface(X,Y,Z,rstride=1,cstride=1,cmap=cm.coolwarm)
	ax.set_zlim(-10,10)
	ax.set_xlabel('x')
	ax.set_ylabel('y')
	ax.set_zlabel('z')
	plt.pause(3)
	plt.show()


# 解码过程
# 说明: 设置DNA_SIZE=24（一个实数DNA长度）,两个实数x,y一共用48位二进制编码，我同时将x和y编码到同一个48位的二进制串里，每一个变量用24位表示
# 		奇数24列为x的编码表示，偶数24列为y的编码表示
def translateDNA(pop): #pop表示种群矩阵，一行表示一个二进制编码表示的DNA，矩阵的行数为种群数目
	x_pop = pop[:,1::2]#奇数列表示X
	y_pop = pop[:,::2] #偶数列表示y
	
	#pop:(POP_SIZE,DNA_SIZE)*(DNA_SIZE,1) --> (POP_SIZE,1)
	x = x_pop.dot(2**np.arange(DNA_SIZE)[::-1])/float(2**DNA_SIZE-1)*(X_BOUND[1]-X_BOUND[0])+X_BOUND[0]
	y = y_pop.dot(2**np.arange(DNA_SIZE)[::-1])/float(2**DNA_SIZE-1)*(Y_BOUND[1]-Y_BOUND[0])+Y_BOUND[0]
	return x,y


# 交叉变异 *** 作
# 说明: 交叉是指每一个个体是由父亲和母亲两个个体繁殖产生，子代个体的DNA（二进制串）获得了一半父亲的DNA，一半母亲的DNA.
#		但是这里的一半并不是真正的一半，这个位置叫做交配点，是随机产生的，可以是染色体的任意位置。通过交叉子代获得了一半来自父亲一半来自母亲的DNA
def crossover_and_mutation(pop, CROSSOVER_RATE = 0.8):
	new_pop = []
	for father in pop:		#遍历种群中的每一个个体，将该个体作为父亲
		child = father		#孩子先得到父亲的全部基因（这里我把一串二进制串的那些0，1称为基因）
		if np.random.rand() < CROSSOVER_RATE:			#产生子代时不是必然发生交叉，而是以一定的概率发生交叉
			mother = pop[np.random.randint(POP_SIZE)]	#再种群中选择另一个个体，并将该个体作为母亲
			cross_points = np.random.randint(low=0, high=DNA_SIZE*2)	#随机产生交叉的点
			child[cross_points:] = mother[cross_points:]		#孩子得到位于交叉点后的母亲的基因
		mutation(child)	#每个后代有一定的机率发生变异
		new_pop.append(child)

	return new_pop

# 变异 *** 作
# 说明: 在交叉过程外，子代自身可能发生变异，使得其DNA即不来自父亲，也不来自母亲，在某个位置上发生随机改变
#		通常就是改变DNA的一个二进制位（0变到1，或者1变到0）
def mutation(child, MUTATION_RATE=0.003):
	if np.random.rand() < MUTATION_RATE: 				#以MUTATION_RATE的概率进行变异
		mutate_point = np.random.randint(0, DNA_SIZE*2)	#随机产生一个实数，代表要变异基因的位置
		child[mutate_point] = child[mutate_point]^1 	#将变异点的二进制为反转


# 适应度
# 说明: 求最大值的问题中可以直接用可能解（个体）对应的函数的函数值的大小来评估，这样可能解对应的函数值越大越有可能被保留下来
def get_fitness(pop): 
    x,y = translateDNA(pop)  # 解码, 将二进制串映射到指定范围内（也就是区间[-3, 3]）
    # pred是将可能解带入函数F中得到的预测值，因为后面的选择过程需要根据个体适应度确定每个个体被保留下来的概率，而概率不能是负值，所以减去预测中的最小值把适应度值的最小区间提升到从0开始
    # 但是如果适应度为0，其对应的概率也为0，表示该个体不可能在选择中保留下来，这不符合算法思想，遗传算法不绝对否定谁也不绝对肯定谁，所以最后加上了一个很小的正数。
	pred = F(x, y)
	return (pred - np.min(pred)) + 1e-3 #减去最小的适应度是为了防止适应度出现负数，通过这一步fitness的范围为[0, np.max(pred)-np.min(pred)],最后在加上一个很小的数防止出现为0的适应度


# 种群选择
# 说明: 选择则是根据新个体的适应度进行，但同时不意味着完全以适应度高低为导向（选择top k个适应度最高的个体，容易陷入局部最优解）
#		因为单纯选择适应度高的个体将可能导致算法快速收敛到局部最优解而非全局最优解，我们称之为早熟
#		作为折中，遗传算法依据原则：适应度越高，被选择的机会越高，而适应度低的，被选择的机会就低
def select(pop, fitness):    # nature selection wrt pop's fitness
	# 这里主要是使用了choice里的最后一个参数p，参数p描述了从np.arange(POP_SIZE)里选择每一个元素的概率，概率越高约有可能被选中
    idx = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True,
                           p=(fitness)/(fitness.sum()) )	# 返回的是被选择种群的所以，这里可能是重复的索引
    # 最后返回被选中的个体即可
    return pop[idx]


# 打印相关信息
def print_info(pop):
	fitness = get_fitness(pop)
	max_fitness_index = np.argmax(fitness)
	print("max_fitness:", fitness[max_fitness_index])
	x,y = translateDNA(pop)
	print("最优的基因型：", pop[max_fitness_index])
	print("(x, y):", (x[max_fitness_index], y[max_fitness_index]))


if __name__ == "__main__":
	fig = plt.figure()
	ax = Axes3D(fig)	
	plt.ion()#将画图模式改为交互模式，程序遇到plt.show不会暂停，而是继续执行
	plot_3d(ax)

	pop = np.random.randint(2, size=(POP_SIZE, DNA_SIZE*2)) #matrix (POP_SIZE, DNA_SIZE)
	for _ in range(N_GENERATIONS):#迭代N代
		x,y = translateDNA(pop)
		if 'sca' in locals(): 
			sca.remove()
		sca = ax.scatter(x, y, F(x,y), c='black', marker='o');plt.show();plt.pause(0.1)
		pop = np.array(crossover_and_mutation(pop, CROSSOVER_RATE))
		#F_values = F(translateDNA(pop)[0], translateDNA(pop)[1])#x, y --> Z matrix
		fitness = get_fitness(pop)
		pop = select(pop, fitness) #选择生成新的种群
	
	print_info(pop)
	plt.ioff()
	plot_3d(ax)

以上为完整的代码，解码、适应度与选择、交叉变异这些步骤是遗传算法的核心模块，将这些模块在主函数中迭代起来，让种群去进化，核心的迭代代码如下所示：

# 随机生成种群 matrix (POP_SIZE, DNA_SIZE)
pop = np.random.randint(2, size=(POP_SIZE, DNA_SIZE*2)) 
for _ in range(N_GENERATIONS):	# 种群迭代进化N_GENERATIONS代
	crossover_and_mutation(pop, CROSSOVER_RATE)	# 种群通过交叉变异产生后代
	fitness = get_fitness(pop)	# 对种群中的每个个体进行评估
	pop = select(pop, fitness) 	# 选择生成新的种群

参考资料：

使用k-means聚类anchors：https://blog.csdn.net/qq_37541097/article/details/119647026
遗传算法详解附python代码实现：https://blog.csdn.net/ha_ha_ha233/article/details/91364937

欢迎分享，转载请注明来源：内存溢出

原文地址: http://outofmemory.cn/langs/735162.html

目标检测的Tricks | 【Trick13】使用kmeans与遗传算法聚类anchor

发表评论

评论列表（0条）