本文文件仓库 本文所用到的所有内容均保存在该仓库中:https://github.com/chmoe/FaceRecognize_Superpixel_Colorblock
运行环境 项目 版本 Device MacMini(2020) M1 OS macOS Big [email protected] RAM 16G SSD 1T Python 3.8.6 based conda IDE PyCharm 2021.2.2(Community Edition)
面部识别 具体要求 2021年9月24日,教授为我布置了研究生的第一个课题,目的是测试我的编程能力的样子。内容是使用一下四种面容检测器框出人脸。
Haar特徴量 + Cascade識別器 HOG特徴量 + SVM識別器 CNN MTCNN 参考链接:https://iatom.hatenablog.com/entry/2020/11/01/152307
实现代码 Haar特徴量 + Cascade識別器 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 import cv2 import copycascade_fn = "./haarcascade_frontalface_alt.xml" face_cascade = cv2.CascadeClassifier(cascade_fn) for i in range (37 ): img = cv2.imread(r'./picture/' + str (i) + '.jpg' ) img = cv2.resize(img, dsize=(480 , 640 )) face_frame = copy.deepcopy(img) gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.equalizeHist(gray_img) dets = face_cascade.detectMultiScale(gray_img, scaleFactor=1.3 , minNeighbors=3 , minSize=(30 , 30 ), flags=cv2.CASCADE_SCALE_IMAGE) for (x, y, w, h) in dets: face_image = face_frame[y:y + h, x:x + w] cv2.putText(img, "Haar" , (x, y - 4 ), cv2.FONT_HERSHEY_SIMPLEX, 0.5 , (0 , 255 , 0 ), 1 , cv2.LINE_AA) cv2.rectangle(img, (x, y), (x + w, y + h), (0 , 255 , 0 ), 2 ) cv2.imwrite('./result/haar_face/' + str (i) + '.png' , img)
HOG特徴量 + SVM識別器 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 import cv2 import dlib import copyprint (dlib.__file__)detector = dlib.get_frontal_face_detector() for i in range (37 ): img = cv2.imread(r'./picture/' + str (i) + '.jpg' ) img = cv2.resize(img, dsize=(480 , 640 )) face_frame = copy.deepcopy(img) dets = detector(img, 1 ) for k, d in enumerate (dets): face_image = face_frame[d.top():d.bottom(), d.left():d.right()] cv2.putText(img, "Dlib" , (int (d.left()), int (d.top()) - 4 ), cv2.FONT_HERSHEY_SIMPLEX, 0.5 , (0 , 255 , 0 ), 1 , cv2.LINE_AA) cv2.rectangle(img, (int (d.left()), int (d.top())), (int (d.right()), int (d.bottom())), (0 , 255 , 0 ), 2 ) cv2.imwrite('./result/Dlib_HOG_CSM/' + str (i) + '.png' , img)
CNN 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 import dlibimport cv2import copycnn_fn = './mmod_human_face_detector.dat' cnn_face_detector = dlib.cnn_face_detection_model_v1(cnn_fn) for i in range (37 ): img = cv2.imread(r'./picture/' + str (i) + '.jpg' ) img = cv2.resize(img, dsize=(480 , 640 )) face_frame = copy.deepcopy(img) dets = cnn_face_detector(img, 1 ) for face in dets: face_image = face_frame[face.rect.top():face.rect.bottom(), face.rect.left():face.rect.right()] cv2.putText(img, "CNN" , (int (face.rect.left()), int (face.rect.top()) - 4 ), cv2.FONT_HERSHEY_SIMPLEX, 0.5 , (0 , 255 , 0 ), 1 , cv2.LINE_AA) cv2.rectangle(img, (face.rect.left(), face.rect.top()), (face.rect.right(), face.rect.bottom()), (0 , 255 , 0 ), 2 ) cv2.imwrite('./result/CNN/' + str (i) + '.png' , img)
MTCNN 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 import cv2from mtcnn import MTCNN import copydetector = MTCNN() for i in range (37 ): img = cv2.imread(r'./picture/' + str (i) + '.jpg' ) img = cv2.resize(img, dsize=(480 , 640 )) face_frame = copy.deepcopy(img) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) dets = detector.detect_faces(img_rgb) for face in dets: box_x, box_y, box_w, box_h = face['box' ] face_image = face_frame[box_y:box_y + box_h, box_x:box_x + box_w] cv2.putText(face_frame, "MTCNN" , (box_x, box_y - 4 ), cv2.FONT_HERSHEY_SIMPLEX, 0.5 , (0 , 255 , 0 ), 1 , cv2.LINE_AA) cv2.rectangle(face_frame, (box_x, box_y), (box_x + box_w, box_y + box_h), (0 , 255 , 0 ), 2 ) for key, value in face['keypoints' ].items(): cv2.circle(face_frame, value, 1 , (0 , 0 , 255 ), -1 ) cv2.imwrite('./result/MTCNN/' + str (i) + '.png' , face_frame)
总结 上述四个方式的实现并不需要过多的代码,仅仅是使用网路上下载的意境训练好的人脸识别的模型进行应用一下即可。
超像素处理 具体要求 2021年9月27日,教授布置了第二个研究生课题,内容是使用面部识别 中第四种方式(MTCNN)所得到的人脸,将框处的人脸部分使用SLIC(Simple Linear Iterative Clustering,简单的线性迭代聚类)进行超像素化处理,并且将结果输出。
解析及想法 想法 看过众多的解释之后,在这里我想写下自己的理解。
这里说的某种意义上是因为其并非真正降低了图片所存在的像素数目,只是将相同颜色的像素数理解成为同一像素。例如经过超像素化后的图片(如下图),其输出结果分辨率为192 × 226 192 \times 226 192 × 226 ,但是在肉眼看上去,其色块只有6 × 7 6\times7 6 × 7 (图片中每一个色块中心的点为颜色的定位点,在这里需要忽略考虑)。
具体步骤 为了实现超像素(我真的不想使用这个名字),根据上文分析的原理,可以分为下述几个步骤进行实现。
均匀播种 查询周边 找到同类 交给时间 下面对此进行一一介绍
均匀播种 首先为了实现超像素化,需要在图片中找到需要划分个数
,这里的每个超像素应近似为正方形,因此每个种子相邻的距离(步长)可以近似为S = N K S=\sqrt{\frac{N}{K}} S = K N 。
1 2 3 4 5 6 7 8 9 10 11 12 13 def init_clusters (self ): """ 初始化超像素 :return: self """ h = self.S / 2 w = self.S / 2 while h < self.image_height: while w < self.image_width: self.clusters.append(self.make_cluster(h, w)) w += self.S w = self.S / 2 h += self.S
查询周边 经过上一个步骤的处理,我们成功的在图片上放置了K
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 def get_gradient (self, h, w ): """ 获取梯度 :param h: :param w: :return: """ if w + 1 >= self.image_width: w = self.image_width - 2 if h + 1 >= self.image_height: h = self.image_height - 2 gradient = self.data[h + 1 ][w + 1 ][0 ] - self.data[h][w][0 ] + \ self.data[h + 1 ][w + 1 ][1 ] - self.data[h][w][1 ] + \ self.data[h + 1 ][w + 1 ][2 ] - self.data[h][w][2 ] return gradient def move_clusters (self ): for cluster in self.clusters: cluster_gradient = self.get_gradient(cluster.h, cluster.w) for dh in range (-1 , 2 ): for dw in range (-1 , 2 ): _h = cluster.h + dh _w = cluster.w + dw new_gradient = self.get_gradient(_h, _w) if new_gradient < cluster_gradient: cluster.update(_h, _w, self.data[_h][_w][0 ], self.data[_h][_w][1 ], self.data[_h][_w][2 ]) cluster_gradient = new_gradient
找到同类 到了这一步,种子点已经找到了一个相对较好的位置了。
d c ( o l o r ) = ( l j − l i ) 2 + ( a j − a i ) 2 + ( b j − b i ) 2 d_{c(olor)}=\sqrt{(l_j-l_i)^2+(a_j-a_i)^2+(b_j-b_i)^2} d c ( o l or ) = ( l j − l i ) 2 + ( a j − a i ) 2 + ( b j − b i ) 2
d s = ( x j − x i ) 2 + ( y j − y i ) 2 d_s=\sqrt{(x_j-x_i)^2+(y_j-y_i)^2} d s = ( x j − x i ) 2 + ( y j − y i ) 2
D ′ = ( d c N c ) 2 + d s N s D'=\sqrt{(\frac{d_c}{N_c})^2+\frac{d_s}{N_s}} D ′ = ( N c d c ) 2 + N s d s
并且N s N_s N s 表示距离空间中的最大值,也就是超像素之间的步长即S
,而N c N_c N c 所代表的是颜色距离的最大值,常会使用常数m
是可以自主修改的,范围是[1, 40],一般会取10。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 def assignment (self ): for cluster in self.clusters: for h in range (cluster.h - 2 * self.S, cluster.h + 2 * self.S): if h < 0 or h >= self.image_height: continue for w in range (cluster.w - 2 * self.S, cluster.w + 2 * self.S): if w < 0 or w >= self.image_width: continue L, A, B = self.data[h][w] Dc = math.sqrt( math.pow (L - cluster.l, 2 ) + math.pow (A - cluster.a, 2 ) + math.pow (B - cluster.b, 2 )) Ds = math.sqrt( math.pow (h - cluster.h, 2 ) + math.pow (w - cluster.w, 2 )) D = math.sqrt(math.pow (Dc / self.M, 2 ) + math.pow (Ds / self.S, 2 )) if D < self.dis[h][w]: if (h, w) not in self.label: self.label[(h, w)] = cluster cluster.pixels.append((h, w)) else : self.label[(h, w)].pixels.remove((h, w)) self.label[(h, w)] = cluster cluster.pixels.append((h, w)) self.dis[h][w] = D def update_cluster (self ): """ 将超像素的lab值取得属于当前超像素的所有像素的最中间的值 :return: """ for cluster in self.clusters: sum_h = sum_w = number = 0 for p in cluster.pixels: sum_h += p[0 ] sum_w += p[1 ] number += 1 _h = int (sum_h / number) _w = int (sum_w / number) cluster.update(_h, _w, self.data[_h][_w][0 ], self.data[_h][_w][1 ], self.data[_h][_w][2 ])
交给时间 上述过程如果只进行一次的话,很有可能会不准确。
The L2 norm is used to compute a residual error E between the new cluster center locations and previous cluster center locations. The assignment and update steps can be repeated iteratively until the error converges, but we have found that 10 iterations suffices for most images, and report all results in this paper using this criteria.
1 2 3 for _ in trange(10 ): self.assignment() self.update_cluster()
实现代码 MTCNN部分 超像素处理部分所需要使用的图片需要经过MTCNN方式的检测,并且和第三部分面部识别
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 import cv2from mtcnn import MTCNN import copydetector = MTCNN() for i in range (37 ): img = cv2.imread(r'./picture/' + str (i) + '.jpg' ) img = cv2.resize(img, dsize=(480 , 640 )) face_frame = copy.deepcopy(img) img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) dets = detector.detect_faces(img_rgb) for face in dets: box_x, box_y, box_w, box_h = face['box' ] face_image = face_frame[box_y:box_y + box_h, box_x:box_x + box_w] cv2.imwrite('./result/' + str (i) + '.png' , face_image)
SLIC部分 该算法的伪代码如下图所示
cluster[]: 用于保存为超像素位置 label{}: 用于保存每个像素对应的超像素 具体实现如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 import mathfrom skimage import io, colorimport numpy as npfrom tqdm import trangeclass Cluster (object ): cluster_index = 1 def __init__ (self, h, w, l=0 , a=0 , b=0 ): self.update(h, w, l, a, b) self.pixels = [] self.no = self.cluster_index Cluster.cluster_index += 1 def update (self, h, w, l, a, b ): self.h = h self.w = w self.l = l self.a = a self.b = b def __str__ (self ): return "{},{}:{} {} {} " .format (self.h, self.w, self.l, self.a, self.b) def __repr__ (self ): return self.__str__() class SLICProcessor (object ): @staticmethod def open_image (path ): """ 打开图片 Return: 高(row), 宽(col), 颜色([lab]) """ rgb = io.imread(path) lab_arr = color.rgb2lab(rgb) return lab_arr @staticmethod def save_lab_image (path, lab_arr ): """ 将图片从lab转换回rgb,然后保存到指定的path :param path: :param lab_arr: :return: """ rgb_arr = color.lab2rgb(lab_arr) io.imsave(path, rgb_arr) def make_cluster (self, h, w ): h = int (h) w = int (w) return Cluster(h, w, self.data[h][w][0 ], self.data[h][w][1 ], self.data[h][w][2 ]) def __init__ (self, filename, K, M ): self.K = K self.M = M self.data = self.open_image(filename) self.image_height = self.data.shape[0 ] self.image_width = self.data.shape[1 ] self.N = self.image_height * self.image_width self.S = int (math.sqrt(self.N / self.K)) self.clusters = [] self.label = {} self.dis = np.full((self.image_height, self.image_width), np.inf) def init_clusters (self ): """ 初始化超像素 :return: self """ h = self.S / 2 w = self.S / 2 while h < self.image_height: while w < self.image_width: self.clusters.append(self.make_cluster(h, w)) w += self.S w = self.S / 2 h += self.S def get_gradient (self, h, w ): """ 获取梯度 :param h: :param w: :return: """ if w + 1 >= self.image_width: w = self.image_width - 2 if h + 1 >= self.image_height: h = self.image_height - 2 gradient = self.data[h + 1 ][w + 1 ][0 ] - self.data[h][w][0 ] + \ self.data[h + 1 ][w + 1 ][1 ] - self.data[h][w][1 ] + \ self.data[h + 1 ][w + 1 ][2 ] - self.data[h][w][2 ] return gradient def move_clusters (self ): for cluster in self.clusters: cluster_gradient = self.get_gradient(cluster.h, cluster.w) for dh in range (-1 , 2 ): for dw in range (-1 , 2 ): _h = cluster.h + dh _w = cluster.w + dw new_gradient = self.get_gradient(_h, _w) if new_gradient < cluster_gradient: cluster.update(_h, _w, self.data[_h][_w][0 ], self.data[_h][_w][1 ], self.data[_h][_w][2 ]) cluster_gradient = new_gradient def assignment (self ): for cluster in self.clusters: for h in range (cluster.h - 2 * self.S, cluster.h + 2 * self.S): if h < 0 or h >= self.image_height: continue for w in range (cluster.w - 2 * self.S, cluster.w + 2 * self.S): if w < 0 or w >= self.image_width: continue L, A, B = self.data[h][w] Dc = math.sqrt( math.pow (L - cluster.l, 2 ) + math.pow (A - cluster.a, 2 ) + math.pow (B - cluster.b, 2 )) Ds = math.sqrt( math.pow (h - cluster.h, 2 ) + math.pow (w - cluster.w, 2 )) D = math.sqrt(math.pow (Dc / self.M, 2 ) + math.pow (Ds / self.S, 2 )) if D < self.dis[h][w]: if (h, w) not in self.label: self.label[(h, w)] = cluster cluster.pixels.append((h, w)) else : self.label[(h, w)].pixels.remove((h, w)) self.label[(h, w)] = cluster cluster.pixels.append((h, w)) self.dis[h][w] = D def update_cluster (self ): """ 将超像素的lab值取得属于当前超像素的所有像素的最中间的值 :return: """ for cluster in self.clusters: sum_h = sum_w = number = 0 for p in cluster.pixels: sum_h += p[0 ] sum_w += p[1 ] number += 1 _h = int (sum_h / number) _w = int (sum_w / number) cluster.update(_h, _w, self.data[_h][_w][0 ], self.data[_h][_w][1 ], self.data[_h][_w][2 ]) def save_current_image (self, name ): image_arr = np.copy(self.data) for cluster in self.clusters: for p in cluster.pixels: image_arr[p[0 ]][p[1 ]][0 ] = cluster.l image_arr[p[0 ]][p[1 ]][1 ] = cluster.a image_arr[p[0 ]][p[1 ]][2 ] = cluster.b image_arr[cluster.h][cluster.w][0 ] = 0 image_arr[cluster.h][cluster.w][1 ] = 0 image_arr[cluster.h][cluster.w][2 ] = 0 self.save_lab_image(name, image_arr) def iterate_10times (self, j ): self.init_clusters() self.move_clusters() for _ in trange(10 ): self.assignment() self.update_cluster() name = './SLIC_result/{}.png' .format (j) self.save_current_image(name) if __name__ == '__main__' : for i in range (37 ): print ("第{}/{}个正在运行" .format (i + 1 , 37 )) p = SLICProcessor('./result/{}.png' .format (i), 40 , 40 ) p.iterate_10times(i)
参考内容 SLIC超像素分割详解(一):简介: https://blog.csdn.net/electech6/article/details/45509779
超像素SLIC算法: https://www.jianshu.com/p/f2bc9dbbd9b2
SLIC算法分割超像素原理及Python实现: https://www.kawabangga.com/posts/1923
图像梯度的基本原理: https://blog.csdn.net/saltriver/article/details/78987096
ACHANTA, Radhakrishna, et al. SLIC superpixels compared to state-of-the-art superpixel methods. IEEE transactions on pattern analysis and machine intelligence, 2012, 34.11: 2274-2282.
色块提出 具体要求 2021年9月29日,教授布置了第三个课题,内容是使用超像素处理 超像素化后的图片,在其中找到每个小块中颜色≥95%的最小矩形框,并且将矩形块进行输出。
这次没有参考链接,由于中间隔着一个国庆,需要去处理一些非常麻烦的事情(具体详情日记篇 ),所以再次认真完成的时候中间已经过去了6天
解析及想法 解析 因为是需要使用超像素化处理后的图片,因此按照正常思路来说设想是需要使用处理好的图片作为输入进行处理。
识别出每一张超像素处理后的图片的每个色块 对于每一个色块进行精准定位 按照色块对图像进行划分 找到覆盖范围≥95%的最小矩形 不过在考量处理的数据量之后,我判断循环的速度要远远快与处理SLIC,因此决定重新写一段算法来满足本次的需求。
虽然在大学的课程和实践中曾经多次听过这一名词,但是我却从来没有亲自用代码实现过 ,毕竟编程的精髓在于cmd + c和cmd + v 。
具体步骤 识别记录色块 首先需要读取图片进入内存,这里使用的是OpenCV的imread
的,大小为(image_height, image_width)的二维数组,此处使用numpy进行实现,因此使用的是np.full()
1 2 3 4 5 6 7 def rgb2hex (rgb ): hex_color = '#' for i in rgb: num = int (i) hex_color += str (hex (num))[-2 :].replace('x' , '0' ).upper() return hex_color
滑动窗口方法 在使用滑动窗口方式处理时,面临的最大问题便是如何处理边界的问题。
当方块的初始长度大边界快的长度该如何处理,如下图所示,该图像中的黄色区域占14像素,而根据d = ⌈ p i x e l n u m b e r ⌉ d=\lceil {\sqrt{pixel\ number}}\ \rceil d = ⌈ p i x e l n u mb er ⌉ 公式可以判断,该窗口的边长应为d = ⌈ 14 ⌉ = 4 d=\lceil {\sqrt{14}}\ \rceil = 4 d = ⌈ 14 ⌉ = 4
最后的处理 根据要求,最后输出的内容需要是检测出的方块框柱的原图部分,因此需要使用如下方法进行处理。
将上一步中获取到的边界值应用于一张原图,将改变接之外的部分的颜色值变换为RGB(0, 0, 0)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 def save_image (self, path, point ): """ 按照要求处理图片,然后进行保存 :param path: 图片保存路径 :param point: 上下左右的限定范围 :return: """ data_copy = self.source_file[point[0 ]:point[1 ], point[2 ]:point[3 ], :] for row in range (point[0 ], point[1 ]): for col in range (point[2 ], point[3 ]): if self.img_ndarray[row, col] == self.last_color: continue else : data_copy[row - point[0 ]][col - point[2 ]] = (0 , 0 , 0 ) cv2.imwrite(path, data_copy[:, :, (2 , 1 , 0 )])
实现代码 该程序的全部代码如下所示。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 import mathimport cv2import numpy as npimport osclass ColorProcessor (object ): def __init__ (self, file_path, source_file_path ): self.source_file = self.open_image(source_file_path) self.data = self.open_image(file_path) self.image_height = self.data.shape[0 ] self.image_width = self.data.shape[1 ] self.img_ndarray = np.full((self.image_height, self.image_width), '0000000' ) self.last_color = "#" self.color_set = set () self.most_left = self.image_width self.most_right = 0 self.most_top = self.image_height self.most_bottom = 0 self.color_count = 0 @staticmethod def open_image (path ): """ 打开图片 Return: 高(row), 宽(col), 颜色([rgb]) """ bgr = cv2.imread(path) rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) return rgb @staticmethod def rgb2hex (rgb ): hex_color = '#' for i in rgb: num = int (i) hex_color += str (hex (num))[-2 :].replace('x' , '0' ).upper() return hex_color def traversal (self, i ): """ 遍历数组 :return: """ counter = 0 for row in range (self.image_height): for col in range (self.image_width): if self.img_ndarray[row, col] != '0000000' : pass else : self.last_color = self.rgb2hex(self.data[row, col]) self.img_ndarray[row, col] = self.last_color self.get_connected(row, col) point = self.move_rectangle() print ("point: \n" , point) if point: path = "./Process_result1/{}_{}.png" .format (i, counter) print ("last_color: {}" .format (self.last_color)) self.save_image(path, point) counter += 1 def save_image (self, path, point ): """ 按照要求处理图片,然后进行保存 :param path: 图片保存路径 :param point: 上下左右的限定范围 :return: """ data_copy = self.source_file[point[0 ]:point[1 ], point[2 ]:point[3 ], :] for row in range (point[0 ], point[1 ]): for col in range (point[2 ], point[3 ]): if self.img_ndarray[row, col] == self.last_color: continue else : data_copy[row - point[0 ]][col - point[2 ]] = (0 , 0 , 0 ) cv2.imwrite(path, data_copy[:, :, (2 , 1 , 0 )]) def move_rectangle (self ): """ 使用矩形在范围内移动,并且逐渐扩大矩形以找到最大结果 :return: 返回 上下左右 """ min_sqa = math.ceil(self.color_count * 0.95 ) print ("最小像素数: " , min_sqa) min_rec = math.ceil(math.sqrt(min_sqa)) width = self.most_right - self.most_left height = self.most_bottom - self.most_top max_rec = max (width, height) count = 0 print ("mo_le: {}, mo_ri: {}, mo_to: {}, mo_bo: {}" .format (self.most_left, self.most_right, self.most_top, self.most_bottom)) print ("min_rec: {}, max_rec: {}" .format (min_rec, max_rec)) for d in range (min_rec, max_rec + 1 ): print ("width: {}, height: {}, d: {}" .format (width, height, d)) if d > width: for y in range (height - d): for row in range (self.most_top + y, self.most_top + y + d + 1 ): for col in range (self.most_left, self.most_right + 1 ): if self.img_ndarray[row, col] == self.last_color: count += 1 if count >= min_sqa: return [self.most_top + y, self.most_top + y + d, self.most_left if self.most_left + d <= self.image_width else self.most_right - d, self.most_left + d if self.most_left + d <= self.image_width else self.most_right] elif d > height: for x in range (width - d): for row in range (self.most_top, self.most_bottom + 1 ): for col in range (self.most_left + x, self.most_left + x + d + 1 ): if self.img_ndarray[row, col] == self.last_color: count += 1 if count >= min_sqa: return [self.most_top if self.most_top + d <= self.image_height else self.most_bottom - d, self.most_top + d if self.most_top + d <= self.image_height else self.most_bottom, self.most_left + x, self.most_left + x + d] else : for x in range (width - d + 1 ): for y in range (height - d + 1 ): for row in range (self.most_top + y, self.most_top + y + d + 1 ): for col in range (self.most_left + x, self.most_left + x + d + 1 ): if self.img_ndarray[row, col] == self.last_color: count += 1 print ("count: {}, min_sqa: {}" .format (count, min_sqa)) if count >= min_sqa: return [self.most_top + y, self.most_top + y + d, self.most_left + x, self.most_left + x + d] def get_connected (self, _row, _col ): """ 在图片中从当前点寻找连通域,并且在ndarray中标注颜色 :param _row: 坐标x :param _col: 坐标y :return: """ self.color_set = set () self.color_set.add((_row, _col)) self.most_left = self.image_width self.most_right = 0 self.most_top = self.image_height self.most_bottom = 0 self.color_count = 0 while 0 != len (self.color_set): [row, col] = self.color_set.pop() self.color_count += 1 if row - 1 >= 0 : self.set_color(row - 1 , col) if row + 1 <= self.image_height - 1 : self.set_color(row + 1 , col) if col - 1 >= 0 : self.set_color(row, col - 1 ) if col + 1 <= self.image_width - 1 : self.set_color(row, col + 1 ) def set_color (self, row, col ): if self.rgb2hex(self.data[row, col]) == self.last_color: if self.img_ndarray[row, col] != self.last_color: self.img_ndarray[row, col] = self.last_color self.color_set.add((row, col)) if row > self.most_bottom: self.most_bottom = row if row < self.most_top: self.most_top = row if col < self.most_left: self.most_left = col if col > self.most_right: self.most_right = col if __name__ == '__main__' : for i in range (37 ): print ("第{}/{}个正在运行" .format (i + 1 , 37 )) p = ColorProcessor('./SLIC_result/{}.png' .format (i), './result/{}.png' .format (i)) p.traversal(i)
参考内容 滑动窗口的概念: https://baike.baidu.com/item/滑动窗口
什么是「滑动窗口算法」(sliding window algorithm),有哪些应用场景?: https://www.zhihu.com/question/314669016
总结 以上便是第一次教授布置的演習課題的全部内容,虽然是分为三次进行布置的,但是综合起来我认为可以成为一个问题,因此就放在一篇文章中了。