因为之前正好看了CMU在CVPR2017上的论文《Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields》
,而且他们提供了训练好的模型。所以就直接用CMU训练的模型在AI challenge的数据集上做了测试。最后没有使用AI challenge训练集训练的模型在AI challenge上的得分是0.1667,可以看作是一个baseline。
以下是预处理的说明以及加入预处理程序的源代码。openpose的源代码使用#openpose ##openpose标注出来了,剩下的就是AI challenge的预处理程序。
在Google Cloud 上使用1片NVIDIA Tesla K80 跑完AI challenge的测试集大约需要24小时,4秒左右处理一副图。
AI challenge测试要求的关键点顺序是:1右肩,2右肘,3右腕,4左肩,5左肘,6左腕,7右髋,8右膝,9右踝,10左髋,11左膝,12左踝,13头顶,14脖子
openpose源码中subset输出的关键点顺序是:1鼻子,2脖子,3右肩,4右肘,5右腕,6左肩,7左肘,8左腕,9右髋,10右膝,11右踝,12左髋,13左膝,14左踝,15左眼,16右眼,17左耳,18右耳,19 pt19
函数 subset2AIsubset, all_peaks2all_peaks_1d, listMultiKeypoints 负责把openpose的关键点转换成AI challenge 的关键点。
当然还得按照官网上的要求输出特定格式的JSON文件,如下所示:
[ { "image_id": "a0f6bdc065a602b7b84a67fb8d14ce403d902e0d", "keypoint_annotations": { "human1": [261, 294, 1, 281, 328, 1, 0, 0, 0, 213, 295, 1, 208, 346, 1, 192, 335, 1, 245, 375, 1, 255, 432, 1, 244, 494, 1, 221, 379, 1, 219, 442, 1, 226, 491, 1, 226, 256, 1, 231, 284, 1], "human2": [313, 301, 1, 305, 337, 1, 321, 345, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 359, 1, 320, 409, 1, 311, 454, 1, 0, 0, 0, 330, 409, 1, 324, 446, 1, 337, 284, 1, 327, 302, 1], "human3": [373, 304, 1, 346, 286, 1, 332, 263, 1, 0, 0, 0, 0, 0, 0, 345, 313, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363, 386, 1, 361, 424, 1, 361, 475, 1, 365, 273, 1, 369, 297, 1], ... } } ...]
1 #import numpy as np 2 import json 3 import os 4 #openpose 5 import keras 6 from keras.models import Sequential 7 from keras.models import Model 8 from keras.layers import Input, Dense, Activation 9 from keras.layers.convolutional import Conv2D 10 from keras.layers.pooling import MaxPooling2D 11 from keras.layers.normalization import BatchNormalization 12 from keras.layers.merge import Concatenate 13 from config_reader import config_reader 14 import scipy 15 16 import cv2 17 import numpy as np 18 np.seterr(divide='ignore', invalid='ignore') 19 import util 20 import math 21 from numpy import ma 22 from scipy.ndimage.filters import gaussian_filter 23 ##openpose 24 #openpose 25 def relu(x): 26 return Activation('relu')(x) 27 28 def conv(x, nf, ks, name): 29 x1 = Conv2D(nf, (ks, ks), padding='same', name=name)(x) 30 return x1 31 32 def pooling(x, ks, st, name): 33 x = MaxPooling2D((ks, ks), strides=(st, st), name=name)(x) 34 return x 35 36 def vgg_block(x): 37 38 # Block 1 39 x = conv(x, 64, 3, "conv1_1") 40 x = relu(x) 41 x = conv(x, 64, 3, "conv1_2") 42 x = relu(x) 43 x = pooling(x, 2, 2, "pool1_1") 44 45 # Block 2 46 x = conv(x, 128, 3, "conv2_1") 47 x = relu(x) 48 x = conv(x, 128, 3, "conv2_2") 49 x = relu(x) 50 x = pooling(x, 2, 2, "pool2_1") 51 52 # Block 3 53 x = conv(x, 256, 3, "conv3_1") 54 x = relu(x) 55 x = conv(x, 256, 3, "conv3_2") 56 x = relu(x) 57 x = conv(x, 256, 3, "conv3_3") 58 x = relu(x) 59 x = conv(x, 256, 3, "conv3_4") 60 x = relu(x) 61 x = pooling(x, 2, 2, "pool3_1") 62 63 # Block 4 64 x = conv(x, 512, 3, "conv4_1") 65 x = relu(x) 66 x = conv(x, 512, 3, "conv4_2") 67 x = relu(x) 68 69 # Additional non vgg layers 70 x = conv(x, 256, 3, "conv4_3_CPM") 71 x = relu(x) 72 x = conv(x, 128, 3, "conv4_4_CPM") 73 x = relu(x) 74 75 return x 76 77 def stage1_block(x, num_p, branch): 78 79 # Block 1 80 x = conv(x, 128, 3, "conv5_1_CPM_L%d" % branch) 81 x = relu(x) 82 x = conv(x, 128, 3, "conv5_2_CPM_L%d" % branch) 83 x = relu(x) 84 x = conv(x, 128, 3, "conv5_3_CPM_L%d" % branch) 85 x = relu(x) 86 x = conv(x, 512, 1, "conv5_4_CPM_L%d" % branch) 87 x = relu(x) 88 x = conv(x, num_p, 1, "conv5_5_CPM_L%d" % branch) 89 90 return x 91 92 def stageT_block(x, num_p, stage, branch): 93 94 # Block 1 95 x = conv(x, 128, 7, "Mconv1_stage%d_L%d" % (stage, branch)) 96 x = relu(x) 97 x = conv(x, 128, 7, "Mconv2_stage%d_L%d" % (stage, branch)) 98 x = relu(x) 99 x = conv(x, 128, 7, "Mconv3_stage%d_L%d" % (stage, branch))100 x = relu(x)101 x = conv(x, 128, 7, "Mconv4_stage%d_L%d" % (stage, branch))102 x = relu(x)103 x = conv(x, 128, 7, "Mconv5_stage%d_L%d" % (stage, branch))104 x = relu(x)105 x = conv(x, 128, 1, "Mconv6_stage%d_L%d" % (stage, branch))106 x = relu(x)107 x = conv(x, num_p, 1, "Mconv7_stage%d_L%d" % (stage, branch))108 109 return x110 ##openpose111 112 def subset2AIsubset(t, numPersons):113 AIsubset=[]114 for j in xrange(numPersons):115 tempsubset=[]116 for i in xrange(12):117 #20118 #print(i+2)119 tempsubset.append(t[j][i+2])120 121 tempsubset.append(t[j][0])122 tempsubset.append(t[j][1])123 #print(AIsubset)124 AIsubset.append(tempsubset)125 return AIsubset126 127 def all_peaks2all_peaks_1d(all_peaks):128 all_peaks_1d=[]129 for item in all_peaks:130 for item1 in item:131 all_peaks_1d.append(item1)132 return all_peaks_1d133 134 def listMultiKeypoints(all_peaks_1d, numPersons):135 multi_keypoints=[]136 for i in xrange(numPersons):137 sp_keypoints=[]138 for j in xrange(14):139 if(AIsubset[i][j]== -1.):140 sp_keypoints.append(0)141 sp_keypoints.append(0)142 sp_keypoints.append(0)143 else:144 sp_keypoints.append(all_peaks_1d[int(AIsubset[i][j])][0])145 sp_keypoints.append(all_peaks_1d[int(AIsubset[i][j])][1])146 sp_keypoints.append(1)147 #print(sp_keypoints)148 multi_keypoints.append(sp_keypoints)149 return multi_keypoints150 151 def nPersons(t):152 return len(t)153 154 def listHuman(nPersons):155 list_human=[]156 for i in xrange(numPersons):157 list_human.append('human'+str(i+1))158 return list_human159 160 161 162 #openpose163 weights_path = "model/keras/model.h5"164 165 input_shape = (None,None,3)166 167 img_input = Input(shape=input_shape)168 169 stages = 6170 np_branch1 = 38171 np_branch2 = 19172 173 # VGG174 stage0_out = vgg_block(img_input)175 176 # stage 1177 stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1)178 stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2)179 x = Concatenate()([stage1_branch1_out, stage1_branch2_out, stage0_out])180 181 # stage t >= 2182 for sn in range(2, stages + 1):183 stageT_branch1_out = stageT_block(x, np_branch1, sn, 1)184 stageT_branch2_out = stageT_block(x, np_branch2, sn, 2)185 if (sn < stages):186 x = Concatenate()([stageT_branch1_out, stageT_branch2_out, stage0_out])187 188 model = Model(img_input, [stageT_branch1_out, stageT_branch2_out])189 model.load_weights(weights_path)190 ##openpose191 192 #openpose193 # find connection in the specified sequence, center 29 is in the position 15194 limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \195 [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \196 [1,16], [16,18], [3,17], [6,18]]197 # the middle joints heatmap correpondence198 mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \199 [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \200 [55,56], [37,38], [45,46]]201 ##openpose202 203 path = "./test0"204 files = os.listdir(path)205 list_image_names=[]206 final_results=[]207 num_processed_images=0.208 total_images=30000.209 for file in files:210 num_processed_images+=1211 print('file:',file)212 print('number of image:',num_processed_images)213 print('%.2f%%'%(num_processed_images/total_images*100))214 list_image_names.append(str(file)[:-4])215 #openpose216 test_image = './test0/'+file217 #test_image = 'sample_images/000a902c8674739c97f188157c63d709b45b7595.jpg'218 oriImg = cv2.imread(test_image)219 220 param, model_params = config_reader()221 multiplier = [x * model_params['boxsize'] / oriImg.shape[0] for x in param['scale_search']]222 heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))223 paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))224 225 for m in range(len(multiplier)):226 scale = multiplier[m]227 imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)228 imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'], model_params['padValue']) 229 230 input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2))/256 - 0.5; # required shape (1, width, height, channels) 231 print("Input shape: " + str(input_img.shape)) 232 233 output_blobs = model.predict(input_img)234 print("Output shape (heatmap): " + str(output_blobs[1].shape))235 236 # extract outputs, resize, and remove padding237 heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps238 heatmap = cv2.resize(heatmap, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)239 heatmap = heatmap[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]240 heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)241 242 paf = np.squeeze(output_blobs[0]) # output 0 is PAFs243 paf = cv2.resize(paf, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)244 paf = paf[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]245 paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)246 247 heatmap_avg = heatmap_avg + heatmap / len(multiplier)248 paf_avg = paf_avg + paf / len(multiplier)249 ##openpose250 #openpose251 U = paf_avg[:,:,16] * -1252 V = paf_avg[:,:,17]253 X, Y = np.meshgrid(np.arange(U.shape[1]), np.arange(U.shape[0]))254 M = np.zeros(U.shape, dtype='bool')255 M[U**2 + V**2 < 0.5 * 0.5] = True256 U = ma.masked_array(U, mask=M)257 V = ma.masked_array(V, mask=M)258 259 260 all_peaks = []261 peak_counter = 0262 263 for part in range(19-1):264 map_ori = heatmap_avg[:,:,part]265 map = gaussian_filter(map_ori, sigma=3)266 267 map_left = np.zeros(map.shape)268 map_left[1:,:] = map[:-1,:]269 map_right = np.zeros(map.shape)270 map_right[:-1,:] = map[1:,:]271 map_up = np.zeros(map.shape)272 map_up[:,1:] = map[:,:-1]273 map_down = np.zeros(map.shape)274 map_down[:,:-1] = map[:,1:]275 276 peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param['thre1']))277 peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse278 peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]279 id = range(peak_counter, peak_counter + len(peaks))280 peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]281 282 all_peaks.append(peaks_with_score_and_id)283 peak_counter += len(peaks)284 ##openpose285 #openpose286 connection_all = []287 special_k = []288 mid_num = 10289 290 for k in range(len(mapIdx)):291 score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]]292 candA = all_peaks[limbSeq[k][0]-1]293 candB = all_peaks[limbSeq[k][1]-1]294 nA = len(candA)295 nB = len(candB)296 indexA, indexB = limbSeq[k]297 if(nA != 0 and nB != 0):298 connection_candidate = []299 for i in range(nA):300 for j in range(nB):301 vec = np.subtract(candB[j][:2], candA[i][:2])302 #303 #print('vec0:',vec[0],'vec1:',vec[1])304 # #305 norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1]+0.1)306 vec = np.divide(vec, norm)307 308 startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \309 np.linspace(candA[i][1], candB[j][1], num=mid_num)))310 311 vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \312 for I in range(len(startend))])313 vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \314 for I in range(len(startend))])315 316 score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])317 #318 #print('norm',norm)319 # #320 score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0)321 322 criterion1 = len(np.nonzero(score_midpts > param['thre2'])[0]) > 0.8 * len(score_midpts)323 criterion2 = score_with_dist_prior > 0324 if criterion1 and criterion2:325 connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]])326 327 connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)328 connection = np.zeros((0,5))329 for c in range(len(connection_candidate)):330 i,j,s = connection_candidate[c][0:3]331 if(i not in connection[:,3] and j not in connection[:,4]):332 connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])333 if(len(connection) >= min(nA, nB)):334 break335 336 connection_all.append(connection)337 else:338 special_k.append(k)339 connection_all.append([])340 ##openpose341 #openpose342 # last number in each row is the total parts number of that person343 # the second last number in each row is the score of the overall configuration344 subset = -1 * np.ones((0, 20))345 candidate = np.array([item for sublist in all_peaks for item in sublist])346 347 for k in range(len(mapIdx)):348 if k not in special_k:349 partAs = connection_all[k][:,0]350 partBs = connection_all[k][:,1]351 indexA, indexB = np.array(limbSeq[k]) - 1352 353 for i in range(len(connection_all[k])): #= 1:size(temp,1)354 found = 0355 subset_idx = [-1, -1]356 for j in range(len(subset)): #1:size(subset,1):357 if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:358 subset_idx[found] = j359 found += 1360 361 if found == 1:362 j = subset_idx[0]363 if(subset[j][indexB] != partBs[i]):364 subset[j][indexB] = partBs[i]365 subset[j][-1] += 1366 subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]367 elif found == 2: # if found 2 and disjoint, merge them368 j1, j2 = subset_idx369 print ("found = 2")370 membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2]371 if len(np.nonzero(membership == 2)[0]) == 0: #merge372 subset[j1][:-2] += (subset[j2][:-2] + 1)373 subset[j1][-2:] += subset[j2][-2:]374 subset[j1][-2] += connection_all[k][i][2]375 subset = np.delete(subset, j2, 0)376 else: # as like found == 1377 subset[j1][indexB] = partBs[i]378 subset[j1][-1] += 1379 subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]380 381 # if find no partA in the subset, create a new subset382 elif not found and k < 17:383 row = -1 * np.ones(20)384 row[indexA] = partAs[i]385 row[indexB] = partBs[i]386 row[-1] = 2387 row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2]388 subset = np.vstack([subset, row])389 # delete some rows of subset which has few parts occur390 deleteIdx = [];391 for i in range(len(subset)):392 if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4:393 deleteIdx.append(i)394 subset = np.delete(subset, deleteIdx, axis=0)395 ##openpose396 numPersons= nPersons(subset)397 #print(subset2AIsubset(subset, numPersons))398 AIsubset = subset2AIsubset(subset,numPersons)399 #print(all_peaks[i][numPersons][3]==[int(AIsubset[0][0])])400 #all_peaks->all_peaks_1d401 all_peaks_1d=all_peaks2all_peaks_1d(all_peaks)402 #print('numPersons:',numPersons)403 #print('multi_keypoints:',listMultiKeypoints(all_peaks_1d, numPersons)) 404 keys=['image_id','keypoint_annotations']405 values=[]406 image_id=str(file)[:-4]407 408 keypoint_annotations = dict(zip(listHuman(numPersons), listMultiKeypoints(all_peaks_1d, numPersons)))409 values.append(image_id)410 values.append(keypoint_annotations)411 412 d = dict(zip(keys, values))413 414 final_results.append(d)415 print(final_results) 416 with open('data.json', 'w') as f:417 json.dump(final_results, f)418 419 print(list_image_names)
[ { "image_id": "a0f6bdc065a602b7b84a67fb8d14ce403d902e0d", "keypoint_annotations": { "human1": [261, 294, 1, 281, 328, 1, 0, 0, 0, 213, 295, 1, 208, 346, 1, 192, 335, 1, 245, 375, 1, 255, 432, 1, 244, 494, 1, 221, 379, 1, 219, 442, 1, 226, 491, 1, 226, 256, 1, 231, 284, 1], "human2": [313, 301, 1, 305, 337, 1, 321, 345, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 359, 1, 320, 409, 1, 311, 454, 1, 0, 0, 0, 330, 409, 1, 324, 446, 1, 337, 284, 1, 327, 302, 1], "human3": [373, 304, 1, 346, 286, 1, 332, 263, 1, 0, 0, 0, 0, 0, 0, 345, 313, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363, 386, 1, 361, 424, 1, 361, 475, 1, 365, 273, 1, 369, 297, 1], ... } } ... ]