lfw_eval.py

from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
torch.backends.cudnn.bencmark = True

import os,sys,cv2,random,datetime
import argparse
import numpy as np
import zipfile

#from dataset import ImageDataset
from matlab_cp2tform import get_similarity_transform_for_cv2
import CNN_R

def alignment(src_img,src_pts):
    ref_pts = [ [30.2946, 51.6963],[65.5318, 51.5014],
        [48.0252, 71.7366],[33.5493, 92.3655],[62.7299, 92.2041] ]
    crop_size = (96, 112)
    src_pts = np.array(src_pts).reshape(5,2)

    s = np.array(src_pts).astype(np.float32)
    r = np.array(ref_pts).astype(np.float32)

    tfm = get_similarity_transform_for_cv2(s, r)
    face_img = cv2.warpAffine(src_img, tfm, crop_size)
    return face_img


# 10 folds. 5400 imgs for training, 600 imgs for testing
def KFold(n=6000, n_folds=10, shuffle=False):  
    folds = []
    base = list(range(n))
    for i in range(n_folds):
        test = base[int(i*n/n_folds):int((i+1)*n/n_folds)]
        train = list(set(base)-set(test))
        folds.append([train,test])
    return folds


def eval_acc(threshold, diff):
    y_true = []
    y_predict = []
    for d in diff:
        same = 1 if float(d[2]) > threshold else 0
        y_predict.append(same)
        y_true.append(int(d[3]))
    y_true = np.array(y_true)
    y_predict = np.array(y_predict)
    accuracy = 1.0*np.count_nonzero(y_true==y_predict)/len(y_true)
    return accuracy


def find_best_threshold(thresholds, predicts):
    best_threshold = best_acc = 0
    for threshold in thresholds:
        accuracy = eval_acc(threshold, predicts)
        if accuracy >= best_acc:
            best_acc = accuracy
            best_threshold = threshold
    return best_threshold


parser = argparse.ArgumentParser(description='PyTorch sphereface lfw')
parser.add_argument('--net','-n', default='sphere64a', type=str)
parser.add_argument('--lfw', default='../datasets/lfw.zip', type=str)
parser.add_argument('--model','-m', default='./weights/CNN_R_sphere64a_14.pth', type=str)
args = parser.parse_args()

predicts=[]
net = getattr(CNN_R,args.net)()
net.load_state_dict(torch.load(args.model))
net.cuda()
net.eval()
net.feature = True   # produce features instead similarities

zfile = zipfile.ZipFile(args.lfw)

landmark = {}
with open('../datasets/lfw_landmark.txt') as f:
    landmark_lines = f.readlines()
for line in landmark_lines:
    l = line.replace('\n','').split('\t')
    landmark[l[0]] = [int(k) for k in l[1:]]

with open('../datasets/pairs.txt') as f:
    pairs_lines = f.readlines()[1:]

for i in range(6000):
    p = pairs_lines[i].replace('\n','').split('\t')

    if 3==len(p):     # the two imgs of the same person
        sameflag = 1    
        name1 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[1]))
        name2 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[2]))
    if 4==len(p):     # the two imggs of two diffrent person
        sameflag = 0
        name1 = p[0]+'/'+p[0]+'_'+'{:04}.jpg'.format(int(p[1]))
        name2 = p[2]+'/'+p[2]+'_'+'{:04}.jpg'.format(int(p[3]))
   
    img1 = alignment(cv2.imdecode(np.frombuffer(zfile.read('lfw/'+name1),np.uint8),1),landmark[name1])
    img2 = alignment(cv2.imdecode(np.frombuffer(zfile.read('lfw/'+name2),np.uint8),1),landmark[name2])

    imglist = [img1,cv2.flip(img1,1),img2,cv2.flip(img2,1)]   # flip horizontally
    for i in range(len(imglist)):
        imglist[i] = imglist[i].transpose(2, 0, 1).reshape((1,3,112,96))  # BGR2RGB
        imglist[i] = (imglist[i]-127.5)/128.0

    img = np.vstack(imglist)
    img = Variable(torch.from_numpy(img).float(),volatile=True).cuda()
    output = net(img)
    f = output.data
    f1,f2 = f[0],f[2]   # original features. but should be the concatenating of ori and aug features?
    cosdistance = f1.dot(f2)/(f1.norm()*f2.norm()+1e-5)
    predicts.append('{}\t{}\t{}\t{}\n'.format(name1,name2,cosdistance,sameflag))


accuracy = []
thd = []
folds = KFold(n=6000, n_folds=10, shuffle=False)
thresholds = np.arange(-1.0, 1.0, 0.005)    # find the best thresholds
predicts = np.array(list(map(lambda line:line.strip('\n').split(), predicts)))
for idx, (train, test) in enumerate(folds):
    best_thresh = find_best_threshold(thresholds, predicts[train])   # 5400 imgs to find the best thresh
    accuracy.append(eval_acc(best_thresh, predicts[test]))   # 600 imgs to test the performance
    thd.append(best_thresh)   # 10 different best thresh for training sets(5400 imgs)
    print('Fold {:d}: Acc:{:.4f}'.format(idx+1, accuracy[idx])) 
print('Model:{:s} LFWACC={:.4f} std={:.4f} thd={:.4f}'.format(args.model, np.mean(accuracy), np.std(accuracy), np.mean(thd)))