smart-interactive-display/Assets/StreamingAssets/MergeFace/Facenet/examples/lfw_evaluate.ipynb

522 lines
16 KiB
Plaintext
Raw Normal View History

2024-06-21 01:20:01 -07:00
{
"cells": [
{
"cell_type": "markdown",
"source": [
"### facenet-pytorch LFW evaluation\n",
"This notebook demonstrates how to evaluate performance against the LFW dataset."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [],
"source": [
"from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training, extract_face\n",
"import torch\n",
"from torch.utils.data import DataLoader, SubsetRandomSampler, SequentialSampler\n",
"from torchvision import datasets, transforms\n",
"import numpy as np\n",
"import os"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"data_dir = 'data/lfw/lfw'\n",
"pairs_path = 'data/lfw/pairs.txt'\n",
"\n",
"batch_size = 16\n",
"epochs = 15\n",
"workers = 0 if os.name == 'nt' else 8"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running on device: cuda:0\n"
]
}
],
"source": [
"device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n",
"print('Running on device: {}'.format(device))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [],
"source": [
"mtcnn = MTCNN(\n",
" image_size=160,\n",
" margin=14,\n",
" device=device,\n",
" selection_method='center_weighted_size'\n",
")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [],
"source": [
"# Define the data loader for the input set of images\n",
"orig_img_ds = datasets.ImageFolder(data_dir, transform=None)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
"source": [
"\n",
"# overwrites class labels in dataset with path so path can be used for saving output in mtcnn batches\n",
"orig_img_ds.samples = [\n",
" (p, p)\n",
" for p, _ in orig_img_ds.samples\n",
"]\n",
"\n",
"loader = DataLoader(\n",
" orig_img_ds,\n",
" num_workers=workers,\n",
" batch_size=batch_size,\n",
" collate_fn=training.collate_pil\n",
")\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"crop_paths = []\n",
"box_probs = []\n",
"\n",
"for i, (x, b_paths) in enumerate(loader):\n",
" crops = [p.replace(data_dir, data_dir + '_cropped') for p in b_paths]\n",
" mtcnn(x, save_path=crops)\n",
" crop_paths.extend(crops)\n",
" print('\\rBatch {} of {}'.format(i + 1, len(loader)), end='')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [],
"source": [
"# Remove mtcnn to reduce GPU memory usage\n",
"del mtcnn\n",
"torch.cuda.empty_cache()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [],
"source": [
"# create dataset and data loaders from cropped images output from MTCNN\n",
"\n",
"trans = transforms.Compose([\n",
" np.float32,\n",
" transforms.ToTensor(),\n",
" fixed_image_standardization\n",
"])\n",
"\n",
"dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)\n",
"\n",
"embed_loader = DataLoader(\n",
" dataset,\n",
" num_workers=workers,\n",
" batch_size=batch_size,\n",
" sampler=SequentialSampler(dataset)\n",
")"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [],
"source": [
"# Load pretrained resnet model\n",
"resnet = InceptionResnetV1(\n",
" classify=False,\n",
" pretrained='vggface2'\n",
").to(device)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [],
"source": [
"classes = []\n",
"embeddings = []\n",
"resnet.eval()\n",
"with torch.no_grad():\n",
" for xb, yb in embed_loader:\n",
" xb = xb.to(device)\n",
" b_embeddings = resnet(xb)\n",
" b_embeddings = b_embeddings.to('cpu').numpy()\n",
" classes.extend(yb.numpy())\n",
" embeddings.extend(b_embeddings)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [],
"source": [
"embeddings_dict = dict(zip(crop_paths,embeddings))\n",
"\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"#### Evaluate embeddings by using distance metrics to perform verification on the official LFW test set.\n",
"\n",
"The functions in the next block are copy pasted from `facenet.src.lfw`. Unfortunately that module has an absolute import from `facenet`, so can't be imported from the submodule\n",
"\n",
"added functionality to return false positive and false negatives"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [],
"source": [
"from sklearn.model_selection import KFold\n",
"from scipy import interpolate\n",
"\n",
"# LFW functions taken from David Sandberg's FaceNet implementation\n",
"def distance(embeddings1, embeddings2, distance_metric=0):\n",
" if distance_metric==0:\n",
" # Euclidian distance\n",
" diff = np.subtract(embeddings1, embeddings2)\n",
" dist = np.sum(np.square(diff),1)\n",
" elif distance_metric==1:\n",
" # Distance based on cosine similarity\n",
" dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)\n",
" norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)\n",
" similarity = dot / norm\n",
" dist = np.arccos(similarity) / math.pi\n",
" else:\n",
" raise 'Undefined distance metric %d' % distance_metric\n",
"\n",
" return dist\n",
"\n",
"def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):\n",
" assert(embeddings1.shape[0] == embeddings2.shape[0])\n",
" assert(embeddings1.shape[1] == embeddings2.shape[1])\n",
" nrof_pairs = min(len(actual_issame), embeddings1.shape[0])\n",
" nrof_thresholds = len(thresholds)\n",
" k_fold = KFold(n_splits=nrof_folds, shuffle=False)\n",
"\n",
" tprs = np.zeros((nrof_folds,nrof_thresholds))\n",
" fprs = np.zeros((nrof_folds,nrof_thresholds))\n",
" accuracy = np.zeros((nrof_folds))\n",
"\n",
" is_false_positive = []\n",
" is_false_negative = []\n",
"\n",
" indices = np.arange(nrof_pairs)\n",
"\n",
" for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):\n",
" if subtract_mean:\n",
" mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)\n",
" else:\n",
" mean = 0.0\n",
" dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)\n",
"\n",
" # Find the best threshold for the fold\n",
" acc_train = np.zeros((nrof_thresholds))\n",
" for threshold_idx, threshold in enumerate(thresholds):\n",
" _, _, acc_train[threshold_idx], _ ,_ = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])\n",
" best_threshold_index = np.argmax(acc_train)\n",
" for threshold_idx, threshold in enumerate(thresholds):\n",
" tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _, _, _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])\n",
" _, _, accuracy[fold_idx], is_fp, is_fn = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])\n",
"\n",
" tpr = np.mean(tprs,0)\n",
" fpr = np.mean(fprs,0)\n",
" is_false_positive.extend(is_fp)\n",
" is_false_negative.extend(is_fn)\n",
"\n",
" return tpr, fpr, accuracy, is_false_positive, is_false_negative\n",
"\n",
"def calculate_accuracy(threshold, dist, actual_issame):\n",
" predict_issame = np.less(dist, threshold)\n",
" tp = np.sum(np.logical_and(predict_issame, actual_issame))\n",
" fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))\n",
" tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))\n",
" fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))\n",
"\n",
" is_fp = np.logical_and(predict_issame, np.logical_not(actual_issame))\n",
" is_fn = np.logical_and(np.logical_not(predict_issame), actual_issame)\n",
"\n",
" tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)\n",
" fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)\n",
" acc = float(tp+tn)/dist.size\n",
" return tpr, fpr, acc, is_fp, is_fn\n",
"\n",
"def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False):\n",
" assert(embeddings1.shape[0] == embeddings2.shape[0])\n",
" assert(embeddings1.shape[1] == embeddings2.shape[1])\n",
" nrof_pairs = min(len(actual_issame), embeddings1.shape[0])\n",
" nrof_thresholds = len(thresholds)\n",
" k_fold = KFold(n_splits=nrof_folds, shuffle=False)\n",
"\n",
" val = np.zeros(nrof_folds)\n",
" far = np.zeros(nrof_folds)\n",
"\n",
" indices = np.arange(nrof_pairs)\n",
"\n",
" for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):\n",
" if subtract_mean:\n",
" mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)\n",
" else:\n",
" mean = 0.0\n",
" dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)\n",
"\n",
" # Find the threshold that gives FAR = far_target\n",
" far_train = np.zeros(nrof_thresholds)\n",
" for threshold_idx, threshold in enumerate(thresholds):\n",
" _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])\n",
" if np.max(far_train)>=far_target:\n",
" f = interpolate.interp1d(far_train, thresholds, kind='slinear')\n",
" threshold = f(far_target)\n",
" else:\n",
" threshold = 0.0\n",
"\n",
" val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])\n",
"\n",
" val_mean = np.mean(val)\n",
" far_mean = np.mean(far)\n",
" val_std = np.std(val)\n",
" return val_mean, val_std, far_mean\n",
"\n",
"def calculate_val_far(threshold, dist, actual_issame):\n",
" predict_issame = np.less(dist, threshold)\n",
" true_accept = np.sum(np.logical_and(predict_issame, actual_issame))\n",
" false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))\n",
" n_same = np.sum(actual_issame)\n",
" n_diff = np.sum(np.logical_not(actual_issame))\n",
" val = float(true_accept) / float(n_same)\n",
" far = float(false_accept) / float(n_diff)\n",
" return val, far\n",
"\n",
"\n",
"\n",
"def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):\n",
" # Calculate evaluation metrics\n",
" thresholds = np.arange(0, 4, 0.01)\n",
" embeddings1 = embeddings[0::2]\n",
" embeddings2 = embeddings[1::2]\n",
" tpr, fpr, accuracy, fp, fn = calculate_roc(thresholds, embeddings1, embeddings2,\n",
" np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)\n",
" thresholds = np.arange(0, 4, 0.001)\n",
" val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,\n",
" np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)\n",
" return tpr, fpr, accuracy, val, val_std, far, fp, fn\n",
"\n",
"def add_extension(path):\n",
" if os.path.exists(path+'.jpg'):\n",
" return path+'.jpg'\n",
" elif os.path.exists(path+'.png'):\n",
" return path+'.png'\n",
" else:\n",
" raise RuntimeError('No file \"%s\" with extension png or jpg.' % path)\n",
"\n",
"def get_paths(lfw_dir, pairs):\n",
" nrof_skipped_pairs = 0\n",
" path_list = []\n",
" issame_list = []\n",
" for pair in pairs:\n",
" if len(pair) == 3:\n",
" path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])))\n",
" path1 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2])))\n",
" issame = True\n",
" elif len(pair) == 4:\n",
" path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])))\n",
" path1 = add_extension(os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3])))\n",
" issame = False\n",
" if os.path.exists(path0) and os.path.exists(path1): # Only add the pair if both paths exist\n",
" path_list += (path0,path1)\n",
" issame_list.append(issame)\n",
" else:\n",
" nrof_skipped_pairs += 1\n",
" if nrof_skipped_pairs>0:\n",
" print('Skipped %d image pairs' % nrof_skipped_pairs)\n",
"\n",
" return path_list, issame_list\n",
"\n",
"def read_pairs(pairs_filename):\n",
" pairs = []\n",
" with open(pairs_filename, 'r') as f:\n",
" for line in f.readlines()[1:]:\n",
" pair = line.strip().split()\n",
" pairs.append(pair)\n",
" return np.array(pairs, dtype=object)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 14,
"outputs": [],
"source": [
"pairs = read_pairs(pairs_path)\n",
"path_list, issame_list = get_paths(data_dir+'_cropped', pairs)\n",
"embeddings = np.array([embeddings_dict[path] for path in path_list])\n",
"\n",
"tpr, fpr, accuracy, val, val_std, far, fp, fn = evaluate(embeddings, issame_list)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 15,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.995 0.995 0.99166667 0.99 0.99 0.99666667\n",
" 0.99 0.995 0.99666667 0.99666667]\n"
]
},
{
"data": {
"text/plain": "0.9936666666666666"
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(accuracy)\n",
"np.mean(accuracy)\n",
"\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}