Adding re-ranking for image retrieval (microsoft#515)

PatrickBue · web-flow · commit 735f7ff4747e · 2020-03-18T19:00:32.000-04:00
diff --git a/NOTICE.txt b/NOTICE.txt
@@ -500,3 +500,29 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
+--
+
+https://github.com/layumi/Person_reID_baseline_pytorch
+
+
+MIT License
+
+Copyright (c) 2018 Zhedong Zheng
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/scenarios/similarity/02_state_of_the_art.ipynb b/scenarios/similarity/02_state_of_the_art.ipynb
diff --git a/scenarios/similarity/README.md b/scenarios/similarity/README.md
@@ -27,6 +27,11 @@ Below are a subset of popular papers in the field with reported accuracies on st
 | [Classification is a Strong Baseline for DeepMetric Learning](https://arxiv.org/abs/1811.12649) <br> (Implemented in this repository)  | BMVC 2019	| No  | **84%** (512-dim) <br> **89%** (2048-dim) |	61% (512-dim) <br> **65%** (2048-dim) |	**78%** (512-dim) <br> **80%** (2048-dim) |			
 
 
+## Re-ranking
+
+In addition to the SOTA method introduced above, we provide an implementation of a popular re-ranking approach published in the CVPR 2017 paper [Re-ranking Person Re-identification with k-reciprocal Encoding](http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhong_Re-Ranking_Person_Re-Identification_CVPR_2017_paper.pdf). Re-ranking is a post-processing step to improve retrieval accuracy. The proposed approach is fast, fully automatic, unsupervised, and shown to outperform other state-of-the-art methods with regards to accuracy.
+
+
 ## Frequently asked questions
 
 Answers to Frequently Asked Questions such as "How many images do I need to train a model?" or "How to annotate images?" can be found in the [FAQ.md](FAQ.md) file. For image classification specified questions, see the [FAQ.md](../classification/FAQ.md) in the classification folder.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -16,7 +16,7 @@
 from PIL import Image
 from torch import tensor
 from pathlib import Path
-from fastai.vision import cnn_learner, models
+from fastai.vision import cnn_learner, DatasetType, models
 from fastai.vision.data import ImageList, imagenet_stats
 from typing import List, Tuple
 from tempfile import TemporaryDirectory
@@ -35,6 +35,7 @@
     _apply_threshold,
 )
 from utils_cv.similarity.data import Urls as is_urls
+from utils_cv.similarity.model import compute_features_learner
 
 
 def path_classification_notebooks():
@@ -279,7 +280,7 @@ def tiny_ic_databunch(tmp_session):
         .split_by_rand_pct(valid_pct=0.1, seed=20)
         .label_from_folder()
         .transform(size=50)
-        .databunch(bs=16, num_workers = db_num_workers())
+        .databunch(bs=16, num_workers=db_num_workers())
         .normalize(imagenet_stats)
     )
 
@@ -351,7 +352,7 @@ def testing_databunch(tmp_session):
         .split_by_rand_pct(valid_pct=0.2, seed=20)
         .label_from_folder()
         .transform(size=300)
-        .databunch(bs=16, num_workers = db_num_workers())
+        .databunch(bs=16, num_workers=db_num_workers())
         .normalize(imagenet_stats)
     )
 
@@ -735,6 +736,7 @@ def workspace_region(request):
 
 # ------|-- Similarity ---------------------------------------------
 
+
 @pytest.fixture(scope="session")
 def tiny_is_data_path(tmp_session) -> str:
     """ Returns the path to the tiny fridge objects dataset. """
@@ -743,4 +745,14 @@ def tiny_is_data_path(tmp_session) -> str:
         fpath=tmp_session,
         dest=tmp_session,
         exist_ok=True,
-    )
+    )
+
+
+@pytest.fixture(scope="session")
+def tiny_ic_databunch_valid_features(tiny_ic_databunch):
+    learn = cnn_learner(tiny_ic_databunch, models.resnet18)
+    embedding_layer = learn.model[1][6]
+    features = compute_features_learner(
+        tiny_ic_databunch, DatasetType.Valid, learn, embedding_layer
+    )
+    return features
diff --git a/tests/unit/similarity/test_similarity_metrics.py b/tests/unit/similarity/test_similarity_metrics.py
@@ -8,6 +8,7 @@
 from utils_cv.similarity.data import comparative_set_builder
 from utils_cv.similarity.metrics import (
     compute_distances,
+    evaluate,
     positive_image_ranks,
     recall_at_k,
     vector_distance,
@@ -64,3 +65,31 @@ def test_recall_at_k():
     assert recall_at_k(rank_list, 3) == 60
     assert recall_at_k(rank_list, 6) == 100
     assert recall_at_k(rank_list, 10) == 100
+
+
+def test_evaluate(tiny_ic_databunch, tiny_ic_databunch_valid_features):
+    (rank_accs, mAP) = evaluate(
+        tiny_ic_databunch.valid_ds,
+        tiny_ic_databunch_valid_features,
+        use_rerank=False,
+    )
+    assert 0 <= mAP <= 1.0
+    assert len(rank_accs) == 6
+    assert max(rank_accs) <= 1.001
+    assert min(rank_accs) >= -0.001
+    for i in range(len(rank_accs) - 1):
+        rank_accs[i] <= rank_accs[i + 1]
+
+    (rank_accs, ap) = evaluate(
+        tiny_ic_databunch.valid_ds,
+        tiny_ic_databunch_valid_features,
+        use_rerank=True,
+        rerank_k1=2,
+        rerank_k2=3,
+    )
+    assert 0 <= mAP <= 1.0
+    assert len(rank_accs) == 6
+    assert max(rank_accs) <= 1.001
+    assert min(rank_accs) >= -0.001
+    for i in range(len(rank_accs) - 1):
+        rank_accs[i] <= rank_accs[i + 1]
diff --git a/utils_cv/similarity/metrics.py b/utils_cv/similarity/metrics.py
@@ -1,11 +1,12 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
-
-from typing import List
-
+from typing import Dict, List
 import numpy as np
 import scipy
 
+from fastai.vision import LabelList
+from .references.evaluate import evaluate_with_query_set
+
 
 def vector_distance(
     vec1: np.ndarray,
@@ -105,3 +106,52 @@ def recall_at_k(ranks: List[int], k: int) -> float:
     below_threshold = [x for x in ranks if x <= k]
     percent_in_top_k = round(100.0 * len(below_threshold) / len(ranks), 1)
     return percent_in_top_k
+
+
+def evaluate(
+    data: LabelList,
+    features: Dict[str, np.array],
+    use_rerank=False,
+    rerank_k1=20,
+    rerank_k2=6,
+    rerank_lambda=0.3,
+):
+    """
+    Computes rank@1 through rank@10 accuracy as well as mAP, optionally with re-ranking
+    post-processor to improve accuracy (see the re-ranking implementation for more info).
+
+    Args:
+        data: Fastai's image labellist
+        features: Dictionary of DNN features for each image
+        use_rerank: use re-ranking
+        rerank_k1, rerank_k2, rerank_lambda: re-ranking parameters
+    Returns:
+        rank_accs: accuracy at rank1 through rank10
+        mAP: average precision
+
+    """
+
+    labels = np.array([data.y[i].obj for i in range(len(data.y))])
+    features = np.array([features[str(s)] for s in data.items])
+
+    # Assign each image into its own group. This serves as id during evaluation to
+    # ensure a query image is not compared to itself during rank computation.
+    # For the market-1501 dataset, the group ids can be used to ensure that a query
+    # can not match to an image taken from the same camera.
+    groups = np.array(range(len(labels)))
+    assert len(labels) == len(groups) == features.shape[0]
+
+    # Run evaluation
+    rank_accs, mAP = evaluate_with_query_set(
+        labels,
+        groups,
+        features,
+        labels,
+        groups,
+        features,
+        use_rerank,
+        rerank_k1,
+        rerank_k2,
+        rerank_lambda,
+    )
+    return rank_accs, mAP
diff --git a/utils_cv/similarity/references/evaluate.py b/utils_cv/similarity/references/evaluate.py
@@ -0,0 +1,141 @@
+# Most of the code in this file is copied and slightly modified from:
+# https://github.com/layumi/Person_reID_baseline_pytorch/blob/master/evaluate.py
+
+import numpy as np
+import time
+import torch
+
+from .re_ranking import re_ranking
+
+
+# Note: the Market1501 dataset has a slightly different evaluation procedure which can be used
+#       by setting is_market1501=True.
+def evaluate_with_query_set(
+    gallery_labels,
+    gallery_groups,
+    gallery_features,
+    query_labels,
+    query_groups,
+    query_features,
+    use_rerank=False,
+    rerank_k1=20,
+    rerank_k2=6,
+    rerank_lambda=0.3,
+    is_market1501=False,
+):
+
+    # Init
+    ap = 0.0
+    CMC = torch.IntTensor(len(gallery_labels)).zero_()
+
+    # Compute pairwise distance
+    q_g_dist = np.dot(query_features, np.transpose(gallery_features))
+
+    # Improve pairwise distances using re-ranking
+    if use_rerank:
+        print("Calculate re-ranked distances..")
+        q_q_dist = np.dot(query_features, np.transpose(query_features))
+        g_g_dist = np.dot(gallery_features, np.transpose(gallery_features))
+        since = time.time()
+        distances = re_ranking(
+            q_g_dist, q_q_dist, g_g_dist, k1=rerank_k1, k2=rerank_k2, lambda_value=rerank_lambda,
+        )
+        time_elapsed = time.time() - since
+        print(
+            "Reranking complete in {:.0f}m {:.0f}s".format(
+                time_elapsed // 60, time_elapsed % 60
+            )
+        )
+    else:
+        distances = -q_g_dist
+
+    # Compute accuracies
+    norm = 0
+    skip = 1  # set to >1 to only consider a subset of the query images
+    for i in range(len(query_labels))[::skip]:
+        ap_tmp, CMC_tmp = evaluate_helper(
+            distances[i, :],
+            query_labels[i],
+            query_groups[i],
+            gallery_labels,
+            gallery_groups,
+            is_market1501,
+        )
+        if CMC_tmp[0] == -1:
+            continue
+        norm += 1
+        ap += ap_tmp
+        CMC = CMC + CMC_tmp
+
+    # Print accuracy. Note that Market1501 normalizes by dividing over number of query images.
+    if is_market1501:
+        norm = len(query_labels) / float(skip)
+    ap = ap / norm
+    CMC = CMC.float()
+    CMC = CMC / norm
+    print(
+        "Rank@1:{:.1f}, rank@5:{:.1f}, mAP:{:.2f}".format(100 * CMC[0], 100 * CMC[4], ap)
+    )
+
+    return (CMC, ap)
+
+
+# Explanation:
+# - query_index: all images in the reference set with the same label as the query image ("true match")
+# - camera_index: all images which share the same group (called "camera" since the code was originally written for the Market-1501 dataset).
+# - junk_index2: all reference images with the same group ("camera") as the query are considered "false matches".
+# - junk_index1: for the market1501 dataset, images with label -1 should be ignored.
+def evaluate_helper(score, ql, qc, gl, gc, is_market1501=False):
+    assert type(gl) == np.ndarray, "Input gl has to be a numpy ndarray"
+    assert type(gc) == np.ndarray, "Input gc has to be a numpy ndarray"
+
+    # Sort scores
+    index = np.argsort(score)  # from small to large
+
+    # Compare reference images to the query image.
+    query_index = np.argwhere(gl == ql)
+    camera_index = np.argwhere(gc == qc)
+    good_index = np.setdiff1d(query_index, camera_index, assume_unique=True)
+    junk_index2 = np.intersect1d(query_index, camera_index)
+
+    # For market 1501 dataset, ignore images with label -1
+    if is_market1501:
+        junk_index1a = np.argwhere(gl == -1)
+        junk_index1b = np.argwhere(gl == "-1")
+        junk_index1 = np.append(junk_index1a, junk_index1b)
+        junk_index = np.append(junk_index2, junk_index1)
+    else:
+        junk_index = junk_index2
+
+    CMC_tmp = compute_mAP(index, good_index, junk_index)
+    return CMC_tmp
+
+
+def compute_mAP(index, good_index, junk_index):
+    ap = 0
+    cmc = torch.IntTensor(len(index)).zero_()
+    if good_index.size == 0:  # if empty
+        cmc[0] = -1
+        return ap, cmc
+
+    # remove junk_index
+    mask = np.in1d(index, junk_index, invert=True)
+    index = index[mask]
+
+    # find good_index index
+    ngood = len(good_index)
+    mask = np.in1d(index, good_index)
+    rows_good = np.argwhere(mask)  # == True)
+    rows_good = rows_good.flatten()
+
+    cmc[rows_good[0] :] = 1
+    for i in range(ngood):
+        d_recall = 1.0 / ngood
+        precision = (i + 1) * 1.0 / (rows_good[i] + 1)
+        if rows_good[i] != 0:
+            old_precision = i * 1.0 / rows_good[i]
+        else:
+            old_precision = 1.0
+        ap = ap + d_recall * (old_precision + precision) / 2
+
+    return ap, cmc
diff --git a/utils_cv/similarity/references/re_ranking.py b/utils_cv/similarity/references/re_ranking.py