pandas-dev · stevenae · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
@@ -1456,24 +1456,17 @@ default value.
 
 .. _indexing.lookup:
 
-Looking up values by index/column labels
-----------------------------------------
+The :meth:`~pandas.DataFrame.lookup` method
+-------------------------------------------
 
 Sometimes you want to extract a set of values given a sequence of row labels
-and column labels, this can be achieved by ``pandas.factorize``  and NumPy indexing.
-For instance:
+and column labels, and the ``lookup`` method allows for this and returns a
+NumPy array.  For instance:
 
 .. ipython:: python
 
-    df = pd.DataFrame({'col': ["A", "A", "B", "B"],
-                       'A': [80, 23, np.nan, 22],
-                       'B': [80, 55, 76, 67]})
-    df
-    idx, cols = pd.factorize(df['col'])
-    df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]
-
-Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method
-which was deprecated in version 1.2.0 and removed in version 2.0.0.
+   dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D'])
+   dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D'])
 
 .. _indexing.class:
 

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -30,6 +30,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
+- :meth:`pandas.DataFrame.lookup` returns with optimizations for looking up values by list of row/column pairs  (:issue:`40140`)
 - :meth:`pandas.api.interchange.from_dataframe` now uses the `PyCapsule Interface <https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html>`_ if available, only falling back to the Dataframe Interchange Protocol if that fails (:issue:`60739`)
 - Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`)
 - Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5135,6 +5135,84 @@ def _series(self):
     # ----------------------------------------------------------------------
     # Reindexing and alignment
 
+    def lookup(self, row_labels, col_labels) -> ExtensionArray | np.ndarray:
+        """
+        Label-based "fancy indexing" function for DataFrame.
+
+        Given equal-length arrays of row and column labels, return an
+        array of the values corresponding to each (row, col) pair.
+
+        Parameters
+        ----------
+        row_labels : sequence
+            The row labels to use for lookup.
+        col_labels : sequence
+            The column labels to use for lookup.
+
+        Returns
+        -------
+        numpy.ndarray
+            The found values.
+
+        Examples
+        --------
+        >>> grades = pd.DataFrame(
+        ...     {
+        ...         "Math": [85, 92, 78, 88, 95],
+        ...         "Science": [90, 85, 92, 79, 87],
+        ...     },
+        ...     index=["Alice", "Bob", "Charlie", "David", "Eve"],
+        ... )
+        >>> feedback = pd.DataFrame(
+        ...     {
+        ...         "Math": [
+        ...             "Strong analytical skills",
+        ...             "Excellent problem-solving",
+        ...             "Needs more practice",
+        ...             "Solid understanding",
+        ...             "Exceptional reasoning",
+        ...         ],
+        ...         "Science": [
+        ...             "Excellent inquiry skills",
+        ...             "Good theoretical concepts",
+        ...             "Strong methodological interest",
+        ...             "Needs focus",
+        ...             "Outstanding curiosity",
+        ...         ],
+        ...     },
+        ...     index=["Alice", "Bob", "Charlie", "David", "Eve"],
+        ... )
+        >>> student_top = grades.rank(1).idxmax(1)  #  student's top score
+        >>> feedback.lookup(student_top.index, student_top)
+        array(['Excellent inquiry skills', 'Excellent problem-solving',
+               'Strong methodological interest', 'Solid understanding',
+               'Exceptional reasoning'], dtype=object)
+        """
+        n = len(row_labels)
+        if n != len(col_labels):
+            raise ValueError("Row labels must have same size as column labels")
+        if not (self.index.is_unique and self.columns.is_unique):
+            # GH#33041
+            raise ValueError("DataFrame.lookup requires unique index and columns")
+
+        ridx = self.index.get_indexer(row_labels)
+        cidx = self.columns.get_indexer(col_labels)
+        if (ridx == -1).any():
+            raise KeyError("One or more row labels was not found")
+        if (cidx == -1).any():
+            raise KeyError("One or more column labels was not found")
+
+        sub = self.take(np.unique(cidx), axis=1)
+        sub = sub.take(np.unique(ridx), axis=0)
+        ridx = sub.index.get_indexer(row_labels)
+        values = sub.melt()["value"]
+        cidx = sub.columns.get_indexer(col_labels)
+        flat_index = ridx + cidx * len(sub)
+
+        result = values[flat_index]
+
+        return result
+
     def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame:
         """
         We are guaranteed non-Nones in the axes.

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1414,6 +1414,74 @@ def test_loc_named_tuple_for_midx(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_lookup_float(self, float_frame):
+        df = float_frame
+        rows = list(df.index) * len(df.columns)
+        cols = list(df.columns) * len(df.index)
+        result = df.lookup(rows, cols)
+
+        expected = Series([df.loc[r, c] for r, c in zip(rows, cols)])
+        tm.assert_series_equal(result, expected, check_index=False, check_names=False)
+
+    def test_lookup_mixed(self, float_string_frame):
+        df = float_string_frame
+        rows = list(df.index) * len(df.columns)
+        cols = list(df.columns) * len(df.index)
+        result = df.lookup(rows, cols)
+
+        expected = Series([df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_)
+        tm.assert_series_equal(result, expected, check_index=False, check_names=False)
+
+    def test_lookup_bool(self):
+        df = DataFrame(
+            {
+                "label": ["a", "b", "a", "c"],
+                "mask_a": [True, True, False, True],
+                "mask_b": [True, False, False, False],
+                "mask_c": [False, True, False, True],
+            }
+        )
+        df_mask = df.lookup(df.index, "mask_" + df["label"])
+
+        exp_mask = Series(
+            [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])]
+        )
+
+        tm.assert_series_equal(
+            df_mask, Series(exp_mask, name="mask"), check_index=False, check_names=False
+        )
+        assert df_mask.dtype == np.bool_
+
+    def test_lookup_raises(self, float_frame):
+        with pytest.raises(KeyError, match="'One or more row labels was not found'"):
+            float_frame.lookup(["xyz"], ["A"])
+
+        with pytest.raises(KeyError, match="'One or more column labels was not found'"):
+            float_frame.lookup([float_frame.index[0]], ["xyz"])
+
+        with pytest.raises(ValueError, match="same size"):
+            float_frame.lookup(["a", "b", "c"], ["a"])
+
+    def test_lookup_requires_unique_axes(self):
+        # GH#33041 raise with a helpful error message
+        df = DataFrame(
+            np.random.default_rng(2).standard_normal((3, 2)), columns=["A", "A"]
+        )
+
+        rows = [0, 1]
+        cols = ["A", "A"]
+
+        # homogeneous-dtype case
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            df.lookup(rows, cols)
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            df.T.lookup(cols, rows)
+
+        # heterogeneous dtype
+        df["B"] = 0
+        with pytest.raises(ValueError, match="requires unique index and columns"):
+            df.lookup(rows, cols)
+
     @pytest.mark.parametrize("indexer", [["a"], "a"])
     @pytest.mark.parametrize("col", [{}, {"b": 1}])
     def test_set_2d_casting_date_to_int(self, col, indexer):