From ac745c09962cba79f28552b229b5c3a602149fc1 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Tue, 20 Sep 2022 13:09:25 +0200
Subject: [PATCH 01/16] Bump version

---
 autosklearn/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosklearn/__version__.py b/autosklearn/__version__.py
index a6b8488c18..810f877681 100644
--- a/autosklearn/__version__.py
+++ b/autosklearn/__version__.py
@@ -1,4 +1,4 @@
 """Version information."""
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.15.0"
+__version__ = "0.16.0dev"

From f160d7d82b402c0297cf9d1c6fd22ca47ff117be Mon Sep 17 00:00:00 2001
From: Shantam Gilra <64306405+shantam-8@users.noreply.github.com>
Date: Mon, 10 Oct 2022 11:03:36 +0100
Subject: [PATCH 02/16] Proposed changes for ``test_metrics.py`` (#1577)

* Trial pytest changes

* Updated tests

* Fixing errors and repetition

* Updating tests

* Proposed updates

* Removing TestMetrics class

* Update test_metrics.py
---
 test/test_metric/test_metrics.py | 859 ++++++++++++++-----------------
 1 file changed, 380 insertions(+), 479 deletions(-)

diff --git a/test/test_metric/test_metrics.py b/test/test_metric/test_metrics.py
index 4443024c4b..2edc7c066c 100644
--- a/test/test_metric/test_metrics.py
+++ b/test/test_metric/test_metrics.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import sklearn.metrics
-from smac.utils.constants import MAXINT
 
 import autosklearn.metrics
 from autosklearn.constants import BINARY_CLASSIFICATION, REGRESSION
@@ -45,425 +44,340 @@ def dummy_metric(y_true, y_pred, X_data=None, **kwargs):
         )
         scorer_nox(y_true, y_pred, X_data=np.array([32]))
 
-    def test_predict_scorer_binary(self):
-        y_true = np.array([0, 0, 1, 1])
-        y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "accuracy", sklearn.metrics.accuracy_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0)
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.5)
-
-        y_pred = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.5)
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "bac", sklearn.metrics.balanced_accuracy_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.5)
-
-        scorer = autosklearn.metrics._PredictScorer(
-            name="accuracy",
-            score_func=sklearn.metrics.accuracy_score,
-            optimum=1,
-            worst_possible_result=0,
-            sign=-1,
-            kwargs={},
-        )
-
-        y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -1.0)
-
-    def test_predict_scorer_multiclass(self):
-        y_true = np.array([0, 1, 2])
-        y_pred = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "accuracy", sklearn.metrics.accuracy_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0)
-
-        y_pred = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.333333333)
-
-        y_pred = np.array([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.333333333)
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "bac", sklearn.metrics.balanced_accuracy_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.333333333)
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "accuracy", sklearn.metrics.accuracy_score, 1, 0, -1, {}
-        )
-
-        y_pred = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -1.0)
-
-    def test_predict_scorer_multilabel(self):
-        y_true = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
-        y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "accuracy", sklearn.metrics.accuracy_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0)
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.25)
-
-        y_pred = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.25)
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "accuracy", sklearn.metrics.accuracy_score, 1, 0, -1, {}
-        )
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -1.0)
-
-    def test_predict_scorer_regression(self):
-        y_true = np.arange(0, 1.01, 0.1)
-        y_pred = y_true.copy()
-
-        scorer = autosklearn.metrics._PredictScorer(
-            "r2", sklearn.metrics.r2_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0)
-
-        y_pred = np.ones(y_true.shape) * np.mean(y_true)
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.0)
-
-    def test_proba_scorer_binary(self):
-        y_true = [0, 0, 1, 1]
-        y_pred = [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]
-
-        scorer = autosklearn.metrics._ProbaScorer(
-            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.0)
-
-        y_pred = [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.69314718055994529)
-
-        y_pred = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.69314718055994529)
-
-        scorer = autosklearn.metrics._ProbaScorer(
-            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, -1, {}
-        )
-
-        y_pred = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -0.69314718055994529)
-
-    def test_proba_scorer_multiclass(self):
-        y_true = [0, 1, 2]
-        y_pred = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
-
-        scorer = autosklearn.metrics._ProbaScorer(
-            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.0)
-
-        y_pred = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0986122886681098)
-
-        y_pred = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0986122886681096)
-
-        scorer = autosklearn.metrics._ProbaScorer(
-            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, -1, {}
-        )
-
-        y_pred = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -1.0986122886681096)
-
-    def test_proba_scorer_multilabel(self):
-        y_true = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
-        y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
-
-        scorer = autosklearn.metrics._ProbaScorer(
-            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.34657359027997314)
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.69314718055994529)
-
-        y_pred = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.69314718055994529)
-
-        scorer = autosklearn.metrics._ProbaScorer(
-            "log_loss", sklearn.metrics.log_loss, 0, MAXINT, -1, {}
-        )
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -0.34657359027997314)
 
-    def test_threshold_scorer_binary(self):
-        y_true = [0, 0, 1, 1]
-        y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
-
-        scorer = autosklearn.metrics._ThresholdScorer(
-            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0)
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.5)
-
-        y_pred = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.5)
-
-        scorer = autosklearn.metrics._ThresholdScorer(
-            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, -1, {}
-        )
-
-        y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -1.0)
-
-    def test_threshold_scorer_multilabel(self):
-        y_true = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
-        y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
-
-        scorer = autosklearn.metrics._ThresholdScorer(
-            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, 1, {}
-        )
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0)
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.5)
-
-        y_pred = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 0.5)
-
-        scorer = autosklearn.metrics._ThresholdScorer(
-            "roc_auc", sklearn.metrics.roc_auc_score, 1, 0, -1, {}
-        )
-
-        y_pred = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -1.0)
-
-    def test_sign_flip(self):
-        y_true = np.arange(0, 1.01, 0.1)
-        y_pred = y_true.copy()
-
-        scorer = autosklearn.metrics.make_scorer(
-            "r2", sklearn.metrics.r2_score, greater_is_better=True
-        )
-
-        score = scorer(y_true, y_pred + 1.0)
-        self.assertAlmostEqual(score, -9.0)
-
-        score = scorer(y_true, y_pred + 0.5)
-        self.assertAlmostEqual(score, -1.5)
-
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, 1.0)
-
-        scorer = autosklearn.metrics.make_scorer(
-            "r2", sklearn.metrics.r2_score, greater_is_better=False
-        )
-
-        score = scorer(y_true, y_pred + 1.0)
-        self.assertAlmostEqual(score, 9.0)
-
-        score = scorer(y_true, y_pred + 0.5)
-        self.assertAlmostEqual(score, 1.5)
+@pytest.mark.parametrize(
+    "y_pred, y_true, scorer, expected_score",
+    [
+        (
+            np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]),
+            np.array([0, 0, 1, 1]),
+            autosklearn.metrics.accuracy,
+            1.0,
+        ),
+        (
+            np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]),
+            np.array([0, 0, 1, 1]),
+            autosklearn.metrics.accuracy,
+            0.5,
+        ),
+        (
+            np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]),
+            np.array([0, 0, 1, 1]),
+            autosklearn.metrics.balanced_accuracy,
+            0.5,
+        ),
+        (
+            np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]),
+            np.array([0, 1, 2]),
+            autosklearn.metrics.accuracy,
+            1.0,
+        ),
+        (
+            np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]),
+            np.array([0, 1, 2]),
+            autosklearn.metrics.accuracy,
+            0.333333333,
+        ),
+        (
+            np.array([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]),
+            np.array([0, 1, 2]),
+            autosklearn.metrics.accuracy,
+            0.333333333,
+        ),
+        (
+            np.array([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]),
+            np.array([0, 1, 2]),
+            autosklearn.metrics.balanced_accuracy,
+            0.333333333,
+        ),
+        (
+            np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]),
+            np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
+            autosklearn.metrics.accuracy,
+            1.0,
+        ),
+        (
+            np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]),
+            np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
+            autosklearn.metrics.accuracy,
+            0.25,
+        ),
+        (
+            np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]),
+            np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
+            autosklearn.metrics.accuracy,
+            0.25,
+        ),
+        (
+            np.arange(0, 1.01, 0.1),
+            np.arange(0, 1.01, 0.1),
+            autosklearn.metrics.r2,
+            1.0,
+        ),
+        (
+            np.ones(np.arange(0, 1.01, 0.1).shape) * np.mean(np.arange(0, 1.01, 0.1)),
+            np.arange(0, 1.01, 0.1),
+            autosklearn.metrics.r2,
+            0.0,
+        ),
+        (
+            np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]),
+            np.array([0, 0, 1, 1]),
+            autosklearn.metrics.log_loss,
+            0.0,
+        ),
+        (
+            np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]),
+            np.array([0, 1, 2]),
+            autosklearn.metrics.log_loss,
+            0.0,
+        ),
+        (
+            np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]),
+            np.array([0, 0, 1, 1]),
+            autosklearn.metrics.roc_auc,
+            1.0,
+        ),
+        (
+            np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]),
+            np.array([0, 0, 1, 1]),
+            autosklearn.metrics.roc_auc,
+            0.5,
+        ),
+        (
+            np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]),
+            np.array([0, 0, 1, 1]),
+            autosklearn.metrics.roc_auc,
+            0.5,
+        ),
+        (
+            np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]),
+            np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
+            autosklearn.metrics.roc_auc,
+            1.0,
+        ),
+        (
+            np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]),
+            np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
+            autosklearn.metrics.roc_auc,
+            0.5,
+        ),
+        (
+            np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]),
+            np.array([[0, 0], [0, 1], [1, 0], [1, 1]]),
+            autosklearn.metrics.roc_auc,
+            0.5,
+        ),
+    ],
+)
+def test_scorer(
+    y_pred: np.ndarray,
+    y_true: np.ndarray,
+    scorer: autosklearn.metrics.Scorer,
+    expected_score: float,
+) -> None:
+    """
+    Expects
+    -------
+    * Expected scores are equal to scores gained from implementing assembled scorers.
+    """
+    result_score = scorer(y_true, y_pred)
+    assert expected_score == pytest.approx(result_score)
+
+
+@pytest.mark.parametrize(
+    "y_pred, y_true, expected_score",
+    [
+        (
+            np.arange(0, 1.01, 0.1) + 1.0,
+            np.arange(0, 1.01, 0.1),
+            -9.0,
+        ),
+        (
+            np.arange(0, 1.01, 0.1) + 0.5,
+            np.arange(0, 1.01, 0.1),
+            -1.5,
+        ),
+        (
+            np.arange(0, 1.01, 0.1),
+            np.arange(0, 1.01, 0.1),
+            1.0,
+        ),
+    ],
+)
+def test_sign_flip(
+    y_pred: np.array,
+    y_true: np.array,
+    expected_score: float,
+) -> None:
+    """
+    Expects
+    -------
+    * Flipping greater_is_better for r2_score result in flipped signs of its output.
+    """
+    greater_true_scorer = autosklearn.metrics.make_scorer(
+        "r2", sklearn.metrics.r2_score, greater_is_better=True
+    )
+    greater_true_score = greater_true_scorer(y_true, y_pred)
+    assert expected_score == pytest.approx(greater_true_score)
 
-        score = scorer(y_true, y_pred)
-        self.assertAlmostEqual(score, -1.0)
+    greater_false_scorer = autosklearn.metrics.make_scorer(
+        "r2", sklearn.metrics.r2_score, greater_is_better=False
+    )
+    greater_false_score = greater_false_scorer(y_true, y_pred)
+    assert (expected_score * -1.0) == pytest.approx(greater_false_score)
+
+
+def test_regression_metrics():
+    """
+    Expects
+    -------
+    * Test metrics do not change output for autosklearn.metrics.REGRESSION_METRICS.
+    """
+    for metric, scorer in autosklearn.metrics.REGRESSION_METRICS.items():
+        y_true = np.random.random(100).reshape((-1, 1))
+        y_pred = y_true.copy() + np.random.randn(100, 1) * 0.1
+
+        if metric == "mean_squared_log_error":
+            y_true = np.abs(y_true)
+            y_pred = np.abs(y_pred)
+
+        y_true_2 = y_true.copy()
+        y_pred_2 = y_pred.copy()
+        assert np.isfinite(scorer(y_true_2, y_pred_2))
+        np.testing.assert_array_almost_equal(y_true, y_true_2, err_msg=metric)
+        np.testing.assert_array_almost_equal(y_pred, y_pred_2, err_msg=metric)
+
+
+def test_classification_metrics():
+    """
+    Expects
+    -------
+    * Test metrics do not change output for autosklearn.metrics.CLASSIFICATION_METRICS.
+    """
+    for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
+        y_true = np.random.randint(0, 2, size=(100, 1))
+        y_pred = np.random.random(200).reshape((-1, 2))
+        y_pred = np.array([y_pred[i] / np.sum(y_pred[i]) for i in range(100)])
+
+        y_true_2 = y_true.copy()
+        y_pred_2 = y_pred.copy()
+        try:
+            assert np.isfinite(scorer(y_true_2, y_pred_2))
+            np.testing.assert_array_almost_equal(y_true, y_true_2, err_msg=metric)
+            np.testing.assert_array_almost_equal(y_pred, y_pred_2, err_msg=metric)
+        except ValueError as e:
+            if (
+                e.args[0] == "Samplewise metrics are not available outside"
+                " of multilabel classification."
+            ):
+                pass
+            else:
+                raise e
+
+
+def test_regression_all():
+    """
+    Expects
+    -------
+    * Correct scores from REGRESSION_METRICS.
+    """
+    for metric, scorer in autosklearn.metrics.REGRESSION_METRICS.items():
+        if scorer.name == "mean_squared_log_error":
+            continue
 
+        y_true = np.array([1, 2, 3, 4])
 
-class TestMetricsDoNotAlterInput(unittest.TestCase):
-    def test_regression_metrics(self):
-        for metric, scorer in autosklearn.metrics.REGRESSION_METRICS.items():
-            y_true = np.random.random(100).reshape((-1, 1))
-            y_pred = y_true.copy() + np.random.randn(100, 1) * 0.1
+        y_pred_list = [
+            np.array([1, 2, 3, 4]),
+            np.array([3, 4, 5, 6]),
+            np.array([-1, 0, -1, 0]),
+            np.array([-5, 10, 7, -3]),
+        ]
 
-            if metric == "mean_squared_log_error":
-                y_true = np.abs(y_true)
-                y_pred = np.abs(y_pred)
+        score_list = [scorer(y_true, y_pred) for y_pred in y_pred_list]
+
+        assert scorer._optimum == pytest.approx(score_list[0])
+        assert score_list == sorted(score_list, reverse=True)
+
+
+def test_classification_binary():
+    """
+    Expects
+    -------
+    * Correct scores from CLASSIFICATION_METRICS for binary classification.
+    """
+    for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
+        # Skip functions not applicable for binary classification.
+        # TODO: Average precision should work for binary classification,
+        # TODO: but its behavior is not right. When y_pred is completely
+        # TODO: wrong, it does return 0.5, but when it is not completely
+        # TODO: wrong, it returns value smaller than 0.5.
+        if metric in [
+            "average_precision",
+            "precision_samples",
+            "recall_samples",
+            "f1_samples",
+        ]:
+            continue
 
-            y_true_2 = y_true.copy()
-            y_pred_2 = y_pred.copy()
-            self.assertTrue(np.isfinite(scorer(y_true_2, y_pred_2)))
-            np.testing.assert_array_almost_equal(y_true, y_true_2, err_msg=metric)
-            np.testing.assert_array_almost_equal(y_pred, y_pred_2, err_msg=metric)
+        y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
 
-    def test_classification_metrics(self):
-        for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
-            y_true = np.random.randint(0, 2, size=(100, 1))
-            y_pred = np.random.random(200).reshape((-1, 2))
-            y_pred = np.array([y_pred[i] / np.sum(y_pred[i]) for i in range(100)])
-
-            y_true_2 = y_true.copy()
-            y_pred_2 = y_pred.copy()
-            try:
-                self.assertTrue(np.isfinite(scorer(y_true_2, y_pred_2)))
-                np.testing.assert_array_almost_equal(y_true, y_true_2, err_msg=metric)
-                np.testing.assert_array_almost_equal(y_pred, y_pred_2, err_msg=metric)
-            except ValueError as e:
-                if (
-                    e.args[0] == "Samplewise metrics are not available outside"
-                    " of multilabel classification."
-                ):
-                    pass
-                else:
-                    raise e
-
-
-class TestMetric(unittest.TestCase):
-    def test_regression_all(self):
-
-        for metric, scorer in autosklearn.metrics.REGRESSION_METRICS.items():
-            y_true = np.array([1, 2, 3, 4])
-            y_pred = y_true.copy()
-            previous_score = scorer._optimum
-            current_score = scorer(y_true, y_pred)
-            self.assertAlmostEqual(current_score, previous_score)
-
-            y_pred = np.array([3, 4, 5, 6])
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            if scorer.name == "mean_squared_log_error":
-                continue
-
-            y_pred = np.array([-1, 0, -1, 0])
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            y_pred = np.array([-5, 10, 7, -3])
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-    def test_classification_binary(self):
-
-        for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
-            # Skip functions not applicable for binary classification.
-            # TODO: Average precision should work for binary classification,
-            # TODO: but its behavior is not right. When y_pred is completely
-            # TODO: wrong, it does return 0.5, but when it is not completely
-            # TODO: wrong, it returns value smaller than 0.5.
-            if metric in [
-                "average_precision",
-                "precision_samples",
-                "recall_samples",
-                "f1_samples",
-            ]:
-                continue
-
-            y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
-            y_pred = np.array(
+        y_pred_list = [
+            np.array(
                 [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]
-            )
-            previous_score = scorer._optimum
-            current_score = scorer(y_true, y_pred)
-            self.assertAlmostEqual(current_score, previous_score)
-
-            y_pred = np.array(
+            ),
+            np.array(
                 [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]]
-            )
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            y_pred = np.array(
+            ),
+            np.array(
                 [[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]
-            )
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            y_pred = np.array(
+            ),
+            np.array(
                 [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]
-            )
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-    def test_classification_multiclass(self):
-        # The last check in this test has a mismatch between the number of
-        # labels predicted in y_pred and the number of labels in y_true.
-        # This triggers several warnings but we are aware.
-        #
-        # TODO convert to pytest with fixture
-        #
-        #   This test should be parameterized so we can identify which metrics
-        #   cause which warning specifically and rectify if needed.
-        ignored_warnings = [(UserWarning, "y_pred contains classes not in y_true")]
-
-        for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
-            # Skip functions not applicable for multiclass classification.
-            if metric in [
-                "roc_auc",
-                "average_precision",
-                "precision",
-                "recall",
-                "f1",
-                "precision_samples",
-                "recall_samples",
-                "f1_samples",
-            ]:
-                continue
-
-            y_true = np.array([0.0, 0.0, 1.0, 1.0, 2.0])
-
-            y_pred = np.array(
+            ),
+        ]
+
+        score_list = [scorer(y_true, y_pred) for y_pred in y_pred_list]
+
+        assert scorer._optimum == pytest.approx(score_list[0])
+        assert score_list == sorted(score_list, reverse=True)
+
+
+def test_classification_multiclass():
+    """
+    Expects
+    -------
+    * Correct scores from CLASSIFICATION_METRICS for multiclass classification.
+    """
+    # The last check in this test has a mismatch between the number of
+    # labels predicted in y_pred and the number of labels in y_true.
+    # This triggers several warnings but we are aware.
+    #
+    # TODO convert to pytest with fixture
+    #
+    #   This test should be parameterized so we can identify which metrics
+    #   cause which warning specifically and rectify if needed.
+    ignored_warnings = [(UserWarning, "y_pred contains classes not in y_true")]
+
+    for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
+        # Skip functions not applicable for multiclass classification.
+        if metric in [
+            "roc_auc",
+            "average_precision",
+            "precision",
+            "recall",
+            "f1",
+            "precision_samples",
+            "recall_samples",
+            "f1_samples",
+        ]:
+            continue
+
+        y_true = np.array([0.0, 0.0, 1.0, 1.0, 2.0])
+
+        y_pred_list = [
+            np.array(
                 [
                     [1.0, 0.0, 0.0],
                     [1.0, 0.0, 0.0],
@@ -471,12 +385,8 @@ def test_classification_multiclass(self):
                     [0.0, 1.0, 0.0],
                     [0.0, 0.0, 1.0],
                 ]
-            )
-            previous_score = scorer._optimum
-            current_score = scorer(y_true, y_pred)
-            self.assertAlmostEqual(current_score, previous_score)
-
-            y_pred = np.array(
+            ),
+            np.array(
                 [
                     [1.0, 0.0, 0.0],
                     [1.0, 0.0, 0.0],
@@ -484,12 +394,8 @@ def test_classification_multiclass(self):
                     [0.0, 1.0, 0.0],
                     [0.0, 0.0, 1.0],
                 ]
-            )
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            y_pred = np.array(
+            ),
+            np.array(
                 [
                     [0.0, 0.0, 1.0],
                     [0.0, 1.0, 0.0],
@@ -497,12 +403,8 @@ def test_classification_multiclass(self):
                     [0.0, 1.0, 0.0],
                     [0.0, 1.0, 0.0],
                 ]
-            )
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            y_pred = np.array(
+            ),
+            np.array(
                 [
                     [0.0, 0.0, 1.0],
                     [0.0, 0.0, 1.0],
@@ -510,59 +412,58 @@ def test_classification_multiclass(self):
                     [1.0, 0.0, 0.0],
                     [0.0, 1.0, 0.0],
                 ]
-            )
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            # less labels in the targets than in the predictions
-            y_true = np.array([0.0, 0.0, 1.0, 1.0])
-            y_pred = np.array(
-                [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
-            )
+            ),
+        ]
+
+        score_list = [scorer(y_true, y_pred) for y_pred in y_pred_list]
+
+        assert scorer._optimum == pytest.approx(score_list[0])
+        assert score_list == sorted(score_list, reverse=True)
+
+        # less labels in the targets than in the predictions
+        y_true = np.array([0.0, 0.0, 1.0, 1.0])
+        y_pred = np.array(
+            [[1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]
+        )
+
+        with warnings.catch_warnings():
+            for category, message in ignored_warnings:
+                warnings.filterwarnings("ignore", category=category, message=message)
+
+            score = scorer(y_true, y_pred)
+            assert np.isfinite(score)
+
+
+def test_classification_multilabel():
+    """
+    Expects
+    -------
+    * Correct scores from CLASSIFICATION_METRICS for multi-label classification.
+    """
+    for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
+        # Skip functions not applicable for multi-label classification.
+        if metric in [
+            "roc_auc",
+            "log_loss",
+            "precision",
+            "recall",
+            "f1",
+            "balanced_accuracy",
+        ]:
+            continue
+        y_true = np.array([[1, 0, 0], [1, 1, 0], [0, 1, 1], [1, 1, 1]])
+
+        y_pred_list = [
+            np.array([[1, 0, 0], [1, 1, 0], [0, 1, 1], [1, 1, 1]]),
+            np.array([[1, 0, 0], [0, 0, 1], [0, 1, 1], [1, 1, 1]]),
+            np.array([[1, 0, 0], [0, 0, 1], [1, 0, 1], [1, 1, 0]]),
+            np.array([[0, 1, 1], [0, 0, 1], [1, 0, 0], [0, 0, 0]]),
+        ]
+
+        score_list = [scorer(y_true, y_pred) for y_pred in y_pred_list]
 
-            with warnings.catch_warnings():
-                for category, message in ignored_warnings:
-                    warnings.filterwarnings(
-                        "ignore", category=category, message=message
-                    )
-
-                score = scorer(y_true, y_pred)
-                self.assertTrue(np.isfinite(score))
-
-    def test_classification_multilabel(self):
-
-        for metric, scorer in autosklearn.metrics.CLASSIFICATION_METRICS.items():
-            # Skip functions not applicable for multi-label classification.
-            if metric in [
-                "roc_auc",
-                "log_loss",
-                "precision",
-                "recall",
-                "f1",
-                "balanced_accuracy",
-            ]:
-                continue
-            y_true = np.array([[1, 0, 0], [1, 1, 0], [0, 1, 1], [1, 1, 1]])
-            y_pred = y_true.copy()
-            previous_score = scorer._optimum
-            current_score = scorer(y_true, y_pred)
-            self.assertAlmostEqual(current_score, previous_score)
-
-            y_pred = np.array([[1, 0, 0], [0, 0, 1], [0, 1, 1], [1, 1, 1]])
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            y_pred = np.array([[1, 0, 0], [0, 0, 1], [1, 0, 1], [1, 1, 0]])
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
-
-            y_pred = np.array([[0, 1, 1], [0, 0, 1], [1, 0, 0], [0, 0, 0]])
-            previous_score = current_score
-            current_score = scorer(y_true, y_pred)
-            self.assertLess(current_score, previous_score)
+        assert scorer._optimum == pytest.approx(score_list[0])
+        assert score_list == sorted(score_list, reverse=True)
 
 
 class TestCalculateScore(unittest.TestCase):

From 313f5fbc131b888070404e9e05c105eb0d2a6c6a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 10 Oct 2022 12:05:08 +0200
Subject: [PATCH 03/16] Bump actions/stale from 5 to 6 (#1588)

Bumps [actions/stale](https://github.com/actions/stale) from 5 to 6.
- [Release notes](https://github.com/actions/stale/releases)
- [Changelog](https://github.com/actions/stale/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/stale/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/stale
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/stale.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml
index 5d24ae0627..f5232d347e 100644
--- a/.github/workflows/stale.yaml
+++ b/.github/workflows/stale.yaml
@@ -9,7 +9,7 @@ jobs:
   stale:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/stale@v5
+      - uses: actions/stale@v6
         with:
           days-before-stale: 60
           days-before-close: 7

From cc047d651373cc4c3c34afa555f242274565e33b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 10 Oct 2022 12:05:21 +0200
Subject: [PATCH 04/16] Bump actions/checkout from 2 to 3.1.0 (#1592)

Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.1.0.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v2...v3.1.0)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/citation_cff.yml       | 2 +-
 .github/workflows/dist.yml               | 2 +-
 .github/workflows/docker-publish.yml     | 2 +-
 .github/workflows/docs.yml               | 2 +-
 .github/workflows/generate-baselines.yml | 2 +-
 .github/workflows/pre-commit-update.yml  | 2 +-
 .github/workflows/pre-commit.yaml        | 2 +-
 .github/workflows/pytest.yml             | 2 +-
 .github/workflows/regressions.yml        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/citation_cff.yml b/.github/workflows/citation_cff.yml
index 6851c52d38..d3a5659aa8 100644
--- a/.github/workflows/citation_cff.yml
+++ b/.github/workflows/citation_cff.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out a copy of the repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v3.1.0
 
       - name: Check whether the citation metadata from CITATION.cff is valid
         uses: citation-file-format/cffconvert-github-action@2.0.0
diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml
index 07ad9366a2..1053f3dda9 100644
--- a/.github/workflows/dist.yml
+++ b/.github/workflows/dist.yml
@@ -22,7 +22,7 @@ jobs:
 
     steps:
     - name: Check out the repo
-      uses: actions/checkout@v3
+      uses: actions/checkout@v3.1.0
       with:
         submodules: recursive
 
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index a884e5f613..1c849db2a1 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -22,7 +22,7 @@ jobs:
 
     steps:
       - name: Check out the repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v3.1.0
         with:
           submodules: recursive
 
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 83510c5483..c87f900302 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -23,7 +23,7 @@ jobs:
     steps:
 
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v3.1.0
       with:
         submodules: recursive
 
diff --git a/.github/workflows/generate-baselines.yml b/.github/workflows/generate-baselines.yml
index 5149dd57d8..fd6f45b381 100644
--- a/.github/workflows/generate-baselines.yml
+++ b/.github/workflows/generate-baselines.yml
@@ -64,7 +64,7 @@ jobs:
           python-version: ${{ steps.python-version.outputs.value }}
 
       - name: Checkout Automlbenchmark
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3.1.0
         with:
           repository: ${{ env.AUTOMLBENCHMARK_REPO }}
           ref: ${{ env.AUTOMLBENCHMARK_REF }}
diff --git a/.github/workflows/pre-commit-update.yml b/.github/workflows/pre-commit-update.yml
index 3bfede916f..09db790a7f 100644
--- a/.github/workflows/pre-commit-update.yml
+++ b/.github/workflows/pre-commit-update.yml
@@ -11,7 +11,7 @@ jobs:
   auto-update:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3.1.0
 
       - uses: actions/setup-python@v2
 
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
index c7e5b94438..9964a287e7 100644
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@@ -20,7 +20,7 @@ jobs:
   run-all-files:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v3.1.0
       with:
         submodules: recursive
 
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 64e1f26b5b..76c8aac3ed 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -74,7 +74,7 @@ jobs:
     steps:
 
     - name: Checkout
-      uses: actions/checkout@v3
+      uses: actions/checkout@v3.1.0
       with:
         submodules: recursive
 
diff --git a/.github/workflows/regressions.yml b/.github/workflows/regressions.yml
index 8bb0addcf4..942b0253d2 100644
--- a/.github/workflows/regressions.yml
+++ b/.github/workflows/regressions.yml
@@ -82,7 +82,7 @@ jobs:
         #   branch: the branch name
 
       - name: Checkout Automlbenchmark
-        uses: actions/checkout@v3
+        uses: actions/checkout@v3.1.0
         with:
           repository: ${{ env.AUTOMLBENCHMARK_REPO }}
           ref: ${{ env.AUTOMLBENCHMARK_REF }}

From 5c69ddf4584c5c7c4977203a1a579d042c6e3716 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 10 Oct 2022 12:06:13 +0200
Subject: [PATCH 05/16] chore: update pre-commit hooks (#1580)

Co-authored-by: eddiebergman <eddiebergman@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b296a920cb..baf26a9ee3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
         files: test/.*
 
   - repo: https://github.com/psf/black
-    rev: 22.6.0
+    rev: 22.10.0
     hooks:
       - id: black
         name: black formatter autosklearn
@@ -39,7 +39,7 @@ repos:
         additional_dependencies: ["toml"] # Needed to parse pyproject.toml
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.971
+    rev: v0.982
     hooks:
       - id: mypy
         name: mypy auto-sklearn

From 305a3ab152241c1f97b7ac239ad09b66cfe81c57 Mon Sep 17 00:00:00 2001
From: Aron Bahram <aron.bahram@gmail.com>
Date: Mon, 14 Nov 2022 09:26:21 +0100
Subject: [PATCH 06/16] Fix link checker make command in CONTRIBUTE.md (#1608)

---
 CONTRIBUTING.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 73ce781618..6408e56628 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -252,10 +252,11 @@ Lastly, if the feature really is a game changer or you're very proud of it, cons
     make doc
     ```
     *   If you're unfamiliar with sphinx, it's a documentation generator which can read comments and docstrings from within the code and generate html documentation.
-    *   If you've added documentation, we also has a command `linkcheck` for making sure all the links correctly go to some destination.
+    *   If you've added documentation, we also have a command `links` for making sure 
+        all the links correctly go to some destination.
         This helps tests for dead links or accidental typos.
     ```bash
-    make linkcheck
+    make links
     ```
     *   We also use sphinx-gallery which can take python files (such as those in the `examples` folder) and run them, creating html which shows the code and the output it generates.
     ```bash
@@ -396,7 +397,7 @@ Lastly, if the feature really is a game changer or you're very proud of it, cons
     # If you changed documentation:
     # This will generate all documentation and check links
     make doc
-    make linkcheck
+    make links
     make examples  # mainly needed if you modified some examples
 
     # ... fix any issues

From 6a97f729b971df0db3c1d590b94985c7349a1c5e Mon Sep 17 00:00:00 2001
From: Aron Bahram <aron.bahram@gmail.com>
Date: Mon, 14 Nov 2022 09:30:36 +0100
Subject: [PATCH 07/16] Show progress bar while fitting to training data 
 (#1606)

* Show progress bar while fitting to training data

* Minor fixes for progress bar

* Revert accidental changes to requirements.txt

* Document changes

* Skip type checks for tqdm

* Make progress bar more flexible with kwargs

* Fix link checker make command in CONTRIBUTE.md

* Update doc link to be sphinx compatible

* Switch to pytets-forked from pytest-xdist

Co-authored-by: Eddie Bergman <eddiebergmanhs@gmail.com>
---
 CONTRIBUTING.md                   |  4 +-
 autosklearn/automl.py             | 10 +++++
 autosklearn/estimators.py         |  7 ++++
 autosklearn/experimental/askl2.py |  6 +++
 autosklearn/util/progress_bar.py  | 68 +++++++++++++++++++++++++++++++
 pyproject.toml                    |  3 +-
 requirements.txt                  |  3 +-
 setup.py                          |  2 +-
 8 files changed, 98 insertions(+), 5 deletions(-)
 create mode 100644 autosklearn/util/progress_bar.py

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6408e56628..dfffc2fcf1 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -252,8 +252,8 @@ Lastly, if the feature really is a game changer or you're very proud of it, cons
     make doc
     ```
     *   If you're unfamiliar with sphinx, it's a documentation generator which can read comments and docstrings from within the code and generate html documentation.
-    *   If you've added documentation, we also have a command `links` for making sure 
-        all the links correctly go to some destination.
+    *   If you've added documentation, we also have a command `links` for making 
+        sure all the links correctly go to some destination.
         This helps tests for dead links or accidental typos.
     ```bash
     make links
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index e242fbbc08..93fde84330 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -120,6 +120,7 @@
     warnings_to,
 )
 from autosklearn.util.parallel import preload_modules
+from autosklearn.util.progress_bar import ProgressBar
 from autosklearn.util.smac_wrap import SMACCallback, SmacRunCallback
 from autosklearn.util.stopwatch import StopWatch
 
@@ -239,6 +240,7 @@ def __init__(
         get_trials_callback: SMACCallback | None = None,
         dataset_compression: bool | Mapping[str, Any] = True,
         allow_string_features: bool = True,
+        disable_progress_bar: bool = False,
     ):
         super().__init__()
 
@@ -295,6 +297,7 @@ def __init__(
         self.logging_config = logging_config
         self.precision = precision
         self.allow_string_features = allow_string_features
+        self.disable_progress_bar = disable_progress_bar
         self._initial_configurations_via_metalearning = (
             initial_configurations_via_metalearning
         )
@@ -626,6 +629,12 @@ def fit(
         # By default try to use the TCP logging port or get a new port
         self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
 
+        progress_bar = ProgressBar(
+            total=self._time_for_task,
+            disable=self.disable_progress_bar,
+            desc="Fitting to the training data",
+            colour="green",
+        )
         # Once we start the logging server, it starts in a new process
         # If an error occurs then we want to make sure that we exit cleanly
         # and shut it down, else it might hang
@@ -961,6 +970,7 @@ def fit(
             self._logger.exception(e)
             raise e
         finally:
+            progress_bar.stop()
             self._fit_cleanup()
 
         self.fitted = True
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
index 1a094d2582..577265239e 100644
--- a/autosklearn/estimators.py
+++ b/autosklearn/estimators.py
@@ -76,6 +76,7 @@ def __init__(
         get_trials_callback: SMACCallback | None = None,
         dataset_compression: Union[bool, Mapping[str, Any]] = True,
         allow_string_features: bool = True,
+        disable_progress_bar: bool = False,
     ):
         """
         Parameters
@@ -381,6 +382,10 @@ def __init__(
             Whether autosklearn should process string features. By default the
             textpreprocessing is enabled.
 
+        disable_progress_bar: bool = False
+            Whether to disable the progress bar that is displayed in the console
+            while fitting to the training data.
+
         Attributes
         ----------
         cv_results_ : dict of numpy (masked) ndarrays
@@ -475,6 +480,7 @@ def __init__(
         self.get_trials_callback = get_trials_callback
         self.dataset_compression = dataset_compression
         self.allow_string_features = allow_string_features
+        self.disable_progress_bar = disable_progress_bar
 
         self.automl_ = None  # type: Optional[AutoML]
 
@@ -525,6 +531,7 @@ def build_automl(self):
             get_trials_callback=self.get_trials_callback,
             dataset_compression=self.dataset_compression,
             allow_string_features=self.allow_string_features,
+            disable_progress_bar=self.disable_progress_bar,
         )
 
         return automl
diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py
index 317f0be5b1..b712ba484e 100644
--- a/autosklearn/experimental/askl2.py
+++ b/autosklearn/experimental/askl2.py
@@ -166,6 +166,7 @@ def __init__(
         load_models: bool = True,
         dataset_compression: Union[bool, Mapping[str, Any]] = True,
         allow_string_features: bool = True,
+        disable_progress_bar: bool = False,
     ):
 
         """
@@ -284,6 +285,10 @@ def __init__(
         load_models : bool, optional (True)
             Whether to load the models after fitting Auto-sklearn.
 
+        disable_progress_bar: bool = False
+            Whether to disable the progress bar that is displayed in the console
+            while fitting to the training data.
+
         Attributes
         ----------
 
@@ -337,6 +342,7 @@ def __init__(
             scoring_functions=scoring_functions,
             load_models=load_models,
             allow_string_features=allow_string_features,
+            disable_progress_bar=disable_progress_bar,
         )
 
     def train_selectors(self, selected_metric=None):
diff --git a/autosklearn/util/progress_bar.py b/autosklearn/util/progress_bar.py
new file mode 100644
index 0000000000..7ccd3bc153
--- /dev/null
+++ b/autosklearn/util/progress_bar.py
@@ -0,0 +1,68 @@
+from typing import Any
+
+import datetime
+import time
+from threading import Thread
+
+from tqdm import trange
+
+
+class ProgressBar(Thread):
+    """A Thread that displays a tqdm progress bar in the console.
+
+    It is specialized to display information relevant to fitting to the training data
+    with auto-sklearn.
+
+    Parameters
+    ----------
+    total : int
+        The total amount that should be reached by the progress bar once it finishes
+    update_interval : float
+        Specifies how frequently the progress bar is updated (in seconds)
+    disable : bool
+        Turns on or off the progress bar. If True, this thread won't be started or
+        initialized.
+    kwargs : Any
+        Keyword arguments that are passed into tqdm's constructor. Refer to:
+        `tqdm <https://tqdm.github.io/docs/tqdm/>`_. Note that postfix can not be
+        specified in the kwargs since it is already passed into tqdm by this class.
+    """
+
+    def __init__(
+        self,
+        total: int,
+        update_interval: float = 1.0,
+        disable: bool = False,
+        **kwargs: Any,
+    ):
+        self.disable = disable
+        if not disable:
+            super().__init__(name="_progressbar_")
+            self.total = total
+            self.update_interval = update_interval
+            self.terminated: bool = False
+            self.kwargs = kwargs
+            # start this thread
+            self.start()
+
+    def run(self) -> None:
+        """Display a tqdm progress bar in the console.
+
+        Additionally, it shows useful information related to the task. This method
+        overrides the run method of Thread.
+        """
+        if not self.disable:
+            for _ in trange(
+                self.total,
+                postfix=f"The total time budget for this task is "
+                f"{datetime.timedelta(seconds=self.total)}",
+                **self.kwargs,
+            ):
+                if not self.terminated:
+                    time.sleep(self.update_interval)
+
+    def stop(self) -> None:
+        """Terminates the thread."""
+        if not self.disable:
+            self.terminated = True
+            super().join()
diff --git a/pyproject.toml b/pyproject.toml
index 40ea854030..a696c0fb46 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -155,7 +155,8 @@ module = [
     "setuptools.*",
     "pkg_resources.*",
     "yaml.*",
-    "psutil.*"
+    "psutil.*",
+    "tqdm.*",
 ]
 ignore_missing_imports = true
 
diff --git a/requirements.txt b/requirements.txt
index 76af7f4a06..d47fb91474 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,8 +14,9 @@ pyyaml
 pandas>=1.0
 liac-arff
 threadpoolctl
+tqdm
 
 ConfigSpace>=0.4.21,<0.5
 pynisher>=0.6.3,<0.7
 pyrfr>=0.8.1,<0.9
-smac>=1.2,<1.3
+smac>=1.2,<1.3
\ No newline at end of file
diff --git a/setup.py b/setup.py
index aa6e42669e..6e37e0e711 100644
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@
     "test": [
         "pytest>=4.6",
         "pytest-cov",
-        "pytest-xdist",
+        "pytest-forked",
         "pytest-timeout",
         "pytest-cases>=3.6.11",
         "mypy",

From 5a90a19bf8f9342ce9cad7d28ce230bdbb33ead1 Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Tue, 15 Nov 2022 14:53:25 +0100
Subject: [PATCH 08/16] fix(multiprocessing): Use list instead of key-view
 (#1609)

---
 autosklearn/util/parallel.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/autosklearn/util/parallel.py b/autosklearn/util/parallel.py
index 0804588a61..2bbe474abb 100644
--- a/autosklearn/util/parallel.py
+++ b/autosklearn/util/parallel.py
@@ -3,7 +3,16 @@
 
 
 def preload_modules(context: multiprocessing.context.BaseContext) -> None:
-    all_loaded_modules = sys.modules.keys()
+    """Attempt to preload modules when using forkserver"""
+    # NOTE: preloading and docstring
+    #
+    #   This is just a best guess at why this is used, coming from this blogpost
+    #   https://bnikolic.co.uk/blog/python/parallelism/2019/11/13/python-forkserver-preload.html
+    #   Ideally we should identify subprocesses that get run with this and try limit the
+    #   necessity to use all of these modules
+    #
+    #   @eddiebergman
+    all_loaded_modules = list(sys.modules.keys())
     preload = [
         loaded_module
         for loaded_module in all_loaded_modules

From 40f1111f7e733833ce59e038695e0b90452cebe9 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 15 Nov 2022 16:50:25 +0100
Subject: [PATCH 09/16] chore: update pre-commit hooks (#1605)

Co-authored-by: eddiebergman <eddiebergman@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index baf26a9ee3..95b8c00f51 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
         additional_dependencies: ["toml"] # Needed to parse pyproject.toml
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.982
+    rev: v0.990
     hooks:
       - id: mypy
         name: mypy auto-sklearn

From 1abd1f95eeb68d695cc66c60b2ccfa9a0b24b1ae Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Tue, 15 Nov 2022 17:04:44 +0100
Subject: [PATCH 10/16] doc(smac): Update link for `get_smac_object_callback`
 (#1610)

* doc(smac): Update link for `get_smac_object_callback`

* doc(links): Update more smac links
---
 autosklearn/estimators.py                       | 4 ++--
 autosklearn/experimental/askl2.py               | 2 +-
 examples/40_advanced/example_multi_objective.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
index 577265239e..68300b4a29 100644
--- a/autosklearn/estimators.py
+++ b/autosklearn/estimators.py
@@ -276,12 +276,12 @@ def __init__(
 
         smac_scenario_args : dict, optional (None)
             Additional arguments inserted into the scenario of SMAC. See the
-            `SMAC documentation <https://automl.github.io/SMAC3/main/api/smac.scenario.scenario.html#module-smac.scenario.scenario>`_
+            `SMAC documentation <https://automl.github.io/SMAC3/main/api/smac.scenario.html#smac.scenario.Scenario>`_
             for a list of available arguments.
 
         get_smac_object_callback : callable
             Callback function to create an object of class
-            `smac.optimizer.smbo.SMBO <https://automl.github.io/SMAC3/main/api/smac.optimizer.smbo.html>`_.
+            `smac.facade.AbstractFacade <https://automl.github.io/SMAC3/main/api/smac.facade.html>`_.
             The function must accept the arguments ``scenario_dict``,
             ``instances``, ``num_params``, ``runhistory``, ``seed`` and ``ta``.
             This is an advanced feature. Use only if you are familiar with
diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py
index b712ba484e..abe43ff254 100644
--- a/autosklearn/experimental/askl2.py
+++ b/autosklearn/experimental/askl2.py
@@ -264,7 +264,7 @@ def __init__(
 
         smac_scenario_args : dict, optional (None)
             Additional arguments inserted into the scenario of SMAC. See the
-            `SMAC documentation <https://automl.github.io/SMAC3/main/api/smac.scenario.scenario.html#module-smac.scenario.scenario>`_
+            `SMAC documentation <https://automl.github.io/SMAC3/main/api/smac.scenario.html#smac.scenario.Scenario>`_
             for a list of available arguments.
 
         logging_config : dict, optional (None)
diff --git a/examples/40_advanced/example_multi_objective.py b/examples/40_advanced/example_multi_objective.py
index 2e4ceb1c7e..d61ce8b17a 100644
--- a/examples/40_advanced/example_multi_objective.py
+++ b/examples/40_advanced/example_multi_objective.py
@@ -8,7 +8,7 @@
 competing metrics: `precision` and `recall` (read more on this tradeoff
 in the `scikit-learn docs <https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html>`_.
 
-Auto-sklearn uses `SMAC3's implementation of ParEGO <https://automl.github.io/SMAC3/main/details/multi_objective.html>`_.
+Auto-sklearn uses `SMAC3's implementation of ParEGO <https://automl.github.io/SMAC3/main/examples/3_multi_objective/2_parego.html#parego>`_.
 Multi-objective ensembling and proper access to the full Pareto set will be added in the near
 future.
 """

From 59ea4b0a8010e7a621503b4e1be2ca7c9d34fc03 Mon Sep 17 00:00:00 2001
From: Aron Bahram <aron.bahram@gmail.com>
Date: Thu, 24 Nov 2022 13:12:14 +0100
Subject: [PATCH 11/16] refactor: use progress_bar more explicitly as a thread
 (#1622)

---
 autosklearn/automl.py            |  3 +-
 autosklearn/util/progress_bar.py | 72 +++++++++++++++++++++-----------
 2 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index 93fde84330..1d37cb2321 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -652,6 +652,7 @@ def fit(
                 # space
                 self._backend.save_start_time(self._seed)
 
+            progress_bar.start()
             self._stopwatch = StopWatch()
 
             # Make sure that input is valid
@@ -970,7 +971,7 @@ def fit(
             self._logger.exception(e)
             raise e
         finally:
-            progress_bar.stop()
+            progress_bar.join()
             self._fit_cleanup()
 
         self.fitted = True
diff --git a/autosklearn/util/progress_bar.py b/autosklearn/util/progress_bar.py
index 7ccd3bc153..c1eb3139f8 100644
--- a/autosklearn/util/progress_bar.py
+++ b/autosklearn/util/progress_bar.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import Any
 
 import datetime
@@ -10,22 +12,45 @@
 class ProgressBar(Thread):
     """A Thread that displays a tqdm progress bar in the console.
 
-    It is specialized to display information relevant to fitting to the training data
-    with auto-sklearn.
+    Treat this class as an ordinary thread. So to display a progress bar,
+    call start() on an instance of this class. To wait for the thread to
+    terminate call join(), which will max out the progress bar,
+    therefore terminate this thread immediately.
 
     Parameters
     ----------
     total : int
-        The total amount that should be reached by the progress bar once it finishes
-    update_interval : float
-        Specifies how frequently the progress bar is updated (in seconds)
-    disable : bool
-        Turns on or off the progress bar. If True, this thread won't be started or
-        initialized.
-    kwargs : Any
+        The total amount that should be reached by the progress bar once it finishes.
+    update_interval : float, default=1.0
+        Specifies how frequently the progress bar is updated (in seconds).
+    disable : bool, default=False
+        Turns on or off the progress bar. If True, this thread does not get
+        initialized and won't be started if start() is called.
+    tqdm_kwargs : Any, optional
         Keyword arguments that are passed into tqdm's constructor. Refer to:
-        `tqdm <https://tqdm.github.io/docs/tqdm/>`_. Note that postfix can not be
-        specified in the kwargs since it is already passed into tqdm by this class.
+        `tqdm <https://tqdm.github.io/docs/tqdm/>`_ for a list of parameters that
+        tqdm accepts. Note that 'postfix' cannot be specified in the kwargs since it is
+        already passed into tqdm by this class.
+
+    Examples
+    --------
+
+    .. code:: python
+
+        progress_bar = ProgressBar(
+            total=10,
+            desc="Executing code that runs for 10 seconds",
+            colour="green",
+        )
+        # colour is a tqdm parameter passed as a tqdm_kwargs
+        try:
+            progress_bar.start()
+            # some code that runs for 10 seconds
+        except SomeException:
+            # something went wrong
+        finally:
+            progress_bar.join()
+            # perform some cleanup
     """
 
     def __init__(
@@ -33,7 +58,7 @@ def __init__(
         total: int,
         update_interval: float = 1.0,
         disable: bool = False,
-        **kwargs: Any,
+        **tqdm_kwargs: Any,
     ):
         self.disable = disable
         if not disable:
@@ -41,28 +66,27 @@ def __init__(
             self.total = total
             self.update_interval = update_interval
             self.terminated: bool = False
-            self.kwargs = kwargs
-            # start this thread
-            self.start()
+            self.tqdm_kwargs = tqdm_kwargs
 
-    def run(self) -> None:
-        """Display a tqdm progress bar in the console.
+    def start(self) -> None:
+        """Start a new thread that calls the run() method."""
+        if not self.disable:
+            super().start()
 
-        Additionally, it shows useful information related to the task. This method
-        overrides the run method of Thread.
-        """
+    def run(self) -> None:
+        """Display a tqdm progress bar in the console."""
         if not self.disable:
             for _ in trange(
                 self.total,
                 postfix=f"The total time budget for this task is "
                 f"{datetime.timedelta(seconds=self.total)}",
-                **self.kwargs,
+                **self.tqdm_kwargs,
             ):
                 if not self.terminated:
                     time.sleep(self.update_interval)
 
-    def stop(self) -> None:
-        """Terminates the thread."""
+    def join(self, timeout: float | None = None) -> None:
+        """Maxes out the progress bar and thereby terminating this thread."""
         if not self.disable:
             self.terminated = True
-            super().join()
+            super().join(timeout)

From a978478f6053f2e966955347e32b20d6d35c0a61 Mon Sep 17 00:00:00 2001
From: Aron Bahram <aron.bahram@gmail.com>
Date: Thu, 24 Nov 2022 13:14:44 +0100
Subject: [PATCH 12/16] fix: modify show_models() to display same ranks as
 leaderboard (#1621)

---
 autosklearn/automl.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index 1d37cb2321..ffcf1fb033 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -2185,21 +2185,20 @@ def has_key(rv, key):
 
         table = pd.DataFrame.from_dict(table_dict, orient="index")
         table.sort_values(by="cost", inplace=True)
+        table["rank"] = np.arange(1, len(table.index) + 1)
 
         # Check which resampling strategy is chosen and selecting the appropriate models
         is_cv = self._resampling_strategy == "cv"
         models = self.cv_models_ if is_cv else self.models_
 
-        rank = 1  # Initializing rank for the first model
         for (_, model_id, _), model in models.items():
             model_dict = {}  # Declaring model dictionary
 
             # Inserting model_id, rank, cost and ensemble weight
             model_dict["model_id"] = table.loc[model_id]["model_id"].astype(int)
-            model_dict["rank"] = rank
+            model_dict["rank"] = table.loc[model_id]["rank"].astype(int)
             model_dict["cost"] = table.loc[model_id]["cost"]
             model_dict["ensemble_weight"] = table.loc[model_id]["ensemble_weight"]
-            rank += 1  # Incrementing rank by 1 for the next model
 
             # The steps in the models pipeline are as follows:
             # 'data_preprocessor': DataPreprocessor,

From 63bfbebbd288c8669d6bce7f44f8c9a3a82facd5 Mon Sep 17 00:00:00 2001
From: Aron Bahram <aron.bahram@gmail.com>
Date: Wed, 7 Dec 2022 09:59:21 +0100
Subject: [PATCH 13/16] refactor: track model_ids in cv_results (#1628)

---
 autosklearn/automl.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/autosklearn/automl.py b/autosklearn/automl.py
index ffcf1fb033..1b2b08f74f 100644
--- a/autosklearn/automl.py
+++ b/autosklearn/automl.py
@@ -1921,15 +1921,17 @@ def cv_results_(self):
             metric_dict[metric.name] = []
             metric_mask[metric.name] = []
 
+        model_ids = []
         mean_fit_time = []
         params = []
         status = []
         budgets = []
 
-        for run_key in self.runhistory_.data:
-            run_value = self.runhistory_.data[run_key]
+        for run_key, run_value in self.runhistory_.data.items():
             config_id = run_key.config_id
             config = self.runhistory_.ids_config[config_id]
+            if run_value.additional_info and "num_run" in run_value.additional_info:
+                model_ids.append(run_value.additional_info["num_run"])
 
             s = run_value.status
             if s == StatusType.SUCCESS:
@@ -1990,6 +1992,8 @@ def cv_results_(self):
                 metric_dict[metric.name].append(metric_value)
                 metric_mask[metric.name].append(mask_value)
 
+        results["model_ids"] = model_ids
+
         if len(self._metrics) == 1:
             results["mean_test_score"] = np.array(metric_dict[self._metrics[0].name])
             rank_order = -1 * self._metrics[0]._sign * results["mean_test_score"]
@@ -2165,14 +2169,11 @@ def show_models(self) -> dict[int, Any]:
             warnings.warn("No ensemble found. Returning empty dictionary.")
             return ensemble_dict
 
-        def has_key(rv, key):
-            return rv.additional_info and key in rv.additional_info
-
         table_dict = {}
-        for run_key, run_val in self.runhistory_.data.items():
-            if has_key(run_val, "num_run"):
-                model_id = run_val.additional_info["num_run"]
-                table_dict[model_id] = {"model_id": model_id, "cost": run_val.cost}
+        for run_key, run_value in self.runhistory_.data.items():
+            if run_value.additional_info and "num_run" in run_value.additional_info:
+                model_id = run_value.additional_info["num_run"]
+                table_dict[model_id] = {"model_id": model_id, "cost": run_value.cost}
 
         # Checking if the dictionary is empty
         if not table_dict:

From 673211252ca508b6f5bb92cf5fa87c6455bbad99 Mon Sep 17 00:00:00 2001
From: Aron Bahram <aron.bahram@gmail.com>
Date: Tue, 18 Apr 2023 13:08:13 +0200
Subject: [PATCH 14/16] fix(regressor): correctly cap the labels in predict
 (#1662)

updates pre-commit
---
 .pre-commit-config.yaml            |  8 ++++----
 autosklearn/pipeline/regression.py | 25 ++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 95b8c00f51..af0ec72b29 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@
 repos:
 
   - repo: https://github.com/pycqa/isort
-    rev: 5.10.1
+    rev: 5.11.5
     hooks:
       - id: isort
         name: isort imports autosklearn
@@ -15,7 +15,7 @@ repos:
         files: test/.*
 
   - repo: https://github.com/psf/black
-    rev: 22.10.0
+    rev: 23.3.0
     hooks:
       - id: black
         name: black formatter autosklearn
@@ -31,7 +31,7 @@ repos:
 
   # This is disabled as most modules fail this
   - repo: https://github.com/pycqa/pydocstyle
-    rev: 6.1.1
+    rev: 6.3.0
     hooks:
       - id: pydocstyle
         files: DISABLED # autosklearn/.*
@@ -39,7 +39,7 @@ repos:
         additional_dependencies: ["toml"] # Needed to parse pyproject.toml
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.990
+    rev: v1.2.0
     hooks:
       - id: mypy
         name: mypy auto-sklearn
diff --git a/autosklearn/pipeline/regression.py b/autosklearn/pipeline/regression.py
index dcc2fa3fcf..85f5ed70ab 100644
--- a/autosklearn/pipeline/regression.py
+++ b/autosklearn/pipeline/regression.py
@@ -106,12 +106,35 @@ def iterative_fit(self, X, y, n_iter=1, **fit_params):
         )
 
     def predict(self, X, batch_size=None):
+        """Predict the classes using the selected model.
+
+        Predicted values are capped to approximately the maximum and minimum labels
+        seen during training.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+
+        batch_size: int or None, defaults to None
+            batch_size controls whether the pipeline will be
+            called on small chunks of the data. Useful when calling the
+            predict method on the whole array X results in a MemoryError.
+
+        Returns
+        -------
+        array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
+            Returns the predicted values"""
         y = super().predict(X, batch_size=batch_size)
-        y[y > (2 * self.y_max_)] = 2 * self.y_max_
+
+        if self.y_max_ > 0:
+            y[y > (2 * self.y_max_)] = 2 * self.y_max_
+        elif self.y_max_ < 0:
+            y[y > (0.5 * self.y_max_)] = 0.5 * self.y_max_
         if self.y_min_ < 0:
             y[y < (2 * self.y_min_)] = 2 * self.y_min_
         elif self.y_min_ > 0:
             y[y < (0.5 * self.y_min_)] = 0.5 * self.y_min_
+
         return y
 
     def _get_hyperparameter_search_space(

From 87a10eec249d61912a4ecd742329925c7a36633d Mon Sep 17 00:00:00 2001
From: agentmarketbot <agentmarketbot@gmail.com>
Date: Thu, 9 Jan 2025 22:56:30 +0000
Subject: [PATCH 15/16] agent bot commit

---
 doc/manual.rst                                | 23 ++++++++++++++++---
 .../example_extending_data_preprocessor.py    |  8 ++++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/doc/manual.rst b/doc/manual.rst
index 1322ecfdbc..c5be838e94 100644
--- a/doc/manual.rst
+++ b/doc/manual.rst
@@ -153,11 +153,28 @@ to restrict the searchspace:
 
     Data preprocessing includes One-Hot encoding of categorical features, imputation
     of missing values and the normalization of features or samples. These ensure that
-    the data the gets to the sklearn models is well formed and can be used for
+    the data that gets to the sklearn models is well formed and can be used for
     training models.
 
-    While this is necessary in general, if you'd like to disable this step, please
-    refer to this :ref:`example <sphx_glr_examples_80_extending_example_extending_data_preprocessor.py>`.
+    While this is necessary in general, if you'd like to disable this step, you need to:
+    
+    1. First register the NoPreprocessing component:
+
+    .. code:: python
+
+        from autosklearn.pipeline.components.data_preprocessing import add_preprocessor
+        from autosklearn.pipeline.components.data_preprocessing.NoPreprocessing import NoPreprocessing
+        add_preprocessor(NoPreprocessing)
+
+    2. Then include it in your classifier configuration:
+
+    .. code:: python
+
+        automl = AutoSklearnClassifier(
+            include={"data_preprocessor": ["NoPreprocessing"]},
+        )
+
+    For a complete example, refer to :ref:`example <sphx_glr_examples_80_extending_example_extending_data_preprocessor.py>`.
 
 .. collapse:: <b>Turn off feature preprocessing</b>
 
diff --git a/examples/80_extending/example_extending_data_preprocessor.py b/examples/80_extending/example_extending_data_preprocessor.py
index eb0325d9df..095ad803be 100644
--- a/examples/80_extending/example_extending_data_preprocessor.py
+++ b/examples/80_extending/example_extending_data_preprocessor.py
@@ -3,7 +3,13 @@
 Extending Auto-Sklearn with Data Preprocessor Component
 =======================================================
 
-The following example demonstrates how to turn off data preprocessing step in auto-skearn.
+The following example demonstrates how to turn off data preprocessing step in auto-sklearn.
+This is useful when you want to:
+1. Skip automatic data preprocessing (One-Hot encoding, imputation, normalization)
+2. Use your own preprocessed data directly
+3. Ensure the data remains unchanged before reaching the models
+
+Note: You must register the NoPreprocessing component before using it in include={}.
 """
 from typing import Optional
 from pprint import pprint

From 31dc88a62fff22959a4b9e53c40f9a8fa0b29538 Mon Sep 17 00:00:00 2001
From: agentmarketbot <agentmarketbot@gmail.com>
Date: Thu, 9 Jan 2025 23:01:20 +0000
Subject: [PATCH 16/16] agent bot commit

---
 .../data_preprocessing/NoPreprocessing.py     | 83 +++++++++++++++++++
 doc/manual.rst                                |  3 +
 .../example_extending_data_preprocessor.py    |  3 +
 .../test_NoPreprocessing.py                   | 47 +++++++++++
 4 files changed, 136 insertions(+)
 create mode 100644 autosklearn/pipeline/components/data_preprocessing/NoPreprocessing.py
 create mode 100644 test/test_pipeline/components/data_preprocessing/test_NoPreprocessing.py

diff --git a/autosklearn/pipeline/components/data_preprocessing/NoPreprocessing.py b/autosklearn/pipeline/components/data_preprocessing/NoPreprocessing.py
new file mode 100644
index 0000000000..61eb3d60e1
--- /dev/null
+++ b/autosklearn/pipeline/components/data_preprocessing/NoPreprocessing.py
@@ -0,0 +1,83 @@
+from typing import Optional, Dict, Any, Union, Tuple
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+import numpy as np
+
+from autosklearn.pipeline.base import DATASET_PROPERTIES_TYPE
+from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
+from autosklearn.pipeline.constants import DENSE, SPARSE, UNSIGNED_DATA, INPUT
+
+
+class NoPreprocessing(AutoSklearnPreprocessingAlgorithm):
+    def __init__(
+        self,
+        random_state: Optional[Union[int, np.random.RandomState]] = None,
+    ) -> None:
+        """A component that does no preprocessing, passing the data through unchanged."""
+        self.random_state = random_state
+
+    def fit(self, X: DENSE, y: Optional[DENSE] = None) -> "NoPreprocessing":
+        """Fit the NoPreprocessing component.
+        
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
+            Training data
+        y : array-like, shape = (n_samples,), optional
+            Targets for supervised learning
+
+        Returns
+        -------
+        self : NoPreprocessing
+            This estimator
+        """
+        self.fitted_ = True
+        return self
+
+    def transform(self, X: DENSE) -> DENSE:
+        """Transform the data by doing nothing.
+        
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
+            Data to transform
+
+        Returns
+        -------
+        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
+            Transformed data (identical to input)
+        """
+        if self.fitted_ is False:
+            raise NotImplementedError()
+        return X
+
+    @staticmethod
+    def get_properties(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
+        return {
+            'shortname': 'NoPreprocessing',
+            'name': 'No Preprocessing',
+            'handles_regression': True,
+            'handles_classification': True,
+            'handles_multiclass': True,
+            'handles_multilabel': True,
+            'handles_multioutput': True,
+            'is_deterministic': True,
+            'input': (DENSE, SPARSE, UNSIGNED_DATA),
+            'output': (INPUT,)
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
+    ) -> ConfigurationSpace:
+        """Return the configuration space for this component.
+        
+        Returns
+        -------
+        cs : ConfigurationSpace
+            The configuration space describing all hyperparameters of this component.
+        """
+        cs = ConfigurationSpace()
+        return cs
\ No newline at end of file
diff --git a/doc/manual.rst b/doc/manual.rst
index c5be838e94..a6483698a6 100644
--- a/doc/manual.rst
+++ b/doc/manual.rst
@@ -174,6 +174,9 @@ to restrict the searchspace:
             include={"data_preprocessor": ["NoPreprocessing"]},
         )
 
+    Note: Make sure to register the NoPreprocessing component BEFORE creating the AutoSklearnClassifier.
+    The component must be registered first, otherwise you'll get a ValueError saying the component is not valid.
+
     For a complete example, refer to :ref:`example <sphx_glr_examples_80_extending_example_extending_data_preprocessor.py>`.
 
 .. collapse:: <b>Turn off feature preprocessing</b>
diff --git a/examples/80_extending/example_extending_data_preprocessor.py b/examples/80_extending/example_extending_data_preprocessor.py
index 095ad803be..4a8af872fd 100644
--- a/examples/80_extending/example_extending_data_preprocessor.py
+++ b/examples/80_extending/example_extending_data_preprocessor.py
@@ -10,6 +10,9 @@
 3. Ensure the data remains unchanged before reaching the models
 
 Note: You must register the NoPreprocessing component before using it in include={}.
+This example shows how to properly disable data preprocessing in auto-sklearn.
+
+Fixes #1745
 """
 from typing import Optional
 from pprint import pprint
diff --git a/test/test_pipeline/components/data_preprocessing/test_NoPreprocessing.py b/test/test_pipeline/components/data_preprocessing/test_NoPreprocessing.py
new file mode 100644
index 0000000000..fcd876f702
--- /dev/null
+++ b/test/test_pipeline/components/data_preprocessing/test_NoPreprocessing.py
@@ -0,0 +1,47 @@
+import unittest
+
+import numpy as np
+from scipy import sparse
+
+from autosklearn.pipeline.components.data_preprocessing.NoPreprocessing import NoPreprocessing
+
+
+class NoPreprocessingTest(unittest.TestCase):
+    def test_preprocessing_dtype_transform(self):
+        # Dense
+        X = np.random.rand(3, 2)
+        Y = np.random.randint(0, 2, (3,))
+        no_preprocessing = NoPreprocessing()
+        no_preprocessing.fit(X, Y)
+        X_transformed = no_preprocessing.transform(X)
+        self.assertIsInstance(X_transformed, np.ndarray)
+        np.testing.assert_array_equal(X_transformed, X)
+
+        # Sparse
+        X = sparse.csr_matrix(X)
+        Y = np.random.randint(0, 2, (3,))
+        no_preprocessing = NoPreprocessing()
+        no_preprocessing.fit(X, Y)
+        X_transformed = no_preprocessing.transform(X)
+        self.assertIsInstance(X_transformed, sparse.csr_matrix)
+        np.testing.assert_array_equal(X_transformed.toarray(), X.toarray())
+
+    def test_preprocessing_dtype_transform_no_fit(self):
+        X = np.random.rand(3, 2)
+        no_preprocessing = NoPreprocessing()
+        with self.assertRaises(NotImplementedError):
+            no_preprocessing.transform(X)
+
+    def test_preprocessing_properties(self):
+        props = NoPreprocessing.get_properties()
+        self.assertEqual(props['shortname'], 'NoPreprocessing')
+        self.assertTrue(props['handles_regression'])
+        self.assertTrue(props['handles_classification'])
+        self.assertTrue(props['handles_multiclass'])
+        self.assertTrue(props['handles_multilabel'])
+        self.assertTrue(props['handles_multioutput'])
+        self.assertTrue(props['is_deterministic'])
+
+    def test_hyperparameter_search_space(self):
+        cs = NoPreprocessing.get_hyperparameter_search_space()
+        self.assertEqual(len(cs.get_hyperparameters()), 0)
\ No newline at end of file