From d3af5592d54d66629bb6965b5a10550f267d2b05 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 24 Nov 2021 15:01:55 +0100 Subject: [PATCH 1/3] version bump to 1.6.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7483ebf..be81714 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ setup( name="skorecard", - version="1.6.1", + version="1.6.2", description="Tools for building scorecard models in python, with a sklearn-compatible API", long_description=long_description, long_description_content_type="text/markdown", From 6d2ce4c24da7bf1a67a0f477f66158987d073095 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 24 Nov 2021 15:02:11 +0100 Subject: [PATCH 2/3] add new columns to assertion --- tests/test_reports.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_reports.py b/tests/test_reports.py index e3c55cc..7d766cf 100644 --- a/tests/test_reports.py +++ b/tests/test_reports.py @@ -14,7 +14,7 @@ def test_report_decision_tree(df): tbt.transform(X) df_out = build_bucket_table(X, y, column="LIMIT_BAL", bucketer=tbt) - assert df_out.shape == (5, 9) + assert df_out.shape == (5, 11) # Make sure bucket table equals feature bucket mapping dict assert ( dict(zip(df_out["bucket_id"].values, df_out["label"].values)) @@ -36,9 +36,9 @@ def test_report_decision_tree(df): "Count (%)", "Non-event", "Event", + "% Event", + "% Non-event", "Event Rate", - # "% Event", - # "% Non Event", "WoE", "IV", ] From 10e43bb492dd8f4e4e259291bf3630f0970a639e Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 24 Nov 2021 15:02:26 +0100 Subject: [PATCH 3/3] add %-event and non-event to bucket table --- skorecard/reporting/report.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/skorecard/reporting/report.py b/skorecard/reporting/report.py index 6d83e87..bc2b8a7 100644 --- a/skorecard/reporting/report.py +++ b/skorecard/reporting/report.py @@ -126,6 +126,10 @@ def build_bucket_table( stats["WoE"] = ((stats["% Non-event"] + epsilon) / (stats["% Event"] + epsilon)).apply(lambda x: np.log(x)) stats["IV"] = (stats["% Non-event"] - stats["% Event"]) * stats["WoE"] + stats["% Event"] = np.round(100 * stats["% Event"], 2) + stats["% Non-event"] = np.round(100 * stats["% Non-event"], 2) + stats["Event Rate"] = np.round(stats["Event Rate"], 3) + stats["WoE"] = np.round(stats["WoE"], 3) stats["IV"] = np.round(stats["IV"], 3) @@ -140,6 +144,8 @@ def build_bucket_table( "Count (%)", "Non-event", "Event", + "% Event", + "% Non-event", "Event Rate", "WoE", "IV", @@ -167,10 +173,10 @@ def bucket_table(self, column): the BucketingProcess ends up with the final buckets. An example: - bucket | label | Count | Count (%) | Non-event | Event | Event Rate | WoE | IV - -----------|--------------------|-------|-----------|-----------|-------|------------|------|----- - 0 | (-inf, 25000.0) | 479.0 | 7.98 | 300.0 | 179.0 | 37.37 | 0.73 | 0.05 - 1 | [25000.0, 45000.0) | 370.0 | 6.17 | 233.0 | 137.0 | 37.03 | 0.71 | 0.04 + bucket | label | Count | Count (%) | Non-event | Event | % Event | % Non-event | Event Rate | WoE | IV + -------|--------------|-------|-----------|-----------|-------|---------|-------------|------------|------|----- + 0 | (-inf, 25.0) | 61.0 | 1.36 | 57.0 | 4.0 | 0.41 | 1.62 | 0.066 |1.380 |0.017 + 1 | [25.0, 45.0) | 2024.0| 44.98 | 1536.0 | 488.0 | 49.64 |43.67 | 0.241 |-0.128|0.008 Args: column: The column we wish to analyse