Skip to content

Commit 6e32db5

Browse files
authored
Merge pull request #178 from lvgig/feature/log_issue
Change log(x+1) to log1p(x) in LogTransformer
2 parents 8efb2af + 47e54eb commit 6e32db5

File tree

4 files changed

+40
-7
lines changed

4 files changed

+40
-7
lines changed

CHANGELOG.rst

+11
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,17 @@ Subsections for each version can be one of the following;
1616

1717
Each individual change should have a link to the pull request after the description of the change.
1818

19+
1.2.2 (2024-02-20)
20+
------------------
21+
Added
22+
^^^^^
23+
- Created unit test for checking if log1p is working and well conditioned for small x `#178 <https://github.com/lvgig/tubular/pull/178>`_
24+
25+
Changed
26+
^^^^^^^
27+
- Changed LogTransformer to use log1p(x) instead of log(x+1) `#178 <https://github.com/lvgig/tubular/pull/178>`_
28+
- Changed unit tests using log(x+1) to log1p(x) `#178 <https://github.com/lvgig/tubular/pull/178>`_
29+
1930
1.2.1 (2024-02-08)
2031
------------------
2132
Added

tests/numeric/test_LogTransformer.py

+26-4
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ def expected_df_2():
9696
"""Expected output of test_expected_output_2."""
9797
df = d.create_df_3()
9898

99-
df["a_new_col"] = np.log(df["a"] + 1)
100-
df["b_new_col"] = np.log(df["b"] + 1)
99+
df["a_new_col"] = np.log1p(df["a"])
100+
df["b_new_col"] = np.log1p(df["b"])
101101

102102
return df.drop(columns=["a", "b"])
103103

@@ -114,8 +114,8 @@ def expected_df_4():
114114
"""Expected output of test_expected_output_4."""
115115
df = d.create_df_3()
116116

117-
df["a_new_col"] = np.log(df["a"] + 1)
118-
df["b_new_col"] = np.log(df["b"] + 1)
117+
df["a_new_col"] = np.log1p(df["a"])
118+
df["b_new_col"] = np.log1p(df["b"])
119119

120120
return df
121121

@@ -135,6 +135,28 @@ def expected_df_6():
135135

136136
return df.drop("a", axis=1)
137137

138+
def test_log1p(self):
139+
"""Test that log1p is working as intended."""
140+
df = pd.DataFrame(
141+
{
142+
"a": [0.00001, 0.00002, 0.00003],
143+
"b": [0.00004, 0.00005, 0.00006],
144+
},
145+
)
146+
# Values created using np.log1p() of original df
147+
expected = pd.DataFrame(
148+
{
149+
"a_log": [9.999950e-06, 1.999980e-05, 2.999955e-05],
150+
"b_log": [3.99992000e-05, 4.99987500e-05, 5.99982001e-05],
151+
},
152+
)
153+
log_transformer = LogTransformer(
154+
columns=["a", "b"],
155+
add_1=True,
156+
)
157+
actual = log_transformer.transform(df)
158+
pd.testing.assert_frame_equal(actual, expected)
159+
138160
def test_super_transform_called(self, mocker):
139161
"""Test that BaseTransformer.transform called."""
140162
df = d.create_df_3()

tubular/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.2.1"
1+
__version__ = "1.2.2"

tubular/numeric.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,10 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
122122
raise ValueError(msg)
123123

124124
if self.base is None:
125-
X[new_column_names] = np.log(X[self.columns] + 1)
125+
X[new_column_names] = np.log1p(X[self.columns])
126126

127127
else:
128-
X[new_column_names] = np.log(X[self.columns] + 1) / np.log(self.base)
128+
X[new_column_names] = np.log1p(X[self.columns]) / np.log(self.base)
129129

130130
else:
131131
if (X[self.columns] <= 0).sum().sum() > 0:

0 commit comments

Comments
 (0)