Skip to content

Commit

Permalink
use unit test lib
Browse files Browse the repository at this point in the history
  • Loading branch information
mn-mikke committed Jan 22, 2024
1 parent afcb251 commit 4a791f3
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 94 deletions.
65 changes: 36 additions & 29 deletions h2o-py/tests/testdir_algos/gbm/pyunit_gbm_pojo_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,44 @@

sys.path.insert(1, "../../../")
import h2o
import unittest
from tests import pyunit_utils
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from pandas.util.testing import assert_frame_equal


def prostate_pojo_import():
prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
prostate = prostate.drop("ID")
prostate['CAPSULE'] = prostate['CAPSULE'].asfactor()

model = H2OGradientBoostingEstimator()
model.train(
y="CAPSULE",
training_frame=prostate
)

sandbox_dir = pyunit_utils.locate("results")
pojo_path = h2o.download_pojo(model, path=sandbox_dir)

model_imported = h2o.import_mojo(pojo_path)
print(model_imported)

# 1. check scoring
preds_original = model.predict(prostate)
preds_imported = model_imported.predict(prostate)
assert_frame_equal(preds_original.as_data_frame(), preds_imported.as_data_frame())

# 2. check we can get PDPs
pdp_original = model.partial_plot(frame=prostate, cols=['AGE'], server=True, plot=False)
pdp_imported = model_imported.partial_plot(frame=prostate, cols=['AGE'], server=True, plot=False)
assert_frame_equal(pdp_original[0].as_data_frame(), pdp_imported[0].as_data_frame())


pyunit_utils.standalone_test(prostate_pojo_import, {"jvm_custom_args": ["-Dsys.ai.h2o.pojo.import.enabled=true", ]})
class TestGBMPojoImport(unittest.TestCase):
def test(self):
try:
h2o.init(strict_version_check=False, jvm_custom_args=["-Dsys.ai.h2o.pojo.import.enabled=true", ])
prostate = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
prostate = prostate.drop("ID")
prostate['CAPSULE'] = prostate['CAPSULE'].asfactor()

model = H2OGradientBoostingEstimator()
model.train(
y="CAPSULE",
training_frame=prostate
)

sandbox_dir = pyunit_utils.locate("results")
pojo_path = h2o.download_pojo(model, path=sandbox_dir)

model_imported = h2o.import_mojo(pojo_path)
print(model_imported)

# 1. check scoring
preds_original = model.predict(prostate)
preds_imported = model_imported.predict(prostate)
assert_frame_equal(preds_original.as_data_frame(), preds_imported.as_data_frame())

# 2. check we can get PDPs
pdp_original = model.partial_plot(frame=prostate, cols=['AGE'], server=True, plot=False)
pdp_imported = model_imported.partial_plot(frame=prostate, cols=['AGE'], server=True, plot=False)
assert_frame_equal(pdp_original[0].as_data_frame(), pdp_imported[0].as_data_frame())
finally:
h2o.cluster().shutdown()


suite = unittest.TestLoader().loadTestsFromTestCase(TestGBMPojoImport)
unittest.TextTestRunner().run(suite)
128 changes: 63 additions & 65 deletions h2o-py/tests/testdir_generic_model/pyunit_combined_pojo_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from h2o.estimators import H2OGradientBoostingEstimator, H2OGeneralizedLinearEstimator
from tests import pyunit_utils
import os
import sys
import unittest
from pandas.testing import assert_frame_equal


Expand Down Expand Up @@ -282,67 +282,65 @@ def make_pojo_embeddable(pojo_path):
return "".join(pojo_lines)


def generate_and_import_combined_pojo():
if sys.version_info[0] < 3: # Python 2
print("This example needs Python 3.x+")
return

weather_orig = h2o.import_file(path=pyunit_utils.locate("smalldata/junit/weather.csv"))
weather = weather_orig # working copy

features = list(set(weather.names) - {"Date", "RainTomorrow", "Sunshine"})
features.sort()
response = "RISK_MM"

glm_model = H2OGeneralizedLinearEstimator()
glm_model.train(x=features, y=response, training_frame=weather)
glm_preds = glm_model.predict(weather)

gbm_model = H2OGradientBoostingEstimator(ntrees=5)
gbm_model.train(x=features, y=response, training_frame=weather)
gbm_preds = gbm_model.predict(weather)

# Drop columns that we will calculate in POJO manually (we will recreate them in POJO to be the exact same)
weather = weather.drop("ChangeTemp")
weather = weather.drop("ChangeTempDir")

(combined_pojo_name, combined_pojo_path) = generate_combined_pojo(glm_model, gbm_model)
print("Combined POJO was stored in: " + combined_pojo_path)

# Note: when using upload_mojo - always specify model_id=<POJO class name>
pojo_model = h2o.upload_mojo(combined_pojo_path, model_id=combined_pojo_name)

# Testing begins

# Sanity test - test parameterization that delegates to GLM
weather["Bias"] = 1 # behave like GLM
pojo_glm_preds = pojo_model.predict(weather)
assert_frame_equal(pojo_glm_preds.as_data_frame(), glm_preds.as_data_frame())

# Sanity test - test parameterization that delegates to GBM
weather["Bias"] = 0 # behave like GBM
pojo_gbm_preds = pojo_model.predict(weather)
assert_frame_equal(pojo_gbm_preds.as_data_frame(), gbm_preds.as_data_frame())

# Test per-segment specific behavior, segments are defined by ChangeWindDirect
weather["Bias"] = float("NaN")
for change_wind_dir in weather["ChangeWindDirect"].levels()[0]:
weather_cwd = weather[weather["ChangeWindDirect"] == change_wind_dir]
weather_orig_cwd = weather_orig[weather_orig["ChangeWindDirect"] == change_wind_dir]
pojo_weather_cwd_preds = pojo_model.predict(weather_cwd)
if change_wind_dir == "c" or change_wind_dir == "l":
expected = glm_model.predict(weather_orig_cwd) * 2
assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(), expected.as_data_frame())
elif change_wind_dir == "n":
expected = (glm_model.predict(weather_orig_cwd) + gbm_model.predict(weather_orig_cwd)) / 2
assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(), expected.as_data_frame())
elif change_wind_dir == "s":
expected = gbm_model.predict(weather_orig_cwd)
assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(), expected.as_data_frame())


pyunit_utils.standalone_test(
generate_and_import_combined_pojo,
{"jvm_custom_args": ["-Dsys.ai.h2o.pojo.import.enabled=true", ]}
)

class TestCombinedPojoImport(unittest.TestCase):
def test(self):
try:
h2o.init(strict_version_check=False, jvm_custom_args=["-Dsys.ai.h2o.pojo.import.enabled=true", ])
weather_orig = h2o.import_file(path=pyunit_utils.locate("smalldata/junit/weather.csv"))
weather = weather_orig # working copy

features = list(set(weather.names) - {"Date", "RainTomorrow", "Sunshine"})
features.sort()
response = "RISK_MM"

glm_model = H2OGeneralizedLinearEstimator()
glm_model.train(x=features, y=response, training_frame=weather)
glm_preds = glm_model.predict(weather)

gbm_model = H2OGradientBoostingEstimator(ntrees=5)
gbm_model.train(x=features, y=response, training_frame=weather)
gbm_preds = gbm_model.predict(weather)

# Drop columns that we will calculate in POJO manually (we will recreate them in POJO to be the exact same)
weather = weather.drop("ChangeTemp")
weather = weather.drop("ChangeTempDir")

(combined_pojo_name, combined_pojo_path) = generate_combined_pojo(glm_model, gbm_model)
print("Combined POJO was stored in: " + combined_pojo_path)

# Note: when using upload_mojo - always specify model_id=<POJO class name>
pojo_model = h2o.upload_mojo(combined_pojo_path, model_id=combined_pojo_name)

# Testing begins

# Sanity test - test parameterization that delegates to GLM
weather["Bias"] = 1 # behave like GLM
pojo_glm_preds = pojo_model.predict(weather)
assert_frame_equal(pojo_glm_preds.as_data_frame(), glm_preds.as_data_frame())

# Sanity test - test parameterization that delegates to GBM
weather["Bias"] = 0 # behave like GBM
pojo_gbm_preds = pojo_model.predict(weather)
assert_frame_equal(pojo_gbm_preds.as_data_frame(), gbm_preds.as_data_frame())

# Test per-segment specific behavior, segments are defined by ChangeWindDirect
weather["Bias"] = float("NaN")
for change_wind_dir in weather["ChangeWindDirect"].levels()[0]:
weather_cwd = weather[weather["ChangeWindDirect"] == change_wind_dir]
weather_orig_cwd = weather_orig[weather_orig["ChangeWindDirect"] == change_wind_dir]
pojo_weather_cwd_preds = pojo_model.predict(weather_cwd)
if change_wind_dir == "c" or change_wind_dir == "l":
expected = glm_model.predict(weather_orig_cwd) * 2
assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(), expected.as_data_frame())
elif change_wind_dir == "n":
expected = (glm_model.predict(weather_orig_cwd) + gbm_model.predict(weather_orig_cwd)) / 2
assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(), expected.as_data_frame())
elif change_wind_dir == "s":
expected = gbm_model.predict(weather_orig_cwd)
assert_frame_equal(pojo_weather_cwd_preds.as_data_frame(), expected.as_data_frame())
finally:
h2o.cluster().shutdown()


suite = unittest.TestLoader().loadTestsFromTestCase(TestCombinedPojoImport)
unittest.TextTestRunner().run(suite)

0 comments on commit 4a791f3

Please sign in to comment.