Example: Multioutput regression¶
This example shows how to use ATOM to make preditions on a multioutput regression dataset. One of the models used is a MLP regressor implemented with Keras using scikeras.
The data used is a synthetic dataset created using sklearn's make_regression function.
Load the data¶
In [1]:
Copied!
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel('ERROR')
import numpy as np
from atom import ATOMRegressor, ATOMModel
from sklearn.datasets import make_regression
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel('ERROR')
import numpy as np
from atom import ATOMRegressor, ATOMModel
from sklearn.datasets import make_regression
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense
WARNING:tensorflow:From C:\Users\Mavs\Documents\Python\ATOM\venv311\Lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.
In [2]:
Copied!
# Create data
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)
# Create data
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)
In [3]:
Copied!
# Create the neural network
class NeuralNetwork(KerasRegressor):
"""Multioutput multilayer perceptron."""
def __repr__(self):
return "NeuralNetwork()"
@staticmethod
def _keras_build_fn(n_inputs, n_outputs, **kwargs):
"""Create the model's architecture."""
model = Sequential()
model.add(Dense(20, input_dim=n_inputs, activation="relu"))
model.add(Dense(20, activation="relu"))
model.add(Dense(n_outputs))
model.compile(loss="mse", optimizer="adam")
return model
# Create the neural network
class NeuralNetwork(KerasRegressor):
"""Multioutput multilayer perceptron."""
def __repr__(self):
return "NeuralNetwork()"
@staticmethod
def _keras_build_fn(n_inputs, n_outputs, **kwargs):
"""Create the model's architecture."""
model = Sequential()
model.add(Dense(20, input_dim=n_inputs, activation="relu"))
model.add(Dense(20, activation="relu"))
model.add(Dense(n_outputs))
model.compile(loss="mse", optimizer="adam")
return model
In [4]:
Copied!
# Convert the model to an ATOM model
model = ATOMModel(
estimator=NeuralNetwork(n_inputs=5, n_outputs=y.shape[1], epochs=100, verbose=0),
name="NN",
needs_scaling=True, # Applies automated feature scaling before fitting
native_multioutput=True, # Do not use a multioutput meta-estimator wrapper
)
# Convert the model to an ATOM model
model = ATOMModel(
estimator=NeuralNetwork(n_inputs=5, n_outputs=y.shape[1], epochs=100, verbose=0),
name="NN",
needs_scaling=True, # Applies automated feature scaling before fitting
native_multioutput=True, # Do not use a multioutput meta-estimator wrapper
)
Run the pipeline¶
In [5]:
Copied!
atom = ATOMRegressor(X, y=y, verbose=2, random_state=1)
atom = ATOMRegressor(X, y=y, verbose=2, random_state=1)
<< ================== ATOM ================== >> Configuration ==================== >> Algorithm task: Multioutput regression. Dataset stats ==================== >> Shape: (1000, 13) Train set size: 800 Test set size: 200 ------------------------------------- Memory: 104.13 kB Scaled: True Outlier values: 29 (0.3%)
In [6]:
Copied!
# Show the models that natively support multioutput tasks
atom.available_models(native_multioutput=True)
# Show the models that natively support multioutput tasks
atom.available_models(native_multioutput=True)
Out[6]:
acronym | fullname | estimator | module | handles_missing | needs_scaling | accepts_sparse | native_multilabel | native_multioutput | validation | supports_engines | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Tree | DecisionTree | DecisionTreeRegressor | sklearn.tree._classes | True | False | True | True | True | None | sklearn |
1 | ETree | ExtraTree | ExtraTreeRegressor | sklearn.tree._classes | False | False | True | True | True | None | sklearn |
2 | ET | ExtraTrees | ExtraTreesRegressor | sklearn.ensemble._forest | False | False | True | True | True | None | sklearn |
3 | KNN | KNearestNeighbors | KNeighborsRegressor | sklearn.neighbors._regression | False | True | True | True | True | None | sklearn, sklearnex, cuml |
4 | RNN | RadiusNearestNeighbors | RadiusNeighborsRegressor | sklearn.neighbors._regression | False | True | True | True | True | None | sklearn |
5 | RF | RandomForest | RandomForestRegressor | sklearn.ensemble._forest | False | False | True | True | True | None | sklearn, sklearnex, cuml |
In [7]:
Copied!
# Note we only added 5 informative features to the dataset, let's remove the rest
# If we use a model with no native support for multioutput as solver, specify the
# rfe's importance_getter parameter and return the mean of the coefficients over the
# target columns
atom.feature_selection(
strategy="rfe",
solver="ols", # This becomes MultiOutputRegressor(OLS)
n_features=5,
importance_getter=lambda x: np.mean([e.coef_ for e in x.estimators_], axis=0),
)
# Note we only added 5 informative features to the dataset, let's remove the rest
# If we use a model with no native support for multioutput as solver, specify the
# rfe's importance_getter parameter and return the mean of the coefficients over the
# target columns
atom.feature_selection(
strategy="rfe",
solver="ols", # This becomes MultiOutputRegressor(OLS)
n_features=5,
importance_getter=lambda x: np.mean([e.coef_ for e in x.estimators_], axis=0),
)
Fitting FeatureSelector... Performing feature selection ... --> rfe selected 5 features from the dataset. --> Dropping feature x0 (rank 5). --> Dropping feature x4 (rank 6). --> Dropping feature x5 (rank 2). --> Dropping feature x7 (rank 3). --> Dropping feature x9 (rank 4).
In [8]:
Copied!
# Let's train a native, non-native and our custom model
atom.run(models=["Lasso", "RF", model], metric="mse", errors="raise")
# Let's train a native, non-native and our custom model
atom.run(models=["Lasso", "RF", model], metric="mse", errors="raise")
Training ========================= >> Models: Lasso, RF, NN Metric: mse Results for Lasso: Fit --------------------------------------------- Train evaluation --> mse: -4.6709 Test evaluation --> mse: -4.4039 Time elapsed: 0.031s ------------------------------------------------- Time: 0.031s Results for RandomForest: Fit --------------------------------------------- Train evaluation --> mse: -197.8132 Test evaluation --> mse: -1436.9264 Time elapsed: 0.385s ------------------------------------------------- Time: 0.385s Results for NeuralNetwork: Fit --------------------------------------------- Train evaluation --> mse: -113.9745 Test evaluation --> mse: -131.0996 Time elapsed: 4.949s ------------------------------------------------- Time: 4.949s Final results ==================== >> Total time: 5.367s ------------------------------------- Lasso --> mse: -4.4039 ! RandomForest --> mse: -1436.9264 ~ NeuralNetwork --> mse: -131.0996
In [9]:
Copied!
# And check which of the models used a meta-estimator wrapper
for m in atom.models:
print(f"Estimator for {m} is: {atom[m].estimator}")
# And check which of the models used a meta-estimator wrapper
for m in atom.models:
print(f"Estimator for {m} is: {atom[m].estimator}")
Estimator for Lasso is: MultiOutputRegressor(estimator=Lasso(), n_jobs=1) Estimator for RF is: RandomForestRegressor(n_jobs=1, random_state=1) Estimator for NN is: NeuralNetwork()
Analyze the results¶
In [10]:
Copied!
# Use the target parameter in plots to specify which target column to use
atom.plot_residuals(target=2)
# Use the target parameter in plots to specify which target column to use
atom.plot_residuals(target=2)
In [11]:
Copied!
with atom.canvas(3, 1, figsize=(900, 1300)):
atom.plot_errors(target=0)
atom.plot_errors(target=1)
atom.plot_errors(target=2)
with atom.canvas(3, 1, figsize=(900, 1300)):
atom.plot_errors(target=0)
atom.plot_errors(target=1)
atom.plot_errors(target=2)