Example: Multioutput regression¶

This example shows how to use ATOM to make preditions on a multioutput regression dataset. One of the models used is a MLP regressor implemented with Keras using scikeras.

The data used is a synthetic dataset created using sklearn's make_regression function.

Load the data¶

In [1]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow import get_logger
get_logger().setLevel('ERROR')

import numpy as np
from atom import ATOMRegressor, ATOMModel
from sklearn.datasets import make_regression

from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow import get_logger
get_logger().setLevel('ERROR')

import numpy as np
from atom import ATOMRegressor, ATOMModel
from sklearn.datasets import make_regression

from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense

In [2]:

                
                    Copied!
                    
# Create data
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)
# Create data
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)

In [3]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Create the neural network
class NeuralNetwork(KerasRegressor):
    """Multioutput multilayer perceptron."""

    @staticmethod
    def _keras_build_fn(n_inputs, n_outputs, **kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(Dense(20, input_dim=n_inputs, activation="relu"))
        model.add(Dense(20, activation="relu"))
        model.add(Dense(n_outputs))
        model.compile(loss="mse", optimizer="adam")
        return model
# Create the neural network
class NeuralNetwork(KerasRegressor):
    """Multioutput multilayer perceptron."""

    @staticmethod
    def _keras_build_fn(n_inputs, n_outputs, **kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(Dense(20, input_dim=n_inputs, activation="relu"))
        model.add(Dense(20, activation="relu"))
        model.add(Dense(n_outputs))
        model.compile(loss="mse", optimizer="adam")
        return model

In [4]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=NeuralNetwork(n_inputs=5, n_outputs=y.shape[1], epochs=100, verbose=0),
    name="NN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    native_multioutput=True,  # Do not use a multioutput meta-estimator wrapper
)
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=NeuralNetwork(n_inputs=5, n_outputs=y.shape[1], epochs=100, verbose=0),
    name="NN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    native_multioutput=True,  # Do not use a multioutput meta-estimator wrapper
)

Run the pipeline¶

In [5]:

                
                    Copied!
                    
atom = ATOMRegressor(X, y=y, verbose=2, random_state=1)
atom = ATOMRegressor(X, y=y, verbose=2, random_state=1)

<< ================== ATOM ================== >>
Algorithm task: multioutput regression.

Dataset stats ==================== >>
Shape: (1000, 13)
Train set size: 800
Test set size: 200
-------------------------------------
Memory: 104.13 kB
Scaled: True
Outlier values: 36 (0.3%)

In [6]:

                
                    Copied!
                    
# Show the models that natively support multioutput tasks
atom.available_models()[["acronym", "model", "native_multioutput"]]
# Show the models that natively support multioutput tasks
atom.available_models()[["acronym", "model", "native_multioutput"]]

Out[6]:

	acronym	model	native_multioutput
0	AdaB	AdaBoost	False
1	ARD	AutomaticRelevanceDetermination	False
2	Bag	Bagging	False
3	BR	BayesianRidge	False
4	CatB	CatBoost	False
5	Tree	DecisionTree	True
6	Dummy	Dummy	False
7	EN	ElasticNet	False
8	ETree	ExtraTree	True
9	ET	ExtraTrees	True
10	GP	GaussianProcess	False
11	GBM	GradientBoosting	False
12	Huber	HuberRegression	False
13	hGBM	HistGradientBoosting	False
14	KNN	KNearestNeighbors	True
15	Lasso	Lasso	False
16	Lars	LeastAngleRegression	False
17	LGB	LightGBM	False
18	lSVM	LinearSVM	False
19	MLP	MultiLayerPerceptron	False
20	OLS	OrdinaryLeastSquares	False
21	OMP	OrthogonalMatchingPursuit	False
22	PA	PassiveAggressive	False
23	RNN	RadiusNearestNeighbors	True
24	RF	RandomForest	True
25	Ridge	Ridge	False
26	SGD	StochasticGradientDescent	False
27	SVM	SupportVectorMachine	False
28	XGB	XGBoost	False

In [7]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Note we only added 5 informative features to the dataset, let's remove the rest
# If we use a model with no native support for multioutput as solver, specify the
# RFE's importance_getter parameter and return the mean of the coefficients over the
# target columns
atom.feature_selection(
    strategy="rfe",
    solver="OLS",  # This becomes MultiOutputRegressor(OLS)
    n_features=5,
    importance_getter=lambda x: np.mean([e.coef_ for e in x.estimators_], axis=0),
)
# Note we only added 5 informative features to the dataset, let's remove the rest
# If we use a model with no native support for multioutput as solver, specify the
# RFE's importance_getter parameter and return the mean of the coefficients over the
# target columns
atom.feature_selection(
    strategy="rfe",
    solver="OLS",  # This becomes MultiOutputRegressor(OLS)
    n_features=5,
    importance_getter=lambda x: np.mean([e.coef_ for e in x.estimators_], axis=0),
)

Fitting FeatureSelector...
Performing feature selection ...
 --> rfe selected 5 features from the dataset.
   --> Dropping feature x5 (rank 2).
   --> Dropping feature x6 (rank 3).
   --> Dropping feature x7 (rank 5).
   --> Dropping feature x8 (rank 4).
   --> Dropping feature x9 (rank 6).

In [8]:

                
                    Copied!
                    
# Let's train a native, non-native and our custom model
atom.run(models=["Lasso", "RF", model], metric="mse")
# Let's train a native, non-native and our custom model
atom.run(models=["Lasso", "RF", model], metric="mse")

Training ========================= >>
Models: Lasso, RF, NN
Metric: neg_mean_squared_error


Results for Lasso:
Fit ---------------------------------------------
Train evaluation --> neg_mean_squared_error: -4.4466
Test evaluation --> neg_mean_squared_error: -4.653
Time elapsed: 0.021s
-------------------------------------------------
Total time: 0.021s


Results for RandomForest:
Fit ---------------------------------------------
Train evaluation --> neg_mean_squared_error: -256.5602
Test evaluation --> neg_mean_squared_error: -1756.3085
Time elapsed: 0.429s
-------------------------------------------------
Total time: 0.429s


Results for NeuralNetwork:
Fit ---------------------------------------------
Train evaluation --> neg_mean_squared_error: -87.5797
Test evaluation --> neg_mean_squared_error: -89.1182
Time elapsed: 2.596s
-------------------------------------------------
Total time: 2.596s


Final results ==================== >>
Total time: 3.053s
-------------------------------------
Lasso         --> neg_mean_squared_error: -4.653 !
RandomForest  --> neg_mean_squared_error: -1756.3085 ~
NeuralNetwork --> neg_mean_squared_error: -89.1182

In [9]:

                
                    Copied!
                    
# And check which of the models used a meta-estimator wrapper
for m in atom.models:
    print(f"Estimator for {m} is: {atom[m].estimator}")
# And check which of the models used a meta-estimator wrapper
for m in atom.models:
    print(f"Estimator for {m} is: {atom[m].estimator}")

Estimator for Lasso is: MultiOutputRegressor(estimator=Lasso(random_state=1), n_jobs=1)
Estimator for RF is: RandomForestRegressor(n_jobs=1, random_state=1)
Estimator for NN is: NeuralNetwork(
	model=None
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=None
	validation_batch_size=None
	verbose=0
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=100
	n_inputs=5
	n_outputs=3
	name=NN
	needs_scaling=True
	native_multioutput=True
	has_validation=None
)

Analyze the results¶

In [10]:

                
                    Copied!
                    
# Use the target parameter in plots to specify which target column to use
atom.plot_residuals(target=2)
# Use the target parameter in plots to specify which target column to use
atom.plot_residuals(target=2)

In [11]:

                
                    Copied!
                    
with atom.canvas(3, 1, figsize=(900, 1300)):
    atom.plot_errors(target=0)
    atom.plot_errors(target=1)
    atom.plot_errors(target=2)
with atom.canvas(3, 1, figsize=(900, 1300)):
    atom.plot_errors(target=0)
    atom.plot_errors(target=1)
    atom.plot_errors(target=2)