Example: Deep learning¶

This example shows how to use ATOM to train and validate a Convolutional Neural Network implemented with Keras using scikeras.

Import the MNIST dataset from keras.datasets. This is a well known image dataset whose goal is to classify handwritten digits.

Load the data¶

In [1]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow import get_logger
get_logger().setLevel('ERROR')

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution

from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow import get_logger
get_logger().setLevel('ERROR')

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution

from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout

In [2]:

                
                    Copied!
                    
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)

data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)

data = (X_train, y_train), (X_test, y_test)

In [3]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""

    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )

    @staticmethod
    def _keras_build_fn(**kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=8,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
        )

        return model
# Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""

    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )

    @staticmethod
    def _keras_build_fn(**kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=8,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
        )

        return model

In [4]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(verbose=0),
    acronym="CNN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    has_validation="epochs",  # Applies in-training validation on parameter epochs
)
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(verbose=0),
    acronym="CNN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    has_validation="epochs",  # Applies in-training validation on parameter epochs
)

Run the pipeline¶

In [5]:

                
                    Copied!
                    
atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)
atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)

<< ================== ATOM ================== >>
Algorithm task: multiclass classification.

Dataset stats ==================== >>
Shape: (7000, 785)
Train set size: 6000
Test set size: 1000
-------------------------------------
Memory: 5.54 MB
Scaled: False
Outlier values: 41839 (0.9%)

In [6]:

                
                    Copied!
                    
                        
                        
                    
                    

            
# Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
    models=model,
    metric="f1_weighted",
    n_trials=12,
    ht_params={
        "distributions": {
            "epochs": IntDistribution(2, 10),
            "batch_size": CategoricalDistribution([128, 256, 512]),
        },
    }
)
# Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
    models=model,
    metric="f1_weighted",
    n_trials=12,
    ht_params={
        "distributions": {
            "epochs": IntDistribution(2, 10),
            "batch_size": CategoricalDistribution([128, 256, 512]),
        },
    }
)

Training ========================= >>
Models: CNN
Metric: f1_weighted


Running hyperparameter tuning for ConvNN...
| trial |  epochs | batch_size | f1_weighted | best_f1_weighted | time_trial | time_ht |    state |
| ----- | ------- | ---------- | ----------- | ---------------- | ---------- | ------- | -------- |
| 0     |       5 |        128 |      0.9112 |           0.9112 |     8.853s |  8.853s | COMPLETE |
| 1     |       3 |        512 |      0.8685 |           0.9112 |     5.241s | 14.094s | COMPLETE |
| 2     |       5 |        512 |      0.9109 |           0.9112 |     7.273s | 21.367s | COMPLETE |
| 3     |       3 |        128 |      0.9021 |           0.9112 |     6.097s | 27.465s | COMPLETE |
| 4     |       5 |        128 |      0.9112 |           0.9112 |     1.242s | 28.707s | COMPLETE |
| 5     |       9 |        128 |      0.9302 |           0.9302 |    13.197s | 41.904s | COMPLETE |
| 6     |       9 |        128 |      0.9302 |           0.9302 |     1.465s | 43.369s | COMPLETE |
| 7     |       3 |        128 |      0.9021 |           0.9302 |     1.198s | 44.566s | COMPLETE |
| 8     |    0/10 |        256 |       0.813 |           0.9302 |     2.820s | 47.386s |   PRUNED |
| 9     |       8 |        128 |      0.9382 |           0.9382 |    12.674s | 01m:00s | COMPLETE |
| 10    |     0/7 |        256 |      0.7842 |           0.9382 |     2.854s | 01m:03s |   PRUNED |
| 11    |       8 |        128 |      0.9382 |           0.9382 |     1.199s | 01m:04s | COMPLETE |
Hyperparameter tuning ---------------------------
Best trial --> 9
Best parameters:
 --> epochs: 8
 --> batch_size: 128
Best evaluation --> f1_weighted: 0.9382
Time elapsed: 01m:04s
Fit ---------------------------------------------
Train evaluation --> f1_weighted: 0.9922
Test evaluation --> f1_weighted: 0.95
Time elapsed: 16.514s
-------------------------------------------------
Total time: 01m:21s


Final results ==================== >>
Total time: 01m:28s
-------------------------------------
ConvNN --> f1_weighted: 0.95

Analyze the results¶

In [7]:

                
                    Copied!
                    
atom.cnn.trials
atom.cnn.trials

Out[7]:

	params	estimator	score	time_trial	time_ht	state
trial
0	{'epochs': 5, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.911245	8.853131	8.853131	COMPLETE
1	{'epochs': 3, 'batch_size': 512}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.868467	5.240928	14.094059	COMPLETE
2	{'epochs': 5, 'batch_size': 512}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.910901	7.273163	21.367222	COMPLETE
3	{'epochs': 3, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.902138	6.097407	27.464629	COMPLETE
4	{'epochs': 5, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.911245	1.241957	28.706586	COMPLETE
5	{'epochs': 9, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.930152	13.196992	41.903578	COMPLETE
6	{'epochs': 9, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.930152	1.464945	43.368523	COMPLETE
7	{'epochs': 3, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.902138	1.197684	44.566207	COMPLETE
8	{'epochs': '0/10', 'batch_size': 256}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.812996	2.819621	47.385828	PRUNED
9	{'epochs': 8, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.938189	12.674373	60.060201	COMPLETE
10	{'epochs': '0/7', 'batch_size': 256}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.784233	2.854331	62.914532	PRUNED
11	{'epochs': 8, 'batch_size': 128}	ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm...	0.938189	1.198821	64.113353	COMPLETE

In [8]:

                
                    Copied!
                    
atom.plot_evals(dataset="test+train")
atom.plot_evals(dataset="test+train")

In [9]:

                
                    Copied!
                    
# Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)
# Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)

Out[9]:

	0	1	2	3	4	5	6	7	8	9
0	8.299488e-10	2.628172e-12	1.256850e-08	8.065028e-02	4.577434e-15	9.193031e-01	6.504651e-16	8.309912e-06	1.605456e-10	3.829821e-05
1	9.999994e-01	2.084938e-14	1.802991e-07	9.142812e-08	2.974247e-13	1.974890e-08	1.239963e-07	6.981888e-10	7.344214e-08	4.191539e-09
2	1.046616e-13	2.978779e-14	1.024991e-06	9.082198e-06	9.996915e-01	7.763704e-12	3.392148e-10	3.846663e-05	1.698541e-07	2.597982e-04
3	2.370126e-06	9.976953e-01	1.409927e-03	1.626272e-04	2.117811e-04	3.694122e-06	1.530232e-07	2.715720e-04	2.421655e-04	3.429227e-07
4	1.274777e-09	7.030934e-09	3.012307e-11	2.483380e-07	1.353990e-03	1.034125e-06	1.383914e-12	4.425891e-04	1.408601e-04	9.980612e-01
...	...	...	...	...	...	...	...	...	...	...
59995	3.820039e-09	1.538996e-09	8.258637e-07	2.749041e-04	6.960596e-12	5.976085e-05	3.534885e-13	1.802701e-09	9.996623e-01	2.221414e-06
59996	1.172233e-08	8.490205e-13	6.097040e-05	9.994088e-01	5.236348e-15	2.233540e-04	7.571549e-17	1.496442e-10	3.029155e-04	3.923683e-06
59997	1.421463e-12	1.254891e-12	6.674051e-17	3.223426e-08	7.350002e-10	9.999967e-01	1.345264e-13	3.264989e-12	3.094598e-08	3.277646e-06
59998	8.901869e-05	5.740756e-11	1.169889e-06	6.678182e-07	9.668894e-06	2.655783e-04	9.996337e-01	2.608537e-08	2.759978e-07	1.828723e-08
59999	2.615832e-05	5.634042e-08	1.794981e-05	4.938689e-06	8.963324e-07	1.321769e-01	4.557107e-08	2.386909e-07	8.638437e-01	3.929073e-03

60000 rows × 10 columns

In [10]:

                
                    Copied!
                    
# Or make plots...
atom.cnn.plot_hyperparameters()
# Or make plots...
atom.cnn.plot_hyperparameters()

In [11]:

                
                    Copied!
                    
atom.plot_parallel_coordinate()
atom.plot_parallel_coordinate()