Example: Deep learning¶

This example shows how to use ATOM to train and validate a Convolutional Neural Network implemented with Keras using scikeras.

Import the MNIST dataset from keras.datasets. This is a well known image dataset whose goal is to classify handwritten digits.

Load the data¶

In [1]:

Copied!





# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow import get_logger
get_logger().setLevel("ERROR")

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution

from scikeras.wrappers import KerasClassifier
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

from tensorflow import get_logger
get_logger().setLevel("ERROR")

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution

from scikeras.wrappers import KerasClassifier
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout

In [2]:

Copied!





# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)

data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)

data = (X_train, y_train), (X_test, y_test)

In [3]:

Copied!





# Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""

    def __repr__(self):
        return f"ConvCNN(epochs={self.epochs}, batch_size={self.batch_size})"
    
    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )

    @staticmethod
    def _keras_build_fn(**kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=8,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
        )

        return model
# Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""

    def __repr__(self):
        return f"ConvCNN(epochs={self.epochs}, batch_size={self.batch_size})"
    
    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )

    @staticmethod
    def _keras_build_fn(**kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=8,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
        )

        return model

In [4]:

Copied!





# Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(verbose=0),
    acronym="CNN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    validation="epochs",  # Applies in-training validation on parameter epochs
)
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(verbose=0),
    acronym="CNN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    validation="epochs",  # Applies in-training validation on parameter epochs
)

Run the pipeline¶

In [5]:

Copied!

atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)
atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)

<< ================== ATOM ================== >>

Configuration ==================== >>
Algorithm task: Multiclass classification.

Dataset stats ==================== >>
Shape: (7000, 785)
Train set size: 6000
Test set size: 1000
-------------------------------------
Memory: 5.54 MB
Scaled: False
Outlier values: 41839 (0.9%)

In [6]:

Copied!





# Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
    models=model,
    metric="f1_weighted",
    n_trials=12,
    ht_params={
        "distributions": {
            "epochs": IntDistribution(2, 10),
            "batch_size": CategoricalDistribution([128, 256, 512]),
        },
    },
    errors='raise'
)
# Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
    models=model,
    metric="f1_weighted",
    n_trials=12,
    ht_params={
        "distributions": {
            "epochs": IntDistribution(2, 10),
            "batch_size": CategoricalDistribution([128, 256, 512]),
        },
    },
    errors='raise'
)

Training ========================= >>
Models: CNN
Metric: f1_weighted


Running hyperparameter tuning for ConvNN...
| trial |  epochs | batch_size | f1_weighted | best_f1_weighted | time_trial | time_ht |    state |
| ----- | ------- | ---------- | ----------- | ---------------- | ---------- | ------- | -------- |
| 0     |       5 |        128 |      0.9256 |           0.9256 |     4.756s |  4.756s | COMPLETE |
| 1     |       3 |        512 |      0.8566 |           0.9256 |     3.864s |  8.620s | COMPLETE |
| 2     |       5 |        512 |      0.8968 |           0.9256 |     4.704s | 13.324s | COMPLETE |
| 3     |       3 |        128 |      0.9045 |           0.9256 |     4.009s | 17.333s | COMPLETE |
| 4     |       5 |        128 |      0.9256 |           0.9256 |     0.048s | 17.381s | COMPLETE |
| 5     |       9 |        128 |      0.9277 |           0.9277 |     6.059s | 23.440s | COMPLETE |
| 6     |       9 |        128 |      0.9277 |           0.9277 |     0.041s | 23.481s | COMPLETE |
| 7     |       3 |        128 |      0.9045 |           0.9277 |     0.046s | 23.527s | COMPLETE |
| 8     |      10 |        256 |      0.7922 |           0.9277 |     3.483s | 27.010s |   PRUNED |
| 9     |       8 |        128 |      0.8496 |           0.9277 |     3.190s | 30.200s |   PRUNED |
| 10    |       7 |        256 |      0.8165 |           0.9277 |     3.153s | 33.353s |   PRUNED |
| 11    |      10 |        128 |       0.821 |           0.9277 |     3.498s | 36.852s |   PRUNED |
Hyperparameter tuning ---------------------------
Best trial --> 5
Best parameters:
 --> epochs: 9
 --> batch_size: 128
Best evaluation --> f1_weighted: 0.9277
Time elapsed: 36.852s
Fit ---------------------------------------------
Train evaluation --> f1_weighted: 0.9795
Test evaluation --> f1_weighted: 0.9531
Time elapsed: 46.786s
-------------------------------------------------
Time: 01m:24s


Final results ==================== >>
Total time: 01m:31s
-------------------------------------
ConvNN --> f1_weighted: 0.9531

Analyze the results¶

In [7]:

Copied!

atom.cnn.trials
atom.cnn.trials

Out[7]:

	epochs	batch_size	estimator	f1_weighted	best_f1_weighted	time_trial	time_ht	state
trial
0	5	128	ConvCNN(epochs=5, batch_size=128)	0.925570	0.927664	4.756187	4.756187	COMPLETE
1	3	512	ConvCNN(epochs=3, batch_size=512)	0.856597	0.927664	3.863504	8.619691	COMPLETE
2	5	512	ConvCNN(epochs=5, batch_size=512)	0.896831	0.927664	4.704265	13.323956	COMPLETE
3	3	128	ConvCNN(epochs=3, batch_size=128)	0.904526	0.927664	4.008634	17.332590	COMPLETE
4	5	128	ConvCNN(epochs=5, batch_size=128)	0.925570	0.927664	0.048043	17.380633	COMPLETE
5	9	128	ConvCNN(epochs=9, batch_size=128)	0.927664	0.927664	6.059495	23.440128	COMPLETE
6	9	128	ConvCNN(epochs=9, batch_size=128)	0.927664	0.927664	0.041037	23.481165	COMPLETE
7	3	128	ConvCNN(epochs=3, batch_size=128)	0.904526	0.927664	0.046042	23.527207	COMPLETE
8	10	256	ConvCNN(epochs=10, batch_size=256)	0.792157	0.927664	3.483158	27.010365	PRUNED
9	8	128	ConvCNN(epochs=8, batch_size=128)	0.849556	0.927664	3.189892	30.200257	PRUNED
10	7	256	ConvCNN(epochs=7, batch_size=256)	0.816544	0.927664	3.153120	33.353377	PRUNED
11	10	128	ConvCNN(epochs=10, batch_size=128)	0.821044	0.927664	3.498174	36.851551	PRUNED

In [8]:

Copied!

atom.plot_evals(dataset="test+train")
atom.plot_evals(dataset="test+train")

In [9]:

Copied!

# Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)
# Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)

Out[9]:

	0	1	2	3	4	5	6	7	8	9
0	2.312403e-05	4.813091e-07	9.877838e-07	1.521687e-02	1.477957e-10	9.845214e-01	1.979216e-09	1.859409e-04	1.672720e-07	5.092833e-05
1	9.999985e-01	1.913526e-14	6.021650e-07	4.580108e-07	1.116983e-10	3.819218e-07	5.251873e-08	3.225117e-08	3.185537e-08	8.395847e-08
2	9.198901e-12	7.791282e-12	2.474698e-05	3.879381e-03	9.823568e-01	9.732957e-10	6.507817e-14	1.241411e-02	6.393744e-07	1.324400e-03
3	2.723569e-06	9.855152e-01	4.268080e-03	8.463531e-04	4.408682e-05	8.344805e-05	8.088706e-06	1.071986e-04	9.105954e-03	1.884496e-05
4	7.993587e-09	3.200859e-09	2.385552e-08	4.573155e-07	1.899138e-03	6.267613e-07	6.418151e-10	2.177684e-03	5.593851e-05	9.958662e-01
...	...	...	...	...	...	...	...	...	...	...
59995	7.141304e-07	8.722313e-07	3.200821e-06	5.318429e-04	1.037134e-07	1.061244e-04	1.843534e-09	1.376003e-08	9.993328e-01	2.418581e-05
59996	5.651224e-06	2.765106e-08	3.561635e-04	9.909723e-01	8.901452e-09	3.109492e-04	1.283445e-09	4.298236e-08	8.242389e-03	1.125573e-04
59997	1.372623e-07	3.574151e-07	1.871507e-10	4.340361e-05	4.912128e-07	9.997080e-01	2.914571e-08	1.305577e-08	9.207054e-05	1.555610e-04
59998	2.379551e-04	1.090190e-08	8.885978e-05	4.208754e-05	2.906187e-03	1.157583e-03	9.955516e-01	8.701521e-06	3.407994e-06	3.484051e-06
59999	3.986048e-03	1.135677e-06	2.533903e-05	1.527677e-05	2.258937e-06	4.415337e-02	1.476940e-06	5.445668e-07	9.514875e-01	3.271214e-04

60000 rows × 10 columns

In [10]:

Copied!

# Or make plots...
atom.cnn.plot_hyperparameters()
# Or make plots...
atom.cnn.plot_hyperparameters()

In [11]:

Copied!

atom.plot_parallel_coordinate()
atom.plot_parallel_coordinate()