Example: Deep learning¶
This example shows how to use ATOM to train and validate a Convolutional Neural Network implemented with Keras using scikeras.
Import the MNIST dataset from keras.datasets. This is a well known image dataset whose goal is to classify handwritten digits.
Load the data¶
In [1]:
Copied!
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel("ERROR")
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel("ERROR")
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout
In [2]:
Copied!
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)
data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)
data = (X_train, y_train), (X_test, y_test)
In [3]:
Copied!
# Create the convolutional neural network
class ConvNN(KerasClassifier):
"""Convolutional neural network model."""
def __repr__(self):
return f"ConvCNN(epochs={self.epochs}, batch_size={self.batch_size})"
@property
def feature_encoder(self):
"""Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
return FunctionTransformer(
func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
)
@staticmethod
def _keras_build_fn(**kwargs):
"""Create the model's architecture."""
model = Sequential()
model.add(
Conv2D(
filters=8,
kernel_size=3,
activation="relu",
input_shape=(28, 28, 1),
)
)
model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
model.add(Flatten())
model.add(Dense(units=10, activation="softmax"))
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
)
return model
# Create the convolutional neural network
class ConvNN(KerasClassifier):
"""Convolutional neural network model."""
def __repr__(self):
return f"ConvCNN(epochs={self.epochs}, batch_size={self.batch_size})"
@property
def feature_encoder(self):
"""Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
return FunctionTransformer(
func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
)
@staticmethod
def _keras_build_fn(**kwargs):
"""Create the model's architecture."""
model = Sequential()
model.add(
Conv2D(
filters=8,
kernel_size=3,
activation="relu",
input_shape=(28, 28, 1),
)
)
model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
model.add(Flatten())
model.add(Dense(units=10, activation="softmax"))
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
)
return model
In [4]:
Copied!
# Convert the model to an ATOM model
model = ATOMModel(
estimator=ConvNN(verbose=0),
acronym="CNN",
needs_scaling=True, # Applies automated feature scaling before fitting
validation="epochs", # Applies in-training validation on parameter epochs
)
# Convert the model to an ATOM model
model = ATOMModel(
estimator=ConvNN(verbose=0),
acronym="CNN",
needs_scaling=True, # Applies automated feature scaling before fitting
validation="epochs", # Applies in-training validation on parameter epochs
)
Run the pipeline¶
In [5]:
Copied!
atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)
atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)
<< ================== ATOM ================== >> Configuration ==================== >> Algorithm task: Multiclass classification. Dataset stats ==================== >> Shape: (7000, 785) Train set size: 6000 Test set size: 1000 ------------------------------------- Memory: 5.54 MB Scaled: False Outlier values: 41839 (0.9%)
In [6]:
Copied!
# Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
models=model,
metric="f1_weighted",
n_trials=12,
ht_params={
"distributions": {
"epochs": IntDistribution(2, 10),
"batch_size": CategoricalDistribution([128, 256, 512]),
},
},
errors='raise'
)
# Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
models=model,
metric="f1_weighted",
n_trials=12,
ht_params={
"distributions": {
"epochs": IntDistribution(2, 10),
"batch_size": CategoricalDistribution([128, 256, 512]),
},
},
errors='raise'
)
Training ========================= >> Models: CNN Metric: f1_weighted Running hyperparameter tuning for ConvNN... | trial | epochs | batch_size | f1_weighted | best_f1_weighted | time_trial | time_ht | state | | ----- | ------- | ---------- | ----------- | ---------------- | ---------- | ------- | -------- | | 0 | 5 | 128 | 0.9256 | 0.9256 | 4.756s | 4.756s | COMPLETE | | 1 | 3 | 512 | 0.8566 | 0.9256 | 3.864s | 8.620s | COMPLETE | | 2 | 5 | 512 | 0.8968 | 0.9256 | 4.704s | 13.324s | COMPLETE | | 3 | 3 | 128 | 0.9045 | 0.9256 | 4.009s | 17.333s | COMPLETE | | 4 | 5 | 128 | 0.9256 | 0.9256 | 0.048s | 17.381s | COMPLETE | | 5 | 9 | 128 | 0.9277 | 0.9277 | 6.059s | 23.440s | COMPLETE | | 6 | 9 | 128 | 0.9277 | 0.9277 | 0.041s | 23.481s | COMPLETE | | 7 | 3 | 128 | 0.9045 | 0.9277 | 0.046s | 23.527s | COMPLETE | | 8 | 10 | 256 | 0.7922 | 0.9277 | 3.483s | 27.010s | PRUNED | | 9 | 8 | 128 | 0.8496 | 0.9277 | 3.190s | 30.200s | PRUNED | | 10 | 7 | 256 | 0.8165 | 0.9277 | 3.153s | 33.353s | PRUNED | | 11 | 10 | 128 | 0.821 | 0.9277 | 3.498s | 36.852s | PRUNED | Hyperparameter tuning --------------------------- Best trial --> 5 Best parameters: --> epochs: 9 --> batch_size: 128 Best evaluation --> f1_weighted: 0.9277 Time elapsed: 36.852s Fit --------------------------------------------- Train evaluation --> f1_weighted: 0.9795 Test evaluation --> f1_weighted: 0.9531 Time elapsed: 46.786s ------------------------------------------------- Time: 01m:24s Final results ==================== >> Total time: 01m:31s ------------------------------------- ConvNN --> f1_weighted: 0.9531
Analyze the results¶
In [7]:
Copied!
atom.cnn.trials
atom.cnn.trials
Out[7]:
epochs | batch_size | estimator | f1_weighted | best_f1_weighted | time_trial | time_ht | state | |
---|---|---|---|---|---|---|---|---|
trial | ||||||||
0 | 5 | 128 | ConvCNN(epochs=5, batch_size=128) | 0.925570 | 0.927664 | 4.756187 | 4.756187 | COMPLETE |
1 | 3 | 512 | ConvCNN(epochs=3, batch_size=512) | 0.856597 | 0.927664 | 3.863504 | 8.619691 | COMPLETE |
2 | 5 | 512 | ConvCNN(epochs=5, batch_size=512) | 0.896831 | 0.927664 | 4.704265 | 13.323956 | COMPLETE |
3 | 3 | 128 | ConvCNN(epochs=3, batch_size=128) | 0.904526 | 0.927664 | 4.008634 | 17.332590 | COMPLETE |
4 | 5 | 128 | ConvCNN(epochs=5, batch_size=128) | 0.925570 | 0.927664 | 0.048043 | 17.380633 | COMPLETE |
5 | 9 | 128 | ConvCNN(epochs=9, batch_size=128) | 0.927664 | 0.927664 | 6.059495 | 23.440128 | COMPLETE |
6 | 9 | 128 | ConvCNN(epochs=9, batch_size=128) | 0.927664 | 0.927664 | 0.041037 | 23.481165 | COMPLETE |
7 | 3 | 128 | ConvCNN(epochs=3, batch_size=128) | 0.904526 | 0.927664 | 0.046042 | 23.527207 | COMPLETE |
8 | 10 | 256 | ConvCNN(epochs=10, batch_size=256) | 0.792157 | 0.927664 | 3.483158 | 27.010365 | PRUNED |
9 | 8 | 128 | ConvCNN(epochs=8, batch_size=128) | 0.849556 | 0.927664 | 3.189892 | 30.200257 | PRUNED |
10 | 7 | 256 | ConvCNN(epochs=7, batch_size=256) | 0.816544 | 0.927664 | 3.153120 | 33.353377 | PRUNED |
11 | 10 | 128 | ConvCNN(epochs=10, batch_size=128) | 0.821044 | 0.927664 | 3.498174 | 36.851551 | PRUNED |
In [8]:
Copied!
atom.plot_evals(dataset="test+train")
atom.plot_evals(dataset="test+train")
In [9]:
Copied!
# Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)
# Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)
Out[9]:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 2.312403e-05 | 4.813091e-07 | 9.877838e-07 | 1.521687e-02 | 1.477957e-10 | 9.845214e-01 | 1.979216e-09 | 1.859409e-04 | 1.672720e-07 | 5.092833e-05 |
1 | 9.999985e-01 | 1.913526e-14 | 6.021650e-07 | 4.580108e-07 | 1.116983e-10 | 3.819218e-07 | 5.251873e-08 | 3.225117e-08 | 3.185537e-08 | 8.395847e-08 |
2 | 9.198901e-12 | 7.791282e-12 | 2.474698e-05 | 3.879381e-03 | 9.823568e-01 | 9.732957e-10 | 6.507817e-14 | 1.241411e-02 | 6.393744e-07 | 1.324400e-03 |
3 | 2.723569e-06 | 9.855152e-01 | 4.268080e-03 | 8.463531e-04 | 4.408682e-05 | 8.344805e-05 | 8.088706e-06 | 1.071986e-04 | 9.105954e-03 | 1.884496e-05 |
4 | 7.993587e-09 | 3.200859e-09 | 2.385552e-08 | 4.573155e-07 | 1.899138e-03 | 6.267613e-07 | 6.418151e-10 | 2.177684e-03 | 5.593851e-05 | 9.958662e-01 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
59995 | 7.141304e-07 | 8.722313e-07 | 3.200821e-06 | 5.318429e-04 | 1.037134e-07 | 1.061244e-04 | 1.843534e-09 | 1.376003e-08 | 9.993328e-01 | 2.418581e-05 |
59996 | 5.651224e-06 | 2.765106e-08 | 3.561635e-04 | 9.909723e-01 | 8.901452e-09 | 3.109492e-04 | 1.283445e-09 | 4.298236e-08 | 8.242389e-03 | 1.125573e-04 |
59997 | 1.372623e-07 | 3.574151e-07 | 1.871507e-10 | 4.340361e-05 | 4.912128e-07 | 9.997080e-01 | 2.914571e-08 | 1.305577e-08 | 9.207054e-05 | 1.555610e-04 |
59998 | 2.379551e-04 | 1.090190e-08 | 8.885978e-05 | 4.208754e-05 | 2.906187e-03 | 1.157583e-03 | 9.955516e-01 | 8.701521e-06 | 3.407994e-06 | 3.484051e-06 |
59999 | 3.986048e-03 | 1.135677e-06 | 2.533903e-05 | 1.527677e-05 | 2.258937e-06 | 4.415337e-02 | 1.476940e-06 | 5.445668e-07 | 9.514875e-01 | 3.271214e-04 |
60000 rows × 10 columns
In [10]:
Copied!
# Or make plots...
atom.cnn.plot_hyperparameters()
# Or make plots...
atom.cnn.plot_hyperparameters()
In [11]:
Copied!
atom.plot_parallel_coordinate()
atom.plot_parallel_coordinate()