Deep learning¶
This example shows how to use ATOM to train and validate a Convolutional Neural Network implemented with Keras in two different ways: using scikeras (recommended) and using keras.wrappers.scikit_learn (old way, will be deprecated in a future release).
Import the MNIST dataset from keras.datasets. This is a well known image dataset whose goal is to classify handwritten digits.
Using scikeras¶
In [1]:
Copied!
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)
from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from skopt.space.space import Categorical
from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)
from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from skopt.space.space import Categorical
from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout
In [2]:
Copied!
# Create the convolutional neural network
class ConvNN(KerasClassifier):
"""Convolutional neural network model."""
@property
def feature_encoder(self):
"""Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
return FunctionTransformer(
func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
)
@staticmethod
def _keras_build_fn():
"""Create the model's architecture."""
model = Sequential()
model.add(
Conv2D(
filters=64,
kernel_size=3,
activation="relu",
input_shape=(28, 28, 1),
)
)
model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
model.add(Flatten())
model.add(Dense(units=10, activation="softmax"))
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
return model
# Create the convolutional neural network
class ConvNN(KerasClassifier):
"""Convolutional neural network model."""
@property
def feature_encoder(self):
"""Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
return FunctionTransformer(
func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
)
@staticmethod
def _keras_build_fn():
"""Create the model's architecture."""
model = Sequential()
model.add(
Conv2D(
filters=64,
kernel_size=3,
activation="relu",
input_shape=(28, 28, 1),
)
)
model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
model.add(Flatten())
model.add(Dense(units=10, activation="softmax"))
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
return model
In [3]:
Copied!
# Convert the model to an ATOM model
model = ATOMModel(
estimator=ConvNN(epochs=5, verbose=0),
acronym="NN",
fullname="Neural network",
)
# Convert the model to an ATOM model
model = ATOMModel(
estimator=ConvNN(epochs=5, verbose=0),
acronym="NN",
fullname="Neural network",
)
In [4]:
Copied!
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)
data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)
data = (X_train, y_train), (X_test, y_test)
In [5]:
Copied!
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)
<< ================== ATOM ================== >> Algorithm task: multiclass classification. Parallel processing with 6 cores. Dataset stats ==================== >> Shape: (7000, 785) Memory: 5.50 MB Scaled: False Outlier values: 41387 (0.9%) ------------------------------------- Train set size: 6000 Test set size: 1000 ------------------------------------- | | dataset | train | test | | - | ----------- | ----------- | ----------- | | 0 | 721 (1.1) | 614 (1.1) | 107 (1.2) | | 1 | 756 (1.2) | 637 (1.2) | 119 (1.4) | | 2 | 705 (1.1) | 603 (1.1) | 102 (1.2) | | 3 | 748 (1.1) | 650 (1.2) | 98 (1.1) | | 4 | 667 (1.0) | 575 (1.1) | 92 (1.1) | | 5 | 653 (1.0) | 558 (1.0) | 95 (1.1) | | 6 | 661 (1.0) | 574 (1.0) | 87 (1.0) | | 7 | 733 (1.1) | 635 (1.2) | 98 (1.1) | | 8 | 653 (1.0) | 547 (1.0) | 106 (1.2) | | 9 | 703 (1.1) | 607 (1.1) | 96 (1.1) |
In [6]:
Copied!
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
models=model,
metric="f1_weighted",
n_calls=5,
bo_params={
"dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
"max_time": 120,
}
)
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
models=model,
metric="f1_weighted",
n_calls=5,
bo_params={
"dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
"max_time": 120,
}
)
Training ========================= >> Models: NN Metric: f1_weighted Running BO for Neural network... | call | batch_size | f1_weighted | best_f1_weighted | time | total_time | | ---------------- | ---------- | ----------- | ---------------- | ------- | ---------- | | Initial point 1 | 32 | 0.9311 | 0.9311 | 28.916s | 28.916s | | Initial point 2 | 64 | 0.958 | 0.958 | 25.337s | 54.253s | | Initial point 3 | 128 | 0.9685 | 0.9685 | 22.706s | 1m:17s | | Initial point 4 | 32 | 0.9311 | 0.9685 | 0.000s | 1m:17s | | Initial point 5 | 256 | 0.9651 | 0.9685 | 21.374s | 1m:38s | Bayesian Optimization --------------------------- Best call --> Initial point 3 Best parameters --> {'batch_size': 128} Best evaluation --> f1_weighted: 0.9685 Time elapsed: 1m:38s Fit --------------------------------------------- Train evaluation --> f1_weighted: 0.9995 Test evaluation --> f1_weighted: 0.967 Time elapsed: 27.298s ------------------------------------------------- Total time: 2m:06s Final results ==================== >> Duration: 2m:06s ------------------------------------- Neural network --> f1_weighted: 0.967
Using keras.wrappers.scikit_learn¶
In [7]:
Copied!
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)
# Import standard packages
from atom import ATOMClassifier, ATOMModel
from skopt.space.space import Categorical
# Keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D
from keras.wrappers.scikit_learn import KerasClassifier
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)
# Import standard packages
from atom import ATOMClassifier, ATOMModel
from skopt.space.space import Categorical
# Keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D
from keras.wrappers.scikit_learn import KerasClassifier
In [8]:
Copied!
# Create the convolutional neural network
def neural_network():
"""Create the model's architecture."""
model = Sequential()
model.add(
Conv2D(
filters=64,
kernel_size=3,
activation="relu",
input_shape=(28, 28, 1),
)
)
model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
model.add(Flatten())
model.add(Dense(units=10, activation="softmax"))
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
return model
# Create the convolutional neural network
def neural_network():
"""Create the model's architecture."""
model = Sequential()
model.add(
Conv2D(
filters=64,
kernel_size=3,
activation="relu",
input_shape=(28, 28, 1),
)
)
model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
model.add(Flatten())
model.add(Dense(units=10, activation="softmax"))
model.compile(
optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=["accuracy"],
)
return model
In [9]:
Copied!
# Since ATOM uses sklearn's API, use Keras' wrapper
model = KerasClassifier(neural_network, epochs=5, verbose=0)
# Convert the model to an ATOM model
model = ATOMModel(model, acronym="NN", fullname="Neural network")
# Since ATOM uses sklearn's API, use Keras' wrapper
model = KerasClassifier(neural_network, epochs=5, verbose=0)
# Convert the model to an ATOM model
model = ATOMModel(model, acronym="NN", fullname="Neural network")
In [10]:
Copied!
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Reshape data to fit model
X_train = X_train.reshape(len(X_train), 28, 28, 1)
X_test = X_test.reshape(len(X_test), 28, 28, 1)
data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Reshape data to fit model
X_train = X_train.reshape(len(X_train), 28, 28, 1)
X_test = X_test.reshape(len(X_test), 28, 28, 1)
data = (X_train, y_train), (X_test, y_test)
In [11]:
Copied!
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)
<< ================== ATOM ================== >> Algorithm task: multiclass classification. Parallel processing with 6 cores. Dataset stats ==================== >> Shape: (7000, (28, 28, 1), 2) Memory: 1.07 MB ------------------------------------- Train set size: 6000 Test set size: 1000 ------------------------------------- | | dataset | train | test | | - | ----------- | ----------- | ----------- | | 0 | 716 (1.1) | 610 (1.1) | 106 (1.5) | | 1 | 780 (1.2) | 648 (1.2) | 132 (1.9) | | 2 | 667 (1.1) | 564 (1.0) | 103 (1.5) | | 3 | 716 (1.1) | 616 (1.1) | 100 (1.4) | | 4 | 655 (1.0) | 575 (1.0) | 80 (1.1) | | 5 | 634 (1.0) | 549 (1.0) | 85 (1.2) | | 6 | 723 (1.1) | 616 (1.1) | 107 (1.5) | | 7 | 736 (1.2) | 638 (1.2) | 98 (1.4) | | 8 | 646 (1.0) | 575 (1.0) | 71 (1.0) | | 9 | 727 (1.1) | 609 (1.1) | 118 (1.7) |
In [12]:
Copied!
# When the input data has more than 2 dimensions, ATOM creates a
# dataset with just one column of shape (n_samples, shape_sample)
atom.head()
# When the input data has more than 2 dimensions, ATOM creates a
# dataset with just one column of shape (n_samples, shape_sample)
atom.head()
Out[12]:
multidim feature | target | |
---|---|---|
0 | [[[0], [0], [0], [0], [0], [0], [0], [0], [0],... | 3 |
1 | [[[0], [0], [0], [0], [0], [0], [0], [0], [0],... | 6 |
2 | [[[0], [0], [0], [0], [0], [0], [0], [0], [0],... | 2 |
3 | [[[0], [0], [0], [0], [0], [0], [0], [0], [0],... | 0 |
4 | [[[0], [0], [0], [0], [0], [0], [0], [0], [0],... | 9 |
In [13]:
Copied!
# Every row in the column contains the data of one image
print(f"Shape of one image: {atom.iloc[0, 0].shape}")
print(f"atom's shape (n_rows, (shape_image), n_cols): {atom.shape}")
# Every row in the column contains the data of one image
print(f"Shape of one image: {atom.iloc[0, 0].shape}")
print(f"atom's shape (n_rows, (shape_image), n_cols): {atom.shape}")
Shape of one image: (28, 28, 1) atom's shape (n_rows, (shape_image), n_cols): (7000, (28, 28, 1), 2)
In [14]:
Copied!
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
models=model,
metric="f1_weighted",
n_calls=5,
bo_params={
"dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
"max_time": 120,
}
)
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
models=model,
metric="f1_weighted",
n_calls=5,
bo_params={
"dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
"max_time": 120,
}
)
Training ========================= >> Models: NN Metric: f1_weighted Running BO for Neural network... | call | batch_size | f1_weighted | best_f1_weighted | time | total_time | | ---------------- | ---------- | ----------- | ---------------- | ------- | ---------- | 27/27 [==============================] - 0s 8ms/step | Initial point 1 | 128 | 0.9686 | 0.9686 | 25.543s | 25.543s | 27/27 [==============================] - 0s 7ms/step | Initial point 2 | 64 | 0.9592 | 0.9686 | 25.226s | 50.769s | 27/27 [==============================] - 0s 7ms/step | Initial point 3 | 32 | 0.9486 | 0.9686 | 28.972s | 1m:20s | 27/27 [==============================] - 0s 7ms/step | Initial point 4 | 256 | 0.9578 | 0.9686 | 22.066s | 1m:42s | Bayesian Optimization --------------------------- Best call --> Initial point 1 Best parameters --> {'batch_size': 128} Best evaluation --> f1_weighted: 0.9686 Time elapsed: 1m:42s Fit --------------------------------------------- 188/188 [==============================] - 1s 7ms/step 32/32 [==============================] - 0s 7ms/step Train evaluation --> f1_weighted: 0.9997 Test evaluation --> f1_weighted: 0.965 Time elapsed: 28.000s ------------------------------------------------- Total time: 2m:10s Final results ==================== >> Duration: 2m:10s ------------------------------------- Neural network --> f1_weighted: 0.965
Analyze the results¶
In [15]:
Copied!
# Use the prediction methods like any other model
atom.nn.predict_proba(X_train)
# Use the prediction methods like any other model
atom.nn.predict_proba(X_train)
1875/1875 [==============================] - 11s 6ms/step
Out[15]:
array([[5.4039018e-09, 1.6364370e-14, 1.3189693e-09, ..., 1.3525283e-06, 1.1704481e-07, 5.0089360e-10], [9.9999988e-01, 7.7574962e-16, 2.4149596e-09, ..., 9.6689301e-10, 7.5766078e-09, 1.7431129e-13], [2.2981137e-15, 1.5083846e-12, 7.1241735e-11, ..., 2.3667360e-05, 1.2634129e-07, 1.3805793e-04], ..., [3.6765248e-14, 5.7221899e-20, 9.5585592e-18, ..., 3.6969964e-17, 2.0815643e-08, 7.3205825e-08], [2.2618442e-08, 1.0618017e-14, 6.3257083e-10, ..., 2.8880539e-11, 1.0065155e-11, 3.1087099e-16], [5.3646733e-07, 1.3232229e-11, 8.2509573e-08, ..., 1.0255779e-07, 9.9999702e-01, 5.6972289e-07]], dtype=float32)
In [16]:
Copied!
# Or make plots...
atom.nn.plot_confusion_matrix()
# Or make plots...
atom.nn.plot_confusion_matrix()