Deep learning¶

This example shows how to use ATOM to train and validate a Convolutional Neural Network implemented with Keras in two different ways: using scikeras (recommended) and using keras.wrappers.scikit_learn (old way, will be deprecated in a future release).

Import the MNIST dataset from keras.datasets. This is a well known image dataset whose goal is to classify handwritten digits.

Using scikeras¶

In [1]:

            
                Copied!
                
                    
                    
                
                

        
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)

from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from skopt.space.space import Categorical

from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)

from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from skopt.space.space import Categorical

from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout

In [2]:

            
                Copied!
                
                    
                    
                
                

        
# Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""

    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )

    @staticmethod
    def _keras_build_fn():
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=64,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"],
        )

        return model
# Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""

    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )

    @staticmethod
    def _keras_build_fn():
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=64,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"],
        )

        return model

In [3]:

            
                Copied!
                
                    
                    
                
                

        
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(epochs=5, verbose=0),
    acronym="NN",
    fullname="Neural network",
)
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(epochs=5, verbose=0),
    acronym="NN",
    fullname="Neural network",
)

In [4]:

            
                Copied!
                
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)

data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)

data = (X_train, y_train), (X_test, y_test)

In [5]:

            
                Copied!
                
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)

<< ================== ATOM ================== >>
Algorithm task: multiclass classification.
Parallel processing with 6 cores.

Dataset stats ==================== >>
Shape: (7000, 785)
Memory: 5.50 MB
Scaled: False
Outlier values: 41387 (0.9%)
-------------------------------------
Train set size: 6000
Test set size: 1000
-------------------------------------
|   |     dataset |       train |        test |
| - | ----------- | ----------- | ----------- |
| 0 |   721 (1.1) |   614 (1.1) |   107 (1.2) |
| 1 |   756 (1.2) |   637 (1.2) |   119 (1.4) |
| 2 |   705 (1.1) |   603 (1.1) |   102 (1.2) |
| 3 |   748 (1.1) |   650 (1.2) |    98 (1.1) |
| 4 |   667 (1.0) |   575 (1.1) |    92 (1.1) |
| 5 |   653 (1.0) |   558 (1.0) |    95 (1.1) |
| 6 |   661 (1.0) |   574 (1.0) |    87 (1.0) |
| 7 |   733 (1.1) |   635 (1.2) |    98 (1.1) |
| 8 |   653 (1.0) |   547 (1.0) |   106 (1.2) |
| 9 |   703 (1.1) |   607 (1.1) |    96 (1.1) |

In [6]:

            
                Copied!
                
                    
                    
                
                

        
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
    models=model,
    metric="f1_weighted",
    n_calls=5,
    bo_params={
        "dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
        "max_time": 120,
    }
)
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
    models=model,
    metric="f1_weighted",
    n_calls=5,
    bo_params={
        "dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
        "max_time": 120,
    }
)

Training ========================= >>
Models: NN
Metric: f1_weighted


Running BO for Neural network...
| call             | batch_size | f1_weighted | best_f1_weighted |    time | total_time |
| ---------------- | ---------- | ----------- | ---------------- | ------- | ---------- |
| Initial point 1  |         32 |      0.9311 |           0.9311 | 28.916s |    28.916s |
| Initial point 2  |         64 |       0.958 |            0.958 | 25.337s |    54.253s |
| Initial point 3  |        128 |      0.9685 |           0.9685 | 22.706s |     1m:17s |
| Initial point 4  |         32 |      0.9311 |           0.9685 |  0.000s |     1m:17s |
| Initial point 5  |        256 |      0.9651 |           0.9685 | 21.374s |     1m:38s |
Bayesian Optimization ---------------------------
Best call --> Initial point 3
Best parameters --> {'batch_size': 128}
Best evaluation --> f1_weighted: 0.9685
Time elapsed: 1m:38s
Fit ---------------------------------------------
Train evaluation --> f1_weighted: 0.9995
Test evaluation --> f1_weighted: 0.967
Time elapsed: 27.298s
-------------------------------------------------
Total time: 2m:06s


Final results ==================== >>
Duration: 2m:06s
-------------------------------------
Neural network --> f1_weighted: 0.967

Using keras.wrappers.scikit_learn¶

In [7]:

            
                Copied!
                
                    
                    
                
                

        
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)

# Import standard packages
from atom import ATOMClassifier, ATOMModel
from skopt.space.space import Categorical

# Keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D
from keras.wrappers.scikit_learn import KerasClassifier
# Disable annoying tf warnings
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)

# Import standard packages
from atom import ATOMClassifier, ATOMModel
from skopt.space.space import Categorical

# Keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D
from keras.wrappers.scikit_learn import KerasClassifier

In [8]:

            
                Copied!
                
                    
                    
                
                

        
# Create the convolutional neural network
def neural_network():
    """Create the model's architecture."""
    model = Sequential()
    model.add(
        Conv2D(
            filters=64,
            kernel_size=3,
            activation="relu",
            input_shape=(28, 28, 1),
        )
    )
    model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
    model.add(Flatten())
    model.add(Dense(units=10, activation="softmax"))
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )

    return model
# Create the convolutional neural network
def neural_network():
    """Create the model's architecture."""
    model = Sequential()
    model.add(
        Conv2D(
            filters=64,
            kernel_size=3,
            activation="relu",
            input_shape=(28, 28, 1),
        )
    )
    model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
    model.add(Flatten())
    model.add(Dense(units=10, activation="softmax"))
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"],
    )

    return model

In [9]:

            
                Copied!
                
# Since ATOM uses sklearn's API, use Keras' wrapper
model = KerasClassifier(neural_network, epochs=5, verbose=0)

# Convert the model to an ATOM model
model = ATOMModel(model, acronym="NN", fullname="Neural network")
# Since ATOM uses sklearn's API, use Keras' wrapper
model = KerasClassifier(neural_network, epochs=5, verbose=0)

# Convert the model to an ATOM model
model = ATOMModel(model, acronym="NN", fullname="Neural network")

In [10]:

            
                Copied!
                
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape data to fit model
X_train = X_train.reshape(len(X_train), 28, 28, 1)
X_test = X_test.reshape(len(X_test), 28, 28, 1)

data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape data to fit model
X_train = X_train.reshape(len(X_train), 28, 28, 1)
X_test = X_test.reshape(len(X_test), 28, 28, 1)

data = (X_train, y_train), (X_test, y_test)

In [11]:

            
                Copied!
                
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)
atom = ATOMClassifier(*data, n_rows=0.1, n_jobs=6, verbose=2)

<< ================== ATOM ================== >>
Algorithm task: multiclass classification.
Parallel processing with 6 cores.

Dataset stats ==================== >>
Shape: (7000, (28, 28, 1), 2)
Memory: 1.07 MB
-------------------------------------
Train set size: 6000
Test set size: 1000
-------------------------------------
|   |     dataset |       train |        test |
| - | ----------- | ----------- | ----------- |
| 0 |   716 (1.1) |   610 (1.1) |   106 (1.5) |
| 1 |   780 (1.2) |   648 (1.2) |   132 (1.9) |
| 2 |   667 (1.1) |   564 (1.0) |   103 (1.5) |
| 3 |   716 (1.1) |   616 (1.1) |   100 (1.4) |
| 4 |   655 (1.0) |   575 (1.0) |    80 (1.1) |
| 5 |   634 (1.0) |   549 (1.0) |    85 (1.2) |
| 6 |   723 (1.1) |   616 (1.1) |   107 (1.5) |
| 7 |   736 (1.2) |   638 (1.2) |    98 (1.4) |
| 8 |   646 (1.0) |   575 (1.0) |    71 (1.0) |
| 9 |   727 (1.1) |   609 (1.1) |   118 (1.7) |

In [12]:

            
                Copied!
                
# When the input data has more than 2 dimensions, ATOM creates a 
# dataset with just one column of shape (n_samples, shape_sample)
atom.head()
# When the input data has more than 2 dimensions, ATOM creates a 
# dataset with just one column of shape (n_samples, shape_sample)
atom.head()

Out[12]:

	multidim feature	target
0	[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...	3
1	[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...	6
2	[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...	2
3	[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...	0
4	[[[0], [0], [0], [0], [0], [0], [0], [0], [0],...	9

In [13]:

            
                Copied!
                
# Every row in the column contains the data of one image
print(f"Shape of one image: {atom.iloc[0, 0].shape}")
print(f"atom's shape (n_rows, (shape_image), n_cols): {atom.shape}")
# Every row in the column contains the data of one image
print(f"Shape of one image: {atom.iloc[0, 0].shape}")
print(f"atom's shape (n_rows, (shape_image), n_cols): {atom.shape}")

Shape of one image: (28, 28, 1)
atom's shape (n_rows, (shape_image), n_cols): (7000, (28, 28, 1), 2)

In [14]:

            
                Copied!
                
                    
                    
                
                

        
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
    models=model,
    metric="f1_weighted",
    n_calls=5,
    bo_params={
        "dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
        "max_time": 120,
    }
)
# Like any other model, we can define custom dimensions for the bayesian optimization
atom.run(
    models=model,
    metric="f1_weighted",
    n_calls=5,
    bo_params={
        "dimensions": [Categorical([32, 64, 128, 256], name="batch_size")],
        "max_time": 120,
    }
)

Training ========================= >>
Models: NN
Metric: f1_weighted


Running BO for Neural network...
| call             | batch_size | f1_weighted | best_f1_weighted |    time | total_time |
| ---------------- | ---------- | ----------- | ---------------- | ------- | ---------- |
27/27 [==============================] - 0s 8ms/step
| Initial point 1  |        128 |      0.9686 |           0.9686 | 25.543s |    25.543s |
27/27 [==============================] - 0s 7ms/step
| Initial point 2  |         64 |      0.9592 |           0.9686 | 25.226s |    50.769s |
27/27 [==============================] - 0s 7ms/step
| Initial point 3  |         32 |      0.9486 |           0.9686 | 28.972s |     1m:20s |
27/27 [==============================] - 0s 7ms/step
| Initial point 4  |        256 |      0.9578 |           0.9686 | 22.066s |     1m:42s |
Bayesian Optimization ---------------------------
Best call --> Initial point 1
Best parameters --> {'batch_size': 128}
Best evaluation --> f1_weighted: 0.9686
Time elapsed: 1m:42s
Fit ---------------------------------------------
188/188 [==============================] - 1s 7ms/step
32/32 [==============================] - 0s 7ms/step
Train evaluation --> f1_weighted: 0.9997
Test evaluation --> f1_weighted: 0.965
Time elapsed: 28.000s
-------------------------------------------------
Total time: 2m:10s


Final results ==================== >>
Duration: 2m:10s
-------------------------------------
Neural network --> f1_weighted: 0.965

Analyze the results¶

In [15]:

            
                Copied!
                
# Use the prediction methods like any other model
atom.nn.predict_proba(X_train)
# Use the prediction methods like any other model
atom.nn.predict_proba(X_train)

1875/1875 [==============================] - 11s 6ms/step

Out[15]:

array([[5.4039018e-09, 1.6364370e-14, 1.3189693e-09, ..., 1.3525283e-06,
        1.1704481e-07, 5.0089360e-10],
       [9.9999988e-01, 7.7574962e-16, 2.4149596e-09, ..., 9.6689301e-10,
        7.5766078e-09, 1.7431129e-13],
       [2.2981137e-15, 1.5083846e-12, 7.1241735e-11, ..., 2.3667360e-05,
        1.2634129e-07, 1.3805793e-04],
       ...,
       [3.6765248e-14, 5.7221899e-20, 9.5585592e-18, ..., 3.6969964e-17,
        2.0815643e-08, 7.3205825e-08],
       [2.2618442e-08, 1.0618017e-14, 6.3257083e-10, ..., 2.8880539e-11,
        1.0065155e-11, 3.1087099e-16],
       [5.3646733e-07, 1.3232229e-11, 8.2509573e-08, ..., 1.0255779e-07,
        9.9999702e-01, 5.6972289e-07]], dtype=float32)

In [16]:

            
                Copied!
                
# Or make plots...
atom.nn.plot_confusion_matrix()
# Or make plots...
atom.nn.plot_confusion_matrix()