Example: Deep learning¶
This example shows how to use ATOM to train and validate a Convolutional Neural Network implemented with Keras using scikeras.
Import the MNIST dataset from keras.datasets. This is a well known image dataset whose goal is to classify handwritten digits.
Load the data¶
In [1]:
                    Copied!
                    
                    
                # Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel('ERROR')
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution
from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout
# Disable annoying tf warnings
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
from tensorflow import get_logger
get_logger().setLevel('ERROR')
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
from atom import ATOMClassifier, ATOMModel
from sklearn.preprocessing import FunctionTransformer
from optuna.pruners import PatientPruner
from optuna.distributions import CategoricalDistribution, IntDistribution
from scikeras.wrappers import KerasClassifier
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout
        
        In [2]:
                    Copied!
                    
                    
                # Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)
data = (X_train, y_train), (X_test, y_test)
# Download the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Flatten data to follow sklearn's API (2d input)
X_train = X_train.reshape(len(X_train), -1)
X_test = X_test.reshape(len(X_test), -1)
data = (X_train, y_train), (X_test, y_test)
        
        In [3]:
                    Copied!
                    
                    
                # Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""
    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )
    @staticmethod
    def _keras_build_fn(**kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=8,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
        )
        return model
# Create the convolutional neural network
class ConvNN(KerasClassifier):
    """Convolutional neural network model."""
    @property
    def feature_encoder(self):
        """Convert the 2d input to the image's format (len(X), 28, 28, 1)."""
        return FunctionTransformer(
            func=lambda X: X.reshape(X.shape[0], 28, 28, 1),
        )
    @staticmethod
    def _keras_build_fn(**kwargs):
        """Create the model's architecture."""
        model = Sequential()
        model.add(
            Conv2D(
                filters=8,
                kernel_size=3,
                activation="relu",
                input_shape=(28, 28, 1),
            )
        )
        model.add(Conv2D(filters=4, kernel_size=5, activation="relu"))
        model.add(Flatten())
        model.add(Dense(units=10, activation="softmax"))
        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
        )
        return model
        
        In [4]:
                    Copied!
                    
                    
                # Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(verbose=0),
    acronym="CNN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    has_validation="epochs",  # Applies in-training validation on parameter epochs
)
# Convert the model to an ATOM model
model = ATOMModel(
    estimator=ConvNN(verbose=0),
    acronym="CNN",
    needs_scaling=True,  # Applies automated feature scaling before fitting
    has_validation="epochs",  # Applies in-training validation on parameter epochs
)
        
        Run the pipeline¶
In [5]:
                    Copied!
                    
                    
                atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)
atom = ATOMClassifier(*data, n_rows=0.1, verbose=2, random_state=1)
        
        << ================== ATOM ================== >> Algorithm task: multiclass classification. Dataset stats ==================== >> Shape: (7000, 785) Train set size: 6000 Test set size: 1000 ------------------------------------- Memory: 5.54 MB Scaled: False Outlier values: 41839 (0.9%)
In [6]:
                    Copied!
                    
                    
                # Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
    models=model,
    metric="f1_weighted",
    n_trials=12,
    ht_params={
        "distributions": {
            "epochs": IntDistribution(2, 10),
            "batch_size": CategoricalDistribution([128, 256, 512]),
        },
    }
)
# Like any other model, we can define custom distributions for hyperparameter tuning
atom.run(
    models=model,
    metric="f1_weighted",
    n_trials=12,
    ht_params={
        "distributions": {
            "epochs": IntDistribution(2, 10),
            "batch_size": CategoricalDistribution([128, 256, 512]),
        },
    }
)
        
        Training ========================= >> Models: CNN Metric: f1_weighted Running hyperparameter tuning for ConvNN... | trial | epochs | batch_size | f1_weighted | best_f1_weighted | time_trial | time_ht | state | | ----- | ------- | ---------- | ----------- | ---------------- | ---------- | ------- | -------- | | 0 | 5 | 128 | 0.9112 | 0.9112 | 8.853s | 8.853s | COMPLETE | | 1 | 3 | 512 | 0.8685 | 0.9112 | 5.241s | 14.094s | COMPLETE | | 2 | 5 | 512 | 0.9109 | 0.9112 | 7.273s | 21.367s | COMPLETE | | 3 | 3 | 128 | 0.9021 | 0.9112 | 6.097s | 27.465s | COMPLETE | | 4 | 5 | 128 | 0.9112 | 0.9112 | 1.242s | 28.707s | COMPLETE | | 5 | 9 | 128 | 0.9302 | 0.9302 | 13.197s | 41.904s | COMPLETE | | 6 | 9 | 128 | 0.9302 | 0.9302 | 1.465s | 43.369s | COMPLETE | | 7 | 3 | 128 | 0.9021 | 0.9302 | 1.198s | 44.566s | COMPLETE | | 8 | 0/10 | 256 | 0.813 | 0.9302 | 2.820s | 47.386s | PRUNED | | 9 | 8 | 128 | 0.9382 | 0.9382 | 12.674s | 01m:00s | COMPLETE | | 10 | 0/7 | 256 | 0.7842 | 0.9382 | 2.854s | 01m:03s | PRUNED | | 11 | 8 | 128 | 0.9382 | 0.9382 | 1.199s | 01m:04s | COMPLETE | Hyperparameter tuning --------------------------- Best trial --> 9 Best parameters: --> epochs: 8 --> batch_size: 128 Best evaluation --> f1_weighted: 0.9382 Time elapsed: 01m:04s Fit --------------------------------------------- Train evaluation --> f1_weighted: 0.9922 Test evaluation --> f1_weighted: 0.95 Time elapsed: 16.514s ------------------------------------------------- Total time: 01m:21s Final results ==================== >> Total time: 01m:28s ------------------------------------- ConvNN --> f1_weighted: 0.95
Analyze the results¶
In [7]:
                    Copied!
                    
                    
                atom.cnn.trials
atom.cnn.trials
        
        Out[7]:
| params | estimator | score | time_trial | time_ht | state | |
|---|---|---|---|---|---|---|
| trial | ||||||
| 0 | {'epochs': 5, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.911245 | 8.853131 | 8.853131 | COMPLETE | 
| 1 | {'epochs': 3, 'batch_size': 512} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.868467 | 5.240928 | 14.094059 | COMPLETE | 
| 2 | {'epochs': 5, 'batch_size': 512} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.910901 | 7.273163 | 21.367222 | COMPLETE | 
| 3 | {'epochs': 3, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.902138 | 6.097407 | 27.464629 | COMPLETE | 
| 4 | {'epochs': 5, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.911245 | 1.241957 | 28.706586 | COMPLETE | 
| 5 | {'epochs': 9, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.930152 | 13.196992 | 41.903578 | COMPLETE | 
| 6 | {'epochs': 9, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.930152 | 1.464945 | 43.368523 | COMPLETE | 
| 7 | {'epochs': 3, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.902138 | 1.197684 | 44.566207 | COMPLETE | 
| 8 | {'epochs': '0/10', 'batch_size': 256} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.812996 | 2.819621 | 47.385828 | PRUNED | 
| 9 | {'epochs': 8, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.938189 | 12.674373 | 60.060201 | COMPLETE | 
| 10 | {'epochs': '0/7', 'batch_size': 256} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.784233 | 2.854331 | 62.914532 | PRUNED | 
| 11 | {'epochs': 8, 'batch_size': 128} | ConvNN(\n\tmodel=None\n\tbuild_fn=None\n\twarm... | 0.938189 | 1.198821 | 64.113353 | COMPLETE | 
In [8]:
                    Copied!
                    
                    
                atom.plot_evals(dataset="test+train")
atom.plot_evals(dataset="test+train")
        
        In [9]:
                    Copied!
                    
                    
                # Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)
# Use the prediction methods like any other model
atom.cnn.predict_proba(X_train)
        
        Out[9]:
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 8.299488e-10 | 2.628172e-12 | 1.256850e-08 | 8.065028e-02 | 4.577434e-15 | 9.193031e-01 | 6.504651e-16 | 8.309912e-06 | 1.605456e-10 | 3.829821e-05 | 
| 1 | 9.999994e-01 | 2.084938e-14 | 1.802991e-07 | 9.142812e-08 | 2.974247e-13 | 1.974890e-08 | 1.239963e-07 | 6.981888e-10 | 7.344214e-08 | 4.191539e-09 | 
| 2 | 1.046616e-13 | 2.978779e-14 | 1.024991e-06 | 9.082198e-06 | 9.996915e-01 | 7.763704e-12 | 3.392148e-10 | 3.846663e-05 | 1.698541e-07 | 2.597982e-04 | 
| 3 | 2.370126e-06 | 9.976953e-01 | 1.409927e-03 | 1.626272e-04 | 2.117811e-04 | 3.694122e-06 | 1.530232e-07 | 2.715720e-04 | 2.421655e-04 | 3.429227e-07 | 
| 4 | 1.274777e-09 | 7.030934e-09 | 3.012307e-11 | 2.483380e-07 | 1.353990e-03 | 1.034125e-06 | 1.383914e-12 | 4.425891e-04 | 1.408601e-04 | 9.980612e-01 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 59995 | 3.820039e-09 | 1.538996e-09 | 8.258637e-07 | 2.749041e-04 | 6.960596e-12 | 5.976085e-05 | 3.534885e-13 | 1.802701e-09 | 9.996623e-01 | 2.221414e-06 | 
| 59996 | 1.172233e-08 | 8.490205e-13 | 6.097040e-05 | 9.994088e-01 | 5.236348e-15 | 2.233540e-04 | 7.571549e-17 | 1.496442e-10 | 3.029155e-04 | 3.923683e-06 | 
| 59997 | 1.421463e-12 | 1.254891e-12 | 6.674051e-17 | 3.223426e-08 | 7.350002e-10 | 9.999967e-01 | 1.345264e-13 | 3.264989e-12 | 3.094598e-08 | 3.277646e-06 | 
| 59998 | 8.901869e-05 | 5.740756e-11 | 1.169889e-06 | 6.678182e-07 | 9.668894e-06 | 2.655783e-04 | 9.996337e-01 | 2.608537e-08 | 2.759978e-07 | 1.828723e-08 | 
| 59999 | 2.615832e-05 | 5.634042e-08 | 1.794981e-05 | 4.938689e-06 | 8.963324e-07 | 1.321769e-01 | 4.557107e-08 | 2.386909e-07 | 8.638437e-01 | 3.929073e-03 | 
60000 rows × 10 columns
In [10]:
                    Copied!
                    
                    
                # Or make plots...
atom.cnn.plot_hyperparameters()
# Or make plots...
atom.cnn.plot_hyperparameters()
        
        In [11]:
                    Copied!
                    
                    
                atom.plot_parallel_coordinate()
atom.plot_parallel_coordinate()