{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example: Multilabel classification\n", "--------------------------------\n", "\n", "This example shows how to use ATOM to solve a multilabel classification problem.\n", "\n", "The data used is a synthetic dataset created using sklearn's [make_multilabel_classification](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_multilabel_classification.html) function." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [] }, "outputs": [], "source": [ "# Import packages\n", "import pandas as pd\n", "from atom import ATOMClassifier\n", "from sklearn.datasets import make_multilabel_classification" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [] }, "outputs": [], "source": [ "# Create data\n", "X, y = make_multilabel_classification(n_samples=300, n_classes=3, random_state=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run the pipeline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<< ================== ATOM ================== >>\n", "\n", "Configuration ==================== >>\n", "Algorithm task: Multilabel classification.\n", "\n", "Dataset stats ==================== >>\n", "Shape: (300, 23)\n", "Train set size: 240\n", "Test set size: 60\n", "-------------------------------------\n", "Memory: 51.73 kB\n", "Scaled: False\n", "Outlier values: 29 (0.5%)\n", "\n" ] } ], "source": [ "# Note that for multioutput tasks, you must specify the `y` keyword\n", "atom = ATOMClassifier(X, y=y, verbose=2, random_state=1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | acronym | \n", "fullname | \n", "estimator | \n", "module | \n", "handles_missing | \n", "needs_scaling | \n", "accepts_sparse | \n", "native_multilabel | \n", "native_multioutput | \n", "validation | \n", "supports_engines | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "Tree | \n", "DecisionTree | \n", "DecisionTreeClassifier | \n", "sklearn.tree._classes | \n", "True | \n", "False | \n", "True | \n", "True | \n", "True | \n", "None | \n", "sklearn | \n", "
1 | \n", "ETree | \n", "ExtraTree | \n", "ExtraTreeClassifier | \n", "sklearn.tree._classes | \n", "False | \n", "False | \n", "True | \n", "True | \n", "True | \n", "None | \n", "sklearn | \n", "
2 | \n", "ET | \n", "ExtraTrees | \n", "ExtraTreesClassifier | \n", "sklearn.ensemble._forest | \n", "False | \n", "False | \n", "True | \n", "True | \n", "True | \n", "None | \n", "sklearn | \n", "
3 | \n", "KNN | \n", "KNearestNeighbors | \n", "KNeighborsClassifier | \n", "sklearn.neighbors._classification | \n", "False | \n", "True | \n", "True | \n", "True | \n", "True | \n", "None | \n", "sklearn, sklearnex, cuml | \n", "
4 | \n", "MLP | \n", "MultiLayerPerceptron | \n", "MLPClassifier | \n", "sklearn.neural_network._multilayer_perceptron | \n", "False | \n", "True | \n", "True | \n", "True | \n", "False | \n", "max_iter | \n", "sklearn | \n", "
5 | \n", "RNN | \n", "RadiusNearestNeighbors | \n", "RadiusNeighborsClassifier | \n", "sklearn.neighbors._classification | \n", "False | \n", "True | \n", "True | \n", "True | \n", "True | \n", "None | \n", "sklearn | \n", "
6 | \n", "RF | \n", "RandomForest | \n", "RandomForestClassifier | \n", "sklearn.ensemble._forest | \n", "False | \n", "False | \n", "True | \n", "True | \n", "True | \n", "None | \n", "sklearn, sklearnex, cuml | \n", "
7 | \n", "Ridge | \n", "Ridge | \n", "RidgeClassifier | \n", "sklearn.linear_model._ridge | \n", "False | \n", "True | \n", "True | \n", "True | \n", "False | \n", "None | \n", "sklearn, sklearnex, cuml | \n", "