{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example: Holdout set\n", "----------------------\n", "\n", "This example shows when and how to use ATOM's holdout set in an exploration pipeline.\n", "\n", "The data used is a variation on the [Australian weather dataset](https://www.kaggle.com/jsphyg/weather-dataset-rattle-package) from Kaggle. You can download it from [here](https://github.com/tvdboom/ATOM/blob/master/examples/datasets/weatherAUS.csv). The goal of this dataset is to predict whether or not it will rain tomorrow training a binary classifier on target `RainTomorrow`." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "UserWarning: The pandas version installed (1.5.3) does not match the supported pandas version in Modin (1.5.2). This may cause undesired side effects!\n" ] } ], "source": [ "# Import packages\n", "import pandas as pd\n", "from atom import ATOMClassifier" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Location | \n", "MinTemp | \n", "MaxTemp | \n", "Rainfall | \n", "Evaporation | \n", "Sunshine | \n", "WindGustDir | \n", "WindGustSpeed | \n", "WindDir9am | \n", "WindDir3pm | \n", "... | \n", "Humidity9am | \n", "Humidity3pm | \n", "Pressure9am | \n", "Pressure3pm | \n", "Cloud9am | \n", "Cloud3pm | \n", "Temp9am | \n", "Temp3pm | \n", "RainToday | \n", "RainTomorrow | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "MelbourneAirport | \n", "18.0 | \n", "26.9 | \n", "21.4 | \n", "7.0 | \n", "8.9 | \n", "SSE | \n", "41.0 | \n", "W | \n", "SSE | \n", "... | \n", "95.0 | \n", "54.0 | \n", "1019.5 | \n", "1017.0 | \n", "8.0 | \n", "5.0 | \n", "18.5 | \n", "26.0 | \n", "Yes | \n", "0 | \n", "
1 | \n", "Adelaide | \n", "17.2 | \n", "23.4 | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "S | \n", "41.0 | \n", "S | \n", "WSW | \n", "... | \n", "59.0 | \n", "36.0 | \n", "1015.7 | \n", "1015.7 | \n", "NaN | \n", "NaN | \n", "17.7 | \n", "21.9 | \n", "No | \n", "0 | \n", "
2 | \n", "Cairns | \n", "18.6 | \n", "24.6 | \n", "7.4 | \n", "3.0 | \n", "6.1 | \n", "SSE | \n", "54.0 | \n", "SSE | \n", "SE | \n", "... | \n", "78.0 | \n", "57.0 | \n", "1018.7 | \n", "1016.6 | \n", "3.0 | \n", "3.0 | \n", "20.8 | \n", "24.1 | \n", "Yes | \n", "0 | \n", "
3 | \n", "Portland | \n", "13.6 | \n", "16.8 | \n", "4.2 | \n", "1.2 | \n", "0.0 | \n", "ESE | \n", "39.0 | \n", "ESE | \n", "ESE | \n", "... | \n", "76.0 | \n", "74.0 | \n", "1021.4 | \n", "1020.5 | \n", "7.0 | \n", "8.0 | \n", "15.6 | \n", "16.0 | \n", "Yes | \n", "1 | \n", "
4 | \n", "Walpole | \n", "16.4 | \n", "19.9 | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "SE | \n", "44.0 | \n", "SE | \n", "SE | \n", "... | \n", "78.0 | \n", "70.0 | \n", "1019.4 | \n", "1018.9 | \n", "NaN | \n", "NaN | \n", "17.4 | \n", "18.1 | \n", "No | \n", "0 | \n", "
5 rows × 22 columns
\n", "\n", " | Location | \n", "MinTemp | \n", "MaxTemp | \n", "Rainfall | \n", "Evaporation | \n", "Sunshine | \n", "WindGustDir | \n", "WindGustSpeed | \n", "WindDir9am | \n", "WindDir3pm | \n", "... | \n", "Humidity9am | \n", "Humidity3pm | \n", "Pressure9am | \n", "Pressure3pm | \n", "Cloud9am | \n", "Cloud3pm | \n", "Temp9am | \n", "Temp3pm | \n", "RainToday | \n", "RainTomorrow | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "PearceRAAF | \n", "13.3 | \n", "25.6 | \n", "0.0 | \n", "NaN | \n", "5.8 | \n", "WNW | \n", "52.0 | \n", "WNW | \n", "WNW | \n", "... | \n", "58.0 | \n", "51.0 | \n", "1010.8 | \n", "1009.6 | \n", "7.0 | \n", "7.0 | \n", "21.6 | \n", "24.8 | \n", "No | \n", "1 | \n", "
1 | \n", "Darwin | \n", "25.2 | \n", "30.2 | \n", "4.2 | \n", "3.4 | \n", "2.1 | \n", "WNW | \n", "76.0 | \n", "WNW | \n", "WNW | \n", "... | \n", "83.0 | \n", "85.0 | \n", "1005.9 | \n", "1003.4 | \n", "7.0 | \n", "7.0 | \n", "28.6 | \n", "27.4 | \n", "Yes | \n", "1 | \n", "
2 | \n", "Portland | \n", "8.1 | \n", "15.1 | \n", "4.4 | \n", "3.8 | \n", "7.5 | \n", "W | \n", "52.0 | \n", "SW | \n", "SSW | \n", "... | \n", "66.0 | \n", "60.0 | \n", "1013.6 | \n", "1017.4 | \n", "8.0 | \n", "7.0 | \n", "13.4 | \n", "13.2 | \n", "Yes | \n", "0 | \n", "
3 | \n", "Perth | \n", "10.5 | \n", "22.7 | \n", "0.0 | \n", "2.4 | \n", "9.2 | \n", "WNW | \n", "26.0 | \n", "NNE | \n", "NW | \n", "... | \n", "86.0 | \n", "68.0 | \n", "1016.2 | \n", "1014.3 | \n", "1.0 | \n", "3.0 | \n", "15.8 | \n", "20.8 | \n", "No | \n", "1 | \n", "
4 | \n", "MountGinini | \n", "14.7 | \n", "24.1 | \n", "102.2 | \n", "NaN | \n", "NaN | \n", "NW | \n", "52.0 | \n", "SW | \n", "W | \n", "... | \n", "100.0 | \n", "78.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "16.5 | \n", "21.0 | \n", "Yes | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
14214 | \n", "Walpole | \n", "10.3 | \n", "16.1 | \n", "2.4 | \n", "NaN | \n", "NaN | \n", "NW | \n", "20.0 | \n", "NW | \n", "SSE | \n", "... | \n", "97.0 | \n", "78.0 | \n", "1024.4 | \n", "1022.9 | \n", "NaN | \n", "NaN | \n", "12.3 | \n", "14.7 | \n", "Yes | \n", "0 | \n", "
14215 | \n", "AliceSprings | \n", "24.8 | \n", "29.0 | \n", "0.2 | \n", "9.4 | \n", "0.0 | \n", "NNW | \n", "31.0 | \n", "NNE | \n", "SE | \n", "... | \n", "69.0 | \n", "56.0 | \n", "1009.4 | \n", "1007.1 | \n", "8.0 | \n", "8.0 | \n", "26.1 | \n", "27.1 | \n", "No | \n", "0 | \n", "
14216 | \n", "Darwin | \n", "26.2 | \n", "34.4 | \n", "0.0 | \n", "4.4 | \n", "9.0 | \n", "NW | \n", "33.0 | \n", "E | \n", "WNW | \n", "... | \n", "69.0 | \n", "65.0 | \n", "1009.5 | \n", "1006.0 | \n", "3.0 | \n", "6.0 | \n", "30.4 | \n", "32.0 | \n", "No | \n", "0 | \n", "
14217 | \n", "Cairns | \n", "23.9 | \n", "28.4 | \n", "2.6 | \n", "2.6 | \n", "0.6 | \n", "ENE | \n", "28.0 | \n", "NE | \n", "NE | \n", "... | \n", "72.0 | \n", "78.0 | \n", "1014.4 | \n", "1011.8 | \n", "7.0 | \n", "8.0 | \n", "27.5 | \n", "27.0 | \n", "Yes | \n", "1 | \n", "
14218 | \n", "Tuggeranong | \n", "9.5 | \n", "23.6 | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "W | \n", "30.0 | \n", "NaN | \n", "W | \n", "... | \n", "68.0 | \n", "40.0 | \n", "1010.9 | \n", "1008.4 | \n", "NaN | \n", "NaN | \n", "14.9 | \n", "22.6 | \n", "No | \n", "1 | \n", "
14219 rows × 22 columns
\n", "