{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example: Memory considerations\n", "--------------------------------\n", "\n", "This example shows how to use the `memory` parameter to make efficient use of the available memory.\n", "\n", "The data used is a variation on the [Australian weather dataset](https://www.kaggle.com/jsphyg/weather-dataset-rattle-package) from Kaggle. You can download it from [here](https://github.com/tvdboom/ATOM/blob/master/examples/datasets/weatherAUS.csv). The goal of this dataset is to predict whether or not it will rain tomorrow training a binary classifier on target `RainTomorrow`." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", "import os\n", "import tempfile\n", "import pandas as pd\n", "from atom import ATOMClassifier" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Location | \n", "MinTemp | \n", "MaxTemp | \n", "Rainfall | \n", "Evaporation | \n", "Sunshine | \n", "WindGustDir | \n", "WindGustSpeed | \n", "WindDir9am | \n", "WindDir3pm | \n", "... | \n", "Humidity9am | \n", "Humidity3pm | \n", "Pressure9am | \n", "Pressure3pm | \n", "Cloud9am | \n", "Cloud3pm | \n", "Temp9am | \n", "Temp3pm | \n", "RainToday | \n", "RainTomorrow | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "MelbourneAirport | \n", "18.0 | \n", "26.9 | \n", "21.4 | \n", "7.0 | \n", "8.9 | \n", "SSE | \n", "41.0 | \n", "W | \n", "SSE | \n", "... | \n", "95.0 | \n", "54.0 | \n", "1019.5 | \n", "1017.0 | \n", "8.0 | \n", "5.0 | \n", "18.5 | \n", "26.0 | \n", "Yes | \n", "0 | \n", "
1 | \n", "Adelaide | \n", "17.2 | \n", "23.4 | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "S | \n", "41.0 | \n", "S | \n", "WSW | \n", "... | \n", "59.0 | \n", "36.0 | \n", "1015.7 | \n", "1015.7 | \n", "NaN | \n", "NaN | \n", "17.7 | \n", "21.9 | \n", "No | \n", "0 | \n", "
2 | \n", "Cairns | \n", "18.6 | \n", "24.6 | \n", "7.4 | \n", "3.0 | \n", "6.1 | \n", "SSE | \n", "54.0 | \n", "SSE | \n", "SE | \n", "... | \n", "78.0 | \n", "57.0 | \n", "1018.7 | \n", "1016.6 | \n", "3.0 | \n", "3.0 | \n", "20.8 | \n", "24.1 | \n", "Yes | \n", "0 | \n", "
3 | \n", "Portland | \n", "13.6 | \n", "16.8 | \n", "4.2 | \n", "1.2 | \n", "0.0 | \n", "ESE | \n", "39.0 | \n", "ESE | \n", "ESE | \n", "... | \n", "76.0 | \n", "74.0 | \n", "1021.4 | \n", "1020.5 | \n", "7.0 | \n", "8.0 | \n", "15.6 | \n", "16.0 | \n", "Yes | \n", "1 | \n", "
4 | \n", "Walpole | \n", "16.4 | \n", "19.9 | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "SE | \n", "44.0 | \n", "SE | \n", "SE | \n", "... | \n", "78.0 | \n", "70.0 | \n", "1019.4 | \n", "1018.9 | \n", "NaN | \n", "NaN | \n", "17.4 | \n", "18.1 | \n", "No | \n", "0 | \n", "
5 rows × 22 columns
\n", "\n", " | Location | \n", "MinTemp | \n", "MaxTemp | \n", "Rainfall | \n", "Evaporation | \n", "Sunshine | \n", "WindGustDir | \n", "WindGustSpeed | \n", "WindDir9am | \n", "WindDir3pm | \n", "... | \n", "Humidity9am | \n", "Humidity3pm | \n", "Pressure9am | \n", "Pressure3pm | \n", "Cloud9am | \n", "Cloud3pm | \n", "Temp9am | \n", "Temp3pm | \n", "RainToday | \n", "RainTomorrow | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.070767 | \n", "13.0 | \n", "30.500000 | \n", "0.000000 | \n", "6.80000 | \n", "10.000000 | \n", "0.272677 | \n", "59.0 | \n", "0.254995 | \n", "0.282496 | \n", "... | \n", "19.000000 | \n", "8.00000 | \n", "1013.599976 | \n", "1008.000000 | \n", "0.000000 | \n", "2.00000 | \n", "19.600000 | \n", "29.900000 | \n", "0.0 | \n", "0 | \n", "
1 | \n", "0.130163 | \n", "8.8 | \n", "25.200001 | \n", "0.000000 | \n", "5.00000 | \n", "7.614201 | \n", "0.285167 | \n", "50.0 | \n", "0.26967 | \n", "0.278696 | \n", "... | \n", "68.842218 | \n", "51.50239 | \n", "1011.200012 | \n", "1006.500000 | \n", "4.446657 | \n", "3.00000 | \n", "15.900000 | \n", "23.700001 | \n", "0.0 | \n", "1 | \n", "
2 | \n", "0.262043 | \n", "19.9 | \n", "26.600000 | \n", "8.000000 | \n", "5.46491 | \n", "7.614201 | \n", "0.26658 | \n", "57.0 | \n", "0.254995 | \n", "0.250291 | \n", "... | \n", "81.000000 | \n", "81.00000 | \n", "1013.099976 | \n", "1008.599976 | \n", "4.446657 | \n", "4.50922 | \n", "24.500000 | \n", "24.700001 | \n", "1.0 | \n", "1 | \n", "
3 | \n", "0.183912 | \n", "19.6 | \n", "31.900000 | \n", "2.600000 | \n", "5.46491 | \n", "7.614201 | \n", "0.26658 | \n", "59.0 | \n", "0.269775 | \n", "0.220975 | \n", "... | \n", "70.000000 | \n", "42.00000 | \n", "1001.200012 | \n", "1002.400024 | \n", "2.000000 | \n", "8.00000 | \n", "25.799999 | \n", "22.000000 | \n", "1.0 | \n", "0 | \n", "
4 | \n", "0.258569 | \n", "15.3 | \n", "22.400000 | \n", "16.000000 | \n", "4.20000 | \n", "3.300000 | \n", "0.194464 | \n", "39.0 | \n", "0.245824 | \n", "0.189182 | \n", "... | \n", "83.000000 | \n", "63.00000 | \n", "1025.500000 | \n", "1023.599976 | \n", "6.000000 | \n", "6.00000 | \n", "16.900000 | \n", "21.100000 | \n", "1.0 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
142188 | \n", "0.278746 | \n", "9.0 | \n", "21.799999 | \n", "0.000000 | \n", "5.46491 | \n", "7.614201 | \n", "0.158276 | \n", "33.0 | \n", "0.203597 | \n", "0.277443 | \n", "... | \n", "44.000000 | \n", "38.00000 | \n", "1017.660981 | \n", "1015.270396 | \n", "4.446657 | \n", "4.50922 | \n", "16.600000 | \n", "21.100000 | \n", "0.0 | \n", "1 | \n", "
142189 | \n", "0.307562 | \n", "11.5 | \n", "19.200001 | \n", "0.800000 | \n", "2.00000 | \n", "7.000000 | \n", "0.158276 | \n", "22.0 | \n", "0.143946 | \n", "0.187433 | \n", "... | \n", "73.000000 | \n", "52.00000 | \n", "1021.299988 | \n", "1018.799988 | \n", "3.000000 | \n", "4.00000 | \n", "17.100000 | \n", "18.400000 | \n", "0.0 | \n", "0 | \n", "
142190 | \n", "0.197839 | \n", "17.5 | \n", "29.100000 | \n", "35.599998 | \n", "5.46491 | \n", "7.614201 | \n", "0.158276 | \n", "33.0 | \n", "0.203597 | \n", "0.180537 | \n", "... | \n", "77.000000 | \n", "46.00000 | \n", "1015.200012 | \n", "1013.700012 | \n", "4.446657 | \n", "4.50922 | \n", "21.000000 | \n", "28.799999 | \n", "1.0 | \n", "0 | \n", "
142191 | \n", "0.371853 | \n", "5.9 | \n", "18.000000 | \n", "0.400000 | \n", "0.80000 | \n", "6.700000 | \n", "0.285167 | \n", "26.0 | \n", "0.254995 | \n", "0.278696 | \n", "... | \n", "92.000000 | \n", "65.00000 | \n", "1028.000000 | \n", "1025.300049 | \n", "3.000000 | \n", "2.00000 | \n", "9.400000 | \n", "16.600000 | \n", "0.0 | \n", "0 | \n", "
142192 | \n", "0.297818 | \n", "10.2 | \n", "18.100000 | \n", "0.200000 | \n", "5.46491 | \n", "7.614201 | \n", "0.205887 | \n", "24.0 | \n", "0.150067 | \n", "0.221562 | \n", "... | \n", "84.000000 | \n", "94.00000 | \n", "1018.099976 | \n", "1016.000000 | \n", "4.446657 | \n", "4.50922 | \n", "15.300000 | \n", "16.000000 | \n", "0.0 | \n", "0 | \n", "
142193 rows × 22 columns
\n", "