{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example: Memory considerations\n", "--------------------------------\n", "\n", "This example shows how to use the `memory` parameter to make efficient use of the available memory.\n", "\n", "The data used is a variation on the [Australian weather dataset](https://www.kaggle.com/jsphyg/weather-dataset-rattle-package) from Kaggle. You can download it from [here](https://github.com/tvdboom/ATOM/blob/master/examples/datasets/weatherAUS.csv). The goal of this dataset is to predict whether or not it will rain tomorrow training a binary classifier on target `RainTomorrow`." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load the data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Import packages\n", "import os\n", "import tempfile\n", "import pandas as pd\n", "from atom import ATOMClassifier" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LocationMinTempMaxTempRainfallEvaporationSunshineWindGustDirWindGustSpeedWindDir9amWindDir3pm...Humidity9amHumidity3pmPressure9amPressure3pmCloud9amCloud3pmTemp9amTemp3pmRainTodayRainTomorrow
0MelbourneAirport18.026.921.47.08.9SSE41.0WSSE...95.054.01019.51017.08.05.018.526.0Yes0
1Adelaide17.223.40.0NaNNaNS41.0SWSW...59.036.01015.71015.7NaNNaN17.721.9No0
2Cairns18.624.67.43.06.1SSE54.0SSESE...78.057.01018.71016.63.03.020.824.1Yes0
3Portland13.616.84.21.20.0ESE39.0ESEESE...76.074.01021.41020.57.08.015.616.0Yes1
4Walpole16.419.90.0NaNNaNSE44.0SESE...78.070.01019.41018.9NaNNaN17.418.1No0
\n", "

5 rows × 22 columns

\n", "
" ], "text/plain": [ " Location MinTemp MaxTemp Rainfall Evaporation Sunshine \\\n", "0 MelbourneAirport 18.0 26.9 21.4 7.0 8.9 \n", "1 Adelaide 17.2 23.4 0.0 NaN NaN \n", "2 Cairns 18.6 24.6 7.4 3.0 6.1 \n", "3 Portland 13.6 16.8 4.2 1.2 0.0 \n", "4 Walpole 16.4 19.9 0.0 NaN NaN \n", "\n", " WindGustDir WindGustSpeed WindDir9am WindDir3pm ... Humidity9am \\\n", "0 SSE 41.0 W SSE ... 95.0 \n", "1 S 41.0 S WSW ... 59.0 \n", "2 SSE 54.0 SSE SE ... 78.0 \n", "3 ESE 39.0 ESE ESE ... 76.0 \n", "4 SE 44.0 SE SE ... 78.0 \n", "\n", " Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am \\\n", "0 54.0 1019.5 1017.0 8.0 5.0 18.5 \n", "1 36.0 1015.7 1015.7 NaN NaN 17.7 \n", "2 57.0 1018.7 1016.6 3.0 3.0 20.8 \n", "3 74.0 1021.4 1020.5 7.0 8.0 15.6 \n", "4 70.0 1019.4 1018.9 NaN NaN 17.4 \n", "\n", " Temp3pm RainToday RainTomorrow \n", "0 26.0 Yes 0 \n", "1 21.9 No 0 \n", "2 24.1 Yes 0 \n", "3 16.0 Yes 1 \n", "4 18.1 No 0 \n", "\n", "[5 rows x 22 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load data\n", "X = pd.read_csv(\"./datasets/weatherAUS.csv\")\n", "\n", "# Let's have a look\n", "X.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Define a temp directory to store the files in this example\n", "tempdir = tempfile.gettempdir()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def get_size(filepath):\n", " \"\"\"Return the size of the object in MB.\"\"\"\n", " return f\"{os.path.getsize(filepath + '.pkl') / 1e6:.2f}MB\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run the pipeline" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<< ================== ATOM ================== >>\n", "\n", "Configuration ==================== >>\n", "Algorithm task: Binary classification.\n", "\n", "Dataset stats ==================== >>\n", "Shape: (142193, 22)\n", "Train set size: 113755\n", "Test set size: 28438\n", "-------------------------------------\n", "Memory: 25.03 MB\n", "Scaled: False\n", "Missing values: 316559 (10.1%)\n", "Categorical features: 5 (23.8%)\n", "Duplicates: 45 (0.0%)\n", "\n" ] } ], "source": [ "atom = ATOMClassifier(X, y=\"RainTomorrow\", verbose=2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that the datset takes ~25MB. We can reduce the size of the dataset using \n", "the shrink method, which reduces the dtypes to their smallest possible value." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Location object\n", "MinTemp float64\n", "MaxTemp float64\n", "Rainfall float64\n", "Evaporation float64\n", "Sunshine float64\n", "WindGustDir object\n", "WindGustSpeed float64\n", "WindDir9am object\n", "WindDir3pm object\n", "WindSpeed9am float64\n", "WindSpeed3pm float64\n", "Humidity9am float64\n", "Humidity3pm float64\n", "Pressure9am float64\n", "Pressure3pm float64\n", "Cloud9am float64\n", "Cloud3pm float64\n", "Temp9am float64\n", "Temp3pm float64\n", "RainToday object\n", "RainTomorrow int64\n", "dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atom.dtypes" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The column dtypes are successfully converted.\n" ] } ], "source": [ "atom.shrink(str2cat=True)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Location category\n", "MinTemp Float32\n", "MaxTemp Float32\n", "Rainfall Float32\n", "Evaporation Float32\n", "Sunshine Float32\n", "WindGustDir category\n", "WindGustSpeed Int16\n", "WindDir9am category\n", "WindDir3pm category\n", "WindSpeed9am Int16\n", "WindSpeed3pm Int8\n", "Humidity9am Int8\n", "Humidity3pm Int8\n", "Pressure9am Float32\n", "Pressure3pm Float32\n", "Cloud9am Int8\n", "Cloud3pm Int8\n", "Temp9am Float32\n", "Temp3pm Float32\n", "RainToday category\n", "RainTomorrow Int8\n", "dtype: object" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atom.dtypes" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset stats ==================== >>\n", "Shape: (142193, 22)\n", "Train set size: 113755\n", "Test set size: 28438\n", "-------------------------------------\n", "Memory: 9.67 MB\n", "Scaled: False\n", "Missing values: 316559 (10.1%)\n", "Categorical features: 5 (23.8%)\n", "Duplicates: 45 (0.0%)\n" ] } ], "source": [ "# Let's check the memory usage again...\n", "# Notice the huge drop!\n", "atom.stats()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting Imputer...\n", "Imputing missing values...\n", " --> Imputing 637 missing values with mean (12.19) in column MinTemp.\n", " --> Imputing 322 missing values with mean (23.23) in column MaxTemp.\n", " --> Imputing 1406 missing values with mean (2.37) in column Rainfall.\n", " --> Imputing 60843 missing values with mean (5.48) in column Evaporation.\n", " --> Imputing 67816 missing values with mean (7.63) in column Sunshine.\n", " --> Imputing 9330 missing values with most_frequent (W) in column WindGustDir.\n", " --> Imputing 9270 missing values with mean (40.0) in column WindGustSpeed.\n", " --> Imputing 10013 missing values with most_frequent (N) in column WindDir9am.\n", " --> Imputing 3778 missing values with most_frequent (SE) in column WindDir3pm.\n", " --> Imputing 1348 missing values with mean (14.02) in column WindSpeed9am.\n", " --> Imputing 2630 missing values with mean (18.64) in column WindSpeed3pm.\n", " --> Imputing 1774 missing values with mean (68.82) in column Humidity9am.\n", " --> Imputing 3610 missing values with mean (51.45) in column Humidity3pm.\n", " --> Imputing 14014 missing values with mean (1017.64) in column Pressure9am.\n", " --> Imputing 13981 missing values with mean (1015.25) in column Pressure3pm.\n", " --> Imputing 53657 missing values with mean (4.44) in column Cloud9am.\n", " --> Imputing 57094 missing values with mean (4.5) in column Cloud3pm.\n", " --> Imputing 904 missing values with mean (16.99) in column Temp9am.\n", " --> Imputing 2726 missing values with mean (21.69) in column Temp3pm.\n", " --> Imputing 1406 missing values with most_frequent (No) in column RainToday.\n", "Fitting Encoder...\n", "Encoding categorical columns...\n", " --> Target-encoding feature Location. Contains 49 classes.\n", " --> Target-encoding feature WindGustDir. Contains 16 classes.\n", " --> Target-encoding feature WindDir9am. Contains 16 classes.\n", " --> Target-encoding feature WindDir3pm. Contains 16 classes.\n", " --> Ordinal-encoding feature RainToday. Contains 2 classes.\n", "\n", "Training ========================= >>\n", "Models: LDA\n", "Metric: f1\n", "\n", "\n", "Results for LinearDiscriminantAnalysis:\n", "Fit ---------------------------------------------\n", "Train evaluation --> f1: 0.5906\n", "Test evaluation --> f1: 0.5904\n", "Time elapsed: 0.942s\n", "-------------------------------------------------\n", "Time: 0.942s\n", "\n", "\n", "Final results ==================== >>\n", "Total time: 1.005s\n", "-------------------------------------\n", "LinearDiscriminantAnalysis --> f1: 0.5904\n", "Successfully created new branch: b2.\n", "Fitting Scaler...\n", "Scaling features...\n", "\n", "Training ========================= >>\n", "Models: LDA_scaled\n", "Metric: f1\n", "\n", "\n", "Results for LinearDiscriminantAnalysis:\n", "Fit ---------------------------------------------\n", "Train evaluation --> f1: 0.5906\n", "Test evaluation --> f1: 0.5904\n", "Time elapsed: 0.956s\n", "-------------------------------------------------\n", "Time: 0.956s\n", "\n", "\n", "Final results ==================== >>\n", "Total time: 1.017s\n", "-------------------------------------\n", "LinearDiscriminantAnalysis --> f1: 0.5904\n", "Successfully created new branch: b3.\n", "Fitting Normalizer...\n", "Normalizing features...\n", "\n", "Training ========================= >>\n", "Models: LDA_norm\n", "Metric: f1\n", "\n", "\n", "Results for LinearDiscriminantAnalysis:\n", "Fit ---------------------------------------------\n", "Train evaluation --> f1: 0.5955\n", "Test evaluation --> f1: 0.594\n", "Time elapsed: 0.929s\n", "-------------------------------------------------\n", "Time: 0.929s\n", "\n", "\n", "Final results ==================== >>\n", "Total time: 0.991s\n", "-------------------------------------\n", "LinearDiscriminantAnalysis --> f1: 0.594\n" ] } ], "source": [ "# Now, we create some new branches to train models with different trasnformers\n", "atom.impute()\n", "atom.encode()\n", "atom.run(\"LDA\")\n", "\n", "atom.branch = \"b2\"\n", "atom.scale()\n", "atom.run(\"LDA_scaled\")\n", "\n", "atom.branch = \"b3_from_main\"\n", "atom.normalize()\n", "atom.run(\"LDA_norm\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ATOMClassifier successfully saved.\n" ] }, { "data": { "text/plain": [ "'83.93MB'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# If we save atom now, notice the size\n", "# This is because atom keeps a copy of every branch in memory\n", "filename = os.path.join(tempdir, \"atom1\")\n", "atom.save(filename)\n", "get_size(filename)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To avoid large memory usages, set the `memory` parameter." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<< ================== ATOM ================== >>\n", "\n", "Configuration ==================== >>\n", "Algorithm task: Binary classification.\n", "Cache storage: C:\\Users\\Mavs\\AppData\\Local\\Temp\\joblib\n", "\n", "Dataset stats ==================== >>\n", "Shape: (142193, 22)\n", "Train set size: 113755\n", "Test set size: 28438\n", "-------------------------------------\n", "Memory: 25.03 MB\n", "Scaled: False\n", "Missing values: 316559 (10.1%)\n", "Categorical features: 5 (23.8%)\n", "Duplicates: 45 (0.0%)\n", "\n", "The column dtypes are successfully converted.\n", "Loading cached results for Imputer...\n", "Loading cached results for Encoder...\n", "\n", "Training ========================= >>\n", "Models: LDA\n", "Metric: f1\n", "\n", "\n", "Results for LinearDiscriminantAnalysis:\n", "Fit ---------------------------------------------\n", "Train evaluation --> f1: 0.5914\n", "Test evaluation --> f1: 0.5892\n", "Time elapsed: 0.953s\n", "-------------------------------------------------\n", "Time: 0.953s\n", "\n", "\n", "Final results ==================== >>\n", "Total time: 1.015s\n", "-------------------------------------\n", "LinearDiscriminantAnalysis --> f1: 0.5892\n", "Successfully created new branch: b2.\n", "Loading cached results for Scaler...\n", "\n", "Training ========================= >>\n", "Models: LDA_scaled\n", "Metric: f1\n", "\n", "\n", "Results for LinearDiscriminantAnalysis:\n", "Fit ---------------------------------------------\n", "Train evaluation --> f1: 0.5914\n", "Test evaluation --> f1: 0.5892\n", "Time elapsed: 0.971s\n", "-------------------------------------------------\n", "Time: 0.971s\n", "\n", "\n", "Final results ==================== >>\n", "Total time: 1.028s\n", "-------------------------------------\n", "LinearDiscriminantAnalysis --> f1: 0.5892\n", "Successfully created new branch: b3.\n", "Loading cached results for Normalizer...\n", "\n", "Training ========================= >>\n", "Models: LDA_norm\n", "Metric: f1\n", "\n", "\n", "Results for LinearDiscriminantAnalysis:\n", "Fit ---------------------------------------------\n", "Train evaluation --> f1: 0.5957\n", "Test evaluation --> f1: 0.5935\n", "Time elapsed: 0.924s\n", "-------------------------------------------------\n", "Time: 0.924s\n", "\n", "\n", "Final results ==================== >>\n", "Total time: 0.985s\n", "-------------------------------------\n", "LinearDiscriminantAnalysis --> f1: 0.5935\n" ] } ], "source": [ "atom = ATOMClassifier(X, y=\"RainTomorrow\", memory=tempdir, verbose=1, random_state=1)\n", "atom.shrink(str2cat=True)\n", "atom.impute()\n", "atom.encode()\n", "atom.run(\"LDA\")\n", "\n", "atom.branch = \"b2\"\n", "atom.scale()\n", "atom.run(\"LDA_scaled\")\n", "\n", "atom.branch = \"b3_from_main\"\n", "atom.normalize()\n", "atom.run(\"LDA_norm\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ATOMClassifier successfully saved.\n" ] }, { "data": { "text/plain": [ "'24.78MB'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# And now, it only takes a fraction of the previous size\n", "# This is because the data of inactive branches is now stored locally\n", "filename = os.path.join(tempdir, \"atom2\")\n", "atom.save(filename)\n", "get_size(filename)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Additionnaly, repeated calls to the same transformers with the same data will use the cached results. \n", "Don't forget to specify the `random_state` parameter to ensure the data remains the exact same." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<< ================== ATOM ================== >>\n", "\n", "Configuration ==================== >>\n", "Algorithm task: Binary classification.\n", "Cache storage: C:\\Users\\Mavs\\AppData\\Local\\Temp\\joblib\n", "\n", "Dataset stats ==================== >>\n", "Shape: (142193, 22)\n", "Train set size: 113755\n", "Test set size: 28438\n", "-------------------------------------\n", "Memory: 25.03 MB\n", "Scaled: False\n", "Missing values: 316559 (10.1%)\n", "Categorical features: 5 (23.8%)\n", "Duplicates: 45 (0.0%)\n", "\n", "The column dtypes are successfully converted.\n" ] } ], "source": [ "atom = ATOMClassifier(X, y=\"RainTomorrow\", memory=tempdir, verbose=1, random_state=1)\n", "atom.shrink(str2cat=True)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading cached results for Imputer...\n", "Loading cached results for Encoder...\n" ] } ], "source": [ "# Note the transformers are no longer fitted,\n", "# instead the results are immediately read from cache\n", "atom.impute()\n", "atom.encode()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LocationMinTempMaxTempRainfallEvaporationSunshineWindGustDirWindGustSpeedWindDir9amWindDir3pm...Humidity9amHumidity3pmPressure9amPressure3pmCloud9amCloud3pmTemp9amTemp3pmRainTodayRainTomorrow
00.07076713.030.5000000.0000006.8000010.0000000.27267759.00.2549950.282496...19.0000008.000001013.5999761008.0000000.0000002.0000019.60000029.9000000.00
10.1301638.825.2000010.0000005.000007.6142010.28516750.00.269670.278696...68.84221851.502391011.2000121006.5000004.4466573.0000015.90000023.7000010.01
20.26204319.926.6000008.0000005.464917.6142010.2665857.00.2549950.250291...81.00000081.000001013.0999761008.5999764.4466574.5092224.50000024.7000011.01
30.18391219.631.9000002.6000005.464917.6142010.2665859.00.2697750.220975...70.00000042.000001001.2000121002.4000242.0000008.0000025.79999922.0000001.00
40.25856915.322.40000016.0000004.200003.3000000.19446439.00.2458240.189182...83.00000063.000001025.5000001023.5999766.0000006.0000016.90000021.1000001.01
..................................................................
1421880.2787469.021.7999990.0000005.464917.6142010.15827633.00.2035970.277443...44.00000038.000001017.6609811015.2703964.4466574.5092216.60000021.1000000.01
1421890.30756211.519.2000010.8000002.000007.0000000.15827622.00.1439460.187433...73.00000052.000001021.2999881018.7999883.0000004.0000017.10000018.4000000.00
1421900.19783917.529.10000035.5999985.464917.6142010.15827633.00.2035970.180537...77.00000046.000001015.2000121013.7000124.4466574.5092221.00000028.7999991.00
1421910.3718535.918.0000000.4000000.800006.7000000.28516726.00.2549950.278696...92.00000065.000001028.0000001025.3000493.0000002.000009.40000016.6000000.00
1421920.29781810.218.1000000.2000005.464917.6142010.20588724.00.1500670.221562...84.00000094.000001018.0999761016.0000004.4466574.5092215.30000016.0000000.00
\n", "

142193 rows × 22 columns

\n", "
" ], "text/plain": [ " Location MinTemp MaxTemp Rainfall Evaporation Sunshine \\\n", "0 0.070767 13.0 30.500000 0.000000 6.80000 10.000000 \n", "1 0.130163 8.8 25.200001 0.000000 5.00000 7.614201 \n", "2 0.262043 19.9 26.600000 8.000000 5.46491 7.614201 \n", "3 0.183912 19.6 31.900000 2.600000 5.46491 7.614201 \n", "4 0.258569 15.3 22.400000 16.000000 4.20000 3.300000 \n", "... ... ... ... ... ... ... \n", "142188 0.278746 9.0 21.799999 0.000000 5.46491 7.614201 \n", "142189 0.307562 11.5 19.200001 0.800000 2.00000 7.000000 \n", "142190 0.197839 17.5 29.100000 35.599998 5.46491 7.614201 \n", "142191 0.371853 5.9 18.000000 0.400000 0.80000 6.700000 \n", "142192 0.297818 10.2 18.100000 0.200000 5.46491 7.614201 \n", "\n", " WindGustDir WindGustSpeed WindDir9am WindDir3pm ... Humidity9am \\\n", "0 0.272677 59.0 0.254995 0.282496 ... 19.000000 \n", "1 0.285167 50.0 0.26967 0.278696 ... 68.842218 \n", "2 0.26658 57.0 0.254995 0.250291 ... 81.000000 \n", "3 0.26658 59.0 0.269775 0.220975 ... 70.000000 \n", "4 0.194464 39.0 0.245824 0.189182 ... 83.000000 \n", "... ... ... ... ... ... ... \n", "142188 0.158276 33.0 0.203597 0.277443 ... 44.000000 \n", "142189 0.158276 22.0 0.143946 0.187433 ... 73.000000 \n", "142190 0.158276 33.0 0.203597 0.180537 ... 77.000000 \n", "142191 0.285167 26.0 0.254995 0.278696 ... 92.000000 \n", "142192 0.205887 24.0 0.150067 0.221562 ... 84.000000 \n", "\n", " Humidity3pm Pressure9am Pressure3pm Cloud9am Cloud3pm Temp9am \\\n", "0 8.00000 1013.599976 1008.000000 0.000000 2.00000 19.600000 \n", "1 51.50239 1011.200012 1006.500000 4.446657 3.00000 15.900000 \n", "2 81.00000 1013.099976 1008.599976 4.446657 4.50922 24.500000 \n", "3 42.00000 1001.200012 1002.400024 2.000000 8.00000 25.799999 \n", "4 63.00000 1025.500000 1023.599976 6.000000 6.00000 16.900000 \n", "... ... ... ... ... ... ... \n", "142188 38.00000 1017.660981 1015.270396 4.446657 4.50922 16.600000 \n", "142189 52.00000 1021.299988 1018.799988 3.000000 4.00000 17.100000 \n", "142190 46.00000 1015.200012 1013.700012 4.446657 4.50922 21.000000 \n", "142191 65.00000 1028.000000 1025.300049 3.000000 2.00000 9.400000 \n", "142192 94.00000 1018.099976 1016.000000 4.446657 4.50922 15.300000 \n", "\n", " Temp3pm RainToday RainTomorrow \n", "0 29.900000 0.0 0 \n", "1 23.700001 0.0 1 \n", "2 24.700001 1.0 1 \n", "3 22.000000 1.0 0 \n", "4 21.100000 1.0 1 \n", "... ... ... ... \n", "142188 21.100000 0.0 1 \n", "142189 18.400000 0.0 0 \n", "142190 28.799999 1.0 0 \n", "142191 16.600000 0.0 0 \n", "142192 16.000000 0.0 0 \n", "\n", "[142193 rows x 22 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atom.dataset" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }