Example: Accelerating pipelines on GPU¶

This example shows how to accelerate a pipeline on GPU using cuML.

The data used is a synthetic dataset created using sklearn's make_classification function.

In [1]:

                
                    Copied!
                    
from atom import ATOMClassifier
from sklearn.datasets import make_classification

# Create a dummy dataset
X, y = make_classification(n_samples=100000, n_features=40)
from atom import ATOMClassifier
from sklearn.datasets import make_classification

# Create a dummy dataset
X, y = make_classification(n_samples=100000, n_features=40)

In [2]:

                
                    Copied!
                    
atom = ATOMClassifier(X, y, device="gpu", engine="cuml", verbose=2)
atom = ATOMClassifier(X, y, device="gpu", engine="cuml", verbose=2)

<< ================== ATOM ================== >>
Algorithm task: binary classification.
GPU training enabled.
Backend engine: cuml.

Dataset stats ==================== >>
Shape: (100000, 41)
Memory: 32.80 MB
Scaled: True
Outlier values: 8127 (0.2%)
-------------------------------------
Train set size: 80000
Test set size: 20000
-------------------------------------
|   |       dataset |         train |          test |
| - | ------------- | ------------- | ------------- |
| 0 |   50006 (1.0) |   40005 (1.0) |   10001 (1.0) |
| 1 |   49994 (1.0) |   39995 (1.0) |    9999 (1.0) |

In [3]:

                
                    Copied!
                    
atom.scale()
atom.scale()

Fitting Scaler...
Scaling features...

In [13]:

                
                    Copied!
                    
atom.dataset
atom.dataset

Out[13]:

	x0	x1	x2	x3	x4	x5	x6	x7	x8	x9	...	x31	x32	x33	x34	x35	x36	x37	x38	x39	target
0	2.021646	-0.634557	-0.867811	1.103642	1.559011	0.122284	-0.864821	1.411657	0.147997	-2.269082	...	-0.489864	1.861048	-0.353861	0.720823	-1.522117	-0.737707	-1.573936	-0.832174	0.203154	0
1	-0.019885	0.846568	-0.364059	-1.091604	-1.336692	0.186689	-0.274142	0.020563	0.693235	-1.908658	...	-1.610058	-0.365231	0.284908	0.170156	-0.236553	-0.573761	-0.107317	-2.480178	0.420341	0
2	0.516618	-0.013420	-0.753879	-0.488243	0.560051	0.395817	-0.522523	-1.083503	-0.073398	0.383061	...	0.966283	1.405546	-0.658654	0.339090	-1.615997	-1.312444	0.984578	0.602858	-1.110684	1
3	0.111861	-0.966334	0.208509	0.494328	-0.766835	-0.003399	-0.500449	-0.530622	-0.481663	-1.146132	...	-0.304896	2.030211	-1.189488	-1.238600	1.658765	-0.255644	0.572194	0.195496	0.617734	1
4	0.160135	-0.873517	0.719142	-2.020767	0.421435	-1.941230	0.835615	-1.178845	0.235273	-0.328574	...	1.633662	-0.631118	1.814046	1.031754	0.328665	1.704483	2.153710	-1.430552	-0.543915	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
99995	1.100240	0.092581	-0.346265	0.234024	0.590199	0.755019	-1.688456	-1.031070	-0.620193	-0.283336	...	0.356480	1.346821	-0.299087	2.343587	-2.003646	-0.933179	0.764255	-0.233526	-1.462311	1
99996	-1.142596	0.321843	-0.974006	0.390418	0.404722	-0.324256	-0.288176	1.009458	0.860912	-0.191313	...	0.044618	-2.030135	1.448640	-0.854798	1.441451	1.347461	-0.937607	0.572504	-0.787673	0
99997	1.658252	0.303637	-0.020324	0.225917	0.154092	-1.208507	-0.199919	1.063016	-0.395696	-0.060886	...	1.563345	-1.261853	-0.810122	-0.503823	1.565602	-1.264792	-0.591644	1.588397	0.601721	0
99998	-0.288042	-1.139792	1.548338	0.501413	0.361604	-0.315720	-0.564607	1.500870	0.501768	0.649079	...	0.344663	1.734476	0.660177	0.767554	1.461940	0.310189	-1.469978	0.900132	1.114330	0
99999	-3.093351	-0.636463	-0.449575	1.169980	-1.041870	-0.257173	2.072777	-0.101111	-0.956916	-0.251162	...	2.250647	0.746250	-0.610311	0.445467	-0.636288	-0.187444	0.226108	-0.186927	-1.024960	1

100000 rows × 41 columns

In [4]:

                
                    Copied!
                    
print(f"Scaler used: {atom.standard}")
print(f"Scaler's module: {atom.standard.__class__.__module__}")
print(f"Scaler used: {atom.standard}")
print(f"Scaler's module: {atom.standard.__class__.__module__}")

Scaler used: StandardScaler()
Scaler's module: cuml._thirdparty.sklearn.preprocessing._data

In [5]:

                
                    Copied!
                    
atom.run(models=["RF", "SGD", "XGB"])
atom.run(models=["RF", "SGD", "XGB"])

Training ========================= >>
Models: RF, SGD, XGB
Metric: f1


Results for RandomForest:
Fit ---------------------------------------------
Train evaluation --> f1: 0.9726
Test evaluation --> f1: 0.9431
Time elapsed: 1.935s
-------------------------------------------------
Total time: 1.935s


Results for StochasticGradientDescent:
Fit ---------------------------------------------
Train evaluation --> f1: 0.9236
Test evaluation --> f1: 0.9219
Time elapsed: 02m:16s
-------------------------------------------------
Total time: 02m:16s


Results for XGBoost:
Fit ---------------------------------------------
Train evaluation --> f1: 0.9749
Test evaluation --> f1: 0.9437
Time elapsed: 6.394s
-------------------------------------------------
Total time: 6.394s


Final results ==================== >>
Total time: 02m:24s
-------------------------------------
RandomForest              --> f1: 0.9431
StochasticGradientDescent --> f1: 0.9219
XGBoost                   --> f1: 0.9437 !

In [6]:

                
                    Copied!
                    
atom.results
atom.results

Out[6]:

	score_train	score_test	time_fit	time
RF	0.9726	0.9431	1.934512	1.934512
SGD	0.9236	0.9219	135.871493	135.871493
XGB	0.9749	0.9437	6.394416	6.394416

In [7]:

                
                    Copied!
                    
for m in atom.models:
    print(f"{m}'s module: {atom[m].estimator.__class__.__module__}")
for m in atom.models:
    print(f"{m}'s module: {atom[m].estimator.__class__.__module__}")

RF's module: cuml.ensemble.randomforestclassifier
SGD's module: sklearn.linear_model._stochastic_gradient
XGB's module: xgboost.sklearn

In [8]:

                
                    Copied!
                    
atom.evaluate()
atom.evaluate()

Out[8]:

	accuracy	average_precision	balanced_accuracy	f1	jaccard	matthews_corrcoef	precision	recall	roc_auc
RF	0.9429	0.9741	0.9429	0.9431	0.8924	0.8858	0.9391	0.9472	0.9792
SGD	0.9217	0.9635	0.9218	0.9219	0.8551	0.8435	0.9203	0.9235	0.9676
XGB	0.9434	0.9753	0.9434	0.9437	0.8933	0.8868	0.9385	0.9489	0.9798