import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from bayes_opt import BayesianOptimization
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import cross_val_score

import warnings
warnings.simplefilter('ignore')

Classification Problem Bayesian Optimazation

X, y = make_classification(n_samples=10000, n_features=10, n_classes=2)

We using the default hyperparameter to fit the data

rfc = RandomForestClassifier()
np.mean(cross_val_score(rfc, X, y, cv=5, scoring='roc_auc'))
0.9897763781114314

Default hyperparameter perform aroudn 0.98 ROC_AUC, next step we use Bayesian Optimazation to fine turn the hyperparameter.

Define the blackBox function

def rfc_cv(n_estimators, min_samples_split, max_features, max_depth):
    val = np.mean(cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators), 
                                                 min_samples_split=int(min_samples_split),
                                                 max_features=min(max_features, 0.999),
                                                 max_depth=int(max_depth), random_state=42),
                         X, y, scoring='roc_auc', cv=5))
    
    return val
rfc_bo = BayesianOptimization(
             rfc_cv,
             {'n_estimators': (10, 250),
             'min_samples_split': (2, 25),
             'max_features': (0.1, 0.999),
             'max_depth': (5, 30)})
rfc_bo.maximize()
|   iter    |  target   | max_depth | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------------------
|  1        |  0.9904   |  18.08    |  0.3725   |  2.766    |  249.1    |
|  2        |  0.9898   |  15.31    |  0.6883   |  24.87    |  84.16    |
|  3        |  0.99     |  12.87    |  0.416    |  24.62    |  51.27    |
|  4        |  0.9903   |  19.94    |  0.4735   |  5.652    |  244.5    |
|  5        |  0.9895   |  25.96    |  0.9587   |  12.6     |  244.6    |
|  6        |  0.9901   |  16.89    |  0.7025   |  2.421    |  244.6    |
|  7        |  0.9904   |  16.8     |  0.4017   |  6.605    |  248.0    |
|  8        |  0.9901   |  21.43    |  0.5085   |  6.751    |  249.4    |
|  9        |  0.9901   |  11.14    |  0.6435   |  4.991    |  248.4    |
|  10       |  0.9902   |  16.72    |  0.5588   |  9.387    |  242.5    |
|  11       |  0.9897   |  13.73    |  0.7405   |  12.09    |  248.8    |
|  12       |  0.9895   |  19.62    |  0.5513   |  4.749    |  237.9    |
|  13       |  0.9901   |  23.44    |  0.4172   |  2.239    |  247.5    |
|  14       |  0.99     |  5.734    |  0.5701   |  3.207    |  241.1    |
|  15       |  0.9878   |  19.07    |  0.1093   |  4.803    |  246.5    |
|  16       |  0.9896   |  17.28    |  0.7474   |  21.93    |  203.7    |
|  17       |  0.9894   |  9.389    |  0.7191   |  7.547    |  34.61    |
|  18       |  0.9904   |  24.09    |  0.5541   |  22.82    |  39.02    |
|  19       |  0.9905   |  6.456    |  0.7869   |  22.56    |  17.81    |
|  20       |  0.9896   |  22.42    |  0.3757   |  9.123    |  42.82    |
|  21       |  0.9882   |  25.08    |  0.6047   |  4.696    |  19.99    |
|  22       |  0.9898   |  18.56    |  0.2698   |  6.795    |  65.66    |
|  23       |  0.9889   |  5.348    |  0.4807   |  19.61    |  69.79    |
|  24       |  0.99     |  14.29    |  0.2061   |  13.03    |  224.5    |
|  25       |  0.9902   |  10.75    |  0.4831   |  5.161    |  248.3    |
|  26       |  0.9902   |  17.2     |  0.676    |  7.752    |  246.8    |
|  27       |  0.9892   |  23.25    |  0.8133   |  22.96    |  39.82    |
|  28       |  0.99     |  6.223    |  0.8502   |  22.09    |  17.42    |
|  29       |  0.9904   |  15.85    |  0.4259   |  6.132    |  247.7    |
|  30       |  0.9891   |  24.09    |  0.2956   |  23.08    |  38.13    |
=========================================================================
rfc_bo.max
{'target': 0.9905380799798376,
 'params': {'max_depth': 6.456055231994655,
  'max_features': 0.7869473158265811,
  'min_samples_split': 22.558253615710782,
  'n_estimators': 17.814015466174588}}
rfc_Optimazed = RandomForestClassifier(n_estimators=18, max_depth=6, max_features=0.78, min_samples_split=22)
np.mean(cross_val_score(rfc_Optimazed, X, y, cv=5, scoring='roc_auc'))
0.9900614797906387
  • Original roc_auc: 0.989776
  • Optimized roc_auc: 0.99006

Regression Problem Bayesian Optimazation

X, y = make_regression(n_samples=10000, n_features=10)
rfe = RandomForestRegressor()
np.mean(cross_val_score(rfe, X, y, cv=5, scoring='neg_mean_squared_error'))
-1409.2889528620326

Define the blackbox function

def rfe_cv(n_estimators, min_samples_split, max_features, max_depth):
    val = np.mean(cross_val_score(RandomForestRegressor(n_estimators=int(n_estimators), 
                                                 min_samples_split=int(min_samples_split),
                                                 max_features=min(max_features, 0.999),
                                                 max_depth=int(max_depth), random_state=42),
                         X, y, scoring='neg_mean_squared_error', cv=5))
    
    return val
score = rfe_cv(n_estimators=100, min_samples_split=10, max_depth=6, max_features=0.78)
score
rfe_bo = BayesianOptimization(
             rfe_cv,
             {'n_estimators': (10, 250),
             'min_samples_split': (2, 25),
             'max_features': (0.1, 0.999),
             'max_depth': (5, 30)})
rfe_bo.maximize()
|   iter    |  target   | max_depth | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------------------
|  1        | -2.702e+0 |  22.15    |  0.2902   |  20.4     |  211.9    |
|  2        | -2.788e+0 |  25.96    |  0.2216   |  22.83    |  166.3    |
|  3        | -1.651e+0 |  11.15    |  0.8612   |  10.86    |  153.0    |
|  4        | -5.608e+0 |  5.331    |  0.4747   |  10.79    |  49.97    |
|  5        | -1.862e+0 |  12.6     |  0.9883   |  21.26    |  124.8    |
|  6        | -5.684e+0 |  5.0      |  0.999    |  2.0      |  250.0    |
|  7        | -1.568e+0 |  12.54    |  0.9491   |  10.55    |  149.3    |
|  8        | -4.212e+0 |  30.0     |  0.1      |  2.0      |  117.6    |
|  9        | -5.682e+0 |  5.0      |  0.999    |  25.0     |  144.0    |
|  10       | -1.443e+0 |  20.34    |  0.758    |  8.014    |  154.0    |
|  11       | -1.412e+0 |  14.26    |  0.999    |  2.0      |  165.3    |
|  12       | -1.399e+0 |  30.0     |  0.999    |  2.0      |  172.4    |
|  13       | -4.4e+03  |  17.47    |  0.1      |  2.0      |  186.3    |
|  14       | -1.402e+0 |  30.0     |  0.999    |  2.0      |  160.2    |
|  15       | -4.4e+03  |  6.677    |  0.999    |  25.0     |  106.8    |
|  16       | -5.683e+0 |  5.477    |  0.999    |  2.0      |  149.3    |
|  17       | -1.443e+0 |  22.98    |  0.999    |  7.414    |  165.7    |
|  18       | -1.635e+0 |  12.45    |  0.999    |  13.44    |  165.8    |
|  19       | -1.654e+0 |  24.78    |  0.999    |  15.16    |  144.4    |
|  20       | -1.864e+0 |  26.15    |  0.6263   |  23.02    |  129.2    |
|  21       | -1.391e+0 |  28.7     |  0.8468   |  2.122    |  140.7    |
|  22       | -1.46e+03 |  18.32    |  0.6642   |  9.14     |  133.6    |
|  23       | -2.242e+0 |  30.0     |  0.999    |  25.0     |  10.0     |
|  24       | -1.758e+0 |  30.0     |  0.999    |  4.534    |  10.0     |
|  25       | -1.859e+0 |  14.99    |  0.5897   |  10.89    |  10.73    |
|  26       | -5.012e+0 |  27.24    |  0.1      |  11.51    |  23.43    |
|  27       | -5.892e+0 |  5.0      |  0.999    |  2.0      |  10.0     |
|  28       | -1.568e+0 |  28.81    |  0.9138   |  12.15    |  134.3    |
|  29       | -2.17e+03 |  15.28    |  0.6976   |  23.56    |  10.48    |
|  30       | -6.18e+03 |  5.125    |  0.352    |  23.98    |  176.2    |
=========================================================================
rfe_bo.max
{'target': -1390.7849548765093,
 'params': {'max_depth': 28.70255259053527,
  'max_features': 0.8468279746142502,
  'min_samples_split': 2.1219418980976834,
  'n_estimators': 140.748505191585}}
rfe = RandomForestRegressor(n_estimators=140, max_depth=29, max_features=0.84, min_samples_split=2)
np.mean(cross_val_score(rfe, X, y, cv=5, scoring='neg_mean_squared_error'))
-1383.4479089516929
  • Origin neg_mean_squared_error: -1409.2889528620326
  • Optimazed neg_mean_squared_error: -1383.4479089516929