Bayesian Optimazation Classification and Regression
From now, stop using GridSearch and RandomSearch
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from bayes_opt import BayesianOptimization
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import cross_val_score
import warnings
warnings.simplefilter('ignore')
X, y = make_classification(n_samples=10000, n_features=10, n_classes=2)
We using the default hyperparameter to fit the data
rfc = RandomForestClassifier()
np.mean(cross_val_score(rfc, X, y, cv=5, scoring='roc_auc'))
Default hyperparameter perform aroudn 0.98
ROC_AUC, next step we use Bayesian Optimazation
to fine turn the hyperparameter.
def rfc_cv(n_estimators, min_samples_split, max_features, max_depth):
val = np.mean(cross_val_score(RandomForestClassifier(n_estimators=int(n_estimators),
min_samples_split=int(min_samples_split),
max_features=min(max_features, 0.999),
max_depth=int(max_depth), random_state=42),
X, y, scoring='roc_auc', cv=5))
return val
rfc_bo = BayesianOptimization(
rfc_cv,
{'n_estimators': (10, 250),
'min_samples_split': (2, 25),
'max_features': (0.1, 0.999),
'max_depth': (5, 30)})
rfc_bo.maximize()
rfc_bo.max
rfc_Optimazed = RandomForestClassifier(n_estimators=18, max_depth=6, max_features=0.78, min_samples_split=22)
np.mean(cross_val_score(rfc_Optimazed, X, y, cv=5, scoring='roc_auc'))
- Original
roc_auc
: 0.989776 - Optimized
roc_auc
: 0.99006
X, y = make_regression(n_samples=10000, n_features=10)
rfe = RandomForestRegressor()
np.mean(cross_val_score(rfe, X, y, cv=5, scoring='neg_mean_squared_error'))
def rfe_cv(n_estimators, min_samples_split, max_features, max_depth):
val = np.mean(cross_val_score(RandomForestRegressor(n_estimators=int(n_estimators),
min_samples_split=int(min_samples_split),
max_features=min(max_features, 0.999),
max_depth=int(max_depth), random_state=42),
X, y, scoring='neg_mean_squared_error', cv=5))
return val
score = rfe_cv(n_estimators=100, min_samples_split=10, max_depth=6, max_features=0.78)
score
rfe_bo = BayesianOptimization(
rfe_cv,
{'n_estimators': (10, 250),
'min_samples_split': (2, 25),
'max_features': (0.1, 0.999),
'max_depth': (5, 30)})
rfe_bo.maximize()
rfe_bo.max
rfe = RandomForestRegressor(n_estimators=140, max_depth=29, max_features=0.84, min_samples_split=2)
np.mean(cross_val_score(rfe, X, y, cv=5, scoring='neg_mean_squared_error'))
- Origin
neg_mean_squared_error
: -1409.2889528620326 - Optimazed
neg_mean_squared_error
: -1383.4479089516929