Ensemble Methods Comparison: Bagging, Boosting, and Stacking Techniques
Posted on Oct 4, 2025 in Statistics
Bagging Classifier Implementation
Base Model Performance
base_model = DecisionTreeClassifier(random_state=42)
base_model.fit(X_train, y_train)
y_pred_base = base_model.predict(X_test)
base_recall = recall_score(y_test, y_pred_base)
print("Recall del modelo base: {:.4f}".format(base_recall))
Hyperparameter Tuning (Grid Search)
param_grid = {
"n_estimators": [10, 50, 100],
"max_samples": [0.5, 0.8, 1.0],
"max_features": [0.5, 0.8, 1.0],
"bootstrap": [True]
}
bagging = BaggingClassifier(estimator=base_model, random_state=42)
grid_search = GridSearchCV(estimator=bagging,
param_grid=param_grid,
cv=5,
n_jobs=-1,
scoring="recall")
grid_search.fit(X_train, y_train)
Results and Comparison
print(grid_search.best_params_)
print("Mejor Recall en CV: {:.4f}".format(grid_search.best_score_))
best_bagging = grid_search.best_estimator_
y_pred_bagging = best_bagging.predict(X_test)
bagging_recall = recall_score(y_test, y_pred_bagging)
print("\nRecall en test del Bagging: {:.4f}".format(bagging_recall))
if bagging_recall > base_recall:
print("El Bagging mejora el modelo base.")
XGBoost Classifier Implementation
Model Definition and Training
xgb = XGBClassifier(
n_estimators=100, learning_rate=0.1,
max_depth=3, subsample=0.8,
colsample_bytree=0.8, gamma=0, reg_alpha=0,
reg_lambda=1, eval_metric='logloss', random_state=42)
xgb.fit(X_train_tr, y_train)
y_pred = xgb.predict(X_test_tr)
Evaluation Results
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
AdaBoost Classifier Implementation
Base Estimator and Model Setup
base_model = LogisticRegression(solver='liblinear', penalty='l2')
ada = AdaBoostClassifier(estimator=base_model, random_state=42)
Hyperparameter Tuning
param_grid = {
'n_estimators': [50, 100, 200],
'learning_rate': [0.01, 0.1, 0.5],
'estimator__C': [0.01, 0.1, 1, 10]
}
grid_search = GridSearchCV(estimator=ada, param_grid=param_grid, cv=5, scoring='recall', n_jobs=-1)
grid_search.fit(X_train_tr, y_train)
Optimized Model Results
print("Mejores parámetros:", grid_search.best_params_)
print("Mejor Recall en CV:", grid_search.best_score_)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_tr)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
Stacking Classifier Implementation
Defining Base and Meta Models
base_models_1 = [
('bagging', BaggingClassifier(estimator=DecisionTreeClassifier(), random_state=42)),
('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)),
('ada', AdaBoostClassifier(estimator=LogisticRegression(solver='liblinear'), random_state=42))
]
meta_model = LogisticRegression(solver='liblinear')
modelo_apilado = StackingClassifier(estimators=base_models_1, final_estimator=meta_model, cv=5)
Tuning the Stacking Model
param_grid = {
'final_estimator__C': [0.1, 1],
}
grid_search = GridSearchCV(estimator=modelo_apilado, param_grid=param_grid, cv=5, scoring='recall', n_jobs=-1)
grid_search.fit(X_train_tr, y_train)
Evaluation Results
y_pred = grid_search.predict(X_test)
sensibilidad = recall_score(y_test, y_pred)
print(f"Sensibilidad del clasificador de apilamiento optimizado: {sensibilidad:.2f}")
print(f"Mejores parámetros encontrados: {grid_search.best_params_}")
Gradient Boosting Implementation
Initial Model Training
gb_model = GradientBoostingClassifier(min_samples_split=2, max_depth=2, n_estimators=5, learning_rate=0.1)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)
print("Accuracy (Gradient Boosting):", accuracy_score(y_test, y_pred_gb))
print("Importancia de las características:", gb_model.feature_importances_)
Hyperparameter Optimization (Grid Search)
param_grid = {
'n_estimators': [50, 100],
'learning_rate': [0.05, 0.1, 0.3],
'max_depth': [1, 3, 5],
'subsample': [0.8, 1.0]
}
grid_search = GridSearchCV(
estimator=GradientBoostingClassifier(random_state=42),
param_grid=param_grid,
cv=5,
scoring='accuracy',
n_jobs=-1,
verbose=1
)
grid_search.fit(X_train, y_train)
Optimized Model Evaluation
print("Mejores hiperparámetros:", grid_search.best_params_)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))