Ensemble Methods Comparison: Bagging, Boosting, and Stacking Techniques

Posted on Oct 4, 2025 in Statistics

Bagging Classifier Implementation

Base Model Performance

base_model = DecisionTreeClassifier(random_state=42)
base_model.fit(X_train, y_train)

y_pred_base = base_model.predict(X_test)
base_recall = recall_score(y_test, y_pred_base)
print("Recall del modelo base: {:.4f}".format(base_recall))

Hyperparameter Tuning (Grid Search)

param_grid = {
    "n_estimators": [10, 50, 100],
    "max_samples": [0.5, 0.8, 1.0],
    "max_features": [0.5, 0.8, 1.0],
    "bootstrap": [True]
}

bagging = BaggingClassifier(estimator=base_model, random_state=42)

grid_search = GridSearchCV(estimator=bagging,
                           param_grid=param_grid,
                           cv=5,
                           n_jobs=-1,
                           scoring="recall")
grid_search.fit(X_train, y_train)

Results and Comparison

print(grid_search.best_params_)
print("Mejor Recall en CV: {:.4f}".format(grid_search.best_score_))

best_bagging = grid_search.best_estimator_
y_pred_bagging = best_bagging.predict(X_test)
bagging_recall = recall_score(y_test, y_pred_bagging)
print("\nRecall en test del Bagging: {:.4f}".format(bagging_recall))

if bagging_recall > base_recall:
    print("El Bagging mejora el modelo base.")

XGBoost Classifier Implementation

Model Definition and Training

xgb = XGBClassifier(
    n_estimators=100, learning_rate=0.1,
    max_depth=3, subsample=0.8,
    colsample_bytree=0.8, gamma=0, reg_alpha=0,
    reg_lambda=1, eval_metric='logloss', random_state=42)

xgb.fit(X_train_tr, y_train)
y_pred = xgb.predict(X_test_tr)

Evaluation Results

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

AdaBoost Classifier Implementation

Base Estimator and Model Setup

base_model = LogisticRegression(solver='liblinear', penalty='l2')
ada = AdaBoostClassifier(estimator=base_model, random_state=42)

Hyperparameter Tuning

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'estimator__C': [0.01, 0.1, 1, 10]
}

grid_search = GridSearchCV(estimator=ada, param_grid=param_grid, cv=5, scoring='recall', n_jobs=-1)
grid_search.fit(X_train_tr, y_train)

Optimized Model Results

print("Mejores parámetros:", grid_search.best_params_)
print("Mejor Recall en CV:", grid_search.best_score_)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_tr)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Stacking Classifier Implementation

Defining Base and Meta Models

base_models_1 = [
    ('bagging', BaggingClassifier(estimator=DecisionTreeClassifier(), random_state=42)),
    ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)),
    ('ada', AdaBoostClassifier(estimator=LogisticRegression(solver='liblinear'), random_state=42))
]

meta_model = LogisticRegression(solver='liblinear')
modelo_apilado = StackingClassifier(estimators=base_models_1, final_estimator=meta_model, cv=5)

Tuning the Stacking Model

param_grid = {
    'final_estimator__C': [0.1, 1],
}

grid_search = GridSearchCV(estimator=modelo_apilado, param_grid=param_grid, cv=5, scoring='recall', n_jobs=-1)
grid_search.fit(X_train_tr, y_train)

Evaluation Results

y_pred = grid_search.predict(X_test)

sensibilidad = recall_score(y_test, y_pred)
print(f"Sensibilidad del clasificador de apilamiento optimizado: {sensibilidad:.2f}")
print(f"Mejores parámetros encontrados: {grid_search.best_params_}")

Gradient Boosting Implementation

Initial Model Training

gb_model = GradientBoostingClassifier(min_samples_split=2, max_depth=2, n_estimators=5, learning_rate=0.1)
gb_model.fit(X_train, y_train)

y_pred_gb = gb_model.predict(X_test)
print("Accuracy (Gradient Boosting):", accuracy_score(y_test, y_pred_gb))
print("Importancia de las características:", gb_model.feature_importances_)

Hyperparameter Optimization (Grid Search)

param_grid = {
    'n_estimators': [50, 100],
    'learning_rate': [0.05, 0.1, 0.3],
    'max_depth': [1, 3, 5],
    'subsample': [0.8, 1.0]
}

grid_search = GridSearchCV(
    estimator=GradientBoostingClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)

Optimized Model Evaluation

print("Mejores hiperparámetros:", grid_search.best_params_)
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Ensemble Methods Comparison: Bagging, Boosting, and Stacking Techniques

Bagging Classifier Implementation

Base Model Performance

Hyperparameter Tuning (Grid Search)

Results and Comparison

XGBoost Classifier Implementation

Model Definition and Training

Evaluation Results

AdaBoost Classifier Implementation

Base Estimator and Model Setup

Hyperparameter Tuning

Optimized Model Results

Stacking Classifier Implementation

Defining Base and Meta Models

Tuning the Stacking Model

Evaluation Results

Gradient Boosting Implementation

Initial Model Training

Hyperparameter Optimization (Grid Search)

Optimized Model Evaluation

Recent Notes

Subjects

Publicidad