Cross-Validation with Multiple Metrics
Perform cross-validation with multiple scoring metrics and detailed statistics.
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import (
make_scorer, accuracy_score, precision_score,
recall_score, f1_score, roc_auc_score
)
import numpy as np
import pandas as pd
def cv_evaluate(model, X, y, cv=5, metrics=None, return_train_score=False):
"""
Comprehensive cross-validation evaluation with multiple metrics.
Args:
model: Scikit-learn model
X: Feature matrix
y: Target vector
cv: Number of folds
metrics: Dictionary of custom metrics
return_train_score: Whether to return training scores
Returns:
Dictionary with CV results and statistics
"""
# Default metrics
if metrics is None:
metrics = {
'accuracy': make_scorer(accuracy_score),
'precision': make_scorer(precision_score, average='weighted'),
'recall': make_scorer(recall_score, average='weighted'),
'f1': make_scorer(f1_score, average='weighted'),
}
# Perform cross-validation
cv_results = cross_validate(
model, X, y,
cv=cv,
scoring=metrics,
return_train_score=return_train_score,
n_jobs=-1
)
# Calculate statistics
results = {}
for metric_name in metrics.keys():
test_scores = cv_results[f'test_{metric_name}']
results[metric_name] = {
'scores': test_scores,
'mean': np.mean(test_scores),
'std': np.std(test_scores),
'min': np.min(test_scores),
'max': np.max(test_scores)
}
# Print results
print("=" * 70)
print("CROSS-VALIDATION RESULTS")
print("=" * 70)
for metric_name, stats in results.items():
print(f"\n{metric_name.upper()}:")
print(f" Mean: {stats['mean']:.4f} (+/- {stats['std']:.4f})")
print(f" Range: [{stats['min']:.4f}, {stats['max']:.4f}]")
print(f" Scores: {stats['scores']}")
return results, cv_results
# Usage Example
# results, cv_results = cv_evaluate(
# model, X_train, y_train,
# cv=5
# )