Cross-Validation with Metrics

Advanced cross-validation with multiple metrics and detailed scoring

cross-validationmodel-selectionsklearnpython

Cross-Validation with Multiple Metrics

Perform cross-validation with multiple scoring metrics and detailed statistics.

from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import (
    make_scorer, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)
import numpy as np
import pandas as pd

def cv_evaluate(model, X, y, cv=5, metrics=None, return_train_score=False):
    """
    Comprehensive cross-validation evaluation with multiple metrics.

    Args:
        model: Scikit-learn model
        X: Feature matrix
        y: Target vector
        cv: Number of folds
        metrics: Dictionary of custom metrics
        return_train_score: Whether to return training scores

    Returns:
        Dictionary with CV results and statistics
    """
    # Default metrics
    if metrics is None:
        metrics = {
            'accuracy': make_scorer(accuracy_score),
            'precision': make_scorer(precision_score, average='weighted'),
            'recall': make_scorer(recall_score, average='weighted'),
            'f1': make_scorer(f1_score, average='weighted'),
        }

    # Perform cross-validation
    cv_results = cross_validate(
        model, X, y,
        cv=cv,
        scoring=metrics,
        return_train_score=return_train_score,
        n_jobs=-1
    )

    # Calculate statistics
    results = {}
    for metric_name in metrics.keys():
        test_scores = cv_results[f'test_{metric_name}']
        results[metric_name] = {
            'scores': test_scores,
            'mean': np.mean(test_scores),
            'std': np.std(test_scores),
            'min': np.min(test_scores),
            'max': np.max(test_scores)
        }

    # Print results
    print("=" * 70)
    print("CROSS-VALIDATION RESULTS")
    print("=" * 70)
    for metric_name, stats in results.items():
        print(f"\n{metric_name.upper()}:")
        print(f"  Mean: {stats['mean']:.4f} (+/- {stats['std']:.4f})")
        print(f"  Range: [{stats['min']:.4f}, {stats['max']:.4f}]")
        print(f"  Scores: {stats['scores']}")

    return results, cv_results

# Usage Example
# results, cv_results = cv_evaluate(
#     model, X_train, y_train,
#     cv=5
# )