Data Visualization Utilities

Common plotting functions for data science with matplotlib and seaborn

visualizationmatplotlibseabornpython

Data Visualization Utilities

Common plotting functions for exploratory data analysis.

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

def plot_correlation_matrix(df, figsize=(12, 10), annot=True):
    """
    Plot correlation matrix heatmap.

    Args:
        df: DataFrame with numeric columns
        figsize: Figure size
        annot: Whether to annotate with correlation values
    """
    numeric_df = df.select_dtypes(include=[np.number])
    corr_matrix = numeric_df.corr()

    plt.figure(figsize=figsize)
    sns.heatmap(corr_matrix, annot=annot, fmt='.2f', cmap='coolwarm',
                center=0, square=True, linewidths=1, cbar_kws={"shrink": 0.8})
    plt.title('Correlation Matrix', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()

    return corr_matrix

def plot_distribution(df, columns=None, figsize=(15, 5)):
    """
    Plot distribution of numeric columns.

    Args:
        df: DataFrame
        columns: List of columns to plot (default: all numeric)
        figsize: Figure size
    """
    if columns is None:
        columns = df.select_dtypes(include=[np.number]).columns.tolist()

    n_cols = len(columns)
    n_rows = (n_cols + 2) // 3

    fig, axes = plt.subplots(n_rows, 3, figsize=figsize)
    axes = axes.flatten() if n_cols > 1 else [axes]

    for idx, col in enumerate(columns):
        if idx < len(axes):
            axes[idx].hist(df[col].dropna(), bins=30, edgecolor='black', alpha=0.7)
            axes[idx].set_title(f'Distribution of {col}')
            axes[idx].set_xlabel(col)
            axes[idx].set_ylabel('Frequency')
            axes[idx].grid(True, alpha=0.3)

    # Hide unused subplots
    for idx in range(len(columns), len(axes)):
        axes[idx].axis('off')

    plt.tight_layout()
    plt.show()

def plot_boxplots(df, columns=None, figsize=(15, 5)):
    """
    Plot boxplots for numeric columns.

    Args:
        df: DataFrame
        columns: List of columns to plot
        figsize: Figure size
    """
    if columns is None:
        columns = df.select_dtypes(include=[np.number]).columns.tolist()

    n_cols = len(columns)
    n_rows = (n_cols + 2) // 3

    fig, axes = plt.subplots(n_rows, 3, figsize=figsize)
    axes = axes.flatten() if n_cols > 1 else [axes]

    for idx, col in enumerate(columns):
        if idx < len(axes):
            axes[idx].boxplot(df[col].dropna())
            axes[idx].set_title(f'Boxplot of {col}')
            axes[idx].set_ylabel(col)
            axes[idx].grid(True, alpha=0.3)

    for idx in range(len(columns), len(axes)):
        axes[idx].axis('off')

    plt.tight_layout()
    plt.show()

# Usage Example
# plot_correlation_matrix(df)
# plot_distribution(df, columns=['age', 'income', 'score'])