diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..2ffe63b
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,49 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Run tests
+        run: uv run pytest tests/ -v --tb=short
+
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Set up Python
+        run: uv python install 3.12
+
+      - name: Install dependencies
+        run: uv sync --dev
+
+      - name: Run ruff check
+        run: uv run ruff check .
+
+      - name: Run ruff format check
+        run: uv run ruff format --check .
diff --git a/examples/reports/train_hashprep_report_fixes.py b/examples/reports/train_hashprep_report_fixes.py
index 235c9ab..61e21a6 100644
--- a/examples/reports/train_hashprep_report_fixes.py
+++ b/examples/reports/train_hashprep_report_fixes.py
@@ -3,9 +3,6 @@
 Review and adapt before production use.
 """
 
-from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
-from sklearn.preprocessing import RobustScaler
-import numpy as np
 import pandas as pd
 
 
@@ -14,51 +11,51 @@ def apply_fixes(df):
     df = df.copy()
 
     # Column 'Cabin' has 77% missing values
-    df = df.drop(columns=['Cabin'])
+    df = df.drop(columns=["Cabin"])
 
     # Frequency encode high-cardinality column 'Name'
-    freq_Name = df['Name'].value_counts(normalize=True)
-    df['Name_encoded'] = df['Name'].map(freq_Name)
+    freq_Name = df["Name"].value_counts(normalize=True)
+    df["Name_encoded"] = df["Name"].map(freq_Name)
 
     # Frequency encode high-cardinality column 'Ticket'
-    freq_Ticket = df['Ticket'].value_counts(normalize=True)
-    df['Ticket_encoded'] = df['Ticket'].map(freq_Ticket)
+    freq_Ticket = df["Ticket"].value_counts(normalize=True)
+    df["Ticket_encoded"] = df["Ticket"].map(freq_Ticket)
 
     # Clip outliers in 'Fare' using IQR method
-    q1_Fare, q3_Fare = df['Fare'].quantile([0.25, 0.75])
+    q1_Fare, q3_Fare = df["Fare"].quantile([0.25, 0.75])
     iqr_Fare = q3_Fare - q1_Fare
     lower_Fare, upper_Fare = q1_Fare - 1.5 * iqr_Fare, q3_Fare + 1.5 * iqr_Fare
-    df['Fare'] = df['Fare'].clip(lower=lower_Fare, upper=upper_Fare)
+    df["Fare"] = df["Fare"].clip(lower=lower_Fare, upper=upper_Fare)
 
     # Clip outliers in 'Parch' using IQR method
-    q1_Parch, q3_Parch = df['Parch'].quantile([0.25, 0.75])
+    q1_Parch, q3_Parch = df["Parch"].quantile([0.25, 0.75])
     iqr_Parch = q3_Parch - q1_Parch
     lower_Parch, upper_Parch = q1_Parch - 1.5 * iqr_Parch, q3_Parch + 1.5 * iqr_Parch
-    df['Parch'] = df['Parch'].clip(lower=lower_Parch, upper=upper_Parch)
+    df["Parch"] = df["Parch"].clip(lower=lower_Parch, upper=upper_Parch)
 
     # Clip outliers in 'SibSp' using IQR method
-    q1_SibSp, q3_SibSp = df['SibSp'].quantile([0.25, 0.75])
+    q1_SibSp, q3_SibSp = df["SibSp"].quantile([0.25, 0.75])
     iqr_SibSp = q3_SibSp - q1_SibSp
     lower_SibSp, upper_SibSp = q1_SibSp - 1.5 * iqr_SibSp, q3_SibSp + 1.5 * iqr_SibSp
-    df['SibSp'] = df['SibSp'].clip(lower=lower_SibSp, upper=upper_SibSp)
+    df["SibSp"] = df["SibSp"].clip(lower=lower_SibSp, upper=upper_SibSp)
 
     # Drop highly correlated column 'Survived,Sex'
-    df = df.drop(columns=['Survived,Sex'])
+    df = df.drop(columns=["Survived,Sex"])
 
     return df
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import sys
 
     if len(sys.argv) < 2:
-        print('Usage: python fixes.py <input.csv> [output.csv]')
+        print("Usage: python fixes.py <input.csv> [output.csv]")
         sys.exit(1)
 
     input_file = sys.argv[1]
-    output_file = sys.argv[2] if len(sys.argv) > 2 else 'cleaned_data.csv'
+    output_file = sys.argv[2] if len(sys.argv) > 2 else "cleaned_data.csv"
 
     df = pd.read_csv(input_file)
     df_clean = apply_fixes(df)
     df_clean.to_csv(output_file, index=False)
-    print(f'Cleaned data saved to {output_file}')
\ No newline at end of file
+    print(f"Cleaned data saved to {output_file}")
diff --git a/examples/reports/train_hashprep_report_pipeline.py b/examples/reports/train_hashprep_report_pipeline.py
index 81c0478..3e19d09 100644
--- a/examples/reports/train_hashprep_report_pipeline.py
+++ b/examples/reports/train_hashprep_report_pipeline.py
@@ -5,27 +5,27 @@
 
 from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
-import numpy as np
 
 
 def build_preprocessing_pipeline():
     """Build sklearn preprocessing pipeline."""
 
     transformers = [
-        ('drop_column_Cabin', 'drop', ['Cabin']),
-        ('drop_column_Survived,S', 'drop', ['Survived,Sex']),
+        ("drop_column_Cabin", "drop", ["Cabin"]),
+        ("drop_column_Survived,S", "drop", ["Survived,Sex"]),
     ]
 
     preprocessor = ColumnTransformer(
         transformers=transformers,
-        remainder='passthrough',
+        remainder="passthrough",
         verbose_feature_names_out=False,
     )
 
-    pipeline = Pipeline([
-        ('preprocessor', preprocessor),
-    ])
+    pipeline = Pipeline(
+        [
+            ("preprocessor", preprocessor),
+        ]
+    )
 
     return pipeline
 
@@ -37,20 +37,18 @@ def get_pre_pipeline_steps():
     """
     steps = []
     # Outlier clipping for ['Fare']
-    steps.append(('clip_outliers_Fare', None))  # Implement manually
+    steps.append(("clip_outliers_Fare", None))  # Implement manually
     # Outlier clipping for ['Parch']
-    steps.append(('clip_outliers_Parch', None))  # Implement manually
+    steps.append(("clip_outliers_Parch", None))  # Implement manually
     # Outlier clipping for ['SibSp']
-    steps.append(('clip_outliers_SibSp', None))  # Implement manually
+    steps.append(("clip_outliers_SibSp", None))  # Implement manually
     return steps
 
 
-if __name__ == '__main__':
-    import joblib
-
+if __name__ == "__main__":
     pipeline = build_preprocessing_pipeline()
     if pipeline:
-        print('Pipeline created successfully')
+        print("Pipeline created successfully")
         print(pipeline)
         # Example: Save pipeline
-        # joblib.dump(pipeline, 'preprocessing_pipeline.joblib')
\ No newline at end of file
+        # joblib.dump(pipeline, 'preprocessing_pipeline.joblib')
diff --git a/hashprep/__init__.py b/hashprep/__init__.py
index dd2743f..231a028 100644
--- a/hashprep/__init__.py
+++ b/hashprep/__init__.py
@@ -1,3 +1,3 @@
-from .core.analyzer import DatasetAnalyzer
+from .core.analyzer import DatasetAnalyzer as DatasetAnalyzer
 
-__version__ = "0.1.0b1"
\ No newline at end of file
+__version__ = "0.1.0b1"
diff --git a/hashprep/checks/__init__.py b/hashprep/checks/__init__.py
index 094bc46..f44230f 100644
--- a/hashprep/checks/__init__.py
+++ b/hashprep/checks/__init__.py
@@ -1,29 +1,31 @@
-from typing import List, Optional
-
-from .core import Issue
+from .columns import _check_duplicates, _check_high_cardinality, _check_mixed_data_types, _check_single_value_columns
+from .core import Issue as Issue
+from .correlations import calculate_correlations
+from .distribution import _check_uniform_distribution, _check_unique_values
 from .drift import check_drift
+from .imbalance import _check_class_imbalance
 from .leakage import _check_data_leakage, _check_target_leakage_patterns
-from .missing_values import _check_high_missing_values, _check_empty_columns, _check_dataset_missingness, \
-    _check_missing_patterns
-from .columns import _check_single_value_columns, _check_high_cardinality, _check_duplicates, _check_mixed_data_types
+from .missing_values import (
+    _check_dataset_missingness,
+    _check_empty_columns,
+    _check_high_missing_values,
+    _check_missing_patterns,
+)
 from .outliers import (
-    _check_outliers,
-    _check_high_zero_counts,
-    _check_extreme_text_lengths,
-    _check_datetime_skew,
-    _check_skewness,
-    _check_infinite_values,
     _check_constant_length,
+    _check_datetime_skew,
     _check_empty_dataset,
+    _check_extreme_text_lengths,
+    _check_high_zero_counts,
+    _check_infinite_values,
+    _check_outliers,
+    _check_skewness,
 )
-from .correlations import calculate_correlations
-from .imbalance import _check_class_imbalance
-from .distribution import _check_uniform_distribution, _check_unique_values
 
 
 def _check_dataset_drift(analyzer):
     """Wrapper for drift detection that uses analyzer's comparison_df."""
-    if hasattr(analyzer, 'comparison_df') and analyzer.comparison_df is not None:
+    if hasattr(analyzer, "comparison_df") and analyzer.comparison_df is not None:
         return check_drift(analyzer.df, analyzer.comparison_df)
     return []
 
@@ -56,7 +58,7 @@ def _check_dataset_drift(analyzer):
 CORRELATION_CHECKS = {"feature_correlation", "categorical_correlation", "mixed_correlation"}
 
 
-def run_checks(analyzer, checks_to_run: List[str]):
+def run_checks(analyzer, checks_to_run: list[str]):
     issues = []
     correlation_requested = False
 
@@ -70,4 +72,4 @@ def run_checks(analyzer, checks_to_run: List[str]):
     if correlation_requested:
         issues.extend(calculate_correlations(analyzer))
 
-    return issues
\ No newline at end of file
+    return issues
diff --git a/hashprep/checks/columns.py b/hashprep/checks/columns.py
index b37a5eb..55881cc 100644
--- a/hashprep/checks/columns.py
+++ b/hashprep/checks/columns.py
@@ -1,8 +1,9 @@
-from .core import Issue
 from ..config import DEFAULT_CONFIG
+from .core import Issue
 
 _COL_THRESHOLDS = DEFAULT_CONFIG.columns
 
+
 def _check_single_value_columns(analyzer):
     issues = []
     for col in analyzer.df.columns:
@@ -26,7 +27,12 @@ def _check_single_value_columns(analyzer):
             )
     return issues
 
-def _check_high_cardinality(analyzer, threshold: int = _COL_THRESHOLDS.high_cardinality_count, critical_threshold: float = _COL_THRESHOLDS.high_cardinality_ratio_critical):
+
+def _check_high_cardinality(
+    analyzer,
+    threshold: int = _COL_THRESHOLDS.high_cardinality_count,
+    critical_threshold: float = _COL_THRESHOLDS.high_cardinality_ratio_critical,
+):
     issues = []
     categorical_cols = analyzer.df.select_dtypes(include="object").columns.tolist()
     for col in categorical_cols:
@@ -52,6 +58,7 @@ def _check_high_cardinality(analyzer, threshold: int = _COL_THRESHOLDS.high_card
             )
     return issues
 
+
 def _check_duplicates(analyzer):
     issues = []
     duplicate_rows = int(analyzer.df.duplicated().sum())
@@ -76,6 +83,7 @@ def _check_duplicates(analyzer):
         )
     return issues
 
+
 def _check_mixed_data_types(analyzer):
     issues = []
     for col in analyzer.df.columns:
@@ -91,4 +99,4 @@ def _check_mixed_data_types(analyzer):
                     quick_fix="Options: \n- Cast to single type: Ensure consistency (Pros: Simplifies processing; Cons: May lose nuance).\n- Split column: Separate types into new features (Pros: Preserves info; Cons: Adds complexity).\n- Investigate source: Check data collection errors (Pros: Improves quality; Cons: Time-consuming).",
                 )
             )
-    return issues
\ No newline at end of file
+    return issues
diff --git a/hashprep/checks/core.py b/hashprep/checks/core.py
index 6f73640..cb03481 100644
--- a/hashprep/checks/core.py
+++ b/hashprep/checks/core.py
@@ -1,9 +1,8 @@
 from dataclasses import dataclass
 
-@dataclass
 
+@dataclass
 class Issue:
-
     category: str
 
     severity: str  # critical or warning
diff --git a/hashprep/checks/correlations.py b/hashprep/checks/correlations.py
index 4854403..0955ba3 100644
--- a/hashprep/checks/correlations.py
+++ b/hashprep/checks/correlations.py
@@ -1,17 +1,20 @@
-from .core import Issue
-import pandas as pd
-import numpy as np
-from scipy.stats import spearmanr, pearsonr, kendalltau, chi2_contingency
 from itertools import combinations
-from .discretizer import Discretizer, DiscretizationType
-from ..utils.type_inference import is_usable_for_corr
+
+import numpy as np
+import pandas as pd
+from scipy.stats import chi2_contingency, kendalltau, pearsonr, spearmanr
+
 from ..config import DEFAULT_CONFIG
+from ..utils.type_inference import is_usable_for_corr
+from .core import Issue
+from .discretizer import DiscretizationType, Discretizer
 
 _CORR = DEFAULT_CONFIG.correlations
 CORR_THRESHOLDS = _CORR.as_nested_dict()
 CAT_MAX_DISTINCT = _CORR.max_distinct_categories
 LOW_CARD_NUM_THRESHOLD = _CORR.low_cardinality_numeric
 
+
 def _cramers_v_corrected(table: pd.DataFrame) -> float:
     if table.empty or (table.shape[0] == 1 or table.shape[1] == 1):
         return 0.0
@@ -19,11 +22,11 @@ def _cramers_v_corrected(table: pd.DataFrame) -> float:
     n = table.sum().sum()
     phi2 = chi2 / n
     r, k = table.shape
-    with np.errstate(divide='ignore', invalid='ignore'):
-        phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))
-        rcorr = r - ((r-1)**2)/(n-1)
-        kcorr = k - ((k-1)**2)/(n-1)
-        rkcorr = min((kcorr-1), (rcorr-1))
+    with np.errstate(divide="ignore", invalid="ignore"):
+        phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
+        rcorr = r - ((r - 1) ** 2) / (n - 1)
+        kcorr = k - ((k - 1) ** 2) / (n - 1)
+        rkcorr = min((kcorr - 1), (rcorr - 1))
         if rkcorr == 0:
             return 1.0
         return np.sqrt(phi2corr / rkcorr)
@@ -40,14 +43,20 @@ def calculate_correlations(analyzer, thresholds=None):
     inferred_types = analyzer.column_types  # Use analyzer.column_types for inferred types dict
     issues = []
 
-    numeric_cols = [col for col, typ in inferred_types.items() if
-                    typ == 'Numeric' and is_usable_for_corr(analyzer.df[col])]
-    cat_cols = [col for col, typ in inferred_types.items() if typ == 'Categorical' and
-                1 < analyzer.df[col].nunique() <= CAT_MAX_DISTINCT and is_usable_for_corr(analyzer.df[col])]
-
-    issues.extend(_check_numeric_correlation(analyzer, numeric_cols, thresholds['numeric']))
-    issues.extend(_check_categorical_correlation(analyzer, cat_cols, thresholds['categorical']))
-    issues.extend(_check_mixed_correlation(analyzer, numeric_cols, cat_cols, thresholds['mixed']))
+    numeric_cols = [
+        col for col, typ in inferred_types.items() if typ == "Numeric" and is_usable_for_corr(analyzer.df[col])
+    ]
+    cat_cols = [
+        col
+        for col, typ in inferred_types.items()
+        if typ == "Categorical"
+        and 1 < analyzer.df[col].nunique() <= CAT_MAX_DISTINCT
+        and is_usable_for_corr(analyzer.df[col])
+    ]
+
+    issues.extend(_check_numeric_correlation(analyzer, numeric_cols, thresholds["numeric"]))
+    issues.extend(_check_categorical_correlation(analyzer, cat_cols, thresholds["categorical"]))
+    issues.extend(_check_mixed_correlation(analyzer, numeric_cols, cat_cols, thresholds["mixed"]))
 
     return issues
 
@@ -57,7 +66,7 @@ def _check_numeric_correlation(analyzer, numeric_cols: list, thresholds: dict):
     if len(numeric_cols) < 2:
         return issues
 
-    num_df = analyzer.df[numeric_cols].dropna(how='all')
+    num_df = analyzer.df[numeric_cols].dropna(how="all")
 
     for col1, col2 in combinations(numeric_cols, 2):
         series1, series2 = num_df[col1].dropna(), num_df[col2].dropna()
@@ -76,35 +85,38 @@ def _check_numeric_correlation(analyzer, numeric_cols: list, thresholds: dict):
 
         # Kendall (only for low-cardinality numerics)
         kendall_corr, kendall_p = None, None
-        is_low_card = (series1.nunique() <= LOW_CARD_NUM_THRESHOLD or
-                       series2.nunique() <= LOW_CARD_NUM_THRESHOLD)
+        is_low_card = series1.nunique() <= LOW_CARD_NUM_THRESHOLD or series2.nunique() <= LOW_CARD_NUM_THRESHOLD
         if is_low_card:
             kendall_corr, kendall_p = kendalltau(series1, series2)
             kendall_corr = abs(kendall_corr)
 
         # Flag if any metric exceeds threshold
-        metrics = [('Spearman', spearman_corr, spearman_p, thresholds['spearman']),
-                   ('Pearson', pearson_corr, pearson_p, thresholds['pearson'])]
+        metrics = [
+            ("Spearman", spearman_corr, spearman_p, thresholds["spearman"]),
+            ("Pearson", pearson_corr, pearson_p, thresholds["pearson"]),
+        ]
         if kendall_corr is not None:
-            metrics.append(('Kendall', kendall_corr, kendall_p, thresholds['kendall']))
+            metrics.append(("Kendall", kendall_corr, kendall_p, thresholds["kendall"]))
 
         for method, corr, p_val, thresh in metrics:
-            if corr > thresh['warning']:
-                severity = 'critical' if corr > thresh['critical'] else 'warning'
-                impact = 'high' if severity == 'critical' else 'medium'
+            if corr > thresh["warning"]:
+                severity = "critical" if corr > thresh["critical"] else "warning"
+                impact = "high" if severity == "critical" else "medium"
                 quick_fix = (
                     f"Options: \n- Drop one feature (e.g., {col2}): Reduces multicollinearity.\n- PCA/combine: Retains info.\n- Use tree-based models."
-                    if severity == 'critical' else
-                    f"Options: \n- Monitor in modeling.\n- Drop if redundant."
+                    if severity == "critical"
+                    else "Options: \n- Monitor in modeling.\n- Drop if redundant."
+                )
+                issues.append(
+                    Issue(
+                        category="feature_correlation",
+                        severity=severity,
+                        column=f"{col1},{col2}",
+                        description=f"Numeric columns '{col1}' and '{col2}' highly correlated ({method}: {corr:.3f}, p={p_val:.4f})",
+                        impact_score=impact,
+                        quick_fix=quick_fix,
+                    )
                 )
-                issues.append(Issue(
-                    category="feature_correlation",
-                    severity=severity,
-                    column=f"{col1},{col2}",
-                    description=f"Numeric columns '{col1}' and '{col2}' highly correlated ({method}: {corr:.3f}, p={p_val:.4f})",
-                    impact_score=impact,
-                    quick_fix=quick_fix,
-                ))
 
     return issues
 
@@ -117,22 +129,24 @@ def _check_categorical_correlation(analyzer, cat_cols: list, thresholds: dict):
     for col1, col2 in combinations(cat_cols, 2):
         table = pd.crosstab(analyzer.df[col1], analyzer.df[col2])
         cramers_v = _cramers_v_corrected(table)
-        if cramers_v > thresholds['warning']:
-            severity = 'critical' if cramers_v > thresholds['critical'] else 'warning'
-            impact = 'high' if severity == 'critical' else 'medium'
+        if cramers_v > thresholds["warning"]:
+            severity = "critical" if cramers_v > thresholds["critical"] else "warning"
+            impact = "high" if severity == "critical" else "medium"
             quick_fix = (
                 "Options: \n- Drop one (less predictive). \n- Group categories. \n- Use trees (robust to assoc.)."
-                if severity == 'critical' else
-                "Options: \n- Monitor redundancy. \n- Re-encode."
+                if severity == "critical"
+                else "Options: \n- Monitor redundancy. \n- Re-encode."
+            )
+            issues.append(
+                Issue(
+                    category="feature_correlation",
+                    severity=severity,
+                    column=f"{col1},{col2}",
+                    description=f"Categorical columns '{col1}' and '{col2}' highly associated (Cramer's V: {cramers_v:.3f})",
+                    impact_score=impact,
+                    quick_fix=quick_fix,
+                )
             )
-            issues.append(Issue(
-                category="feature_correlation",
-                severity=severity,
-                column=f"{col1},{col2}",
-                description=f"Categorical columns '{col1}' and '{col2}' highly associated (Cramer's V: {cramers_v:.3f})",
-                impact_score=impact,
-                quick_fix=quick_fix,
-            ))
     return issues
 
 
@@ -147,21 +161,23 @@ def _check_mixed_correlation(analyzer, numeric_cols: list, cat_cols: list, thres
     for num_col, cat_col in [(n, c) for n in numeric_cols for c in cat_cols]:
         table = pd.crosstab(df_disc[cat_col], df_disc[num_col])
         cramers_v = _cramers_v_corrected(table)
-        if cramers_v > thresholds['warning']:
-            severity = 'critical' if cramers_v > thresholds['critical'] else 'warning'
-            impact = 'high' if severity == 'critical' else 'medium'
+        if cramers_v > thresholds["warning"]:
+            severity = "critical" if cramers_v > thresholds["critical"] else "warning"
+            impact = "high" if severity == "critical" else "medium"
             quick_fix = (
                 "Options: \n- Drop one. \n- Discretize/encode differently. \n- Use robust models."
-                if severity == 'critical' else
-                "Options: \n- Monitor in modeling."
+                if severity == "critical"
+                else "Options: \n- Monitor in modeling."
             )
-            issues.append(Issue(
-                category="feature_correlation",
-                severity=severity,
-                column=f"{cat_col},{num_col}",
-                description=f"Mixed columns '{cat_col}' (cat) and '{num_col}' (num) associated (Discretized Cramer's V: {cramers_v:.3f})",
-                impact_score=impact,
-                quick_fix=quick_fix,
-            ))
-
-    return issues
\ No newline at end of file
+            issues.append(
+                Issue(
+                    category="feature_correlation",
+                    severity=severity,
+                    column=f"{cat_col},{num_col}",
+                    description=f"Mixed columns '{cat_col}' (cat) and '{num_col}' (num) associated (Discretized Cramer's V: {cramers_v:.3f})",
+                    impact_score=impact,
+                    quick_fix=quick_fix,
+                )
+            )
+
+    return issues
diff --git a/hashprep/checks/discretizer.py b/hashprep/checks/discretizer.py
index ade7064..889a303 100644
--- a/hashprep/checks/discretizer.py
+++ b/hashprep/checks/discretizer.py
@@ -1,11 +1,14 @@
-import pandas as pd
-import numpy as np
 from enum import Enum
 
+import numpy as np
+import pandas as pd
+
+
 class DiscretizationType(Enum):
     UNIFORM = "uniform"
     QUANTILE = "quantile"
 
+
 class Discretizer:
     def __init__(self, method=DiscretizationType.UNIFORM, n_bins=10):
         self.method = method
diff --git a/hashprep/checks/distribution.py b/hashprep/checks/distribution.py
index 82fa866..7446d5e 100644
--- a/hashprep/checks/distribution.py
+++ b/hashprep/checks/distribution.py
@@ -1,13 +1,12 @@
-from typing import List
-
 from scipy.stats import kstest
 
-from .core import Issue
 from ..config import DEFAULT_CONFIG
+from .core import Issue
 
 _DIST = DEFAULT_CONFIG.distribution
 
-def _check_uniform_distribution(analyzer, p_threshold: float = _DIST.uniform_p_value) -> List[Issue]:
+
+def _check_uniform_distribution(analyzer, p_threshold: float = _DIST.uniform_p_value) -> list[Issue]:
     """
     Detect uniformly distributed numeric columns using Kolmogorov-Smirnov test.
     Uniform distributions often indicate synthetic IDs or sequential data.
@@ -48,7 +47,7 @@ def _check_uniform_distribution(analyzer, p_threshold: float = _DIST.uniform_p_v
     return issues
 
 
-def _check_unique_values(analyzer, threshold: float = _DIST.unique_value_ratio) -> List[Issue]:
+def _check_unique_values(analyzer, threshold: float = _DIST.unique_value_ratio) -> list[Issue]:
     """
     Detect columns where nearly all values are unique.
     High uniqueness often indicates identifiers, names, or free-text fields.
diff --git a/hashprep/checks/drift.py b/hashprep/checks/drift.py
index 4e5798b..332a4bc 100644
--- a/hashprep/checks/drift.py
+++ b/hashprep/checks/drift.py
@@ -2,9 +2,9 @@
 import pandas as pd
 from scipy.stats import chisquare, ks_2samp
 
-from .core import Issue
 from ..config import DEFAULT_CONFIG
 from ..utils.logging import get_logger
+from .core import Issue
 
 _log = get_logger("checks.drift")
 
@@ -88,7 +88,7 @@ def _check_categorical_drift(
 
         new_categories = set(test_counts.index) - set(train_counts.index)
         if new_categories:
-            sample_new = list(new_categories)[:_DRIFT.max_new_category_samples]
+            sample_new = list(new_categories)[: _DRIFT.max_new_category_samples]
             issues.append(
                 Issue(
                     category="dataset_drift",
diff --git a/hashprep/checks/imbalance.py b/hashprep/checks/imbalance.py
index 081ee62..19a2cd3 100644
--- a/hashprep/checks/imbalance.py
+++ b/hashprep/checks/imbalance.py
@@ -1,5 +1,6 @@
-from .core import Issue
 from ..config import DEFAULT_CONFIG
+from .core import Issue
+
 
 def _check_class_imbalance(analyzer, threshold: float = DEFAULT_CONFIG.imbalance.majority_class_ratio):
     issues = []
@@ -16,4 +17,4 @@ def _check_class_imbalance(analyzer, threshold: float = DEFAULT_CONFIG.imbalance
                     quick_fix="Options: \n- Resample data: Use oversampling (e.g., SMOTE) or undersampling (Pros: Balances classes; Cons: May introduce bias or lose data).\n- Use class weights: Adjust model weights for imbalance (Pros: Simple; Cons: Model-dependent).\n- Stratified sampling: Ensure balanced splits in training (Pros: Improves evaluation; Cons: Requires careful implementation).",
                 )
             )
-    return issues
\ No newline at end of file
+    return issues
diff --git a/hashprep/checks/leakage.py b/hashprep/checks/leakage.py
index ecbbc1b..cdfa429 100644
--- a/hashprep/checks/leakage.py
+++ b/hashprep/checks/leakage.py
@@ -1,13 +1,15 @@
-from .core import Issue
+import numpy as np
 import pandas as pd
 from scipy.stats import chi2_contingency, f_oneway
-import numpy as np
+
 from ..config import DEFAULT_CONFIG
 from ..utils.logging import get_logger
+from .core import Issue
 
 _LEAK = DEFAULT_CONFIG.leakage
 _log = get_logger("checks.leakage")
 
+
 def _check_data_leakage(analyzer):
     issues = []
     if analyzer.target_col and analyzer.target_col in analyzer.df.columns:
@@ -28,6 +30,7 @@ def _check_data_leakage(analyzer):
                 )
     return issues
 
+
 def _check_target_leakage_patterns(analyzer):
     issues = []
     if analyzer.target_col and analyzer.target_col in analyzer.df.columns:
@@ -41,7 +44,11 @@ def _check_target_leakage_patterns(analyzer):
                 corrs = numeric_cols.corrwith(target).abs()
                 for col, corr in corrs.items():
                     severity = (
-                        "critical" if corr > _LEAK.numeric_critical else "warning" if corr > _LEAK.numeric_warning else None
+                        "critical"
+                        if corr > _LEAK.numeric_critical
+                        else "warning"
+                        if corr > _LEAK.numeric_warning
+                        else None
                     )
                     if severity:
                         impact = "high" if severity == "critical" else "medium"
@@ -62,9 +69,7 @@ def _check_target_leakage_patterns(analyzer):
                         )
         # Categorical target
         else:
-            cat_cols = analyzer.df.select_dtypes(include="object").drop(
-                columns=[analyzer.target_col], errors="ignore"
-            )
+            cat_cols = analyzer.df.select_dtypes(include="object").drop(columns=[analyzer.target_col], errors="ignore")
             for col in cat_cols.columns:
                 try:
                     table = pd.crosstab(target, analyzer.df[col])
@@ -74,7 +79,11 @@ def _check_target_leakage_patterns(analyzer):
                     r, k = table.shape
                     cramers_v = np.sqrt(phi2 / min(k - 1, r - 1))
                     severity = (
-                        "critical" if cramers_v > _LEAK.categorical_critical else "warning" if cramers_v > _LEAK.categorical_warning else None
+                        "critical"
+                        if cramers_v > _LEAK.categorical_critical
+                        else "warning"
+                        if cramers_v > _LEAK.categorical_warning
+                        else None
                     )
                     if severity:
                         impact = "high" if severity == "critical" else "medium"
@@ -110,8 +119,11 @@ def _check_target_leakage_patterns(analyzer):
                 try:
                     f_stat, p_val = f_oneway(*groups)
                     severity = (
-                        "critical" if f_stat > _LEAK.f_stat_critical and p_val < _LEAK.f_stat_p_value
-                        else "warning" if f_stat > _LEAK.f_stat_warning and p_val < _LEAK.f_stat_p_value else None
+                        "critical"
+                        if f_stat > _LEAK.f_stat_critical and p_val < _LEAK.f_stat_p_value
+                        else "warning"
+                        if f_stat > _LEAK.f_stat_warning and p_val < _LEAK.f_stat_p_value
+                        else None
                     )
                     if severity:
                         impact = "high" if severity == "critical" else "medium"
@@ -133,4 +145,4 @@ def _check_target_leakage_patterns(analyzer):
                 except (ValueError, RuntimeWarning) as e:
                     _log.debug("F-test leakage check failed for '%s': %s", col, e)
                     continue
-    return issues
\ No newline at end of file
+    return issues
diff --git a/hashprep/checks/missing_values.py b/hashprep/checks/missing_values.py
index f407360..58e758c 100644
--- a/hashprep/checks/missing_values.py
+++ b/hashprep/checks/missing_values.py
@@ -1,17 +1,22 @@
-from scipy.stats import chi2_contingency, mannwhitneyu
-from .core import Issue
-import pandas as pd
 from collections import defaultdict
+
 import numpy as np
+import pandas as pd
 from numpy.linalg import LinAlgError
+from scipy.stats import chi2_contingency, mannwhitneyu
+
 from ..config import DEFAULT_CONFIG
 from ..utils.logging import get_logger
+from .core import Issue
 
 _log = get_logger("checks.missing_values")
 
 _THRESHOLDS = DEFAULT_CONFIG.missing_values
 
-def _check_high_missing_values(analyzer, threshold: float = _THRESHOLDS.warning, critical_threshold: float = _THRESHOLDS.critical):
+
+def _check_high_missing_values(
+    analyzer, threshold: float = _THRESHOLDS.warning, critical_threshold: float = _THRESHOLDS.critical
+):
     issues = []
     for col in analyzer.df.columns:
         missing_pct = float(analyzer.df[col].isna().mean())
@@ -35,6 +40,7 @@ def _check_high_missing_values(analyzer, threshold: float = _THRESHOLDS.warning,
             )
     return issues
 
+
 def _check_empty_columns(analyzer):
     issues = []
     for col in analyzer.df.columns:
@@ -51,11 +57,14 @@ def _check_empty_columns(analyzer):
             )
     return issues
 
-def _check_dataset_missingness(analyzer, threshold: float = _THRESHOLDS.dataset_warning_pct, critical_threshold: float = _THRESHOLDS.dataset_critical_pct):
+
+def _check_dataset_missingness(
+    analyzer,
+    threshold: float = _THRESHOLDS.dataset_warning_pct,
+    critical_threshold: float = _THRESHOLDS.dataset_critical_pct,
+):
     issues = []
-    missing_pct = float(
-        (analyzer.df.isnull().sum().sum() / (analyzer.df.shape[0] * analyzer.df.shape[1])) * 100
-    )
+    missing_pct = float((analyzer.df.isnull().sum().sum() / (analyzer.df.shape[0] * analyzer.df.shape[1])) * 100)
     if missing_pct > threshold:
         severity = "critical" if missing_pct > critical_threshold else "warning"
         impact = "high" if severity == "critical" else "medium"
@@ -77,11 +86,16 @@ def _check_dataset_missingness(analyzer, threshold: float = _THRESHOLDS.dataset_
     return issues
 
 
-def _check_missing_patterns(analyzer, threshold: float = _THRESHOLDS.pattern_p_value,
-                            critical_p_threshold: float = _THRESHOLDS.pattern_critical_p_value):
+def _check_missing_patterns(
+    analyzer,
+    threshold: float = _THRESHOLDS.pattern_p_value,
+    critical_p_threshold: float = _THRESHOLDS.pattern_critical_p_value,
+):
     issues = []
     missing_cols = [
-        col for col in analyzer.df.columns if int(analyzer.df[col].isna().sum()) >= _THRESHOLDS.pattern_min_missing_count
+        col
+        for col in analyzer.df.columns
+        if int(analyzer.df[col].isna().sum()) >= _THRESHOLDS.pattern_min_missing_count
     ]
 
     # grouping logic
@@ -89,9 +103,7 @@ def _check_missing_patterns(analyzer, threshold: float = _THRESHOLDS.pattern_p_v
     num_patterns = defaultdict(list)  # (missing_col, correlated_col, p_val, cohens_d)
 
     for col in missing_cols:
-        for other_col in analyzer.df.select_dtypes(
-                include=["object", "category"]
-        ).columns:
+        for other_col in analyzer.df.select_dtypes(include=["object", "category"]).columns:
             if col == other_col:
                 continue
             try:
@@ -125,19 +137,20 @@ def cramers_v(table):
                 _log.debug("Chi-square test failed for '%s' vs '%s': %s", col, other_col, e)
                 continue
 
-        for other_col in analyzer.df.select_dtypes(
-                include=["int64", "float64"]
-        ).columns:
+        for other_col in analyzer.df.select_dtypes(include=["int64", "float64"]).columns:
             if col == other_col:
                 continue
             try:
                 missing = analyzer.df[analyzer.df[col].isna()][other_col].dropna()
                 non_missing = analyzer.df[analyzer.df[col].notna()][other_col].dropna()
-                if len(missing) < _THRESHOLDS.pattern_min_group_size or len(non_missing) < _THRESHOLDS.pattern_min_group_size:
+                if (
+                    len(missing) < _THRESHOLDS.pattern_min_group_size
+                    or len(non_missing) < _THRESHOLDS.pattern_min_group_size
+                ):
                     continue
 
                 # Replaced f_oneway with mannwhitneyu
-                u_stat, p_val = mannwhitneyu(missing, non_missing, alternative='two-sided')
+                u_stat, p_val = mannwhitneyu(missing, non_missing, alternative="two-sided")
 
                 # Cohen's d proxy as effect size
                 pooled_std = np.sqrt((np.std(missing) ** 2 + np.std(non_missing) ** 2) / 2)
@@ -160,7 +173,7 @@ def cramers_v(table):
         if all_patterns:
             # Sort by effect size (descending) and take top 3
             all_patterns.sort(key=lambda x: x[2], reverse=True)  # x[2] is effect size
-            top_corrs = [pat[0] for pat in all_patterns[:_THRESHOLDS.pattern_top_correlations]]
+            top_corrs = [pat[0] for pat in all_patterns[: _THRESHOLDS.pattern_top_correlations]]
             total_count = len(all_patterns)
 
             desc = f"Missingness in '{col}' correlates with {total_count} columns ({', '.join(top_corrs)})"
@@ -170,7 +183,9 @@ def cramers_v(table):
             is_target_correlated = any(pat[0] == analyzer.target_col for pat in all_patterns)
             severity = (
                 "critical"
-                if p_val < critical_p_threshold and is_target_correlated and max_effect > _THRESHOLDS.pattern_effect_critical
+                if p_val < critical_p_threshold
+                and is_target_correlated
+                and max_effect > _THRESHOLDS.pattern_effect_critical
                 else "warning"
             )
             impact = "high" if severity == "critical" else "medium"
@@ -191,4 +206,4 @@ def cramers_v(table):
                 )
             )
 
-    return issues
\ No newline at end of file
+    return issues
diff --git a/hashprep/checks/outliers.py b/hashprep/checks/outliers.py
index 9f79638..1cd3217 100644
--- a/hashprep/checks/outliers.py
+++ b/hashprep/checks/outliers.py
@@ -1,10 +1,12 @@
-from .core import Issue
-import pandas as pd
 import numpy as np
+import pandas as pd
+
 from ..config import DEFAULT_CONFIG
+from .core import Issue
 
 _THRESHOLDS = DEFAULT_CONFIG.outliers
 
+
 def _check_outliers(analyzer, z_threshold: float = _THRESHOLDS.z_score):
     issues = []
     for col in analyzer.df.select_dtypes(include="number").columns:
@@ -34,7 +36,12 @@ def _check_outliers(analyzer, z_threshold: float = _THRESHOLDS.z_score):
             )
     return issues
 
-def _check_high_zero_counts(analyzer, threshold: float = _THRESHOLDS.zero_count_warning, critical_threshold: float = _THRESHOLDS.zero_count_critical):
+
+def _check_high_zero_counts(
+    analyzer,
+    threshold: float = _THRESHOLDS.zero_count_warning,
+    critical_threshold: float = _THRESHOLDS.zero_count_critical,
+):
     issues = []
     for col in analyzer.df.select_dtypes(include="number").columns:
         series = analyzer.df[col].dropna()
@@ -61,7 +68,10 @@ def _check_high_zero_counts(analyzer, threshold: float = _THRESHOLDS.zero_count_
             )
     return issues
 
-def _check_extreme_text_lengths(analyzer, max_threshold: int = _THRESHOLDS.text_length_max, min_threshold: int = _THRESHOLDS.text_length_min):
+
+def _check_extreme_text_lengths(
+    analyzer, max_threshold: int = _THRESHOLDS.text_length_max, min_threshold: int = _THRESHOLDS.text_length_min
+):
     issues = []
     for col in analyzer.df.select_dtypes(include="object").columns:
         series = analyzer.df[col].dropna().astype(str)
@@ -69,9 +79,7 @@ def _check_extreme_text_lengths(analyzer, max_threshold: int = _THRESHOLDS.text_
             continue
         lengths = series.str.len()
         if lengths.max() > max_threshold or lengths.min() < min_threshold:
-            extreme_ratio = float(
-                ((lengths > max_threshold) | (lengths < min_threshold)).mean()
-            )
+            extreme_ratio = float(((lengths > max_threshold) | (lengths < min_threshold)).mean())
             severity = "critical" if extreme_ratio > _THRESHOLDS.extreme_ratio_critical else "warning"
             impact = "high" if severity == "critical" else "medium"
             quick_fix = (
@@ -91,7 +99,12 @@ def _check_extreme_text_lengths(analyzer, max_threshold: int = _THRESHOLDS.text_
             )
     return issues
 
-def _check_skewness(analyzer, skew_threshold: float = _THRESHOLDS.skewness_warning, critical_skew_threshold: float = _THRESHOLDS.skewness_critical):
+
+def _check_skewness(
+    analyzer,
+    skew_threshold: float = _THRESHOLDS.skewness_warning,
+    critical_skew_threshold: float = _THRESHOLDS.skewness_critical,
+):
     issues = []
     for col in analyzer.df.select_dtypes(include="number").columns:
         series = analyzer.df[col].dropna()
@@ -99,7 +112,7 @@ def _check_skewness(analyzer, skew_threshold: float = _THRESHOLDS.skewness_warni
             continue
         skewness = float(series.skew())
         abs_skew = abs(skewness)
-        
+
         if abs_skew > skew_threshold:
             severity = "critical" if abs_skew > critical_skew_threshold else "warning"
             impact = "high" if severity == "critical" else "medium"
@@ -120,6 +133,7 @@ def _check_skewness(analyzer, skew_threshold: float = _THRESHOLDS.skewness_warni
             )
     return issues
 
+
 def _check_datetime_skew(analyzer, threshold: float = _THRESHOLDS.datetime_skew):
     issues = []
     for col in analyzer.df.select_dtypes(include="datetime64").columns:
@@ -224,4 +238,4 @@ def _check_empty_dataset(analyzer):
                 quick_fix="All data is missing. Check data extraction and verify the source.",
             )
         )
-    return issues
\ No newline at end of file
+    return issues
diff --git a/hashprep/config.py b/hashprep/config.py
index 9c73c00..d62d3d9 100644
--- a/hashprep/config.py
+++ b/hashprep/config.py
@@ -7,12 +7,12 @@
 """
 
 from dataclasses import dataclass, field
-from typing import Dict
 
 
 @dataclass(frozen=True)
 class MissingValueThresholds:
     """Thresholds for missing value detection."""
+
     warning: float = 0.4
     critical: float = 0.7
     dataset_warning_pct: float = 20.0
@@ -31,6 +31,7 @@ class MissingValueThresholds:
 @dataclass(frozen=True)
 class OutlierThresholds:
     """Thresholds for outlier detection."""
+
     z_score: float = 4.0
     outlier_ratio_critical: float = 0.1
     zero_count_warning: float = 0.5
@@ -49,6 +50,7 @@ class OutlierThresholds:
 @dataclass(frozen=True)
 class ColumnThresholds:
     """Thresholds for column-level checks."""
+
     high_cardinality_count: int = 100
     high_cardinality_ratio_critical: float = 0.9
     duplicate_ratio_critical: float = 0.1
@@ -57,6 +59,7 @@ class ColumnThresholds:
 @dataclass(frozen=True)
 class CorrelationThresholds:
     """Thresholds for correlation analysis."""
+
     spearman_warning: float = 0.7
     spearman_critical: float = 0.95
     pearson_warning: float = 0.7
@@ -73,19 +76,20 @@ class CorrelationThresholds:
     def as_nested_dict(self) -> dict:
         """Return thresholds in the nested dict format used by correlation checks."""
         return {
-            'numeric': {
-                'spearman': {'warning': self.spearman_warning, 'critical': self.spearman_critical},
-                'pearson': {'warning': self.pearson_warning, 'critical': self.pearson_critical},
-                'kendall': {'warning': self.kendall_warning, 'critical': self.kendall_critical},
+            "numeric": {
+                "spearman": {"warning": self.spearman_warning, "critical": self.spearman_critical},
+                "pearson": {"warning": self.pearson_warning, "critical": self.pearson_critical},
+                "kendall": {"warning": self.kendall_warning, "critical": self.kendall_critical},
             },
-            'categorical': {'warning': self.categorical_warning, 'critical': self.categorical_critical},
-            'mixed': {'warning': self.mixed_warning, 'critical': self.mixed_critical},
+            "categorical": {"warning": self.categorical_warning, "critical": self.categorical_critical},
+            "mixed": {"warning": self.mixed_warning, "critical": self.mixed_critical},
         }
 
 
 @dataclass(frozen=True)
 class LeakageThresholds:
     """Thresholds for data leakage detection."""
+
     numeric_critical: float = 0.98
     numeric_warning: float = 0.95
     categorical_critical: float = 0.95
@@ -98,6 +102,7 @@ class LeakageThresholds:
 @dataclass(frozen=True)
 class DriftThresholds:
     """Thresholds for dataset drift detection."""
+
     p_value: float = 0.05
     critical_p_value: float = 0.001
     max_categories_for_chi2: int = 50
@@ -107,6 +112,7 @@ class DriftThresholds:
 @dataclass(frozen=True)
 class DistributionThresholds:
     """Thresholds for distribution checks."""
+
     uniform_p_value: float = 0.1
     uniform_min_samples: int = 20
     unique_value_ratio: float = 0.95
@@ -116,25 +122,33 @@ class DistributionThresholds:
 @dataclass(frozen=True)
 class ImbalanceThresholds:
     """Thresholds for class imbalance detection."""
+
     majority_class_ratio: float = 0.9
 
 
 @dataclass(frozen=True)
 class TypeInferenceConfig:
     """Configuration for type inference."""
+
     cat_cardinality_threshold: int = 50
     cat_percentage_threshold: float = 0.05
     num_low_cat_threshold: int = 10
-    bool_mappings: Dict[str, bool] = field(default_factory=lambda: {
-        'true': True, 'false': False,
-        'yes': True, 'no': False,
-        't': True, 'f': False,
-    })
+    bool_mappings: dict[str, bool] = field(
+        default_factory=lambda: {
+            "true": True,
+            "false": False,
+            "yes": True,
+            "no": False,
+            "t": True,
+            "f": False,
+        }
+    )
 
 
 @dataclass(frozen=True)
 class SamplingDefaults:
     """Default values for dataset sampling."""
+
     max_rows: int = 100_000
     memory_threshold_mb: float = 500.0
 
@@ -142,6 +156,7 @@ class SamplingDefaults:
 @dataclass(frozen=True)
 class SummaryDefaults:
     """Defaults for summary generation."""
+
     histogram_bins: int = 10
     top_n_values: int = 10
     extreme_values_count: int = 10
@@ -151,6 +166,7 @@ class SummaryDefaults:
 @dataclass(frozen=True)
 class HashPrepConfig:
     """Root configuration aggregating all threshold groups."""
+
     missing_values: MissingValueThresholds = field(default_factory=MissingValueThresholds)
     outliers: OutlierThresholds = field(default_factory=OutlierThresholds)
     columns: ColumnThresholds = field(default_factory=ColumnThresholds)
diff --git a/hashprep/core/analyzer.py b/hashprep/core/analyzer.py
index bdbbc59..31e4161 100644
--- a/hashprep/core/analyzer.py
+++ b/hashprep/core/analyzer.py
@@ -1,7 +1,6 @@
 import time
 import warnings
 from datetime import datetime
-from typing import Dict, List, Optional
 
 import pandas as pd
 from scipy.stats import ConstantInputWarning
@@ -62,11 +61,11 @@ class DatasetAnalyzer:
     def __init__(
         self,
         df: pd.DataFrame,
-        target_col: Optional[str] = None,
-        selected_checks: Optional[List[str]] = None,
+        target_col: str | None = None,
+        selected_checks: list[str] | None = None,
         include_plots: bool = False,
-        comparison_df: Optional[pd.DataFrame] = None,
-        sampling_config: Optional[SamplingConfig] = None,
+        comparison_df: pd.DataFrame | None = None,
+        sampling_config: SamplingConfig | None = None,
         auto_sample: bool = True,
     ):
         if not isinstance(df, pd.DataFrame):
@@ -82,10 +81,10 @@ def __init__(
         self.target_col = target_col
         self.selected_checks = selected_checks
         self.include_plots = include_plots
-        self.issues: List = []
-        self.summaries: Dict = {}
+        self.issues: list = []
+        self.summaries: dict = {}
 
-        self.sampler: Optional[DatasetSampler] = None
+        self.sampler: DatasetSampler | None = None
         if auto_sample:
             self.sampler = DatasetSampler(sampling_config)
             if self.sampler.should_sample(df):
@@ -100,10 +99,10 @@ def __init__(
 
         self.column_types = infer_types(self.df)
 
-    def analyze(self) -> Dict:
+    def analyze(self) -> dict:
         """Run all summaries and checks, return summary."""
         # Suppress scipy warnings about constant input arrays
-        warnings.filterwarnings('ignore', category=ConstantInputWarning)
+        warnings.filterwarnings("ignore", category=ConstantInputWarning)
 
         analysis_start = datetime.now()
         start_time = time.time()
@@ -114,12 +113,8 @@ def analyze(self) -> Dict:
         duplicate_info = get_duplicate_info(self.df)
         self.summaries["dataset_info"].update(duplicate_info)
 
-        self.summaries["variable_types"] = summarize_variable_types(
-            self.df, column_types=self.column_types
-        )
-        self.summaries["variable_type_counts"] = summarize_variable_type_counts(
-            self.df, column_types=self.column_types
-        )
+        self.summaries["variable_types"] = summarize_variable_types(self.df, column_types=self.column_types)
+        self.summaries["variable_type_counts"] = summarize_variable_type_counts(self.df, column_types=self.column_types)
         self.summaries["reproduction_info"] = add_reproduction_info(self.df)
         self.summaries["variables"] = summarize_variables(self.df, column_types=self.column_types)
         self.summaries.update(summarize_interactions(self.df))
@@ -151,22 +146,16 @@ def _generate_plots(self):
             plots = {}
             if stats["category"] == "Numeric":
                 if stats["histogram"]["counts"]:
-                    plots["histogram"] = plot_histogram(
-                        self.df[col].dropna(), f"Histogram of {col}"
-                    )
+                    plots["histogram"] = plot_histogram(self.df[col].dropna(), f"Histogram of {col}")
             elif stats["category"] in ["Categorical", "Boolean"]:
                 if stats["categories"].get("common_values"):
                     series = self.df[col].dropna().astype(str).value_counts().head(10)
-                    plots["common_values_bar"] = plot_bar(
-                        series, f"Top Values of {col}", col, "Count"
-                    )
+                    plots["common_values_bar"] = plot_bar(series, f"Top Values of {col}", col, "Count")
             elif stats["category"] == "Text":
                 if stats["words"]:
                     word_counts = {w: d["count"] for w, d in stats["words"].items()}
                     series = pd.Series(word_counts).head(10)
-                    plots["word_bar"] = plot_bar(
-                        series, f"Top Words in {col}", "Words", "Count"
-                    )
+                    plots["word_bar"] = plot_bar(series, f"Top Words in {col}", "Words", "Count")
 
             stats["plots"] = plots
 
@@ -178,8 +167,8 @@ def _generate_plots(self):
 
                 for method in ["pearson", "spearman", "kendall"]:
                     corr = numeric_df.corr(method=method)
-                    self.summaries["numeric_correlations"]["plots"][method] = (
-                        plot_heatmap(corr, f"{method.capitalize()} Correlation")
+                    self.summaries["numeric_correlations"]["plots"][method] = plot_heatmap(
+                        corr, f"{method.capitalize()} Correlation"
                     )
 
         pairs = self.summaries.get("scatter_pairs", [])
diff --git a/hashprep/core/visualizations.py b/hashprep/core/visualizations.py
index c38280f..eda1f6a 100644
--- a/hashprep/core/visualizations.py
+++ b/hashprep/core/visualizations.py
@@ -1,22 +1,24 @@
+import base64
+import io
+
 import matplotlib.pyplot as plt
-import seaborn as sns
 import pandas as pd
-import io
-import base64
-from typing import Dict, Optional, List, Any
+import seaborn as sns
 
 # Set style
-plt.style.use('ggplot')
+plt.style.use("ggplot")
 sns.set_palette("husl")
 
+
 def _fig_to_base64(fig) -> str:
     buf = io.BytesIO()
-    fig.savefig(buf, format='png', bbox_inches='tight')
+    fig.savefig(buf, format="png", bbox_inches="tight")
     buf.seek(0)
-    data = base64.b64encode(buf.read()).decode('utf-8')
+    data = base64.b64encode(buf.read()).decode("utf-8")
     plt.close(fig)
     return data
 
+
 def plot_histogram(series: pd.Series, title: str) -> str:
     fig, ax = plt.subplots(figsize=(4, 3))
     sns.histplot(series, bins=10, ax=ax)
@@ -25,6 +27,7 @@ def plot_histogram(series: pd.Series, title: str) -> str:
     ax.set_ylabel("Count")
     return _fig_to_base64(fig)
 
+
 def plot_bar(series: pd.Series, title: str, xlabel: str, ylabel: str) -> str:
     fig, ax = plt.subplots(figsize=(4, 3))
     series.plot(kind="bar", ax=ax)
@@ -34,12 +37,14 @@ def plot_bar(series: pd.Series, title: str, xlabel: str, ylabel: str) -> str:
     plt.xticks(rotation=45, ha="right")
     return _fig_to_base64(fig)
 
+
 def plot_heatmap(corr_matrix: pd.DataFrame, title: str, vmin: float = -1, vmax: float = 1) -> str:
     fig, ax = plt.subplots(figsize=(5, 4))
     sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", vmin=vmin, vmax=vmax, ax=ax)
     ax.set_title(title)
     return _fig_to_base64(fig)
 
+
 def plot_scatter(df: pd.DataFrame, x: str, y: str) -> str:
     fig, ax = plt.subplots(figsize=(4, 3))
     sns.scatterplot(data=df, x=x, y=y, ax=ax)
@@ -48,6 +53,7 @@ def plot_scatter(df: pd.DataFrame, x: str, y: str) -> str:
     ax.set_ylabel(y)
     return _fig_to_base64(fig)
 
+
 def plot_missing_bar(missing_data: pd.Series) -> str:
     if missing_data.sum() == 0:
         return ""
@@ -59,6 +65,7 @@ def plot_missing_bar(missing_data: pd.Series) -> str:
     plt.xticks(rotation=45, ha="right")
     return _fig_to_base64(fig)
 
+
 def plot_missing_heatmap(df: pd.DataFrame) -> str:
     fig, ax = plt.subplots(figsize=(5, 3))
     sns.heatmap(df.isnull(), cbar=False, cmap="viridis", ax=ax)
diff --git a/hashprep/interfaces/cli/main.py b/hashprep/interfaces/cli/main.py
index 6083aae..d5e216f 100644
--- a/hashprep/interfaces/cli/main.py
+++ b/hashprep/interfaces/cli/main.py
@@ -30,13 +30,7 @@ def json_numpy_handler(obj):
 def suggest_check_names(invalid_check, valid_checks, cutoff=0.4):
     """Suggest similar check names for an invalid check using fuzzybunny."""
     # Use fuzzybunny to find the top 3 most similar check names
-    results = fuzzybunny.rank(
-        invalid_check,
-        valid_checks,
-        scorer='levenshtein',
-        threshold=cutoff,
-        top_n=3
-    )
+    results = fuzzybunny.rank(invalid_check, valid_checks, scorer="levenshtein", threshold=cutoff, top_n=3)
     # Extract just the matched strings from the results
     suggestions = [match[0] for match in results]
     return suggestions
@@ -76,9 +70,7 @@ def version():
     help="Max rows for sampling (default: 100000)",
 )
 @click.option("--no-sample", is_flag=True, help="Disable automatic sampling")
-def scan(
-    file_path, critical_only, quiet, json_out, target, checks, comparison, sample_size, no_sample
-):
+def scan(file_path, critical_only, quiet, json_out, target, checks, comparison, sample_size, no_sample):
     df = pd.read_csv(file_path)
     comparison_df = pd.read_csv(comparison) if comparison else None
 
@@ -135,9 +127,7 @@ def scan(
 
     if "sampling_info" in summary and summary["sampling_info"].get("was_sampled"):
         info = summary["sampling_info"]
-        click.echo(
-            f"Sampled: {info['sample_fraction']*100:.1f}% of {info['original_rows']} rows"
-        )
+        click.echo(f"Sampled: {info['sample_fraction'] * 100:.1f}% of {info['original_rows']} rows")
 
     if critical_only:
         click.echo("Critical Issues:")
@@ -214,7 +204,7 @@ def details(file_path, target, checks, comparison, sample_size, no_sample):
     if "sampling_info" in summary and summary["sampling_info"].get("was_sampled"):
         info = summary["sampling_info"]
         click.echo(
-            f"Note: Analysis performed on {info['sample_fraction']*100:.1f}% sample ({int(info['original_rows'] * info['sample_fraction'])} of {info['original_rows']} rows)"
+            f"Note: Analysis performed on {info['sample_fraction'] * 100:.1f}% sample ({int(info['original_rows'] * info['sample_fraction'])} of {info['original_rows']} rows)"
         )
 
     click.echo("\nCritical Issues:")
@@ -259,9 +249,7 @@ def details(file_path, target, checks, comparison, sample_size, no_sample):
 @cli.command()
 @click.argument("file_path", type=click.Path(exists=True))
 @click.option("--with-code", is_flag=True, help="Generate fixes.py and pipeline.py scripts")
-@click.option(
-    "--full/--no-full", default=True, help="Include full summaries in report (default: True)"
-)
+@click.option("--full/--no-full", default=True, help="Include full summaries in report (default: True)")
 @click.option("--format", default="md", help="Report format: md, json, html, pdf")
 @click.option("--theme", default="minimal", help="HTML report theme: minimal, neubrutalism")
 @click.option("--target", default=None, help="Target column for relevant checks")
@@ -343,15 +331,11 @@ def report(
         theme=theme,
     )
     click.echo(f"Report saved to: {report_file}")
-    click.echo(
-        f"Summary: {summary['critical_count']} critical, {summary['warning_count']} warnings"
-    )
+    click.echo(f"Summary: {summary['critical_count']} critical, {summary['warning_count']} warnings")
 
     if "sampling_info" in summary and summary["sampling_info"].get("was_sampled"):
         info = summary["sampling_info"]
-        click.echo(
-            f"Note: Analysis performed on {info['sample_fraction']*100:.1f}% sample"
-        )
+        click.echo(f"Note: Analysis performed on {info['sample_fraction'] * 100:.1f}% sample")
 
     if with_code:
         issues = [Issue(**i) for i in summary["issues"]]
diff --git a/hashprep/preparers/codegen.py b/hashprep/preparers/codegen.py
index 02d4d23..14cb819 100644
--- a/hashprep/preparers/codegen.py
+++ b/hashprep/preparers/codegen.py
@@ -1,5 +1,3 @@
-from typing import Dict, List, Set
-
 from .models import FixSuggestion, FixType
 from .strategies import (
     ColumnDropStrategy,
@@ -16,7 +14,7 @@
 class CodeGenerator:
     """Generates executable Python code from fix suggestions."""
 
-    STRATEGY_MAP: Dict[FixType, FixStrategy] = {
+    STRATEGY_MAP: dict[FixType, FixStrategy] = {
         FixType.DROP_COLUMN: ColumnDropStrategy(),
         FixType.DROP_DUPLICATES: DuplicateRemovalStrategy(),
         FixType.IMPUTE: ImputationStrategy(),
@@ -26,13 +24,13 @@ class CodeGenerator:
         FixType.CLIP_OUTLIERS: OutlierStrategy(),
     }
 
-    def __init__(self, suggestions: List[FixSuggestion]):
+    def __init__(self, suggestions: list[FixSuggestion]):
         self.suggestions = suggestions
 
     def generate_pandas_script(self) -> str:
         """Generate a complete, runnable pandas script."""
         imports = self._collect_imports()
-        code_blocks: List[str] = []
+        code_blocks: list[str] = []
 
         code_blocks.append('"""')
         code_blocks.append("Auto-generated data cleaning script by HashPrep.")
@@ -82,9 +80,9 @@ def _generate_code_for_suggestion(self, suggestion: FixSuggestion) -> str:
             return strategy.generate_pandas_code(suggestion)
         return ""
 
-    def _collect_imports(self) -> List[str]:
+    def _collect_imports(self) -> list[str]:
         """Collect all required imports."""
-        imports: Set[str] = {"import pandas as pd", "import numpy as np"}
+        imports: set[str] = {"import pandas as pd", "import numpy as np"}
 
         for suggestion in self.suggestions:
             strategy = self.STRATEGY_MAP.get(suggestion.fix_type)
diff --git a/hashprep/preparers/fix_registry.py b/hashprep/preparers/fix_registry.py
index 0100091..5bbee4f 100644
--- a/hashprep/preparers/fix_registry.py
+++ b/hashprep/preparers/fix_registry.py
@@ -1,4 +1,4 @@
-from typing import Callable, Dict, List, Optional
+from collections.abc import Callable
 
 from ..checks.core import Issue
 from .models import (
@@ -6,7 +6,6 @@
     FixSuggestion,
     FixType,
     ImputeMethod,
-    ScaleMethod,
     TransformMethod,
 )
 
@@ -16,15 +15,15 @@ class FixRegistry:
 
     def __init__(
         self,
-        column_types: Dict[str, str],
-        target_col: Optional[str] = None,
-        column_stats: Optional[Dict[str, Dict]] = None,
+        column_types: dict[str, str],
+        target_col: str | None = None,
+        column_stats: dict[str, dict] | None = None,
     ):
         self.column_types = column_types
         self.target_col = target_col
         self.column_stats = column_stats or {}
 
-        self._handlers: Dict[str, Callable[[Issue], List[FixSuggestion]]] = {
+        self._handlers: dict[str, Callable[[Issue], list[FixSuggestion]]] = {
             "missing_values": self._suggest_missing_fix,
             "high_missing_values": self._suggest_missing_fix,
             "empty_column": self._suggest_drop,
@@ -42,7 +41,7 @@ def __init__(
             "feature_correlation": self._suggest_drop_correlated,
         }
 
-    def get_suggestions(self, issue: Issue) -> List[FixSuggestion]:
+    def get_suggestions(self, issue: Issue) -> list[FixSuggestion]:
         """Get fix suggestions for an issue."""
         handler = self._handlers.get(issue.category)
         if handler:
@@ -64,7 +63,7 @@ def _get_missing_pct(self, issue: Issue) -> float:
                 pass
         return 50.0
 
-    def _suggest_missing_fix(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_missing_fix(self, issue: Issue) -> list[FixSuggestion]:
         col = issue.column
         col_type = self._get_column_type(col)
         missing_pct = self._get_missing_pct(issue)
@@ -115,7 +114,7 @@ def _suggest_missing_fix(self, issue: Issue) -> List[FixSuggestion]:
             )
         ]
 
-    def _suggest_drop(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_drop(self, issue: Issue) -> list[FixSuggestion]:
         return [
             FixSuggestion(
                 fix_type=FixType.DROP_COLUMN,
@@ -126,7 +125,7 @@ def _suggest_drop(self, issue: Issue) -> List[FixSuggestion]:
             )
         ]
 
-    def _suggest_drop_with_warning(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_drop_with_warning(self, issue: Issue) -> list[FixSuggestion]:
         return [
             FixSuggestion(
                 fix_type=FixType.DROP_COLUMN,
@@ -137,7 +136,7 @@ def _suggest_drop_with_warning(self, issue: Issue) -> List[FixSuggestion]:
             )
         ]
 
-    def _suggest_encoding(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_encoding(self, issue: Issue) -> list[FixSuggestion]:
         col = issue.column
         desc = issue.description.lower()
 
@@ -185,7 +184,7 @@ def _suggest_encoding(self, issue: Issue) -> List[FixSuggestion]:
             )
         ]
 
-    def _suggest_dedupe(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_dedupe(self, issue: Issue) -> list[FixSuggestion]:
         return [
             FixSuggestion(
                 fix_type=FixType.DROP_DUPLICATES,
@@ -197,7 +196,7 @@ def _suggest_dedupe(self, issue: Issue) -> List[FixSuggestion]:
             )
         ]
 
-    def _suggest_outlier_fix(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_outlier_fix(self, issue: Issue) -> list[FixSuggestion]:
         return [
             FixSuggestion(
                 fix_type=FixType.CLIP_OUTLIERS,
@@ -209,7 +208,7 @@ def _suggest_outlier_fix(self, issue: Issue) -> List[FixSuggestion]:
             )
         ]
 
-    def _suggest_transform(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_transform(self, issue: Issue) -> list[FixSuggestion]:
         col = issue.column
         desc = issue.description.lower()
 
@@ -250,7 +249,7 @@ def _suggest_transform(self, issue: Issue) -> List[FixSuggestion]:
             )
         ]
 
-    def _suggest_drop_correlated(self, issue: Issue) -> List[FixSuggestion]:
+    def _suggest_drop_correlated(self, issue: Issue) -> list[FixSuggestion]:
         col = issue.column
         return [
             FixSuggestion(
diff --git a/hashprep/preparers/models.py b/hashprep/preparers/models.py
index 1def025..1d96e06 100644
--- a/hashprep/preparers/models.py
+++ b/hashprep/preparers/models.py
@@ -1,6 +1,6 @@
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 
 class FixType(Enum):
@@ -59,9 +59,9 @@ class FixSuggestion:
     """Structured representation of a data fix action."""
 
     fix_type: FixType
-    columns: List[str]
-    method: Optional[str] = None
-    parameters: Dict[str, Any] = field(default_factory=dict)
+    columns: list[str]
+    method: str | None = None
+    parameters: dict[str, Any] = field(default_factory=dict)
     priority: int = 0
     reason: str = ""
     source_issue_category: str = ""
diff --git a/hashprep/preparers/pipeline_builder.py b/hashprep/preparers/pipeline_builder.py
index 0783403..faa58b0 100644
--- a/hashprep/preparers/pipeline_builder.py
+++ b/hashprep/preparers/pipeline_builder.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any
 
 from .models import FixSuggestion, FixType
 from .strategies import (
@@ -16,7 +16,7 @@ class PipelineBuilder:
     Generates both code and actual pipeline objects.
     """
 
-    STRATEGY_MAP: Dict[FixType, Any] = {
+    STRATEGY_MAP: dict[FixType, Any] = {
         FixType.DROP_COLUMN: ColumnDropStrategy(),
         FixType.IMPUTE: ImputationStrategy(),
         FixType.ENCODE: EncodingStrategy(),
@@ -24,7 +24,7 @@ class PipelineBuilder:
         FixType.TRANSFORM: TransformationStrategy(),
     }
 
-    def __init__(self, suggestions: List[FixSuggestion]):
+    def __init__(self, suggestions: list[FixSuggestion]):
         self.suggestions = suggestions
         self._validate_suggestions()
 
@@ -38,7 +38,7 @@ def _validate_suggestions(self) -> None:
 
     def generate_pipeline_code(self) -> str:
         """Generate sklearn pipeline code as a string."""
-        code: List[str] = []
+        code: list[str] = []
 
         code.append('"""')
         code.append("Auto-generated sklearn preprocessing pipeline by HashPrep.")
@@ -111,9 +111,9 @@ def generate_pipeline_code(self) -> str:
 
         return "\n".join(code)
 
-    def _collect_all_imports(self) -> List[str]:
+    def _collect_all_imports(self) -> list[str]:
         """Collect all required imports for the pipeline."""
-        imports: Set[str] = {
+        imports: set[str] = {
             "from sklearn.pipeline import Pipeline",
             "from sklearn.compose import ColumnTransformer",
             "import numpy as np",
@@ -128,10 +128,10 @@ def _collect_all_imports(self) -> List[str]:
 
         return sorted(imports)
 
-    def _build_transformer_list(self) -> List[Tuple[str, str, List[str]]]:
+    def _build_transformer_list(self) -> list[tuple[str, str, list[str]]]:
         """Build list of (name, transformer_code, columns) tuples."""
-        transformers: List[Tuple[str, str, List[str]]] = []
-        seen_names: Set[str] = set()
+        transformers: list[tuple[str, str, list[str]]] = []
+        seen_names: set[str] = set()
 
         for suggestion in self.suggestions:
             if suggestion.parameters.get("pre_pipeline"):
@@ -160,7 +160,7 @@ def _build_transformer_list(self) -> List[Tuple[str, str, List[str]]]:
 
         return transformers
 
-    def build_pipeline_object(self) -> Optional[Any]:
+    def build_pipeline_object(self) -> Any | None:
         """
         Return an actual sklearn Pipeline object.
         Can be serialized with joblib.
@@ -172,7 +172,7 @@ def build_pipeline_object(self) -> Optional[Any]:
             return None
 
         transformers = []
-        seen_names: Set[str] = set()
+        seen_names: set[str] = set()
 
         for suggestion in self.suggestions:
             if suggestion.parameters.get("pre_pipeline"):
@@ -207,7 +207,7 @@ def build_pipeline_object(self) -> Optional[Any]:
 
         return Pipeline([("preprocessor", preprocessor)])
 
-    def _get_transformer_instance(self, suggestion: FixSuggestion) -> Optional[Any]:
+    def _get_transformer_instance(self, suggestion: FixSuggestion) -> Any | None:
         """Return actual transformer instance for a suggestion."""
         try:
             from sklearn.impute import KNNImputer, SimpleImputer
@@ -261,9 +261,7 @@ def _get_transformer_instance(self, suggestion: FixSuggestion) -> Optional[Any]:
             if method == "onehot":
                 return OneHotEncoder(handle_unknown="ignore", sparse_output=False)
             if method in ("ordinal", "label"):
-                return OrdinalEncoder(
-                    handle_unknown="use_encoded_value", unknown_value=-1
-                )
+                return OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
             return None
 
         if fix_type == FixType.TRANSFORM:
@@ -274,8 +272,6 @@ def _get_transformer_instance(self, suggestion: FixSuggestion) -> Optional[Any]:
             if method == "log1p":
                 return FunctionTransformer(np.log1p, validate=True)
             if method == "sqrt":
-                return FunctionTransformer(
-                    lambda x: np.sqrt(np.clip(x, 0, None)), validate=True
-                )
+                return FunctionTransformer(lambda x: np.sqrt(np.clip(x, 0, None)), validate=True)
 
         return None
diff --git a/hashprep/preparers/strategies/__init__.py b/hashprep/preparers/strategies/__init__.py
index 5c9c0dc..0bc31be 100644
--- a/hashprep/preparers/strategies/__init__.py
+++ b/hashprep/preparers/strategies/__init__.py
@@ -1,10 +1,10 @@
 from .base import FixStrategy
-from .imputation import ImputationStrategy
+from .column_ops import ColumnDropStrategy, DuplicateRemovalStrategy
 from .encoding import EncodingStrategy
+from .imputation import ImputationStrategy
+from .outlier import OutlierStrategy
 from .scaling import ScalingStrategy
 from .transformation import TransformationStrategy
-from .outlier import OutlierStrategy
-from .column_ops import ColumnDropStrategy, DuplicateRemovalStrategy
 
 __all__ = [
     "FixStrategy",
diff --git a/hashprep/preparers/strategies/base.py b/hashprep/preparers/strategies/base.py
index 06c194d..9b6d6b0 100644
--- a/hashprep/preparers/strategies/base.py
+++ b/hashprep/preparers/strategies/base.py
@@ -1,5 +1,4 @@
 from abc import ABC, abstractmethod
-from typing import List, Optional, Tuple
 
 from ..models import FixSuggestion
 
@@ -7,7 +6,7 @@
 class FixStrategy(ABC):
     """Base class for all fix strategies."""
 
-    SKLEARN_IMPORTS: List[str] = []
+    SKLEARN_IMPORTS: list[str] = []
 
     @abstractmethod
     def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
@@ -15,20 +14,18 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
         pass
 
     @abstractmethod
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         """
         Return (transformer_instance_code, column_list) for sklearn pipeline.
         Returns (None, []) if not applicable to sklearn pipelines.
         """
         pass
 
-    def get_sklearn_imports(self) -> List[str]:
+    def get_sklearn_imports(self) -> list[str]:
         """Return required sklearn import statements."""
         return self.SKLEARN_IMPORTS
 
-    def _format_column_list(self, columns: List[str]) -> str:
+    def _format_column_list(self, columns: list[str]) -> str:
         """Format column list as Python literal."""
         if len(columns) == 1:
             return f"['{columns[0]}']"
diff --git a/hashprep/preparers/strategies/column_ops.py b/hashprep/preparers/strategies/column_ops.py
index 87ec891..0681ed8 100644
--- a/hashprep/preparers/strategies/column_ops.py
+++ b/hashprep/preparers/strategies/column_ops.py
@@ -1,5 +1,3 @@
-from typing import List, Optional, Tuple
-
 from ..models import FixSuggestion
 from .base import FixStrategy
 
@@ -7,28 +5,24 @@
 class ColumnDropStrategy(FixStrategy):
     """Strategy for dropping columns."""
 
-    SKLEARN_IMPORTS: List[str] = []
+    SKLEARN_IMPORTS: list[str] = []
 
     def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
         cols = self._format_column_list(suggestion.columns)
         return f"df = df.drop(columns={cols})"
 
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         return "'drop'", suggestion.columns
 
 
 class DuplicateRemovalStrategy(FixStrategy):
     """Strategy for removing duplicate rows."""
 
-    SKLEARN_IMPORTS: List[str] = []
+    SKLEARN_IMPORTS: list[str] = []
 
     def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
         keep = suggestion.parameters.get("keep", "first")
         return f"df = df.drop_duplicates(keep='{keep}')"
 
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         return None, []
diff --git a/hashprep/preparers/strategies/encoding.py b/hashprep/preparers/strategies/encoding.py
index a33703d..234faaf 100644
--- a/hashprep/preparers/strategies/encoding.py
+++ b/hashprep/preparers/strategies/encoding.py
@@ -1,5 +1,3 @@
-from typing import List, Optional, Tuple
-
 from ..models import EncodeMethod, FixSuggestion
 from .base import FixStrategy
 
@@ -40,9 +38,7 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
 
         return f"df = pd.get_dummies(df, columns={self._format_column_list(cols)})"
 
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         method = suggestion.method
         cols = suggestion.columns
 
diff --git a/hashprep/preparers/strategies/imputation.py b/hashprep/preparers/strategies/imputation.py
index a4171ae..56342f8 100644
--- a/hashprep/preparers/strategies/imputation.py
+++ b/hashprep/preparers/strategies/imputation.py
@@ -1,5 +1,3 @@
-from typing import List, Optional, Tuple
-
 from ..models import FixSuggestion, ImputeMethod
 from .base import FixStrategy
 
@@ -24,9 +22,7 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
         if method == ImputeMethod.MODE.value:
             lines = []
             for col in suggestion.columns:
-                lines.append(
-                    f"df['{col}'] = df['{col}'].fillna(df['{col}'].mode().iloc[0])"
-                )
+                lines.append(f"df['{col}'] = df['{col}'].fillna(df['{col}'].mode().iloc[0])")
             return "\n".join(lines)
 
         if method == ImputeMethod.CONSTANT.value:
@@ -44,9 +40,7 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
 
         return f"df[{cols}] = df[{cols}].fillna(df[{cols}].median())"
 
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         method = suggestion.method
         cols = suggestion.columns
 
diff --git a/hashprep/preparers/strategies/outlier.py b/hashprep/preparers/strategies/outlier.py
index 81e0367..7ac3a89 100644
--- a/hashprep/preparers/strategies/outlier.py
+++ b/hashprep/preparers/strategies/outlier.py
@@ -1,5 +1,3 @@
-from typing import List, Optional, Tuple
-
 from ..models import FixSuggestion
 from .base import FixStrategy
 
@@ -20,12 +18,8 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
             for col in cols:
                 lines.append(f"q1_{col}, q3_{col} = df['{col}'].quantile([0.25, 0.75])")
                 lines.append(f"iqr_{col} = q3_{col} - q1_{col}")
-                lines.append(
-                    f"lower_{col}, upper_{col} = q1_{col} - 1.5 * iqr_{col}, q3_{col} + 1.5 * iqr_{col}"
-                )
-                lines.append(
-                    f"df['{col}'] = df['{col}'].clip(lower=lower_{col}, upper=upper_{col})"
-                )
+                lines.append(f"lower_{col}, upper_{col} = q1_{col} - 1.5 * iqr_{col}, q3_{col} + 1.5 * iqr_{col}")
+                lines.append(f"df['{col}'] = df['{col}'].clip(lower=lower_{col}, upper=upper_{col})")
             return "\n".join(lines)
 
         if clip_method == "percentile":
@@ -33,12 +27,8 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
             upper_pct = suggestion.parameters.get("upper_pct", 0.99)
             lines = []
             for col in cols:
-                lines.append(
-                    f"low_{col}, high_{col} = df['{col}'].quantile([{lower_pct}, {upper_pct}])"
-                )
-                lines.append(
-                    f"df['{col}'] = df['{col}'].clip(lower=low_{col}, upper=high_{col})"
-                )
+                lines.append(f"low_{col}, high_{col} = df['{col}'].quantile([{lower_pct}, {upper_pct}])")
+                lines.append(f"df['{col}'] = df['{col}'].clip(lower=low_{col}, upper=high_{col})")
             return "\n".join(lines)
 
         if clip_method == "zscore":
@@ -47,20 +37,14 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
             for col in cols:
                 lines.append(f"mean_{col} = df['{col}'].mean()")
                 lines.append(f"std_{col} = df['{col}'].std()")
-                lines.append(
-                    f"lower_{col} = mean_{col} - {z_threshold} * std_{col}"
-                )
-                lines.append(
-                    f"upper_{col} = mean_{col} + {z_threshold} * std_{col}"
-                )
-                lines.append(
-                    f"df['{col}'] = df['{col}'].clip(lower=lower_{col}, upper=upper_{col})"
-                )
+                lines.append(f"lower_{col} = mean_{col} - {z_threshold} * std_{col}")
+                lines.append(f"upper_{col} = mean_{col} + {z_threshold} * std_{col}")
+                lines.append(f"df['{col}'] = df['{col}'].clip(lower=lower_{col}, upper=upper_{col})")
             return "\n".join(lines)
 
         return self._generate_iqr_code(cols)
 
-    def _generate_iqr_code(self, cols: List[str]) -> str:
+    def _generate_iqr_code(self, cols: list[str]) -> str:
         lines = []
         for col in cols:
             lines.append(f"q1_{col}, q3_{col} = df['{col}'].quantile([0.25, 0.75])")
@@ -70,7 +54,5 @@ def _generate_iqr_code(self, cols: List[str]) -> str:
             )
         return "\n".join(lines)
 
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         return None, suggestion.columns
diff --git a/hashprep/preparers/strategies/scaling.py b/hashprep/preparers/strategies/scaling.py
index 0140f88..6b05178 100644
--- a/hashprep/preparers/strategies/scaling.py
+++ b/hashprep/preparers/strategies/scaling.py
@@ -1,5 +1,3 @@
-from typing import List, Optional, Tuple
-
 from ..models import FixSuggestion, ScaleMethod
 from .base import FixStrategy
 
@@ -27,9 +25,7 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
                 lines.append(f"q1_{col} = df['{col}'].quantile(0.25)")
                 lines.append(f"q3_{col} = df['{col}'].quantile(0.75)")
                 lines.append(f"iqr_{col} = q3_{col} - q1_{col}")
-                lines.append(
-                    f"df['{col}'] = (df['{col}'] - df['{col}'].median()) / iqr_{col}"
-                )
+                lines.append(f"df['{col}'] = (df['{col}'] - df['{col}'].median()) / iqr_{col}")
             return "\n".join(lines)
 
         if method == ScaleMethod.MAXABS.value:
@@ -37,9 +33,7 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
 
         return f"df[{cols}] = (df[{cols}] - df[{cols}].mean()) / df[{cols}].std()"
 
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         method = suggestion.method
         cols = suggestion.columns
 
diff --git a/hashprep/preparers/strategies/transformation.py b/hashprep/preparers/strategies/transformation.py
index 15b3997..b00a405 100644
--- a/hashprep/preparers/strategies/transformation.py
+++ b/hashprep/preparers/strategies/transformation.py
@@ -1,5 +1,3 @@
-from typing import List, Optional, Tuple
-
 from ..models import FixSuggestion, TransformMethod
 from .base import FixStrategy
 
@@ -30,9 +28,7 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
                 "from scipy.stats import boxcox",
             ]
             for col in suggestion.columns:
-                lines.append(
-                    f"df['{col}'], _ = boxcox(df['{col}'].clip(lower=1e-10).values)"
-                )
+                lines.append(f"df['{col}'], _ = boxcox(df['{col}'].clip(lower=1e-10).values)")
             return "\n".join(lines)
 
         if method == TransformMethod.YEOJOHNSON.value:
@@ -41,16 +37,12 @@ def generate_pandas_code(self, suggestion: FixSuggestion) -> str:
                 "pt = PowerTransformer(method='yeo-johnson')",
             ]
             for col in suggestion.columns:
-                lines.append(
-                    f"df[['{col}']] = pt.fit_transform(df[['{col}']])"
-                )
+                lines.append(f"df[['{col}']] = pt.fit_transform(df[['{col}']])")
             return "\n".join(lines)
 
         return f"df[{cols}] = np.log1p(df[{cols}].clip(lower=0))"
 
-    def get_sklearn_transformer(
-        self, suggestion: FixSuggestion
-    ) -> Tuple[Optional[str], List[str]]:
+    def get_sklearn_transformer(self, suggestion: FixSuggestion) -> tuple[str | None, list[str]]:
         method = suggestion.method
         cols = suggestion.columns
 
diff --git a/hashprep/preparers/suggestions.py b/hashprep/preparers/suggestions.py
index 7a1fa8d..bb9dc3e 100644
--- a/hashprep/preparers/suggestions.py
+++ b/hashprep/preparers/suggestions.py
@@ -1,5 +1,3 @@
-from typing import Dict, List, Optional
-
 from ..checks.core import Issue
 from .fix_registry import FixRegistry
 from .models import FixSuggestion
@@ -13,10 +11,10 @@ class SuggestionProvider:
 
     def __init__(
         self,
-        issues: List[Issue],
-        column_types: Optional[Dict[str, str]] = None,
-        target_col: Optional[str] = None,
-        column_stats: Optional[Dict[str, Dict]] = None,
+        issues: list[Issue],
+        column_types: dict[str, str] | None = None,
+        target_col: str | None = None,
+        column_stats: dict[str, dict] | None = None,
     ):
         self.issues = issues
         self.column_types = column_types or {}
@@ -24,9 +22,9 @@ def __init__(
         self.column_stats = column_stats or {}
         self.registry = FixRegistry(self.column_types, target_col, column_stats)
 
-    def get_suggestions(self) -> List[FixSuggestion]:
+    def get_suggestions(self) -> list[FixSuggestion]:
         """Generate all fix suggestions, deduplicated and prioritized."""
-        suggestions: List[FixSuggestion] = []
+        suggestions: list[FixSuggestion] = []
         seen_columns: set = set()
 
         sorted_issues = sorted(
@@ -44,9 +42,9 @@ def get_suggestions(self) -> List[FixSuggestion]:
 
         return sorted(suggestions, key=lambda s: s.priority)
 
-    def get_suggestions_by_type(self) -> Dict[str, List[FixSuggestion]]:
+    def get_suggestions_by_type(self) -> dict[str, list[FixSuggestion]]:
         """Group suggestions by fix type for organized output."""
-        grouped: Dict[str, List[FixSuggestion]] = {}
+        grouped: dict[str, list[FixSuggestion]] = {}
         for suggestion in self.get_suggestions():
             key = suggestion.fix_type.value
             if key not in grouped:
@@ -54,7 +52,7 @@ def get_suggestions_by_type(self) -> Dict[str, List[FixSuggestion]]:
             grouped[key].append(suggestion)
         return grouped
 
-    def get_legacy_suggestions(self) -> List[Dict]:
+    def get_legacy_suggestions(self) -> list[dict]:
         """
         Return suggestions in legacy format for backward compatibility.
         Maps to the old {issue, code} dict format.
diff --git a/hashprep/reports/__init__.py b/hashprep/reports/__init__.py
index 7743704..eb3709f 100644
--- a/hashprep/reports/__init__.py
+++ b/hashprep/reports/__init__.py
@@ -1 +1 @@
-from .generators import generate_report
\ No newline at end of file
+from .generators import generate_report as generate_report
diff --git a/hashprep/reports/generators.py b/hashprep/reports/generators.py
index a1d67da..a52f8af 100644
--- a/hashprep/reports/generators.py
+++ b/hashprep/reports/generators.py
@@ -9,9 +9,9 @@ def generate(self, summary, full=False, output_file=None):
 
 # Lazy loading report classes
 def _load_generators():
-    from .markdown import MarkdownReport
-    from .json import JsonReport
     from .html import HtmlReport
+    from .json import JsonReport
+    from .markdown import MarkdownReport
     from .pdf import PdfReport
 
     return {
@@ -21,17 +21,19 @@ def _load_generators():
         "pdf": PdfReport(),
     }
 
+
 # get generators dictionary
 def get_generators():
-    if not hasattr(get_generators, 'cache'):
+    if not hasattr(get_generators, "cache"):
         get_generators.cache = _load_generators()
     return get_generators.cache
 
+
 def generate_report(summary, format="md", full=False, output_file=None, theme="minimal"):
     generators = get_generators()
     if format not in generators:
         raise ValueError(f"Unsupported format: {format}")
-    
+
     if format in ["html", "pdf"]:
         return generators[format].generate(summary, full, output_file, theme=theme)
     return generators[format].generate(summary, full, output_file)
diff --git a/hashprep/reports/html.py b/hashprep/reports/html.py
index f07853f..a86d27f 100644
--- a/hashprep/reports/html.py
+++ b/hashprep/reports/html.py
@@ -1,6 +1,5 @@
 import json
 from datetime import datetime
-from typing import Dict, List
 
 import pandas as pd
 import yaml
@@ -102,9 +101,9 @@ def generate(self, summary, full=False, output_file=None, theme="minimal", pdf_m
                 f.write(html_content)
         return html_content
 
-    def _group_alerts_by_type(self, issues: List[Dict]) -> Dict[str, List[Dict]]:
+    def _group_alerts_by_type(self, issues: list[dict]) -> dict[str, list[dict]]:
         """Group issues into display categories for the alerts section."""
-        groups: Dict[str, List[Dict]] = {}
+        groups: dict[str, list[dict]] = {}
 
         for issue in issues:
             alert_type = self.ALERT_TYPE_MAPPING.get(issue["category"], "Other")
@@ -114,7 +113,7 @@ def _group_alerts_by_type(self, issues: List[Dict]) -> Dict[str, List[Dict]]:
 
         return groups
 
-    def _generate_config(self, summary) -> Dict:
+    def _generate_config(self, summary) -> dict:
         """Generate configuration dict for download."""
         reproduction_info = summary["summaries"].get("reproduction_info", {})
         return {
diff --git a/hashprep/reports/json.py b/hashprep/reports/json.py
index 6594c1d..ec9b92a 100644
--- a/hashprep/reports/json.py
+++ b/hashprep/reports/json.py
@@ -1,6 +1,5 @@
 import json
 from datetime import datetime
-from typing import Dict
 
 import numpy as np
 
@@ -22,7 +21,7 @@ def generate(self, summary, full=False, output_file=None):
         dataset_info = summary["summaries"]["dataset_info"]
         reproduction_info = summary["summaries"].get("reproduction_info", {})
 
-        report: Dict = {
+        report: dict = {
             "metadata": {
                 "generated": datetime.now().isoformat(),
                 "version": hashprep.__version__,
diff --git a/hashprep/reports/markdown.py b/hashprep/reports/markdown.py
index 2b48dea..39282fc 100644
--- a/hashprep/reports/markdown.py
+++ b/hashprep/reports/markdown.py
@@ -1,14 +1,14 @@
 import base64
 import os
-from typing import Dict, List
 
 import pandas as pd
+
+import hashprep
+
 from ..utils.logging import get_logger
 
 _log = get_logger("reports.markdown")
 
-import hashprep
-
 
 class MarkdownReport:
     ALERT_TYPE_MAPPING = {
@@ -114,8 +114,12 @@ def generate(self, summary, full=False, output_file=None):
 
                 # Summary line
                 content += "| Metric | Value |\n|--------|-------|\n"
-                content += f"| Distinct | {stats.get('distinct_count', 0)} ({stats.get('distinct_percentage', 0):.1f}%) |\n"
-                content += f"| Missing | {stats.get('missing_count', 0)} ({stats.get('missing_percentage', 0):.1f}%) |\n"
+                content += (
+                    f"| Distinct | {stats.get('distinct_count', 0)} ({stats.get('distinct_percentage', 0):.1f}%) |\n"
+                )
+                content += (
+                    f"| Missing | {stats.get('missing_count', 0)} ({stats.get('missing_percentage', 0):.1f}%) |\n"
+                )
                 if cat == "Numeric":
                     mean_val = stats.get("mean")
                     content += f"| Mean | {f'{mean_val:.6g}' if mean_val is not None else 'N/A'} |\n"
@@ -272,9 +276,9 @@ def generate(self, summary, full=False, output_file=None):
                 f.write(content)
         return content
 
-    def _group_alerts_by_type(self, issues: List[Dict]) -> Dict[str, List[Dict]]:
+    def _group_alerts_by_type(self, issues: list[dict]) -> dict[str, list[dict]]:
         """Group issues into display categories."""
-        groups: Dict[str, List[Dict]] = {}
+        groups: dict[str, list[dict]] = {}
         for issue in issues:
             alert_type = self.ALERT_TYPE_MAPPING.get(issue["category"], "Other")
             if alert_type not in groups:
diff --git a/hashprep/reports/pdf.py b/hashprep/reports/pdf.py
index b6330f8..db8dccf 100644
--- a/hashprep/reports/pdf.py
+++ b/hashprep/reports/pdf.py
@@ -1,5 +1,4 @@
 from datetime import datetime
-from typing import Dict, List
 
 import pandas as pd
 from jinja2 import Template
@@ -88,8 +87,8 @@ def generate(self, summary, full=False, output_file=None, **kwargs):
                 f.write(pdf_content)
         return pdf_content
 
-    def _group_alerts_by_type(self, issues: List[Dict]) -> Dict[str, List[Dict]]:
-        groups: Dict[str, List[Dict]] = {}
+    def _group_alerts_by_type(self, issues: list[dict]) -> dict[str, list[dict]]:
+        groups: dict[str, list[dict]] = {}
         for issue in issues:
             alert_type = self.ALERT_TYPE_MAPPING.get(issue["category"], "Other")
             if alert_type not in groups:
diff --git a/hashprep/summaries/__init__.py b/hashprep/summaries/__init__.py
index dba2c9d..fb3da47 100644
--- a/hashprep/summaries/__init__.py
+++ b/hashprep/summaries/__init__.py
@@ -1,11 +1,21 @@
 from .dataset import (
-    get_dataset_preview,
-    summarize_dataset_info,
-    summarize_variable_types,
-    add_reproduction_info,
-    get_duplicate_info,
-    summarize_variable_type_counts,
-)
-from .variables import summarize_variables
-from .interactions import summarize_interactions
-from .missing import summarize_missing_values
\ No newline at end of file
+    add_reproduction_info as add_reproduction_info,
+)
+from .dataset import (
+    get_dataset_preview as get_dataset_preview,
+)
+from .dataset import (
+    get_duplicate_info as get_duplicate_info,
+)
+from .dataset import (
+    summarize_dataset_info as summarize_dataset_info,
+)
+from .dataset import (
+    summarize_variable_type_counts as summarize_variable_type_counts,
+)
+from .dataset import (
+    summarize_variable_types as summarize_variable_types,
+)
+from .interactions import summarize_interactions as summarize_interactions
+from .missing import summarize_missing_values as summarize_missing_values
+from .variables import summarize_variables as summarize_variables
diff --git a/hashprep/summaries/dataset.py b/hashprep/summaries/dataset.py
index 9aba682..92ae295 100644
--- a/hashprep/summaries/dataset.py
+++ b/hashprep/summaries/dataset.py
@@ -1,8 +1,7 @@
-from typing import Optional, Dict
+import hashlib
 
-import pandas as pd
 import numpy as np
-import hashlib
+import pandas as pd
 
 import hashprep
 
@@ -16,7 +15,7 @@ def get_dataset_preview(df):
     return {"head": head, "tail": tail, "sample": sample}
 
 
-def summarize_dataset_info(df: pd.DataFrame) -> Dict:
+def summarize_dataset_info(df: pd.DataFrame) -> dict:
     rows = df.shape[0]
     cols = df.shape[1]
     total_cells = rows * cols
@@ -33,14 +32,12 @@ def summarize_dataset_info(df: pd.DataFrame) -> Dict:
             "average_record_size_bytes": float(round(total_memory_bytes / rows, 1)) if rows > 0 else 0.0,
             "missing_cells": missing_cells,
             "total_cells": int(total_cells),
-            "missing_percentage": float(
-                round(missing_cells / total_cells * 100, 1)
-            ) if total_cells > 0 else 0.0,
+            "missing_percentage": float(round(missing_cells / total_cells * 100, 1)) if total_cells > 0 else 0.0,
         }
     }
 
 
-def get_duplicate_info(df: pd.DataFrame) -> Dict:
+def get_duplicate_info(df: pd.DataFrame) -> dict:
     """Return duplicate row count and percentage."""
     rows = len(df)
     duplicate_count = int(df.duplicated().sum())
@@ -51,7 +48,7 @@ def get_duplicate_info(df: pd.DataFrame) -> Dict:
     }
 
 
-def summarize_variable_type_counts(df: pd.DataFrame, column_types: Dict[str, str]) -> Dict[str, int]:
+def summarize_variable_type_counts(df: pd.DataFrame, column_types: dict[str, str]) -> dict[str, int]:
     """Count variables by inferred type."""
     type_counts = {
         "Numeric": 0,
@@ -61,7 +58,7 @@ def summarize_variable_type_counts(df: pd.DataFrame, column_types: Dict[str, str
         "Boolean": 0,
         "Unsupported": 0,
     }
-    for col, typ in column_types.items():
+    for _col, typ in column_types.items():
         if typ in type_counts:
             type_counts[typ] += 1
         else:
@@ -69,25 +66,21 @@ def summarize_variable_type_counts(df: pd.DataFrame, column_types: Dict[str, str
     return type_counts
 
 
-def summarize_variable_types(df: pd.DataFrame, column_types: Optional[Dict[str, str]] = None) -> Dict[str, str]:
+def summarize_variable_types(df: pd.DataFrame, column_types: dict[str, str] | None = None) -> dict[str, str]:
     """
     Summarize column types using infer_types if column_types not provided.
     """
     if column_types is None:
         from ..utils.type_inference import infer_types
+
         column_types = infer_types(df)
     return column_types
 
 
-def add_reproduction_info(df: pd.DataFrame) -> Dict:
+def add_reproduction_info(df: pd.DataFrame) -> dict:
     """Generate reproduction metadata for the analysis."""
-    dataset_hash = hashlib.md5(
-        pd.util.hash_pandas_object(df, index=True).values
-    ).hexdigest()
+    dataset_hash = hashlib.md5(pd.util.hash_pandas_object(df, index=True).values).hexdigest()
     return {
         "dataset_hash": dataset_hash,
         "software_version": hashprep.__version__,
     }
-
-
-
diff --git a/hashprep/summaries/interactions.py b/hashprep/summaries/interactions.py
index 2ef9ae3..775c897 100644
--- a/hashprep/summaries/interactions.py
+++ b/hashprep/summaries/interactions.py
@@ -1,6 +1,7 @@
+import numpy as np
 import pandas as pd
 from scipy.stats import chi2_contingency, f_oneway
-import numpy as np
+
 from ..utils.logging import get_logger
 
 _log = get_logger("summaries.interactions")
@@ -17,11 +18,7 @@ def summarize_interactions(df):
 
 def _scatter_plots_numeric(df):
     numeric_columns = df.select_dtypes(include="number").columns.tolist()
-    pairs = [
-        (c1, c2)
-        for i, c1 in enumerate(numeric_columns)
-        for c2 in numeric_columns[i + 1 :]
-    ]
+    pairs = [(c1, c2) for i, c1 in enumerate(numeric_columns) for c2 in numeric_columns[i + 1 :]]
     return pairs
 
 
diff --git a/hashprep/summaries/missing.py b/hashprep/summaries/missing.py
index 263a82d..5908cec 100644
--- a/hashprep/summaries/missing.py
+++ b/hashprep/summaries/missing.py
@@ -1,17 +1,7 @@
-import pandas as pd
-
-
 def summarize_missing_values(df):
     missing_count = {col: int(val) for col, val in df.isnull().sum().to_dict().items()}
-    missing_percentage = {
-        col: float(val)
-        for col, val in (df.isnull().mean() * 100).round(2).to_dict().items()
-    }
-    missing_patterns = {
-        col: df[df[col].isna()].index.tolist()
-        for col in df.columns
-        if df[col].isna().any()
-    }
+    missing_percentage = {col: float(val) for col, val in (df.isnull().mean() * 100).round(2).to_dict().items()}
+    missing_patterns = {col: df[df[col].isna()].index.tolist() for col in df.columns if df[col].isna().any()}
 
     missing_data = {}
     missing_data["missing_values"] = {"count": missing_count, "percentage": missing_percentage}
diff --git a/hashprep/summaries/variables.py b/hashprep/summaries/variables.py
index 6b5243f..ba019e0 100644
--- a/hashprep/summaries/variables.py
+++ b/hashprep/summaries/variables.py
@@ -1,13 +1,16 @@
-import pandas as pd
-import numpy as np
-import unicodedata
 import re
+import unicodedata
 from collections import defaultdict
+
+import numpy as np
+import pandas as pd
 from scipy.stats import median_abs_deviation
+
 from ..config import DEFAULT_CONFIG
 
 _SUMMARY = DEFAULT_CONFIG.summaries
 
+
 def get_monotonicity(series: pd.Series) -> str:
     if series.is_monotonic_increasing:
         return "increasing"
@@ -20,6 +23,7 @@ def get_monotonicity(series: pd.Series) -> str:
 def summarize_variables(df, column_types=None):
     if column_types is None:
         from ..utils.type_inference import infer_types
+
         column_types = infer_types(df)
     inferred_types = column_types
     variables = {}
@@ -27,9 +31,7 @@ def summarize_variables(df, column_types=None):
         typ = inferred_types.get(column, "Unsupported")
         non_missing_count = df[column].notna().sum()
         distinct_count = df[column].nunique()
-        distinct_percentage = (
-            (distinct_count / non_missing_count * 100) if non_missing_count > 0 else 0
-        )
+        distinct_percentage = (distinct_count / non_missing_count * 100) if non_missing_count > 0 else 0
         missing_count = int(df[column].isna().sum())
         missing_percentage = (missing_count / len(df) * 100) if len(df) > 0 else 0
         memory_size = df[column].memory_usage(deep=True)
@@ -116,13 +118,10 @@ def _summarize_numeric(df, col):
         "counts": [int(x) for x in hist],
     }
     vc = series.value_counts().head(_SUMMARY.top_n_values)
-    common_values = {
-        str(v): {"count": int(c), "percentage": float(c / n * 100)}
-        for v, c in vc.items()
-    }
+    common_values = {str(v): {"count": int(c), "percentage": float(c / n * 100)} for v, c in vc.items()}
     extremes = {
-        "minimum_10": [float(x) for x in sorted(series)[:_SUMMARY.extreme_values_count]],
-        "maximum_10": [float(x) for x in sorted(series)[-_SUMMARY.extreme_values_count:]],
+        "minimum_10": [float(x) for x in sorted(series)[: _SUMMARY.extreme_values_count]],
+        "maximum_10": [float(x) for x in sorted(series)[-_SUMMARY.extreme_values_count :]],
     }
     stats = {
         "infinite_count": infinite_count,
@@ -180,7 +179,6 @@ def _summarize_text(df, col):
             },
         }
     lengths = series.str.len()
-    n = len(series)
     all_text = "".join(series)
     total_chars = len(all_text)
     distinct_chars = len(set(all_text))
@@ -309,9 +307,7 @@ def _summarize_categorical(df, col):
     text_summary = _summarize_text(df, col)
     n = len(series)
     vc = series.value_counts().head(10)
-    common_values = {
-        v: {"count": int(c), "percentage": float(c / n * 100)} for v, c in vc.items()
-    }
+    common_values = {v: {"count": int(c), "percentage": float(c / n * 100)} for v, c in vc.items()}
     stats = {
         "overview": text_summary["overview"],
         "categories": {
@@ -372,9 +368,6 @@ def _summarize_boolean(df, col):
         bool_series = pd.to_numeric(series, errors="coerce").notna().astype(bool)
         vc = bool_series.value_counts()
     n = len(series)
-    common_values = {
-        str(k): {"count": int(v), "percentage": float(v / n * 100)}
-        for k, v in vc.items()
-    }
+    common_values = {str(k): {"count": int(v), "percentage": float(v / n * 100)} for k, v in vc.items()}
     stats = {"common_values": common_values}
     return stats
diff --git a/hashprep/utils/sampling.py b/hashprep/utils/sampling.py
index 8c7aa5f..7c2215b 100644
--- a/hashprep/utils/sampling.py
+++ b/hashprep/utils/sampling.py
@@ -1,5 +1,5 @@
-from dataclasses import dataclass, field
-from typing import Dict, Literal, Optional, Tuple
+from dataclasses import dataclass
+from typing import Literal
 
 import pandas as pd
 
@@ -16,8 +16,8 @@ class SamplingConfig:
 
     max_rows: int = DEFAULT_MAX_ROWS
     sample_method: Literal["random", "stratified", "systematic", "head"] = "random"
-    random_state: Optional[int] = 42
-    stratify_column: Optional[str] = None
+    random_state: int | None = 42
+    stratify_column: str | None = None
     memory_threshold_mb: float = DEFAULT_MEMORY_THRESHOLD_MB
     enabled: bool = True
 
@@ -25,10 +25,10 @@ class SamplingConfig:
 class DatasetSampler:
     """Handles sampling of large datasets for efficient analysis."""
 
-    def __init__(self, config: Optional[SamplingConfig] = None):
+    def __init__(self, config: SamplingConfig | None = None):
         self.config = config or SamplingConfig()
-        self.original_shape: Optional[Tuple[int, int]] = None
-        self.sample_fraction: Optional[float] = None
+        self.original_shape: tuple[int, int] | None = None
+        self.sample_fraction: float | None = None
         self.was_sampled: bool = False
 
     def should_sample(self, df: pd.DataFrame) -> bool:
@@ -91,9 +91,7 @@ def _stratified_sample(self, df: pd.DataFrame, target_rows: int) -> pd.DataFrame
             n_samples = max(1, int(proportions[name] * target_rows))
             n_samples = min(n_samples, len(group), remaining)
             if n_samples > 0:
-                samples.append(
-                    group.sample(n=n_samples, random_state=self.config.random_state)
-                )
+                samples.append(group.sample(n=n_samples, random_state=self.config.random_state))
                 remaining -= n_samples
             if remaining <= 0:
                 break
@@ -106,14 +104,12 @@ def _stratified_sample(self, df: pd.DataFrame, target_rows: int) -> pd.DataFrame
         if len(result) < target_rows and len(result) < len(df):
             additional_needed = min(target_rows - len(result), len(df) - len(result))
             remaining_indices = df.index.difference(result.index)
-            additional = df.loc[remaining_indices].sample(
-                n=additional_needed, random_state=self.config.random_state
-            )
+            additional = df.loc[remaining_indices].sample(n=additional_needed, random_state=self.config.random_state)
             result = pd.concat([result, additional])
 
         return result.sample(frac=1, random_state=self.config.random_state)
 
-    def get_sampling_info(self) -> Dict:
+    def get_sampling_info(self) -> dict:
         """Return metadata about sampling performed."""
         return {
             "was_sampled": self.was_sampled,
diff --git a/hashprep/utils/type_inference.py b/hashprep/utils/type_inference.py
index ded5d3f..49676ed 100644
--- a/hashprep/utils/type_inference.py
+++ b/hashprep/utils/type_inference.py
@@ -1,17 +1,17 @@
 import pandas as pd
-from typing import Dict
 
 from ..config import DEFAULT_CONFIG
 
 _TYPE_CFG = DEFAULT_CONFIG.type_inference
 CONFIG = {
-    'cat_cardinality_threshold': _TYPE_CFG.cat_cardinality_threshold,
-    'cat_percentage_threshold': _TYPE_CFG.cat_percentage_threshold,
-    'num_low_cat_threshold': _TYPE_CFG.num_low_cat_threshold,
-    'bool_mappings': _TYPE_CFG.bool_mappings,
+    "cat_cardinality_threshold": _TYPE_CFG.cat_cardinality_threshold,
+    "cat_percentage_threshold": _TYPE_CFG.cat_percentage_threshold,
+    "num_low_cat_threshold": _TYPE_CFG.num_low_cat_threshold,
+    "bool_mappings": _TYPE_CFG.bool_mappings,
 }
 
-def infer_types(df: pd.DataFrame) -> Dict[str, str]:
+
+def infer_types(df: pd.DataFrame) -> dict[str, str]:
     """
     Infer semantic types per ydata logic.
     Returns: {col: 'Numeric' | 'Categorical' | 'Text' | 'Unsupported'}
@@ -20,38 +20,41 @@ def infer_types(df: pd.DataFrame) -> Dict[str, str]:
     for col in df.columns:
         series = df[col].dropna()
         if series.empty:
-            types[col] = 'Unsupported'
+            types[col] = "Unsupported"
             continue
 
         # Numeric inference (ydata's Numeric.contains_op + numeric_is_category)
         if pd.api.types.is_numeric_dtype(series) and not pd.api.types.is_bool_dtype(series):
             n_unique = series.nunique()
-            if 1 <= n_unique <= CONFIG['num_low_cat_threshold']:
-                types[col] = 'Categorical'  # Low-card numeric → Categorical (e.g., SibSp, Parch)
+            if 1 <= n_unique <= CONFIG["num_low_cat_threshold"]:
+                types[col] = "Categorical"  # Low-card numeric → Categorical (e.g., SibSp, Parch)
             else:
-                types[col] = 'Numeric'  # High-card numeric (e.g., Age, Fare)
+                types[col] = "Numeric"  # High-card numeric (e.g., Age, Fare)
 
         # String/Text inference (ydata's Text.contains_op + string_is_category)
         elif pd.api.types.is_string_dtype(series) or pd.api.types.is_object_dtype(series):
             n_unique = series.nunique()
             unique_pct = n_unique / len(series)
-            is_bool = all(s.lower() in CONFIG['bool_mappings'] for s in series[:5])  # Quick bool check
+            is_bool = all(s.lower() in CONFIG["bool_mappings"] for s in series[:5])  # Quick bool check
             if is_bool:
-                types[col] = 'Categorical'  # Bool-like → Categorical
-            elif 1 <= n_unique <= CONFIG['cat_cardinality_threshold'] and unique_pct < CONFIG['cat_percentage_threshold']:
-                types[col] = 'Categorical'  # Low-card string → Categorical (e.g., Sex, Embarked)
+                types[col] = "Categorical"  # Bool-like → Categorical
+            elif (
+                1 <= n_unique <= CONFIG["cat_cardinality_threshold"] and unique_pct < CONFIG["cat_percentage_threshold"]
+            ):
+                types[col] = "Categorical"  # Low-card string → Categorical (e.g., Sex, Embarked)
             else:
-                types[col] = 'Text'  # High-card/unique → Text (e.g., Name, Cabin, Ticket)
+                types[col] = "Text"  # High-card/unique → Text (e.g., Name, Cabin, Ticket)
 
         # Categorical dtype
         elif pd.api.types.is_categorical_dtype(series):
-            types[col] = 'Categorical'
+            types[col] = "Categorical"
 
         else:
-            types[col] = 'Unsupported'
+            types[col] = "Unsupported"
 
     return types
 
+
 # Helper: Check if series is constant/empty (skip corr)
 def is_usable_for_corr(series: pd.Series) -> bool:
-    return series.nunique() > 1 and len(series.dropna()) > 1
\ No newline at end of file
+    return series.nunique() > 1 and len(series.dropna()) > 1
diff --git a/pyproject.toml b/pyproject.toml
index 9438449..62f5d2c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,33 @@ include = ["hashprep*"]
 [dependency-groups]
 dev = [
     "pytest>=9.0.2",
+    "ruff>=0.8.0",
 ]
 
 [project.scripts]
 hashprep = "hashprep.interfaces.cli.main:cli"
+
+[tool.ruff]
+target-version = "py310"
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+    "E",    # pycodestyle errors
+    "W",    # pycodestyle warnings
+    "F",    # pyflakes
+    "I",    # isort
+    "UP",   # pyupgrade
+    "B",    # flake8-bugbear
+    "SIM",  # flake8-simplify
+]
+ignore = [
+    "E501",   # line too long (handled by formatter)
+    "B905",   # zip without strict (requires 3.10+)
+    "SIM102", # collapsible-if (elif + nested if is often more readable)
+    "SIM108", # ternary operator (readability preference)
+    "UP007",  # Optional[X] -> X | None (keep for 3.10 compat)
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["hashprep"]
diff --git a/tests/test.py b/tests/test.py
index 7936eca..81b023c 100755
--- a/tests/test.py
+++ b/tests/test.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 
-import hashprep
 import pandas as pd
+
+import hashprep
 from hashprep import DatasetAnalyzer
 
 TARGET_COLUMN = "Survived"
diff --git a/tests/test_cli.py b/tests/test_cli.py
index d19e7ac..b2b1e91 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -26,15 +26,10 @@ def temp_output_dir():
 
 def run_cli(args, cwd=None):
     """Helper to run CLI commands."""
-    cmd = ['uv', 'run', 'hashprep'] + args
+    cmd = ["uv", "run", "hashprep"] + args
     if cwd is None:
         cwd = Path(__file__).parent.parent
-    result = subprocess.run(
-        cmd,
-        capture_output=True,
-        text=True,
-        cwd=cwd
-    )
+    result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd)
     return result
 
 
@@ -43,63 +38,60 @@ class TestCLIScan:
 
     def test_scan_basic(self, titanic_csv):
         """Test basic scan command."""
-        result = run_cli(['scan', titanic_csv])
+        result = run_cli(["scan", titanic_csv])
 
         assert result.returncode == 0
-        assert 'Dataset Health Check' in result.stdout
-        assert 'Critical Issues:' in result.stdout
-        assert 'Warnings:' in result.stdout
+        assert "Dataset Health Check" in result.stdout
+        assert "Critical Issues:" in result.stdout
+        assert "Warnings:" in result.stdout
 
     def test_scan_critical_only(self, titanic_csv):
         """Test scan with --critical-only flag."""
-        result = run_cli(['scan', titanic_csv, '--critical-only'])
+        result = run_cli(["scan", titanic_csv, "--critical-only"])
 
         assert result.returncode == 0
-        assert 'Critical Issues:' in result.stdout
+        assert "Critical Issues:" in result.stdout
 
     def test_scan_quiet(self, titanic_csv):
         """Test scan with --quiet flag."""
-        result = run_cli(['scan', titanic_csv, '--quiet'])
+        result = run_cli(["scan", titanic_csv, "--quiet"])
 
         assert result.returncode == 0
         # Should only show counts
-        assert 'critical' in result.stdout.lower()
+        assert "critical" in result.stdout.lower()
 
     def test_scan_json_output(self, titanic_csv):
         """Test scan with --json flag."""
-        result = run_cli(['scan', titanic_csv, '--json'])
+        result = run_cli(["scan", titanic_csv, "--json"])
 
         assert result.returncode == 0
         # Should be valid JSON
         data = json.loads(result.stdout)
-        assert 'critical_issues' in data or 'critical_count' in data
-        assert 'warnings' in data or 'warning_count' in data
-        assert 'issues' in data
+        assert "critical_issues" in data or "critical_count" in data
+        assert "warnings" in data or "warning_count" in data
+        assert "issues" in data
 
     def test_scan_with_target(self, titanic_csv):
         """Test scan with target column."""
-        result = run_cli(['scan', titanic_csv, '--target', 'Survived'])
+        result = run_cli(["scan", titanic_csv, "--target", "Survived"])
 
         assert result.returncode == 0
-        assert 'Dataset Health Check' in result.stdout
+        assert "Dataset Health Check" in result.stdout
 
     def test_scan_specific_checks(self, titanic_csv):
         """Test scan with specific checks."""
-        result = run_cli([
-            'scan', titanic_csv,
-            '--checks', 'outliers,duplicates,high_missing_values'
-        ])
+        result = run_cli(["scan", titanic_csv, "--checks", "outliers,duplicates,high_missing_values"])
 
         assert result.returncode == 0
-        assert 'Dataset Health Check' in result.stdout
+        assert "Dataset Health Check" in result.stdout
 
     def test_scan_with_sampling(self, titanic_csv):
         """Test scan with custom sample size."""
-        result = run_cli(['scan', titanic_csv, '--sample-size', '500'])
+        result = run_cli(["scan", titanic_csv, "--sample-size", "500"])
 
         assert result.returncode == 0
-        if 'sample' in result.stdout.lower():
-            assert '56.1%' in result.stdout  # 500/891
+        if "sample" in result.stdout.lower():
+            assert "56.1%" in result.stdout  # 500/891
 
 
 class TestCLIDetails:
@@ -107,27 +99,24 @@ class TestCLIDetails:
 
     def test_details_basic(self, titanic_csv):
         """Test basic details command."""
-        result = run_cli(['details', titanic_csv])
+        result = run_cli(["details", titanic_csv])
 
         assert result.returncode == 0
-        assert 'Detailed Analysis' in result.stdout
-        assert 'Critical Issues:' in result.stdout
-        assert 'Warnings:' in result.stdout
-        assert 'Dataset Summary:' in result.stdout
+        assert "Detailed Analysis" in result.stdout
+        assert "Critical Issues:" in result.stdout
+        assert "Warnings:" in result.stdout
+        assert "Dataset Summary:" in result.stdout
 
     def test_details_with_target(self, titanic_csv):
         """Test details with target column."""
-        result = run_cli(['details', titanic_csv, '--target', 'Survived'])
+        result = run_cli(["details", titanic_csv, "--target", "Survived"])
 
         assert result.returncode == 0
-        assert 'Detailed Analysis' in result.stdout
+        assert "Detailed Analysis" in result.stdout
 
     def test_details_specific_checks(self, titanic_csv):
         """Test details with specific checks."""
-        result = run_cli([
-            'details', titanic_csv,
-            '--checks', 'high_missing_values,outliers'
-        ])
+        result = run_cli(["details", titanic_csv, "--checks", "high_missing_values,outliers"])
 
         assert result.returncode == 0
 
@@ -137,120 +126,101 @@ class TestCLIReport:
 
     def test_report_markdown(self, titanic_csv, temp_output_dir):
         """Test Markdown report generation."""
-        result = run_cli(['report', titanic_csv, '--format', 'md'], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--format", "md"], cwd=temp_output_dir)
 
         assert result.returncode == 0
-        assert 'Report saved to:' in result.stdout
-        assert 'train_hashprep_report.md' in result.stdout
+        assert "Report saved to:" in result.stdout
+        assert "train_hashprep_report.md" in result.stdout
 
         # Check file was created
-        report_file = os.path.join(temp_output_dir, 'train_hashprep_report.md')
+        report_file = os.path.join(temp_output_dir, "train_hashprep_report.md")
         assert os.path.exists(report_file)
 
     def test_report_json(self, titanic_csv, temp_output_dir):
         """Test JSON report generation."""
-        result = run_cli(['report', titanic_csv, '--format', 'json'], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--format", "json"], cwd=temp_output_dir)
 
         assert result.returncode == 0
-        assert 'train_hashprep_report.json' in result.stdout
+        assert "train_hashprep_report.json" in result.stdout
 
         # Verify JSON is valid
-        report_file = os.path.join(temp_output_dir, 'train_hashprep_report.json')
+        report_file = os.path.join(temp_output_dir, "train_hashprep_report.json")
         assert os.path.exists(report_file)
         with open(report_file) as f:
             data = json.load(f)
-            assert 'metadata' in data
-            assert 'dataset_overview' in data
+            assert "metadata" in data
+            assert "dataset_overview" in data
 
     def test_report_html_minimal(self, titanic_csv, temp_output_dir):
         """Test HTML report with minimal theme."""
-        result = run_cli([
-            'report', titanic_csv,
-            '--format', 'html',
-            '--theme', 'minimal',
-            '--full'
-        ], cwd=temp_output_dir)
+        result = run_cli(
+            ["report", titanic_csv, "--format", "html", "--theme", "minimal", "--full"], cwd=temp_output_dir
+        )
 
         assert result.returncode == 0
-        assert 'train_hashprep_report.html' in result.stdout
+        assert "train_hashprep_report.html" in result.stdout
 
-        report_file = os.path.join(temp_output_dir, 'train_hashprep_report.html')
+        report_file = os.path.join(temp_output_dir, "train_hashprep_report.html")
         assert os.path.exists(report_file)
 
     def test_report_html_neubrutalism(self, titanic_csv, temp_output_dir):
         """Test HTML report with neubrutalism theme."""
-        result = run_cli([
-            'report', titanic_csv,
-            '--format', 'html',
-            '--theme', 'neubrutalism',
-            '--full'
-        ], cwd=temp_output_dir)
+        result = run_cli(
+            ["report", titanic_csv, "--format", "html", "--theme", "neubrutalism", "--full"], cwd=temp_output_dir
+        )
 
         assert result.returncode == 0
-        assert 'train_hashprep_report.html' in result.stdout
+        assert "train_hashprep_report.html" in result.stdout
 
     def test_report_pdf(self, titanic_csv, temp_output_dir):
         """Test PDF report generation."""
-        result = run_cli(['report', titanic_csv, '--format', 'pdf', '--full'], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--format", "pdf", "--full"], cwd=temp_output_dir)
 
         assert result.returncode == 0
-        assert 'train_hashprep_report.pdf' in result.stdout
+        assert "train_hashprep_report.pdf" in result.stdout
 
-        report_file = os.path.join(temp_output_dir, 'train_hashprep_report.pdf')
+        report_file = os.path.join(temp_output_dir, "train_hashprep_report.pdf")
         assert os.path.exists(report_file)
         # Check PDF magic number
-        with open(report_file, 'rb') as f:
-            assert f.read(4) == b'%PDF'
+        with open(report_file, "rb") as f:
+            assert f.read(4) == b"%PDF"
 
     def test_report_with_code_generation(self, titanic_csv, temp_output_dir):
         """Test report with code generation."""
-        result = run_cli(['report', titanic_csv, '--with-code'], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--with-code"], cwd=temp_output_dir)
 
         assert result.returncode == 0
-        assert 'fixes script saved' in result.stdout
-        assert 'pipeline script saved' in result.stdout
+        assert "fixes script saved" in result.stdout
+        assert "pipeline script saved" in result.stdout
 
         # Check files were created
-        assert os.path.exists(os.path.join(temp_output_dir, 'train_hashprep_report_fixes.py'))
-        assert os.path.exists(os.path.join(temp_output_dir, 'train_hashprep_report_pipeline.py'))
+        assert os.path.exists(os.path.join(temp_output_dir, "train_hashprep_report_fixes.py"))
+        assert os.path.exists(os.path.join(temp_output_dir, "train_hashprep_report_pipeline.py"))
 
     def test_report_no_visualizations(self, titanic_csv, temp_output_dir):
         """Test report without visualizations."""
-        result = run_cli([
-            'report', titanic_csv,
-            '--format', 'html',
-            '--no-visualizations'
-        ], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--format", "html", "--no-visualizations"], cwd=temp_output_dir)
 
         assert result.returncode == 0
 
     def test_report_no_full(self, titanic_csv, temp_output_dir):
         """Test summary-only report."""
-        result = run_cli([
-            'report', titanic_csv,
-            '--format', 'md',
-            '--no-full'
-        ], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--format", "md", "--no-full"], cwd=temp_output_dir)
 
         assert result.returncode == 0
 
     def test_report_with_target(self, titanic_csv, temp_output_dir):
         """Test report with target column."""
-        result = run_cli([
-            'report', titanic_csv,
-            '--target', 'Survived',
-            '--format', 'json'
-        ], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--target", "Survived", "--format", "json"], cwd=temp_output_dir)
 
         assert result.returncode == 0
 
     def test_report_specific_checks(self, titanic_csv, temp_output_dir):
         """Test report with specific checks."""
-        result = run_cli([
-            'report', titanic_csv,
-            '--checks', 'outliers,high_missing_values,duplicates',
-            '--format', 'md'
-        ], cwd=temp_output_dir)
+        result = run_cli(
+            ["report", titanic_csv, "--checks", "outliers,high_missing_values,duplicates", "--format", "md"],
+            cwd=temp_output_dir,
+        )
 
         assert result.returncode == 0
 
@@ -260,10 +230,10 @@ class TestCLIVersion:
 
     def test_version(self):
         """Test version command."""
-        result = run_cli(['version'])
+        result = run_cli(["version"])
 
         assert result.returncode == 0
-        assert 'hashprep' in result.stdout.lower()
+        assert "hashprep" in result.stdout.lower()
         # Should show version number
         assert any(char.isdigit() for char in result.stdout)
 
@@ -273,26 +243,23 @@ class TestCLIErrorHandling:
 
     def test_invalid_file(self):
         """Test with non-existent file."""
-        result = run_cli(['scan', 'nonexistent.csv'])
+        result = run_cli(["scan", "nonexistent.csv"])
 
         assert result.returncode != 0
 
     def test_invalid_format(self, titanic_csv):
         """Test with invalid report format."""
-        result = run_cli(['report', titanic_csv, '--format', 'invalid'])
+        result = run_cli(["report", titanic_csv, "--format", "invalid"])
 
         # Should handle gracefully or error
-        assert result.returncode != 0 or 'error' in result.stderr.lower()
+        assert result.returncode != 0 or "error" in result.stderr.lower()
 
     def test_invalid_check_name(self, titanic_csv):
         """Test with invalid check name."""
-        result = run_cli([
-            'report', titanic_csv,
-            '--checks', 'invalid_check_name'
-        ])
+        result = run_cli(["report", titanic_csv, "--checks", "invalid_check_name"])
 
         assert result.returncode == 0
-        assert 'Warning: Invalid checks ignored' in result.stdout
+        assert "Warning: Invalid checks ignored" in result.stdout
         # Fuzzy suggestion feature (if merged)
         # assert 'Did you mean' in result.stdout
 
@@ -303,43 +270,46 @@ class TestCLIIntegration:
     def test_full_workflow(self, titanic_csv, temp_output_dir):
         """Test complete workflow: scan -> details -> report."""
         # Step 1: Scan
-        result = run_cli(['scan', titanic_csv])
+        result = run_cli(["scan", titanic_csv])
         assert result.returncode == 0
 
         # Step 2: Details
-        result = run_cli(['details', titanic_csv])
+        result = run_cli(["details", titanic_csv])
         assert result.returncode == 0
 
         # Step 3: Generate all report formats
-        for fmt in ['md', 'json', 'html', 'pdf']:
-            result = run_cli([
-                'report', titanic_csv,
-                '--format', fmt,
-                '--full'
-            ], cwd=temp_output_dir)
+        for fmt in ["md", "json", "html", "pdf"]:
+            result = run_cli(["report", titanic_csv, "--format", fmt, "--full"], cwd=temp_output_dir)
             assert result.returncode == 0
 
         # Step 4: Generate code
-        result = run_cli(['report', titanic_csv, '--with-code'], cwd=temp_output_dir)
+        result = run_cli(["report", titanic_csv, "--with-code"], cwd=temp_output_dir)
         assert result.returncode == 0
 
     def test_ml_workflow_with_target(self, titanic_csv, temp_output_dir):
         """Test ML-focused workflow with target column."""
         # Generate report with target and code
-        result = run_cli([
-            'report', titanic_csv,
-            '--target', 'Survived',
-            '--with-code',
-            '--format', 'html',
-            '--theme', 'minimal',
-            '--full'
-        ], cwd=temp_output_dir)
+        result = run_cli(
+            [
+                "report",
+                titanic_csv,
+                "--target",
+                "Survived",
+                "--with-code",
+                "--format",
+                "html",
+                "--theme",
+                "minimal",
+                "--full",
+            ],
+            cwd=temp_output_dir,
+        )
 
         assert result.returncode == 0
-        assert os.path.exists(os.path.join(temp_output_dir, 'train_hashprep_report.html'))
-        assert os.path.exists(os.path.join(temp_output_dir, 'train_hashprep_report_fixes.py'))
-        assert os.path.exists(os.path.join(temp_output_dir, 'train_hashprep_report_pipeline.py'))
+        assert os.path.exists(os.path.join(temp_output_dir, "train_hashprep_report.html"))
+        assert os.path.exists(os.path.join(temp_output_dir, "train_hashprep_report_fixes.py"))
+        assert os.path.exists(os.path.join(temp_output_dir, "train_hashprep_report_pipeline.py"))
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_codegen.py b/tests/test_codegen.py
index a20964e..2cc8776 100644
--- a/tests/test_codegen.py
+++ b/tests/test_codegen.py
@@ -1,7 +1,5 @@
 """Tests for code generation module."""
 
-import pytest
-
 from hashprep.preparers.codegen import CodeGenerator
 from hashprep.preparers.models import (
     EncodeMethod,
diff --git a/tests/test_drift.py b/tests/test_drift.py
index 9cd8fd6..1401a1a 100644
--- a/tests/test_drift.py
+++ b/tests/test_drift.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 import pandas as pd
-import pytest
 
 from hashprep.checks.drift import check_drift
 
@@ -62,9 +61,7 @@ def test_no_drift_same_categories(self):
 
         issues = check_drift(train, test)
 
-        drift_issues = [
-            i for i in issues if "Drift" in i.description and "cat" in i.column
-        ]
+        drift_issues = [i for i in issues if "Drift" in i.description and "cat" in i.column]
         assert len(drift_issues) == 0
 
     def test_drift_different_category_distribution(self):
@@ -73,9 +70,7 @@ def test_drift_different_category_distribution(self):
 
         issues = check_drift(train, test)
 
-        drift_issues = [
-            i for i in issues if "Drift" in i.description and "cat" in i.column
-        ]
+        drift_issues = [i for i in issues if "Drift" in i.description and "cat" in i.column]
         assert len(drift_issues) >= 1
 
     def test_new_categories_detected(self):
@@ -94,9 +89,5 @@ def test_skips_high_cardinality(self):
 
         issues = check_drift(train, test)
 
-        chi2_issues = [
-            i
-            for i in issues
-            if "Chi-square" in i.description and i.column == "cat"
-        ]
+        chi2_issues = [i for i in issues if "Chi-square" in i.description and i.column == "cat"]
         assert len(chi2_issues) == 0
diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py
new file mode 100644
index 0000000..71cf1cb
--- /dev/null
+++ b/tests/test_edge_cases.py
@@ -0,0 +1,294 @@
+"""Tests for edge cases, failure paths, and boundary conditions."""
+
+import numpy as np
+import pandas as pd
+
+from hashprep import DatasetAnalyzer
+from hashprep.checks.drift import check_drift
+
+
+class TestEmptyDataframes:
+    """Test behavior with empty or minimal DataFrames."""
+
+    def test_empty_dataframe_analysis(self):
+        df = pd.DataFrame()
+        analyzer = DatasetAnalyzer(df, selected_checks=["empty_dataset"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_single_row_dataframe(self):
+        df = pd.DataFrame({"a": [1], "b": ["x"]})
+        analyzer = DatasetAnalyzer(df)
+        summary = analyzer.analyze()
+        assert summary is not None
+        assert "issues" in summary
+
+    def test_single_column_dataframe(self):
+        df = pd.DataFrame({"only_col": range(100)})
+        analyzer = DatasetAnalyzer(df)
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_all_nan_dataframe(self):
+        df = pd.DataFrame({"a": [np.nan] * 10, "b": [np.nan] * 10})
+        analyzer = DatasetAnalyzer(df, selected_checks=["empty_dataset", "high_missing_values"])
+        summary = analyzer.analyze()
+        assert summary["total_issues"] > 0
+
+    def test_all_nan_numeric_column_outliers(self):
+        df = pd.DataFrame({"a": [np.nan] * 10, "b": range(10)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["outliers"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_empty_drift_check(self):
+        issues = check_drift(pd.DataFrame(), pd.DataFrame())
+        assert issues == []
+
+    def test_drift_with_all_nan_columns(self):
+        train = pd.DataFrame({"col": [np.nan] * 10})
+        test = pd.DataFrame({"col": [np.nan] * 10})
+        issues = check_drift(train, test)
+        assert isinstance(issues, list)
+
+
+class TestConstantAndDegenerateColumns:
+    """Test with constant, zero-variance, and degenerate data."""
+
+    def test_all_zeros_column(self):
+        df = pd.DataFrame({"zeros": [0] * 100, "normal": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["outliers", "high_zero_counts"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_constant_numeric_column_outliers(self):
+        df = pd.DataFrame({"const": [42] * 100, "var": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["outliers", "single_value_columns"])
+        summary = analyzer.analyze()
+        single_val_issues = [i for i in summary["issues"] if i["category"] == "single_value"]
+        assert len(single_val_issues) >= 1
+
+    def test_constant_string_column(self):
+        df = pd.DataFrame({"const": ["same"] * 100})
+        analyzer = DatasetAnalyzer(df, selected_checks=["single_value_columns", "high_cardinality"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_single_category(self):
+        df = pd.DataFrame({"cat": ["A"] * 100, "num": range(100)})
+        analyzer = DatasetAnalyzer(df, target_col="cat", selected_checks=["class_imbalance"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_infinite_values(self):
+        df = pd.DataFrame({"a": [1, 2, np.inf, -np.inf, 5], "b": range(5)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["infinite_values"])
+        summary = analyzer.analyze()
+        inf_issues = [i for i in summary["issues"] if i["category"] == "infinite_values"]
+        assert len(inf_issues) >= 1
+
+
+class TestMixedAndEdgeCaseTypes:
+    """Test with mixed types, unusual dtypes, and edge cases."""
+
+    def test_mixed_types_column(self):
+        # Use object-typed column with numeric strings mixed with text
+        df = pd.DataFrame({"mixed": ["1", "two", "3.0", "four", "5"] * 20})
+        analyzer = DatasetAnalyzer(df, selected_checks=["mixed_data_types"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_boolean_column(self):
+        df = pd.DataFrame({"flag": [True, False] * 50, "num": range(100)})
+        analyzer = DatasetAnalyzer(df)
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_datetime_column(self):
+        dates = pd.date_range("2020-01-01", periods=100, freq="D")
+        df = pd.DataFrame({"date": dates, "val": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["datetime_skew"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_very_long_strings(self):
+        df = pd.DataFrame({"text": ["x" * 10000] * 10 + ["short"] * 90})
+        analyzer = DatasetAnalyzer(df, selected_checks=["extreme_text_lengths"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_empty_strings(self):
+        df = pd.DataFrame({"text": [""] * 50 + ["hello"] * 50})
+        analyzer = DatasetAnalyzer(df, selected_checks=["extreme_text_lengths"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+
+class TestCorrelationEdgeCases:
+    """Test correlation checks with edge case data."""
+
+    def test_single_numeric_column_correlation(self):
+        df = pd.DataFrame({"x": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["feature_correlation"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_all_constant_columns_correlation(self):
+        df = pd.DataFrame({"a": [1] * 100, "b": [2] * 100})
+        analyzer = DatasetAnalyzer(df, selected_checks=["feature_correlation"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_categorical_correlation_single_category(self):
+        df = pd.DataFrame({"cat1": ["A"] * 100, "cat2": ["X"] * 100})
+        analyzer = DatasetAnalyzer(df, selected_checks=["categorical_correlation"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_mixed_correlation_no_variance(self):
+        df = pd.DataFrame({"cat": ["A", "B"] * 50, "num": [42] * 100})
+        analyzer = DatasetAnalyzer(df, selected_checks=["mixed_correlation"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+
+class TestLeakageEdgeCases:
+    """Test leakage checks with edge case data."""
+
+    def test_leakage_target_identical_column(self):
+        df = pd.DataFrame({"target": [0, 1] * 50, "clone": [0, 1] * 50})
+        analyzer = DatasetAnalyzer(df, target_col="target", selected_checks=["data_leakage"])
+        summary = analyzer.analyze()
+        leakage = [i for i in summary["issues"] if i["category"] == "data_leakage"]
+        assert len(leakage) >= 1
+
+    def test_leakage_no_target(self):
+        df = pd.DataFrame({"a": range(100), "b": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["data_leakage"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_target_leakage_constant_feature(self):
+        df = pd.DataFrame(
+            {
+                "target": [0, 1] * 50,
+                "const": [42] * 100,
+            }
+        )
+        analyzer = DatasetAnalyzer(
+            df,
+            target_col="target",
+            selected_checks=["target_leakage_patterns"],
+        )
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_categorical_target_leakage(self):
+        df = pd.DataFrame(
+            {
+                "target": ["yes", "no"] * 50,
+                "predictor": ["y", "n"] * 50,
+                "num": range(100),
+            }
+        )
+        analyzer = DatasetAnalyzer(
+            df,
+            target_col="target",
+            selected_checks=["target_leakage_patterns"],
+        )
+        summary = analyzer.analyze()
+        assert summary is not None
+
+
+class TestSelectedChecksFiltering:
+    """Test that selected_checks properly filters checks."""
+
+    def test_unknown_checks_ignored(self):
+        df = pd.DataFrame({"a": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["nonexistent_check", "also_fake"])
+        summary = analyzer.analyze()
+        assert summary["total_issues"] == 0
+
+    def test_empty_selected_checks(self):
+        df = pd.DataFrame({"a": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=[])
+        summary = analyzer.analyze()
+        assert summary["total_issues"] == 0
+
+    def test_single_check_selected(self):
+        df = pd.DataFrame({"a": [0] * 95 + [999] * 5})
+        analyzer = DatasetAnalyzer(df, selected_checks=["outliers"])
+        summary = analyzer.analyze()
+        for issue in summary["issues"]:
+            assert issue["category"] == "outliers"
+
+
+class TestDistributionEdgeCases:
+    """Test distribution checks with edge case data."""
+
+    def test_uniform_with_few_samples(self):
+        df = pd.DataFrame({"x": [1, 2, 3]})
+        analyzer = DatasetAnalyzer(df, selected_checks=["uniform_distribution"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_unique_values_few_rows(self):
+        df = pd.DataFrame({"x": [1, 2, 3]})
+        analyzer = DatasetAnalyzer(df, selected_checks=["unique_values"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+    def test_skewness_constant_column(self):
+        df = pd.DataFrame({"a": [5] * 100})
+        analyzer = DatasetAnalyzer(df, selected_checks=["skewness"])
+        summary = analyzer.analyze()
+        assert summary is not None
+
+
+class TestMissingPatternsEdgeCases:
+    """Test missing value checks with edge case data."""
+
+    def test_no_missing_values(self):
+        df = pd.DataFrame({"a": range(100), "b": range(100)})
+        analyzer = DatasetAnalyzer(
+            df, selected_checks=["high_missing_values", "dataset_missingness", "missing_patterns"]
+        )
+        summary = analyzer.analyze()
+        missing_issues = [i for i in summary["issues"] if "missing" in i["category"].lower()]
+        assert len(missing_issues) == 0
+
+    def test_completely_missing_column(self):
+        df = pd.DataFrame({"empty": [None] * 100, "full": range(100)})
+        analyzer = DatasetAnalyzer(df, selected_checks=["high_missing_values", "empty_columns"])
+        summary = analyzer.analyze()
+        assert summary["total_issues"] > 0
+
+
+class TestDriftEdgeCases:
+    """Test drift detection edge cases beyond basic validation."""
+
+    def test_drift_disjoint_columns(self):
+        train = pd.DataFrame({"a": [1, 2, 3]})
+        test = pd.DataFrame({"b": [4, 5, 6]})
+        issues = check_drift(train, test)
+        assert issues == []
+
+    def test_drift_single_value_columns(self):
+        train = pd.DataFrame({"x": [1.0] * 100})
+        test = pd.DataFrame({"x": [1.0] * 100})
+        issues = check_drift(train, test)
+        assert isinstance(issues, list)
+
+    def test_drift_mixed_column_types(self):
+        train = pd.DataFrame({"num": [1, 2, 3] * 100, "cat": ["a", "b", "c"] * 100})
+        test = pd.DataFrame({"num": [10, 20, 30] * 100, "cat": ["a", "a", "a"] * 100})
+        issues = check_drift(train, test)
+        assert len(issues) > 0
+
+    def test_drift_many_categories_skipped(self):
+        """Chi-square should be skipped for high-cardinality categoricals."""
+        train = pd.DataFrame({"cat": [f"v{i}" for i in range(200)]})
+        test = pd.DataFrame({"cat": [f"v{i}" for i in range(200)]})
+        issues = check_drift(train, test)
+        chi2 = [i for i in issues if "Chi-square" in i.description]
+        assert len(chi2) == 0
diff --git a/tests/test_library_api.py b/tests/test_library_api.py
index 02db03e..2dfdfa8 100644
--- a/tests/test_library_api.py
+++ b/tests/test_library_api.py
@@ -18,14 +18,16 @@
 @pytest.fixture
 def sample_dataframe():
     """Create a sample DataFrame for testing."""
-    return pd.DataFrame({
-        'id': range(1, 101),
-        'category': ['A', 'B', 'C'] * 33 + ['A'],
-        'value': [i * 1.5 for i in range(100)],
-        'target': [0, 1] * 50,
-        'missing_col': [None] * 50 + list(range(50)),
-        'constant': [42] * 100,
-    })
+    return pd.DataFrame(
+        {
+            "id": range(1, 101),
+            "category": ["A", "B", "C"] * 33 + ["A"],
+            "value": [i * 1.5 for i in range(100)],
+            "target": [0, 1] * 50,
+            "missing_col": [None] * 50 + list(range(50)),
+            "constant": [42] * 100,
+        }
+    )
 
 
 @pytest.fixture
@@ -43,25 +45,25 @@ def test_basic_analysis(self, sample_dataframe):
         summary = analyzer.analyze()
 
         # Check summary structure
-        assert 'summaries' in summary
-        assert 'issues' in summary
-        assert 'critical_count' in summary
-        assert 'warning_count' in summary
-        assert 'total_issues' in summary
+        assert "summaries" in summary
+        assert "issues" in summary
+        assert "critical_count" in summary
+        assert "warning_count" in summary
+        assert "total_issues" in summary
 
         # Check summaries
-        assert 'dataset_info' in summary['summaries']
-        assert 'variables' in summary['summaries']
-        assert 'missing_values' in summary['summaries']
+        assert "dataset_info" in summary["summaries"]
+        assert "variables" in summary["summaries"]
+        assert "missing_values" in summary["summaries"]
 
     def test_analysis_with_target(self, sample_dataframe):
         """Test analysis with target column specified."""
-        analyzer = DatasetAnalyzer(sample_dataframe, target_col='target')
+        analyzer = DatasetAnalyzer(sample_dataframe, target_col="target")
         summary = analyzer.analyze()
 
         # Should detect issues related to target
         assert summary is not None
-        assert 'issues' in summary
+        assert "issues" in summary
 
     def test_analysis_with_plots(self, sample_dataframe):
         """Test analysis with visualizations enabled."""
@@ -69,20 +71,17 @@ def test_analysis_with_plots(self, sample_dataframe):
         summary = analyzer.analyze()
 
         # Check for plots in summaries
-        assert 'plots' in summary['summaries']
+        assert "plots" in summary["summaries"]
 
     def test_specific_checks(self, sample_dataframe):
         """Test running specific checks only."""
-        selected_checks = ['outliers', 'duplicates', 'high_missing_values']
-        analyzer = DatasetAnalyzer(
-            sample_dataframe,
-            selected_checks=selected_checks
-        )
+        selected_checks = ["outliers", "duplicates", "high_missing_values"]
+        analyzer = DatasetAnalyzer(sample_dataframe, selected_checks=selected_checks)
         summary = analyzer.analyze()
 
         # Should only run specified checks
         assert summary is not None
-        assert 'issues' in summary
+        assert "issues" in summary
 
     def test_sampling(self, sample_dataframe):
         """Test automatic sampling for large datasets."""
@@ -90,42 +89,34 @@ def test_sampling(self, sample_dataframe):
         large_df = pd.concat([sample_dataframe] * 1000, ignore_index=True)
 
         sampling_config = SamplingConfig(max_rows=1000)
-        analyzer = DatasetAnalyzer(
-            large_df,
-            sampling_config=sampling_config,
-            auto_sample=True
-        )
+        analyzer = DatasetAnalyzer(large_df, sampling_config=sampling_config, auto_sample=True)
         summary = analyzer.analyze()
 
         # Check if sampling occurred
-        if 'sampling_info' in summary:
-            assert summary['sampling_info']['was_sampled']
+        if "sampling_info" in summary:
+            assert summary["sampling_info"]["was_sampled"]
 
     def test_drift_detection(self, sample_dataframe):
         """Test drift detection with comparison dataset."""
         # Create a drifted comparison dataset
         comparison_df = sample_dataframe.copy()
-        comparison_df['value'] = comparison_df['value'] * 2  # Drift in value
+        comparison_df["value"] = comparison_df["value"] * 2  # Drift in value
 
-        analyzer = DatasetAnalyzer(
-            sample_dataframe,
-            comparison_df=comparison_df,
-            selected_checks=['dataset_drift']
-        )
+        analyzer = DatasetAnalyzer(sample_dataframe, comparison_df=comparison_df, selected_checks=["dataset_drift"])
         summary = analyzer.analyze()
 
         # Should detect drift
-        drift_issues = [i for i in summary['issues'] if i['category'] == 'dataset_drift']
+        drift_issues = [i for i in summary["issues"] if i["category"] == "dataset_drift"]
         assert len(drift_issues) > 0
 
     def test_all_checks(self, sample_dataframe):
         """Test that all available checks can run."""
-        analyzer = DatasetAnalyzer(sample_dataframe, target_col='target')
+        analyzer = DatasetAnalyzer(sample_dataframe, target_col="target")
         summary = analyzer.analyze()
 
         # All checks should complete without error
-        assert 'issues' in summary
-        assert isinstance(summary['issues'], list)
+        assert "issues" in summary
+        assert isinstance(summary["issues"], list)
 
 
 class TestReportGeneration:
@@ -136,26 +127,21 @@ def test_markdown_report(self, sample_dataframe):
         analyzer = DatasetAnalyzer(sample_dataframe)
         summary = analyzer.analyze()
 
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False) as f:
             output_file = f.name
 
         try:
-            report = generate_report(
-                summary,
-                format='md',
-                full=True,
-                output_file=output_file
-            )
+            report = generate_report(summary, format="md", full=True, output_file=output_file)
 
             assert report is not None
             assert os.path.exists(output_file)
             assert os.path.getsize(output_file) > 0
 
             # Check content
-            with open(output_file, 'r') as f:
+            with open(output_file) as f:
                 content = f.read()
-                assert '# Dataset Quality Report' in content
-                assert '## Overview' in content
+                assert "# Dataset Quality Report" in content
+                assert "## Overview" in content
         finally:
             if os.path.exists(output_file):
                 os.remove(output_file)
@@ -165,27 +151,23 @@ def test_json_report(self, sample_dataframe):
         analyzer = DatasetAnalyzer(sample_dataframe)
         summary = analyzer.analyze()
 
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
             output_file = f.name
 
         try:
-            report = generate_report(
-                summary,
-                format='json',
-                full=True,
-                output_file=output_file
-            )
+            report = generate_report(summary, format="json", full=True, output_file=output_file)
 
             assert report is not None
             assert os.path.exists(output_file)
 
             # Verify it's valid JSON
             import json
-            with open(output_file, 'r') as f:
+
+            with open(output_file) as f:
                 data = json.load(f)
-                assert 'metadata' in data
-                assert 'dataset_overview' in data
-                assert 'alerts' in data
+                assert "metadata" in data
+                assert "dataset_overview" in data
+                assert "alerts" in data
         finally:
             if os.path.exists(output_file):
                 os.remove(output_file)
@@ -195,26 +177,20 @@ def test_html_report_minimal(self, sample_dataframe):
         analyzer = DatasetAnalyzer(sample_dataframe, include_plots=True)
         summary = analyzer.analyze()
 
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False) as f:
             output_file = f.name
 
         try:
-            report = generate_report(
-                summary,
-                format='html',
-                full=True,
-                output_file=output_file,
-                theme='minimal'
-            )
+            report = generate_report(summary, format="html", full=True, output_file=output_file, theme="minimal")
 
             assert report is not None
             assert os.path.exists(output_file)
 
             # Check HTML content
-            with open(output_file, 'r') as f:
+            with open(output_file) as f:
                 content = f.read()
-                assert '<html' in content
-                assert 'HashPrep' in content or 'Quality Report' in content
+                assert "<html" in content
+                assert "HashPrep" in content or "Quality Report" in content
         finally:
             if os.path.exists(output_file):
                 os.remove(output_file)
@@ -224,25 +200,19 @@ def test_html_report_neubrutalism(self, sample_dataframe):
         analyzer = DatasetAnalyzer(sample_dataframe, include_plots=True)
         summary = analyzer.analyze()
 
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False) as f:
             output_file = f.name
 
         try:
-            report = generate_report(
-                summary,
-                format='html',
-                full=True,
-                output_file=output_file,
-                theme='neubrutalism'
-            )
+            report = generate_report(summary, format="html", full=True, output_file=output_file, theme="neubrutalism")
 
             assert report is not None
             assert os.path.exists(output_file)
 
             # Check for brutal styling
-            with open(output_file, 'r') as f:
+            with open(output_file) as f:
                 content = f.read()
-                assert 'brutal' in content.lower()
+                assert "brutal" in content.lower()
         finally:
             if os.path.exists(output_file):
                 os.remove(output_file)
@@ -252,25 +222,20 @@ def test_pdf_report(self, sample_dataframe):
         analyzer = DatasetAnalyzer(sample_dataframe)
         summary = analyzer.analyze()
 
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as f:
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".pdf", delete=False) as f:
             output_file = f.name
 
         try:
-            report = generate_report(
-                summary,
-                format='pdf',
-                full=True,
-                output_file=output_file
-            )
+            report = generate_report(summary, format="pdf", full=True, output_file=output_file)
 
             assert report is not None
             assert os.path.exists(output_file)
             assert os.path.getsize(output_file) > 0
 
             # Check PDF magic number
-            with open(output_file, 'rb') as f:
+            with open(output_file, "rb") as f:
                 header = f.read(4)
-                assert header == b'%PDF'
+                assert header == b"%PDF"
         finally:
             if os.path.exists(output_file):
                 os.remove(output_file)
@@ -281,112 +246,114 @@ class TestChecks:
 
     def test_missing_value_checks(self):
         """Test missing value detection checks."""
-        df = pd.DataFrame({
-            'mostly_missing': [None] * 90 + [1] * 10,
-            'some_missing': [None] * 20 + [1] * 80,
-            'no_missing': range(100),
-        })
-
-        analyzer = DatasetAnalyzer(
-            df,
-            selected_checks=['high_missing_values', 'dataset_missingness']
+        df = pd.DataFrame(
+            {
+                "mostly_missing": [None] * 90 + [1] * 10,
+                "some_missing": [None] * 20 + [1] * 80,
+                "no_missing": range(100),
+            }
         )
+
+        analyzer = DatasetAnalyzer(df, selected_checks=["high_missing_values", "dataset_missingness"])
         summary = analyzer.analyze()
 
         # Should detect high missing values
-        missing_issues = [i for i in summary['issues'] if 'missing' in i['category'].lower()]
+        missing_issues = [i for i in summary["issues"] if "missing" in i["category"].lower()]
         assert len(missing_issues) > 0
 
     def test_correlation_checks(self):
         """Test correlation detection."""
-        df = pd.DataFrame({
-            'x': range(100),
-            'y': [i * 2 for i in range(100)],  # Highly correlated
-            'z': [i ** 2 for i in range(100)],
-        })
-
-        analyzer = DatasetAnalyzer(
-            df,
-            selected_checks=['feature_correlation']
+        df = pd.DataFrame(
+            {
+                "x": range(100),
+                "y": [i * 2 for i in range(100)],  # Highly correlated
+                "z": [i**2 for i in range(100)],
+            }
         )
+
+        analyzer = DatasetAnalyzer(df, selected_checks=["feature_correlation"])
         summary = analyzer.analyze()
 
         # Check that correlations were computed
-        assert 'numeric_correlations' in summary['summaries']
+        assert "numeric_correlations" in summary["summaries"]
 
     def test_outlier_detection(self):
         """Test outlier detection."""
-        df = pd.DataFrame({
-            'normal': range(100),
-            'with_outliers': list(range(95)) + [1000, 2000, 3000, 4000, 5000],
-        })
+        df = pd.DataFrame(
+            {
+                "normal": range(100),
+                "with_outliers": list(range(95)) + [1000, 2000, 3000, 4000, 5000],
+            }
+        )
 
-        analyzer = DatasetAnalyzer(df, selected_checks=['outliers'])
+        analyzer = DatasetAnalyzer(df, selected_checks=["outliers"])
         summary = analyzer.analyze()
 
         # Should detect outliers
-        outlier_issues = [i for i in summary['issues'] if i['category'] == 'outliers']
+        outlier_issues = [i for i in summary["issues"] if i["category"] == "outliers"]
         assert len(outlier_issues) > 0
 
     def test_duplicate_detection(self):
         """Test duplicate row detection."""
-        df = pd.DataFrame({
-            'a': [1, 2, 3, 1, 2],
-            'b': [4, 5, 6, 4, 5],
-        })
+        df = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 1, 2],
+                "b": [4, 5, 6, 4, 5],
+            }
+        )
 
-        analyzer = DatasetAnalyzer(df, selected_checks=['duplicates'])
+        analyzer = DatasetAnalyzer(df, selected_checks=["duplicates"])
         summary = analyzer.analyze()
 
         # Check duplicate info in dataset_info
-        assert summary['summaries']['dataset_info']['duplicate_rows'] == 2
+        assert summary["summaries"]["dataset_info"]["duplicate_rows"] == 2
 
     def test_cardinality_checks(self):
         """Test high cardinality detection."""
-        df = pd.DataFrame({
-            'high_card': [f'value_{i}' for i in range(1000)],  # Need more unique values
-            'low_card': ['A', 'B'] * 500,
-            'feature': range(1000),
-        })
+        df = pd.DataFrame(
+            {
+                "high_card": [f"value_{i}" for i in range(1000)],  # Need more unique values
+                "low_card": ["A", "B"] * 500,
+                "feature": range(1000),
+            }
+        )
 
-        analyzer = DatasetAnalyzer(df, selected_checks=['high_cardinality'])
+        analyzer = DatasetAnalyzer(df, selected_checks=["high_cardinality"])
         summary = analyzer.analyze()
 
         # Should detect high cardinality
-        card_issues = [i for i in summary['issues'] if i['category'] == 'high_cardinality']
+        card_issues = [i for i in summary["issues"] if i["category"] == "high_cardinality"]
         assert len(card_issues) >= 0  # May or may not detect depending on thresholds
 
     def test_constant_column_detection(self):
         """Test single value column detection."""
-        df = pd.DataFrame({
-            'constant': [42] * 100,
-            'variable': range(100),
-        })
+        df = pd.DataFrame(
+            {
+                "constant": [42] * 100,
+                "variable": range(100),
+            }
+        )
 
-        analyzer = DatasetAnalyzer(df, selected_checks=['single_value_columns'])
-        summary = analyzer.analyze()
+        analyzer = DatasetAnalyzer(df, selected_checks=["single_value_columns"])
+        analyzer.analyze()
 
-        # Should detect constant column
-        constant_issues = [i for i in summary['issues'] if i['category'] == 'single_value_columns']
         # Verify the check ran (may or may not generate issue depending on column type inference)
-        assert 'single_value_columns' in DatasetAnalyzer.ALL_CHECKS
+        assert "single_value_columns" in DatasetAnalyzer.ALL_CHECKS
 
     def test_class_imbalance_detection(self):
         """Test class imbalance detection."""
-        df = pd.DataFrame({
-            'target': [0] * 95 + [1] * 5,
-            'feature': range(100),
-        })
-
-        analyzer = DatasetAnalyzer(
-            df,
-            target_col='target',
-            selected_checks=['class_imbalance']
+        df = pd.DataFrame(
+            {
+                "target": [0] * 95 + [1] * 5,
+                "feature": range(100),
+            }
         )
+
+        analyzer = DatasetAnalyzer(df, target_col="target", selected_checks=["class_imbalance"])
         summary = analyzer.analyze()
 
         # Should detect imbalance
-        imbalance_issues = [i for i in summary['issues'] if i['category'] == 'class_imbalance']
+        imbalance_issues = [i for i in summary["issues"] if i["category"] == "class_imbalance"]
         assert len(imbalance_issues) > 0
 
 
@@ -399,18 +366,18 @@ def test_titanic_full_analysis(self, titanic_csv):
             pytest.skip("Titanic dataset not found")
 
         df = pd.read_csv(titanic_csv)
-        analyzer = DatasetAnalyzer(df, target_col='Survived', include_plots=True)
+        analyzer = DatasetAnalyzer(df, target_col="Survived", include_plots=True)
         summary = analyzer.analyze()
 
         # Verify complete analysis
         assert summary is not None
-        assert len(summary['issues']) > 0
-        assert 'plots' in summary['summaries']
+        assert len(summary["issues"]) > 0
+        assert "plots" in summary["summaries"]
 
         # Verify key issues are detected
-        categories = {issue['category'] for issue in summary['issues']}
+        categories = {issue["category"] for issue in summary["issues"]}
         # Cabin has 77% missing - check was renamed to just 'missing_values' in some versions
-        assert 'high_missing_values' in categories or any('missing' in cat for cat in categories)
+        assert "high_missing_values" in categories or any("missing" in cat for cat in categories)
 
     def test_titanic_all_report_formats(self, titanic_csv):
         """Test generating all report formats for Titanic."""
@@ -421,27 +388,16 @@ def test_titanic_all_report_formats(self, titanic_csv):
         analyzer = DatasetAnalyzer(df, include_plots=True)
         summary = analyzer.analyze()
 
-        formats = ['md', 'json', 'html', 'pdf']
+        formats = ["md", "json", "html", "pdf"]
         for fmt in formats:
-            with tempfile.NamedTemporaryFile(mode='w', suffix=f'.{fmt}', delete=False) as f:
+            with tempfile.NamedTemporaryFile(mode="w", suffix=f".{fmt}", delete=False) as f:
                 output_file = f.name
 
             try:
-                if fmt == 'html':
-                    report = generate_report(
-                        summary,
-                        format=fmt,
-                        full=True,
-                        output_file=output_file,
-                        theme='minimal'
-                    )
+                if fmt == "html":
+                    report = generate_report(summary, format=fmt, full=True, output_file=output_file, theme="minimal")
                 else:
-                    report = generate_report(
-                        summary,
-                        format=fmt,
-                        full=True,
-                        output_file=output_file
-                    )
+                    report = generate_report(summary, format=fmt, full=True, output_file=output_file)
 
                 assert report is not None
                 assert os.path.exists(output_file)
@@ -451,5 +407,5 @@ def test_titanic_all_report_formats(self, titanic_csv):
                     os.remove(output_file)
 
 
-if __name__ == '__main__':
-    pytest.main([__file__, '-v'])
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/test_sampling.py b/tests/test_sampling.py
index 25d5091..4a30c47 100644
--- a/tests/test_sampling.py
+++ b/tests/test_sampling.py
@@ -1,8 +1,6 @@
 """Tests for dataset sampling module."""
 
-import numpy as np
 import pandas as pd
-import pytest
 
 from hashprep.utils.sampling import DatasetSampler, SamplingConfig
 
@@ -62,12 +60,8 @@ def test_head_sampling(self):
         assert list(result["col"]) == list(range(100))
 
     def test_stratified_sampling_preserves_proportions(self):
-        df = pd.DataFrame(
-            {"feature": range(1000), "label": ["A"] * 900 + ["B"] * 100}
-        )
-        config = SamplingConfig(
-            max_rows=100, sample_method="stratified", stratify_column="label"
-        )
+        df = pd.DataFrame({"feature": range(1000), "label": ["A"] * 900 + ["B"] * 100})
+        config = SamplingConfig(max_rows=100, sample_method="stratified", stratify_column="label")
         sampler = DatasetSampler(config)
 
         result = sampler.sample(df)
@@ -112,4 +106,4 @@ def test_should_not_sample_small_dataset(self):
         config = SamplingConfig(max_rows=1000)
         sampler = DatasetSampler(config)
 
-        assert sampler.should_sample(df) == False
+        assert not sampler.should_sample(df)
diff --git a/uv.lock b/uv.lock
index 674d497..f3ff09e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -494,6 +494,7 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "pytest" },
+    { name = "ruff" },
 ]
 
 [package.metadata]
@@ -514,7 +515,10 @@ requires-dist = [
 ]
 
 [package.metadata.requires-dev]
-dev = [{ name = "pytest", specifier = ">=9.0.2" }]
+dev = [
+    { name = "pytest", specifier = ">=9.0.2" },
+    { name = "ruff", specifier = ">=0.8.0" },
+]
 
 [[package]]
 name = "iniconfig"
@@ -1234,6 +1238,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
 ]
 
+[[package]]
+name = "ruff"
+version = "0.15.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/dc/4e6ac71b511b141cf626357a3946679abeba4cf67bc7cc5a17920f31e10d/ruff-0.15.1.tar.gz", hash = "sha256:c590fe13fb57c97141ae975c03a1aedb3d3156030cabd740d6ff0b0d601e203f", size = 4540855, upload-time = "2026-02-12T23:09:09.998Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/23/bf/e6e4324238c17f9d9120a9d60aa99a7daaa21204c07fcd84e2ef03bb5fd1/ruff-0.15.1-py3-none-linux_armv6l.whl", hash = "sha256:b101ed7cf4615bda6ffe65bdb59f964e9f4a0d3f85cbf0e54f0ab76d7b90228a", size = 10367819, upload-time = "2026-02-12T23:09:03.598Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/ea/c8f89d32e7912269d38c58f3649e453ac32c528f93bb7f4219258be2e7ed/ruff-0.15.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:939c995e9277e63ea632cc8d3fae17aa758526f49a9a850d2e7e758bfef46602", size = 10798618, upload-time = "2026-02-12T23:09:22.928Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/0f/1d0d88bc862624247d82c20c10d4c0f6bb2f346559d8af281674cf327f15/ruff-0.15.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1d83466455fdefe60b8d9c8df81d3c1bbb2115cede53549d3b522ce2bc703899", size = 10148518, upload-time = "2026-02-12T23:08:58.339Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/c8/291c49cefaa4a9248e986256df2ade7add79388fe179e0691be06fae6f37/ruff-0.15.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9457e3c3291024866222b96108ab2d8265b477e5b1534c7ddb1810904858d16", size = 10518811, upload-time = "2026-02-12T23:09:31.865Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/1a/f5707440e5ae43ffa5365cac8bbb91e9665f4a883f560893829cf16a606b/ruff-0.15.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:92c92b003e9d4f7fbd33b1867bb15a1b785b1735069108dfc23821ba045b29bc", size = 10196169, upload-time = "2026-02-12T23:09:17.306Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/ff/26ddc8c4da04c8fd3ee65a89c9fb99eaa5c30394269d424461467be2271f/ruff-0.15.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fe5c41ab43e3a06778844c586251eb5a510f67125427625f9eb2b9526535779", size = 10990491, upload-time = "2026-02-12T23:09:25.503Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/00/50920cb385b89413f7cdb4bb9bc8fc59c1b0f30028d8bccc294189a54955/ruff-0.15.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66a6dd6df4d80dc382c6484f8ce1bcceb55c32e9f27a8b94c32f6c7331bf14fb", size = 11843280, upload-time = "2026-02-12T23:09:19.88Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/6d/2f5cad8380caf5632a15460c323ae326f1e1a2b5b90a6ee7519017a017ca/ruff-0.15.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a4a42cbb8af0bda9bcd7606b064d7c0bc311a88d141d02f78920be6acb5aa83", size = 11274336, upload-time = "2026-02-12T23:09:14.907Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/1d/5f56cae1d6c40b8a318513599b35ea4b075d7dc1cd1d04449578c29d1d75/ruff-0.15.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab064052c31dddada35079901592dfba2e05f5b1e43af3954aafcbc1096a5b2", size = 11137288, upload-time = "2026-02-12T23:09:07.475Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/20/6f8d7d8f768c93b0382b33b9306b3b999918816da46537d5a61635514635/ruff-0.15.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5631c940fe9fe91f817a4c2ea4e81f47bee3ca4aa646134a24374f3c19ad9454", size = 11070681, upload-time = "2026-02-12T23:08:55.43Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/67/d640ac76069f64cdea59dba02af2e00b1fa30e2103c7f8d049c0cff4cafd/ruff-0.15.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:68138a4ba184b4691ccdc39f7795c66b3c68160c586519e7e8444cf5a53e1b4c", size = 10486401, upload-time = "2026-02-12T23:09:27.927Z" },
+    { url = "https://files.pythonhosted.org/packages/65/3d/e1429f64a3ff89297497916b88c32a5cc88eeca7e9c787072d0e7f1d3e1e/ruff-0.15.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:518f9af03bfc33c03bdb4cb63fabc935341bb7f54af500f92ac309ecfbba6330", size = 10197452, upload-time = "2026-02-12T23:09:12.147Z" },
+    { url = "https://files.pythonhosted.org/packages/78/83/e2c3bade17dad63bf1e1c2ffaf11490603b760be149e1419b07049b36ef2/ruff-0.15.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:da79f4d6a826caaea95de0237a67e33b81e6ec2e25fc7e1993a4015dffca7c61", size = 10693900, upload-time = "2026-02-12T23:09:34.418Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/27/fdc0e11a813e6338e0706e8b39bb7a1d61ea5b36873b351acee7e524a72a/ruff-0.15.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3dd86dccb83cd7d4dcfac303ffc277e6048600dfc22e38158afa208e8bf94a1f", size = 11227302, upload-time = "2026-02-12T23:09:36.536Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/58/ac864a75067dcbd3b95be5ab4eb2b601d7fbc3d3d736a27e391a4f92a5c1/ruff-0.15.1-py3-none-win32.whl", hash = "sha256:660975d9cb49b5d5278b12b03bb9951d554543a90b74ed5d366b20e2c57c2098", size = 10462555, upload-time = "2026-02-12T23:09:29.899Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/5e/d4ccc8a27ecdb78116feac4935dfc39d1304536f4296168f91ed3ec00cd2/ruff-0.15.1-py3-none-win_amd64.whl", hash = "sha256:c820fef9dd5d4172a6570e5721704a96c6679b80cf7be41659ed439653f62336", size = 11599956, upload-time = "2026-02-12T23:09:01.157Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/07/5bda6a85b220c64c65686bc85bd0bbb23b29c62b3a9f9433fa55f17cda93/ruff-0.15.1-py3-none-win_arm64.whl", hash = "sha256:5ff7d5f0f88567850f45081fac8f4ec212be8d0b963e385c3f7d0d2eb4899416", size = 10874604, upload-time = "2026-02-12T23:09:05.515Z" },
+]
+
 [[package]]
 name = "scikit-learn"
 version = "1.7.2"