Source code for trustlens.api

"""
trustlens.api.
==============
Primary entry point for the TrustLens analysis pipeline.

Usage
-----
>>> from trustlens import analyze
>>> report = analyze(model, X_val, y_val, y_prob)
>>> report.show()
"""

from __future__ import annotations

import logging
from typing import Any, Optional

import numpy as np

from trustlens.backends.registry import get_resolver
from trustlens.core.pipeline import _run_analysis_pipeline
from trustlens.report import TrustReport

logger = logging.getLogger(__name__)


def quick_analyze(
    model=None, X=None, y=None, dataset="iris", framework: Optional[str] = None
) -> TrustReport:
    """
    Zero-friction entry point for TrustLens.
    If no model/data provided, auto-loads a basic dataset to demonstrate output.
    """
    if model is None or X is None or y is None:
        logger.info(f"No model/data provided. Auto-loading {dataset} dataset for demo...")
        if dataset == "iris":
            from sklearn.datasets import load_iris
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split

            data = load_iris()
            X_all, y_all = data.data, data.target
            # Make it binary for simpler demo
            X_all, y_all = X_all[y_all != 2], y_all[y_all != 2]
            X_train, X, y_train, y = train_test_split(X_all, y_all, test_size=0.3, random_state=42)

            model = RandomForestClassifier(n_estimators=10, random_state=42)
            model.fit(X_train, y_train)
        elif dataset == "breast_cancer":
            from sklearn.datasets import load_breast_cancer
            from sklearn.linear_model import LogisticRegression
            from sklearn.model_selection import train_test_split

            data = load_breast_cancer()
            X_all, y_all = data.data, data.target
            X_train, X, y_train, y = train_test_split(X_all, y_all, test_size=0.3, random_state=42)

            model = LogisticRegression(max_iter=1000, random_state=42)
            model.fit(X_train, y_train)
        else:
            raise ValueError("Supported demo datasets: 'iris', 'breast_cancer'")

    print(f"\nTrustLens Analysis: {dataset}")
    print(f"Status: Loading demo model and {dataset} validation data...")

    report = analyze(model=model, X=X, y_true=y, framework=framework, verbose=False)

    report.show()
    report.summary_plot()
    return report



[docs]
def analyze(
    model: Any,
    X: np.ndarray,
    y_true: np.ndarray,
    y_pred: Optional[np.ndarray] = None,
    y_prob: Optional[np.ndarray] = None,
    *,
    framework: Optional[str] = None,
    embeddings: Optional[np.ndarray] = None,
    sensitive_features: Optional[dict[str, np.ndarray]] = None,
    modules: Optional[list[str]] = None,
    plugins: Optional[list[str]] = None,
    verbose: bool = True,
) -> TrustReport:
    """
    Run a full TrustLens analysis on a trained model.

    Parameters
    ----------
    model : Any, optional
      Trained machine learning model. Can be None if ``y_pred`` or ``y_prob`` are provided manually.
    X : np.ndarray
      Validation feature matrix, shape (n_samples, n_features).
    y_true : np.ndarray
      Ground-truth labels, shape (n_samples,).
    y_pred : np.ndarray, optional
      Predicted class labels, shape (n_samples,).
      If None, TrustLens will automatically resolve predictions via the backend system.
    y_prob : np.ndarray, optional
      Predicted class probabilities, shape (n_samples, n_classes).
      If None, TrustLens will automatically resolve probabilities via the backend system.
    framework : str, optional
      Explicitly specify the model framework (e.g., 'sklearn', 'xgboost').
      If None, TrustLens will attempt to auto-detect the framework.
    embeddings : np.ndarray, optional
      Latent representations / embeddings for representation analysis,
      shape (n_samples, embedding_dim).
    sensitive_features : dict, optional
      Mapping of feature name → 1-D array for bias/subgroup analysis.
    modules : list[str], optional
      Subset of analysis modules to run.
    plugins : list[str], optional
      Names of registered plugins to activate.
    verbose : bool
      Print progress updates. Default True.

    Returns
    -------
    TrustReport
      Populated report object with metrics, plots, and narrative summaries.
    """
    if len(y_true) < 30:
        logger.warning("Small dataset (n < 30) detected. Calibration metrics may be unreliable.")

    # ------------------------------------------------------------------
    # 1. Resolve predictions via Backend Registry
    # Short-circuit if both overrides are provided
    if y_pred is not None and y_prob is not None:
        framework = "manual"

    resolver = get_resolver(model, framework=framework)
    bundle = resolver(model, X, y_pred=y_pred, y_prob=y_prob)

    # ------------------------------------------------------------------
    # 2. Delegate to Core Pipeline
    # ------------------------------------------------------------------
    return _run_analysis_pipeline(
        model=model,
        X=X,
        y_true=y_true,
        y_pred=bundle.y_pred,
        y_prob=bundle.y_prob,
        framework=bundle.framework,
        backend_metadata=bundle.metadata,
        embeddings=embeddings,
        sensitive_features=sensitive_features,
        modules=modules,
        plugins=plugins,
        verbose=verbose,
    )