Source code for jacscanomaly.models

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import numpy as np



[docs]
@dataclass(frozen=True)
class CandidateQuality:
    """
    Per-candidate quality diagnostics derived from per-point chi-square improvement.

    Attributes
    ----------
    n_window : int
        Number of points inside the local chi2 evaluation window.
    n_contrib : int
        Number of points with improvement above the configured threshold.
    n_eff : float
        Effective number of contributing points (participation-ratio style).
    peak_frac : float
        Fraction of total positive improvement carried by the strongest point.
    rho1 : float
        Lag-1 autocorrelation of signed per-point improvements within the window.
    longest_run : int
        Longest consecutive run length among above-threshold contributing points.
    """
    n_window: int
    n_contrib: int
    n_eff: float
    peak_frac: float
    rho1: float
    longest_run: int




[docs]
@dataclass(frozen=True)
class BestCandidate:
    """
    Best anomaly candidate selected from all extracted clusters.

    Attributes
    ----------
    t0 : float
        Candidate center time.
    teff : float
        Candidate effective timescale.
    dchi2 : float
        Improvement in chi-square: chi2_null - chi2_anom (larger is better).
    med_others : float
        Median dchi2 among the bulk of the other candidates
        (excluding the best, with the upper tail trimmed when configured).
    std_others : float
        Standard deviation of dchi2 among the bulk of the other candidates
        (excluding the best, with the upper tail trimmed when configured).
    score : float
        Standardized score of the best candidate.
        Computed as ``(dchi2_best - med_others) / std_others``.
        (may be NaN/inf depending on the number of candidates / std_others).
    quality : CandidateQuality
        Per-point support and temporal diagnostics for this candidate.
    """
    t0: float
    teff: float
    dchi2: float
    med_others: float
    std_others: float
    score: float
    quality: CandidateQuality




[docs]
@dataclass(frozen=True)
class SeasonSummary:
    """
    Summary of the anomaly scan for a single season.

    Attributes
    ----------
    season_idx : int
        0-based season index.
    t_start, t_end : float
        Time range of the season.
    n_grid : int
        Number of grid points evaluated in this season.
    clusters : np.ndarray
        Extracted clusters for this season, shape (K, 3) with rows [t0, teff, dchi2].
    grid_metrics : np.ndarray
        Raw per-grid diagnostics, shape (N, 9), columns:
        [t0, teff, dchi2, n_window, n_contrib, n_eff, peak_frac, rho1, longest_run].
    """
    season_idx: int
    t_start: float
    t_end: float
    n_grid: int
    clusters: np.ndarray  # shape (K,3): [t0, teff, dchi2]
    grid_metrics: np.ndarray  # shape (N,9): [t0,teff,dchi2,n_window,n_contrib,n_eff,peak_frac,rho1,longest_run]




[docs]
@dataclass(frozen=True)
class AnomalyResult:
    """
    Output of :meth:`scanomaly.finder.Finder.run`.

    This object is designed to be convenient for plotting and downstream analysis.
    Arrays are stored on CPU as NumPy arrays.

    Attributes
    ----------
    time, flux, ferr : np.ndarray
        Input light curve arrays (1D).
    fit : SingleLensFitResult
        PSPL fitting result (contains params, fs, fb, chi2, model_flux, residual, etc.).
    residual : np.ndarray
        Flux residuals on CPU: flux - model_flux.
    model_flux : np.ndarray
        PSPL model flux on CPU.
    chi2_dof : float
        Reduced chi-square of the PSPL fit.
    seasons : list[SeasonSummary]
        Per-season summaries including clusters.
    clusters_all : np.ndarray
        Flattened clusters across all seasons, shape (N, 3) with rows [t0, teff, dchi2].
    grid_metrics_all : np.ndarray
        Flattened per-grid diagnostics, shape (M, 9), columns:
        [t0, teff, dchi2, n_window, n_contrib, n_eff, peak_frac, rho1, longest_run].
    best : BestCandidate | None
        Best candidate over all clusters, or None if no candidate exists.
    """
    # input (CPU numpy arrays for fast plotting)
    time: np.ndarray
    flux: np.ndarray
    ferr: np.ndarray

    # PSPL fit
    fit: SingleLensFitResult
    residual: np.ndarray
    model_flux: np.ndarray
    chi2_dof: float

    # grid/clusters
    seasons: List[SeasonSummary]
    clusters_all: np.ndarray  # shape (N,3)
    grid_metrics_all: np.ndarray  # shape (M,9)

    # best candidate
    best: Optional[BestCandidate]

    def summary_dict(self) -> Dict[str, Any]:
        """
        Return a compact summary dictionary suitable for logging/serialization.
        """
        out: Dict[str, Any] = {
            "n_points": int(self.time.size),
            "n_seasons": int(len(self.seasons)),
            "n_clusters": int(self.clusters_all.shape[0]),
            "n_grid_total": int(sum(s.n_grid for s in self.seasons)),
            "chi2_dof": float(self.chi2_dof),
            "has_best": bool(self.best is not None),
        }
        if self.best is None:
            return out

        b = self.best
        q = b.quality
        out.update(
            {
                "best_t0": float(b.t0),
                "best_teff": float(b.teff),
                "best_dchi2": float(b.dchi2),
                "best_score": float(b.score),
                "best_n_window": int(q.n_window),
                "best_n_contrib": int(q.n_contrib),
                "best_n_eff": float(q.n_eff),
                "best_peak_frac": float(q.peak_frac),
                "best_rho1": float(q.rho1),
                "best_longest_run": int(q.longest_run),
            }
        )
        return out

    def summary_text(self) -> str:
        """
        Return a CLI-friendly multi-line summary.
        """
        d = self.summary_dict()
        lines = [
            "=== jacscanomaly summary ===",
            f"points      : {d['n_points']}",
            f"seasons     : {d['n_seasons']}",
            f"grid total  : {d['n_grid_total']}",
            f"clusters    : {d['n_clusters']}",
            f"chi2 / dof  : {d['chi2_dof']:.3f}",
        ]

        if not d["has_best"]:
            lines.append("best        : None")
            return "\n".join(lines)

        lines.extend(
            [
                "",
                "=== best candidate ===",
                f"t0          : {d['best_t0']:.6f}",
                f"teff        : {d['best_teff']:.6g}",
                f"dchi2       : {d['best_dchi2']:.6g}",
                f"score       : {d['best_score']:.3f}",
                "",
                "=== quality ===",
                f"n_window    : {d['best_n_window']}",
                f"n_contrib   : {d['best_n_contrib']}",
                f"n_eff       : {d['best_n_eff']:.3f}",
                f"peak_frac   : {d['best_peak_frac']:.3f}",
                f"rho1        : {d['best_rho1']:.3f}",
                f"longest_run : {d['best_longest_run']}",
            ]
        )
        return "\n".join(lines)

    def print_summary(self) -> None:
        """
        Print a CLI-friendly summary.
        """
        print(self.summary_text())

    def summary_table(self):
        """
        Return a notebook-friendly single-row table.

        Returns
        -------
        pandas.DataFrame | list[dict]
            DataFrame when pandas is available, otherwise a single-item list.
        """
        row = self.summary_dict()
        try:
            import pandas as pd
            return pd.DataFrame([row])
        except Exception:
            return [row]

    def __str__(self) -> str:
        return self.summary_text()