Source code for jacscanomaly.models

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import numpy as np


[docs] @dataclass(frozen=True) class CandidateQuality: """ Per-candidate quality diagnostics derived from per-point chi-square improvement. Attributes ---------- n_window : int Number of points inside the local chi2 evaluation window. n_contrib : int Number of points with improvement above the configured threshold. n_eff : float Effective number of contributing points (participation-ratio style). peak_frac : float Fraction of total positive improvement carried by the strongest point. rho1 : float Lag-1 autocorrelation of signed per-point improvements within the window. longest_run : int Longest consecutive run length among above-threshold contributing points. """ n_window: int n_contrib: int n_eff: float peak_frac: float rho1: float longest_run: int
[docs] @dataclass(frozen=True) class BestCandidate: """ Best anomaly candidate selected from all extracted clusters. Attributes ---------- t0 : float Candidate center time. teff : float Candidate effective timescale. dchi2 : float Improvement in chi-square: chi2_null - chi2_anom (larger is better). med_others : float Median dchi2 among the bulk of the other candidates (excluding the best, with the upper tail trimmed when configured). std_others : float Standard deviation of dchi2 among the bulk of the other candidates (excluding the best, with the upper tail trimmed when configured). score : float Standardized score of the best candidate. Computed as ``(dchi2_best - med_others) / std_others``. (may be NaN/inf depending on the number of candidates / std_others). quality : CandidateQuality Per-point support and temporal diagnostics for this candidate. """ t0: float teff: float dchi2: float med_others: float std_others: float score: float quality: CandidateQuality
[docs] @dataclass(frozen=True) class SeasonSummary: """ Summary of the anomaly scan for a single season. Attributes ---------- season_idx : int 0-based season index. t_start, t_end : float Time range of the season. n_grid : int Number of grid points evaluated in this season. clusters : np.ndarray Extracted clusters for this season, shape (K, 3) with rows [t0, teff, dchi2]. grid_metrics : np.ndarray Raw per-grid diagnostics, shape (N, 9), columns: [t0, teff, dchi2, n_window, n_contrib, n_eff, peak_frac, rho1, longest_run]. """ season_idx: int t_start: float t_end: float n_grid: int clusters: np.ndarray # shape (K,3): [t0, teff, dchi2] grid_metrics: np.ndarray # shape (N,9): [t0,teff,dchi2,n_window,n_contrib,n_eff,peak_frac,rho1,longest_run]
[docs] @dataclass(frozen=True) class AnomalyResult: """ Output of :meth:`scanomaly.finder.Finder.run`. This object is designed to be convenient for plotting and downstream analysis. Arrays are stored on CPU as NumPy arrays. Attributes ---------- time, flux, ferr : np.ndarray Input light curve arrays (1D). fit : SingleLensFitResult PSPL fitting result (contains params, fs, fb, chi2, model_flux, residual, etc.). residual : np.ndarray Flux residuals on CPU: flux - model_flux. model_flux : np.ndarray PSPL model flux on CPU. chi2_dof : float Reduced chi-square of the PSPL fit. seasons : list[SeasonSummary] Per-season summaries including clusters. clusters_all : np.ndarray Flattened clusters across all seasons, shape (N, 3) with rows [t0, teff, dchi2]. grid_metrics_all : np.ndarray Flattened per-grid diagnostics, shape (M, 9), columns: [t0, teff, dchi2, n_window, n_contrib, n_eff, peak_frac, rho1, longest_run]. best : BestCandidate | None Best candidate over all clusters, or None if no candidate exists. """ # input (CPU numpy arrays for fast plotting) time: np.ndarray flux: np.ndarray ferr: np.ndarray # PSPL fit fit: SingleLensFitResult residual: np.ndarray model_flux: np.ndarray chi2_dof: float # grid/clusters seasons: List[SeasonSummary] clusters_all: np.ndarray # shape (N,3) grid_metrics_all: np.ndarray # shape (M,9) # best candidate best: Optional[BestCandidate] def summary_dict(self) -> Dict[str, Any]: """ Return a compact summary dictionary suitable for logging/serialization. """ out: Dict[str, Any] = { "n_points": int(self.time.size), "n_seasons": int(len(self.seasons)), "n_clusters": int(self.clusters_all.shape[0]), "n_grid_total": int(sum(s.n_grid for s in self.seasons)), "chi2_dof": float(self.chi2_dof), "has_best": bool(self.best is not None), } if self.best is None: return out b = self.best q = b.quality out.update( { "best_t0": float(b.t0), "best_teff": float(b.teff), "best_dchi2": float(b.dchi2), "best_score": float(b.score), "best_n_window": int(q.n_window), "best_n_contrib": int(q.n_contrib), "best_n_eff": float(q.n_eff), "best_peak_frac": float(q.peak_frac), "best_rho1": float(q.rho1), "best_longest_run": int(q.longest_run), } ) return out def summary_text(self) -> str: """ Return a CLI-friendly multi-line summary. """ d = self.summary_dict() lines = [ "=== jacscanomaly summary ===", f"points : {d['n_points']}", f"seasons : {d['n_seasons']}", f"grid total : {d['n_grid_total']}", f"clusters : {d['n_clusters']}", f"chi2 / dof : {d['chi2_dof']:.3f}", ] if not d["has_best"]: lines.append("best : None") return "\n".join(lines) lines.extend( [ "", "=== best candidate ===", f"t0 : {d['best_t0']:.6f}", f"teff : {d['best_teff']:.6g}", f"dchi2 : {d['best_dchi2']:.6g}", f"score : {d['best_score']:.3f}", "", "=== quality ===", f"n_window : {d['best_n_window']}", f"n_contrib : {d['best_n_contrib']}", f"n_eff : {d['best_n_eff']:.3f}", f"peak_frac : {d['best_peak_frac']:.3f}", f"rho1 : {d['best_rho1']:.3f}", f"longest_run : {d['best_longest_run']}", ] ) return "\n".join(lines) def print_summary(self) -> None: """ Print a CLI-friendly summary. """ print(self.summary_text()) def summary_table(self): """ Return a notebook-friendly single-row table. Returns ------- pandas.DataFrame | list[dict] DataFrame when pandas is available, otherwise a single-item list. """ row = self.summary_dict() try: import pandas as pd return pd.DataFrame([row]) except Exception: return [row] def __str__(self) -> str: return self.summary_text()