Source code for jacscanomaly.config

from __future__ import annotations

from dataclasses import dataclass
from typing import Optional, Literal

from .criteria import CandidateCriteria

[docs] @dataclass(frozen=True) class FinderConfig: """ Configuration object for :class:`jacscanomaly.finder.Finder`. This dataclass collects **all hyperparameters controlling the anomaly-search pipeline**, excluding any numerical or model-dependent quantities. It is intentionally: - *Dependency-free* (no NumPy/JAX imports) - *Frozen* (immutable) for reproducibility - *Explicitly structured* according to pipeline stages The parameters are grouped according to the internal workflow of :class:`jacscanomaly.finder.Finder`: 1. Season splitting 2. Grid construction in (t0, teff) 3. Grid scan and local evaluation 4. Cluster extraction and selection Notes ----- Parameters related to the **single-lens fitting model** (e.g. PSPL vs FSPL, parallax options, sky coordinates) are also placed here, so that a single configuration object fully defines the behavior of :class:`Finder`. """ # ================================================== # 0) Single-lens fitter selection # ================================================== fitter_kind: Literal[ "pspl", "fspl", "pspl_parallax", "fspl_parallax", ] = "pspl" """ Choice of single-lens model used for the initial fit. Options ------- - ``"pspl"`` : Point-Source Point-Lens (standard PaczyƄski curve). - ``"fspl"`` : Finite-Source Point-Lens (log-rho parameterization). - ``"pspl_parallax"`` : PSPL with annual parallax. - ``"fspl_parallax"`` : FSPL with annual parallax. """ ra_deg: Optional[float] = None """Right ascension of the source (degrees). Required for parallax models.""" dec_deg: Optional[float] = None """Declination of the source (degrees). Required for parallax models.""" tref: Optional[float] = None """ Reference time for annual parallax. If ``None``, the median observation time is used. """ # ================================================== # 0b) Automatic single-lens initialization # ================================================== auto_init_teff_min: float = 1.0 """Smallest teff used when estimating single-lens initial values.""" auto_init_teff_max: float = 1000.0 """Largest teff used when estimating single-lens initial values.""" auto_init_teff_grid_n: int = 25 """Number of teff grid points used for automatic initialization.""" auto_init_dt0_coeff: float = 0.25 """t0 grid spacing coefficient used for automatic initialization.""" auto_init_max_clusters: int = 1 """Maximum number of scan clusters used as t0/teff seeds.""" auto_init_min_n_eff: float = 2.0 """ Minimum effective number of contributing points required for automatic single-lens initial grid clusters. This suppresses initial guesses driven by one unrealistically high-weight data point. """ auto_init_u0_min: float = 0.01 """Smallest allowed u0 seed after converting from teff/tE.""" auto_init_u0_max: float = 1.0 """Largest allowed u0 seed after converting from teff/tE.""" auto_init_tE_min: float = 1.0 """Smallest tE seed used in the log grid.""" auto_init_tE_max: float = 1000.0 """Largest tE seed used in the log grid.""" auto_init_tE_grid_n: int = 4 """Number of tE seeds used in the log grid.""" auto_init_logrho: float = -7.0 """Initial logrho used for FSPL models when x0 is omitted.""" pspl_fit_u0_min: float = 0.01 """Smallest allowed absolute u0 for the C++ PSPL fitter.""" pspl_fit_min_t0_support_points: int = 3 """Minimum number of data points required near the fitted t0.""" pspl_fit_t0_support_tE_coeff: float = 3.0 """Require t0 support points within +/- coeff * tE for C++ PSPL fits.""" # ================================================== # 1) Season splitting # ================================================== gap: float = 100.0 """ Time gap threshold for season splitting. A new observing season is started whenever the time difference between consecutive data points exceeds this value. """ # ================================================== # 2) Grid construction (t0, teff) # ================================================== teff_init: float = 0.03 """ Smallest effective timescale used in the grid. This is the first element of the geometric series defining the teff grid. """ common_ratio: float = 4.0 / 3.0 """ Common ratio of the geometric progression used to generate teff values. """ teff_grid_n: int = 24 """ Number of teff values in the grid. """ dt0_coeff: float = 0.17 """ Grid spacing coefficient for the event time t0. The spacing is defined as:: dt0 = dt0_coeff * teff """ # ================================================== # 3) Grid scan (local evaluation window) # ================================================== sigma: float = 3.0 """ Threshold parameter used in per-point chi-square improvement tests. This threshold is used to count strongly contributing points in the per-candidate quality diagnostics. """ teff_coeff: float = 3.0 """ Half-width of the local evaluation window in units of teff. For a grid point (t0, teff), the evaluation window is:: [t0 - teff_coeff * teff, t0 + teff_coeff * teff] """ min_pts_in_window: int = 4 """ Minimum number of data points required inside the local window to evaluate a grid point. """ # ================================================== # 4) Cluster extraction # ================================================== overlap_sigma: float = 3.0 """ Overlap threshold used to group nearby grid points into clusters. Two grid points i and j are considered overlapping if:: |t0_i - t0_j| < overlap_sigma * (teff_i + teff_j) """ min_cluster_points: int = 3 """ Stop extracting clusters once the number of remaining grid points falls below this value. """ best_score_trim_percentile: float = 95.0 """ Upper percentile used when estimating the background spread of ``dchi2`` values for the best-candidate score. When selecting the best candidate, the score is computed from the other cluster peaks after trimming values above this percentile. This makes the score less sensitive to a few strong secondary peaks that would otherwise inflate the background standard deviation. Set to ``100`` to disable trimming. """ candidate_criteria: Optional[CandidateCriteria] = None """ Optional criteria to reject anomaly candidates before best-candidate selection. If ``None``, no additional filtering is applied. """ # ================================================== # 5) Grid execution mode # ================================================== grid_backend: Literal["jax", "cpp"] = "cpp" """ Grid evaluation backend. - ``"cpp"`` uses the C++ for-loop backend for low-memory survey scans. - ``"jax"`` uses the JAX vectorized/chunked implementation. """ single_fit_backend: Literal["jax", "cpp"] = "cpp" """ Single-lens fit backend. ``"cpp"`` is implemented for ``fitter_kind="pspl"``. Other single-lens models continue to use the JAX fitters. """ grid_chunked: bool = False """ Force chunked execution of the grid scan. Instead of evaluating the entire (t0, teff) grid in a single ``vmap``, the grid is split into smaller chunks and processed sequentially. This reduces JAX compilation size and peak memory usage at the cost of a small runtime overhead. """ grid_chunk_auto: bool = False """ Automatically switch to chunked execution for large grids. If enabled, the runner uses chunked evaluation only when the total number of grid points exceeds ``grid_chunk_threshold``. Smaller grids continue to use the standard fully-vectorized execution. """ grid_chunk_size: int = 4096 """ Number of grid points evaluated in each chunk when chunked execution is enabled. Larger values improve runtime performance but increase compilation size and memory usage. """ grid_chunk_threshold: int = 100_000 """ Minimum number of grid points required to activate automatic chunking when ``grid_chunk_auto`` is enabled. """