Source code for teehr.metrics.models.bootstrap

"""Classes for bootstrapping sampling methods."""
from typing import Callable, Union
from pathlib import Path

from numpy.random import RandomState
from pydantic import Field

import teehr.metrics.bootstrap_funcs as bootstrap_funcs
from teehr.metrics.models.base import BootstrapBasemodel


class Gumboot(BootstrapBasemodel):
    """Gumboot bootstrapping.

    This is a partial implementation of the Gumboot R package, a
    non-overlapping bootstrap method where blocks are defined by water
    years. Synthetic timeseries are constructed by randomly resampling water
    years from the input timeseries with replacement. The specified performance
    metric is calculated for each synthetic timeseries for a number of
    bootstrap replications (reps). The quantiles of the bootstrap metric
    results are calculated and returned.

    If the quantile values are not specified or are set to None, the array
    of metric values is returned (dimensions: [reps, 1]). Otherwise the
    specified quantiles of the metric values are returned as a dictionary.

    See Also:  Clark et al. (2021), "The abuse of popular performance metrics
      in hydrologic modeling", Water Resources Research,
      <doi:10.1029/2020WR029001>

      https://cran.r-project.org/web/packages/gumboot/gumboot.pdf

    Parameters
    ----------
    boot_year_file : Union[str, Path, None]
        The file path to the boot year csv file. The default value is None.
    water_year_month : int
        The month specifying the start of the water year. Default value is 10.
    """

    boot_year_file: Union[str, Path, None] = None
    water_year_month: int = 10
    name: str = Field(default="Gumboot")
    include_value_time: bool = Field(True, frozen=True)
    func: Callable = Field(bootstrap_funcs.create_gumboot_func, frozen=True)


class CircularBlock(BootstrapBasemodel):
    """CircularBlock bootstrapping from the arch python package.

    Parameters
    ----------
    random_state : RandomState, optional
        The random state for the random number generator.
    block_size : int or None, optional
        The block size for the CircularBlockBootstrap.
        If ``None`` (default), TEEHR estimates an optimal block size using
        ``arch.bootstrap.optimal_block_length`` on the primary metric input
        series and uses the ``b_cb`` estimate.
    """

    random_state: Union[RandomState, None] = None
    block_size: Union[int, None] = None
    name: str = Field(default="CircularBlock")
    include_value_time: bool = Field(False, frozen=True)
    func: Callable = Field(
        bootstrap_funcs.create_circularblock_func,
        frozen=True
    )


class Stationary(BootstrapBasemodel):
    """Stationary bootstrapping from the arch python package.

    Parameters
    ----------
    random_state : RandomState, optional
        The random state for the random number generator.
    block_size : int or None, optional
        The block size for the StationaryBootstrap.
        If ``None`` (default), TEEHR estimates an optimal block size using
        ``arch.bootstrap.optimal_block_length`` on the primary metric input
        series and uses the ``b_sb`` estimate.
    """

    random_state: Union[RandomState, None] = None
    block_size: Union[int, None] = None
    name: str = Field(default="Stationary")
    include_value_time: bool = Field(False, frozen=True)
    func: Callable = Field(
        bootstrap_funcs.create_stationary_func,
        frozen=True
    )


[docs] class Bootstrappers: """Container class for bootstrap sampling classes. Notes ----- Bootstrapping is a resampling method used to estimate uncertainty in metric results. The bootstrapping methods available in TEEHR include: - Gumboot - CircularBlock - Stationary """ Gumboot = Gumboot CircularBlock = CircularBlock Stationary = Stationary