Source code for teehr.metrics.bootstrap_funcs

"""Contains functions for bootstrap calculations for use in Spark queries."""
from typing import Dict, Callable
import logging

import pandas as pd
import numpy as np

from teehr.models.metrics.basemodels import MetricsBasemodel

logger = logging.getLogger(__name__)


def _calculate_quantiles(
    output_field_name: str,
    results: np.ndarray,
    quantiles: list
) -> Dict:
    """Calculate quantile values of the bootstrap results."""
    values = np.quantile(results, quantiles)
    quantiles = [f"{output_field_name}_{str(i)}" for i in quantiles]
    d = dict(zip(quantiles, values))
    return d


[docs] def create_circularblock_func(model: MetricsBasemodel) -> Callable: """Create the CircularBlock bootstrap function.""" logger.debug("Building the Circular Block bootstrap func.") # lazy import to improve performance from arch.bootstrap import CircularBlockBootstrap def bootstrap_func(p: pd.Series, s: pd.Series) -> Dict: """Bootstrap function.""" bs = CircularBlockBootstrap( model.bootstrap.block_size, p, s, seed=model.bootstrap.seed, random_state=model.bootstrap.random_state ) results = bs.apply( model.func(model), model.bootstrap.reps ) if model.bootstrap.quantiles is not None: return _calculate_quantiles( model.output_field_name, results, model.bootstrap.quantiles, ) else: return results.ravel() def signature_bootstrap_func(p: pd.Series) -> Dict: """Bootstrap function.""" bs = CircularBlockBootstrap( model.bootstrap.block_size, p, seed=model.bootstrap.seed, random_state=model.bootstrap.random_state ) results = bs.apply( model.func(model), model.bootstrap.reps ) if model.bootstrap.quantiles is not None: return _calculate_quantiles( model.output_field_name, results, model.bootstrap.quantiles, ) else: return results.ravel() if len(model.input_field_names) == 1: return signature_bootstrap_func return bootstrap_func
[docs] def create_gumboot_func(model: MetricsBasemodel) -> Callable: """Create the Gumboot bootstrap function.""" logger.debug("Building the Gumboot bootstrap func.") # lazy import to improve performance from teehr.metrics.gumboot_bootstrap import GumbootBootstrap def bootstrap_func(p: pd.Series, s: pd.Series, vt: pd.Series) -> Dict: """Bootstrap function.""" bs = GumbootBootstrap( p, s, value_time=vt, seed=model.bootstrap.seed, water_year_month=model.bootstrap.water_year_month, boot_year_file=model.bootstrap.boot_year_file ) results = bs.apply( model.func(model), model.bootstrap.reps ) if model.bootstrap.quantiles is not None: return _calculate_quantiles( model.output_field_name, results, model.bootstrap.quantiles, ) else: return results.ravel() def signature_bootstrap_func(p: pd.Series, vt: pd.Series) -> Dict: """Bootstrap function.""" bs = GumbootBootstrap( p, value_time=vt, seed=model.bootstrap.seed, water_year_month=model.bootstrap.water_year_month, boot_year_file=model.bootstrap.boot_year_file ) results = bs.apply( model.func(model), model.bootstrap.reps ) if model.bootstrap.quantiles is not None: return _calculate_quantiles( model.output_field_name, results, model.bootstrap.quantiles, ) else: return results.ravel() if len(model.input_field_names) == 1: return signature_bootstrap_func return bootstrap_func
[docs] def create_stationary_func(model: MetricsBasemodel) -> Callable: """Create the Stationary bootstrap function.""" logger.debug("Building the Stationary bootstrap func.") # lazy import to improve performance from arch.bootstrap import StationaryBootstrap def bootstrap_func(p: pd.Series, s: pd.Series) -> Dict: """Bootstrap function.""" bs = StationaryBootstrap( model.bootstrap.block_size, p, s, seed=model.bootstrap.seed, random_state=model.bootstrap.random_state ) results = bs.apply( model.func(model), model.bootstrap.reps ) if model.bootstrap.quantiles is not None: return _calculate_quantiles( model.output_field_name, results, model.bootstrap.quantiles, ) else: return results.ravel() def signature_bootstrap_func(p: pd.Series) -> Dict: """Bootstrap function.""" bs = StationaryBootstrap( model.bootstrap.block_size, p, seed=model.bootstrap.seed, random_state=model.bootstrap.random_state ) results = bs.apply( model.func(model), model.bootstrap.reps ) if model.bootstrap.quantiles is not None: return _calculate_quantiles( model.output_field_name, results, model.bootstrap.quantiles, ) else: return results.ravel() if len(model.input_field_names) == 1: return signature_bootstrap_func return bootstrap_func