diff --git a/aeon/transformations/acf.py b/aeon/transformations/acf.py index 4c109ed9c6..66992f1a74 100644 --- a/aeon/transformations/acf.py +++ b/aeon/transformations/acf.py @@ -18,7 +18,7 @@ @deprecated( version="0.9.0", reason="AutoCorrelationTransformer statsmodels wrapper will be removed in version " - "0.10. Please use the BaseSeriesTransformer version in the " + "0.10.0. Please use the BaseSeriesTransformer version in the " "transformations.series module called StatsModelsACF for a wrapper version.", category=FutureWarning, ) @@ -156,8 +156,8 @@ def get_test_params(cls, parameter_set="default"): @deprecated( version="0.9.0", reason="PartialAutoCorrelationTransformer statsmodels wrapper will be removed" - "in version 0.10. Please use the BaseSeriesTransformer version in the " - "transformations.series module called StatsModelsACF.", + "in version 0.10.0. Please use the BaseSeriesTransformer version in the " + "transformations.series module called StatsModelsPACF.", category=FutureWarning, ) class PartialAutoCorrelationTransformer(BaseTransformer): diff --git a/aeon/transformations/augmenter.py b/aeon/transformations/augmenter.py index 59fe847789..fa98c5ed8c 100644 --- a/aeon/transformations/augmenter.py +++ b/aeon/transformations/augmenter.py @@ -37,7 +37,7 @@ class _AugmenterTags: # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="WhiteNoiseAugmenter will be removed in version 0.10.", + reason="WhiteNoiseAugmenter will be removed in version 0.10.0.", category=FutureWarning, ) class WhiteNoiseAugmenter(_AugmenterTags, BaseTransformer): @@ -89,7 +89,7 @@ def _transform(self, X, y=None): # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="ReverseAugmenter will be removed in version 0.10.", + reason="ReverseAugmenter will be removed in version 0.10.0.", category=FutureWarning, ) class ReverseAugmenter(_AugmenterTags, BaseTransformer): @@ -129,7 +129,7 @@ def _transform(self, X, y=None): # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="InvertAugmenter will be removed in version 0.10.", + reason="InvertAugmenter will be removed in version 0.10.0.", category=FutureWarning, ) class InvertAugmenter(_AugmenterTags, BaseTransformer): @@ -162,7 +162,7 @@ def _transform(self, X, y=None): # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="RandomSamplesAugmenter will be removed in version 0.10.", + reason="RandomSamplesAugmenter will be removed in version 0.10.0.", category=FutureWarning, ) class RandomSamplesAugmenter(_AugmenterTags, BaseTransformer): diff --git a/aeon/transformations/binning.py b/aeon/transformations/binning.py index 4c130e1d7c..056b5c855e 100644 --- a/aeon/transformations/binning.py +++ b/aeon/transformations/binning.py @@ -14,7 +14,7 @@ # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="TimeBinAggregate will be removed in version 0.10.", + reason="TimeBinAggregate will be removed in version 0.10.0.", category=FutureWarning, ) class TimeBinAggregate(BaseTransformer): diff --git a/aeon/transformations/boxcox.py b/aeon/transformations/boxcox.py index 9d7666cf4f..5a89842354 100644 --- a/aeon/transformations/boxcox.py +++ b/aeon/transformations/boxcox.py @@ -44,7 +44,7 @@ def _calc_uniform_order_statistic_medians(n): # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="BoxCoxTransformer will be removed in version 0.10 and replaced with a " + reason="BoxCoxTransformer will be removed in version 0.10.0 and replaced with a " "BaseSeriesTransformer version in the transformations.series module.", category=FutureWarning, ) @@ -217,7 +217,7 @@ def _inverse_transform(self, X, y=None): # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="LogTransformer will be removed in version 0.10 and replaced with a " + reason="LogTransformer will be removed in version 0.10.0 and replaced with a " "BaseSeriesTransformer version in the transformations.series module.", category=FutureWarning, ) diff --git a/aeon/transformations/clear_sky.py b/aeon/transformations/clear_sky.py index 5cc9cdbf2c..344fbb933e 100644 --- a/aeon/transformations/clear_sky.py +++ b/aeon/transformations/clear_sky.py @@ -4,12 +4,20 @@ import numpy as np import pandas as pd +from deprecated.sphinx import deprecated from joblib import Parallel, delayed from scipy.stats import vonmises from aeon.transformations.base import BaseTransformer +# TODO: remove in v0.10.0 +@deprecated( + version="0.9.0", + reason="ClearSky will be removed in version 0.10.0 and replaced with a " + "BaseSeriesTransformer version in the transformations.series module.", + category=FutureWarning, +) class ClearSky(BaseTransformer): """Clear sky transformer for solar data. diff --git a/aeon/transformations/cos.py b/aeon/transformations/cos.py index 9a539097c1..cb6040243e 100644 --- a/aeon/transformations/cos.py +++ b/aeon/transformations/cos.py @@ -12,8 +12,7 @@ # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="CosineTransformer scikit wrapper will be removed in version 0.10. Please " - "just use scikit learn.", + reason="CosineTransformer will be removed in version 0.10.", category=FutureWarning, ) class CosineTransformer(BaseTransformer): diff --git a/aeon/transformations/difference.py b/aeon/transformations/difference.py index 073a73f7d1..81b6cfcb45 100644 --- a/aeon/transformations/difference.py +++ b/aeon/transformations/difference.py @@ -172,7 +172,7 @@ def _inverse_diff(X, lags, X_diff_seq=None): # TODO: remove v0.10.0 @deprecated( version="0.9.0", - reason="The Differencer class will be removed in C0.10.0, see issue #1534", + reason="The Differencer class will be removed in v0.10.0, see issue #1534", category=FutureWarning, ) class Differencer(BaseTransformer): diff --git a/aeon/transformations/dobin.py b/aeon/transformations/dobin.py index 4ac9f6ab49..69adbef755 100644 --- a/aeon/transformations/dobin.py +++ b/aeon/transformations/dobin.py @@ -20,7 +20,7 @@ # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="DOBIN will be removed in version 0.10 and replaced with a " + reason="DOBIN will be removed in version 0.10.0 and replaced with a " "BaseSeriesTransformer version in the transformations.series module.", category=FutureWarning, ) diff --git a/aeon/transformations/exponent.py b/aeon/transformations/exponent.py index 3357130326..b85f912e91 100644 --- a/aeon/transformations/exponent.py +++ b/aeon/transformations/exponent.py @@ -189,7 +189,7 @@ def get_test_params(cls, parameter_set="default"): # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="SqrtTransformer will be removed in version 0.10.", + reason="SqrtTransformer will be removed in version 0.10.0.", category=FutureWarning, ) class SqrtTransformer(ExponentTransformer): diff --git a/aeon/transformations/series/__init__.py b/aeon/transformations/series/__init__.py index 43d0891e93..24baf8bed1 100644 --- a/aeon/transformations/series/__init__.py +++ b/aeon/transformations/series/__init__.py @@ -3,6 +3,7 @@ __all__ = [ "AutoCorrelationSeriesTransformer", "BaseSeriesTransformer", + "ClearSkyTransformer", "Dobin", "MatrixProfileSeriesTransformer", "StatsModelsACF", @@ -15,6 +16,7 @@ StatsModelsACF, StatsModelsPACF, ) +from aeon.transformations.series._clear_sky import ClearSkyTransformer from aeon.transformations.series._dobin import Dobin from aeon.transformations.series._matrix_profile import MatrixProfileSeriesTransformer from aeon.transformations.series._theta import ThetaTransformer diff --git a/aeon/transformations/series/_clear_sky.py b/aeon/transformations/series/_clear_sky.py new file mode 100644 index 0000000000..ffdcfa3f4f --- /dev/null +++ b/aeon/transformations/series/_clear_sky.py @@ -0,0 +1,346 @@ +"""Clear sky transformer for solar time-series.""" + +__maintainer__ = [] + +import numpy as np +import pandas as pd +from joblib import Parallel, delayed +from scipy.stats import vonmises + +from aeon.transformations.series.base import BaseSeriesTransformer + + +class ClearSkyTransformer(BaseSeriesTransformer): + """Clear sky transformer for solar data. + + This is a transformation which converts a time series from it's original + domain into a percentage domain. The numerator at each time step in the + transformation is the input values, the denominator is a weighted + quantile of the time series for that particular time step. In the example + of solar power transformations, the denominator is an approximation of the + clear sky power, and the output of the transformation is the clearness index. + + The clear sky power, i.e. the denominator, is calculated on a grid containing + each unique combination of time-of-day and day-of-year. The spacing of the + grid depends on the frequency of the input data. + + The weights are defined using von-mises kernels with bandwidths chosen by the + user. + + This transformation can be inacurate at low values, in the solar example during + early morning and late evening. Therefore, clear sky values below a threshold can + be fixed to zero in the transformed domain. Denominator values of zero are set + to zero in the transformed domain by default. + + This transformer is based on the work detailed in [1]_. + + Parameters + ---------- + quantile_prob : float, default=0.95 + The probability level used to calculate the weighted quantile + bw_diurnal : float, default=100 + The bandwidth of the diurnal kernel. This is the kappa value of the + von mises kernel for time of day. + bw_annual : float, default=10 + The bandwidth of the annual kernel. This is the kappa value of the + von mises kernel for day of year. + min_thresh : float, default=0 + The threshold of the clear sky power below which values are + set to zero in the transformed domain. + n_jobs : int or None, default=None + Number of jobs to run in parallel. + None means 1 unless in a joblib.parallel_backend context. + -1 means using all processors. + backend : str, default="loky" + Specify the parallelisation backend implementation in joblib, where + "loky" is used by default. + + References + ---------- + .. [1] https://doi.org/10.1016/j.solener.2009.05.016 + + Examples + -------- + >>> from aeon.transformations.series import ClearSkyTransformer # doctest: +SKIP + >>> from aeon.datasets import load_solar # doctest: +SKIP + >>> y = load_solar() # doctest: +SKIP + >>> transformer = ClearSkyTransformer() # doctest: +SKIP + >>> # takes ~1min + >>> y_trafo = transformer.fit_transform(y) # doctest: +SKIP + """ + + _tags = { + "X_inner_type": "pd.Series", + "enforce_index_type": [pd.DatetimeIndex, pd.PeriodIndex], + "transform-returns-same-time-index": True, + "skip-inverse-transform": False, + "capability:inverse_transform": True, + "capability:multivariate": False, + "capability:missing_values": True, + "capability:missing_values:removes": True, + "python_dependencies": "statsmodels", + } + + def __init__( + self, + quantile_prob=0.95, + bw_diurnal=100, + bw_annual=10, + min_thresh=0, + n_jobs=None, + backend="loky", + ): + self.quantile_prob = quantile_prob + self.bw_diurnal = bw_diurnal + self.bw_annual = bw_annual + self.min_thresh = min_thresh + self.n_jobs = n_jobs + self.backend = backend + + super().__init__(axis=0) + + def _fit(self, X, y=None): + """Fit transformer to X and y. + + private _fit containing the core logic, called from fit + + Parameters + ---------- + X : Series of pd.DataFrame + Data used to estimate the clear sky power. + y : Ignored argument for interface compatibility. + + Returns + ------- + self: reference to self + """ + # check that the data is formatted correctly etc + self.freq = _check_index(X) + # now get grid of model + df = pd.DataFrame(index=X.index) + df["yday"] = df.index.dayofyear + df["tod"] = df.index.hour + df.index.minute / 60 + df.index.second / 60 + + # set up smoothing grid + tod = pd.timedelta_range(start="0T", end="1D", freq=self.freq)[:-1] + tod = [(x.total_seconds() / (60 * 60)) for x in tod.to_pytimedelta()] + yday = pd.RangeIndex(start=1, stop=367) + indx = pd.MultiIndex.from_product([yday, tod], names=["yday", "tod"]) + + # set up parallel function and backend + parallel = Parallel(n_jobs=self.n_jobs, backend=self.backend) + + def par_csp(x): + res = _clearskypower( + y=X, + q=self.quantile_prob, + tod_i=x[1], + doy_i=x[0], + tod_vec=df["tod"], + doy_vec=df["yday"], + bw_tod=self.bw_diurnal, + bw_doy=self.bw_annual, + ) + + return res + + # calculate the csp + csp = parallel(delayed(par_csp)(name) for name in indx) + csp = pd.Series(csp, index=indx, dtype="float64") + self.clearskypower = csp.sort_index() + + return self + + def _transform(self, X, y=None): + """Transform X and return a transformed version. + + private _transform containing core logic, called from transform + + Parameters + ---------- + X : Series of pd.DataFrame + Data used to be transformed. + y : Ignored argument for interface compatibility. + + Returns + ------- + X_trafo : transformed version of X + """ + _freq_ind = _check_index(X) + if self.freq != _freq_ind: + raise ValueError( + """ + Change in frequency detected from original input. Make sure + X is the same frequency as used in .fit(). + """ + ) + # get required seasonal index + yday = X.index.dayofyear + tod = X.index.hour + X.index.minute / 60 + X.index.second / 60 + indx_seasonal = pd.MultiIndex.from_arrays([yday, tod], names=["yday", "tod"]) + + # look up values and overwrite index + csp = self.clearskypower[indx_seasonal].copy() + csp.index = X.index + X_trafo = X / csp + + # threshold for small morning/evening values + X_trafo[(csp <= self.min_thresh) & (X.notnull())] = 0 + + return X_trafo + + def _inverse_transform(self, X, y=None): + """Inverse transform, inverse operation to transform. + + private _inverse_transform containing core logic, called from + inverse_transform + + Parameters + ---------- + X : Series of pd.DataFrame + Data used to be inversed transformed. + y : Ignored argument for interface compatibility. + + Returns + ------- + X_trafo : inverse transformed version of X + """ + _freq_ind = _check_index(X) + if self.freq != _freq_ind: + raise ValueError( + """ + Change in frequency detected from original input. Make sure + X is the same frequency as used in .fit(). + """ + ) + yday = X.index.dayofyear + tod = X.index.hour + X.index.minute / 60 + X.index.second / 60 + indx_seasonal = pd.MultiIndex.from_arrays([yday, tod], names=["yday", "tod"]) + + # look up values and overwrite index + csp = self.clearskypower[indx_seasonal].copy() + csp.index = X.index + X_trafo = X * csp + + return X_trafo + + def _get_fitted_params(self): + """Get fitted parameters. + + Returns + ------- + fitted_params : dict + """ + params = {"clearskypower": self.clearskypower} + + return params + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + There are currently no reserved values for transformers. + + Returns + ------- + params : dict or list of dict, default = {} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params` + """ + params = { + "quantile_prob": 0.95, + "bw_diurnal": 100, + "bw_annual": 10, + "min_thresh": None, + } + + return params + + +def _clearskypower(y, q, tod_i, doy_i, tod_vec, doy_vec, bw_tod, bw_doy): + """Estimate the clear sky power for a given day-of-year and hour-of-day. + + Parameters + ---------- + y : Series of measurements + q : Probability level used for the quantile + tod_i : time-of-day of interest in hours + doy_i : day-of-year of interest in days + tod_vec : Series of time-of-day corresponding to the index of y + doy_vec: Series of day-of-year corresponding to the index of y + bw_tod : Kappa value used for defining weights for time-of-day + bw_doy : Kappa value used for defining weights for day-of-year + + Returns + ------- + csp : float + The clear sky power at tod_i and doy_i + """ + from statsmodels.stats.weightstats import DescrStatsW + + wts_tod = vonmises.pdf( + x=tod_i * 2 * np.pi / 24, kappa=bw_tod, loc=tod_vec * 2 * np.pi / 24 + ) + wts_doy = vonmises.pdf( + x=doy_i * 2 * np.pi / 365.25, kappa=bw_doy, loc=doy_vec * 2 * np.pi / 365.25 + ) + + wts = wts_doy * wts_tod + wts = wts / wts.sum() + + csp = DescrStatsW(y, weights=wts).quantile(probs=q).values[0] + + return csp + + +def _check_index(X): + """Check input value frequency is set and we have the correct index. + + Parameters + ---------- + X : Series or pd.DataFrame + Data used to be inversed transformed. + + Raises + ------ + ValueError : Input index must be class pd.DatetimeIndex or pd.PeriodIndex. + ValueError : Input index frequency cannot be inferred and is not set. + ValueError : Frequency of data not suitable for transformer as is. + + Returns + ------- + freq_ind : str or None + Frequency of data in string format + + """ + if not (isinstance(X.index, pd.DatetimeIndex)) | ( + isinstance(X.index, pd.PeriodIndex) + ): + raise ValueError( + "Input index must be class pd.DatetimeIndex or pd.PeriodIndex." + ) + # check that it has a frequency, if not infer + freq_ind = X.index.freq + if freq_ind is None: + freq_ind = pd.infer_freq(X.index) + if freq_ind is None: + raise ValueError("Input index frequency cannot be inferred and is not set.") + + tod = pd.timedelta_range(start="0T", end="1D", freq=freq_ind) + # checck frequency of tod + if (tod.freq > pd.offsets.Day(1)) | (tod.freq < pd.offsets.Second(1)): + raise ValueError( + """ + Transformer intended to be used with input frequency of greater than + or equal to one day and with a frequency of less or equal to than + 1 second. Contributions welcome on adapting for these use cases. + """ + ) + return freq_ind diff --git a/aeon/transformations/tests/test_clear_sky.py b/aeon/transformations/series/tests/test_clear_sky.py similarity index 90% rename from aeon/transformations/tests/test_clear_sky.py rename to aeon/transformations/series/tests/test_clear_sky.py index 05fff03a90..941b5b4708 100644 --- a/aeon/transformations/tests/test_clear_sky.py +++ b/aeon/transformations/series/tests/test_clear_sky.py @@ -7,7 +7,7 @@ import pytest from aeon.datasets import load_solar -from aeon.transformations.clear_sky import ClearSky +from aeon.transformations.series import ClearSkyTransformer from aeon.utils.validation._dependencies import _check_soft_dependencies output_chk = [0.0, 0.0, 0.901, 0.739, 0.618, 0.0] @@ -22,7 +22,7 @@ def test_clearsky_trafo_vals(): y = load_solar(api_version=None) # only take every 4H for quickness y = y.asfreq("4H") - cs_model = ClearSky() + cs_model = ClearSkyTransformer() y_trafo = cs_model.fit_transform(y) msg = "ClearSky not transforming values consistently with stored values." @@ -30,7 +30,7 @@ def test_clearsky_trafo_vals(): y_missing = y.copy() y_missing.iloc[6:12] = np.nan - cs_model_missing = ClearSky() + cs_model_missing = ClearSkyTransformer() y_trafo_missing = cs_model_missing.fit_transform(y_missing) msg = "ClearSky transformer not returning correct number of values." @@ -39,7 +39,7 @@ def test_clearsky_trafo_vals(): y_period = y.copy() y_period.index = y_period.index.to_period() - cs_model_period = ClearSky() + cs_model_period = ClearSkyTransformer() y_trafo_period = cs_model_period.fit_transform(y_period) msg = "PeriodIndex and DatetimeIndex returning different values" @@ -56,7 +56,7 @@ def test_clearsky_trafo_range_exception(): # range index should not work y = y.reset_index(drop=True) - cs_model = ClearSky() + cs_model = ClearSkyTransformer() with pytest.raises(ValueError): cs_model.fit_transform(y) @@ -71,7 +71,7 @@ def test_clearsky_trafo_nofreq_exception(): # no set or inferrable frequency should not work y = y.drop(pd.to_datetime("2021-05-01 00:30:00", utc=True)) - cs_model = ClearSky() + cs_model = ClearSkyTransformer() with pytest.raises(ValueError): cs_model.fit_transform(y) @@ -86,6 +86,6 @@ def test_clearsky_trafo_grdaily_exception(): # gr daily frequency should not work y = y.asfreq("2D") - cs_model = ClearSky() + cs_model = ClearSkyTransformer() with pytest.raises(ValueError): cs_model.fit_transform(y) diff --git a/aeon/transformations/subset.py b/aeon/transformations/subset.py index 1d49ce60cc..397f9ced29 100644 --- a/aeon/transformations/subset.py +++ b/aeon/transformations/subset.py @@ -12,7 +12,7 @@ # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="IndexSubset will be removed in version 0.10.", + reason="IndexSubset will be removed in version 0.10.0.", category=FutureWarning, ) class IndexSubset(BaseTransformer): @@ -118,7 +118,7 @@ def get_test_params(cls, parameter_set="default"): # TODO: remove in v0.10.0 @deprecated( version="0.9.0", - reason="ColumnSelect will be removed in version 0.10.", + reason="ColumnSelect will be removed in version 0.10.0.", category=FutureWarning, ) class ColumnSelect(BaseTransformer):