diff --git a/MANIFEST.in b/MANIFEST.in index 46a874d6f7..3969afe5d8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ recursive-include aeon *.py -recursive-include aeon/benchmarking/example_results *.csv recursive-include aeon/datasets *.csv *.arff *.txt *.ts *.tsv *.tsf +recursive-include aeon/testing/example_results_files *.csv include aeon/registry/README.md include .coveragerc include conftest.py diff --git a/aeon/benchmarking/tests/test_results_loaders.py b/aeon/benchmarking/tests/test_results_loaders.py index dcc271df09..b6deaf1d82 100644 --- a/aeon/benchmarking/tests/test_results_loaders.py +++ b/aeon/benchmarking/tests/test_results_loaders.py @@ -7,6 +7,7 @@ import pytest from pytest import raises +import aeon from aeon.benchmarking.results_loaders import ( CONNECTION_ERRORS, NAME_ALIASES, @@ -72,8 +73,10 @@ def test_get_available_estimators(): cls = ["HIVECOTEV2", "FreshPRINCE", "InceptionTime"] data = ["Chinatown", "ItalyPowerDemand", "Tools"] -test_path = os.path.dirname(__file__) -data_path = os.path.join(test_path, "../example_results/") +data_path = os.path.join( + os.path.dirname(aeon.__file__), + "testing/example_results_files/", +) @pytest.mark.skipif( diff --git a/aeon/classification/compose/tests/test_pipeline.py b/aeon/classification/compose/tests/test_pipeline.py index 84d02308f6..9dea384eb5 100644 --- a/aeon/classification/compose/tests/test_pipeline.py +++ b/aeon/classification/compose/tests/test_pipeline.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from numpy.testing import assert_array_almost_equal from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import StandardScaler @@ -16,7 +17,6 @@ make_example_3d_numpy_list, ) from aeon.testing.mock_estimators import MockCollectionTransformer -from aeon.testing.utils.estimator_checks import _assert_array_almost_equal from aeon.transformations.collection import ( AutocorrelationFunctionTransformer, HOG1DTransformer, @@ -61,7 +61,7 @@ def test_classifier_pipeline(transformers): X_test = t.transform(X_test) c.fit(X_train, y_train) - _assert_array_almost_equal(y_pred, c.predict(X_test)) + assert_array_almost_equal(y_pred, c.predict(X_test)) @pytest.mark.parametrize( @@ -99,7 +99,7 @@ def test_sklearn_classifier_pipeline(transformers): X_test = t.transform(X_test) c.fit(X_train, y_train) - _assert_array_almost_equal(y_pred, c.predict(X_test)) + assert_array_almost_equal(y_pred, c.predict(X_test)) def test_unequal_tag_inference(): diff --git a/aeon/clustering/compose/tests/test_pipeline.py b/aeon/clustering/compose/tests/test_pipeline.py index 0c2d26bdc0..73f751944b 100644 --- a/aeon/clustering/compose/tests/test_pipeline.py +++ b/aeon/clustering/compose/tests/test_pipeline.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from numpy.testing import assert_array_almost_equal from sklearn.cluster import KMeans from sklearn.preprocessing import StandardScaler @@ -14,7 +15,6 @@ make_example_3d_numpy_list, ) from aeon.testing.mock_estimators import MockCollectionTransformer -from aeon.testing.utils.estimator_checks import _assert_array_almost_equal from aeon.transformations.collection import ( AutocorrelationFunctionTransformer, HOG1DTransformer, @@ -60,7 +60,7 @@ def test_clusterer_pipeline(transformers): X_test = t.transform(X_test) c.fit(X_train, y_train) - _assert_array_almost_equal(y_pred, c.predict(X_test)) + assert_array_almost_equal(y_pred, c.predict(X_test)) @pytest.mark.parametrize( @@ -98,7 +98,7 @@ def test_sklearn_clusterer_pipeline(transformers): X_test = t.transform(X_test) c.fit(X_train, y_train) - _assert_array_almost_equal(y_pred, c.predict(X_test)) + assert_array_almost_equal(y_pred, c.predict(X_test)) def test_unequal_tag_inference(): diff --git a/aeon/performance_metrics/tests/test_stats.py b/aeon/performance_metrics/tests/test_stats.py index 44560c7691..012cbdbeb8 100644 --- a/aeon/performance_metrics/tests/test_stats.py +++ b/aeon/performance_metrics/tests/test_stats.py @@ -13,7 +13,7 @@ data_path = os.path.join( os.path.dirname(aeon.__file__), - "benchmarking/example_results/", + "testing/example_results_files/", ) diff --git a/aeon/regression/compose/tests/test_pipeline.py b/aeon/regression/compose/tests/test_pipeline.py index 6b93be0976..edafa9eecc 100644 --- a/aeon/regression/compose/tests/test_pipeline.py +++ b/aeon/regression/compose/tests/test_pipeline.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from numpy.testing import assert_array_almost_equal from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler @@ -16,7 +17,6 @@ make_example_3d_numpy_list, ) from aeon.testing.mock_estimators import MockCollectionTransformer -from aeon.testing.utils.estimator_checks import _assert_array_almost_equal from aeon.transformations.collection import ( AutocorrelationFunctionTransformer, HOG1DTransformer, @@ -61,7 +61,7 @@ def test_regressor_pipeline(transformers): X_test = t.transform(X_test) r.fit(X_train, y_train) - _assert_array_almost_equal(y_pred, r.predict(X_test)) + assert_array_almost_equal(y_pred, r.predict(X_test)) @pytest.mark.parametrize( @@ -99,7 +99,7 @@ def test_sklearn_regressor_pipeline(transformers): X_test = t.transform(X_test) r.fit(X_train, y_train) - _assert_array_almost_equal(y_pred, r.predict(X_test)) + assert_array_almost_equal(y_pred, r.predict(X_test)) def test_unequal_tag_inference(): diff --git a/aeon/segmentation/_eagglo.py b/aeon/segmentation/_eagglo.py index d482c47d1b..67605b7761 100644 --- a/aeon/segmentation/_eagglo.py +++ b/aeon/segmentation/_eagglo.py @@ -72,13 +72,11 @@ class EAggloSegmenter(BaseSegmenter): Examples -------- - >>> from aeon.testing.data_generation import piecewise_normal_multivariate + >>> from aeon.testing.data_generation import make_example_dataframe_series >>> from aeon.segmentation import EAggloSegmenter - >>> X = piecewise_normal_multivariate(means=[[1, 3], [4, 5]], lengths=[3, 4], - ... random_state = 10) + >>> X = make_example_dataframe_series(n_channels=2, random_state=10) >>> model = EAggloSegmenter() - >>> model.fit_predict(X, axis=0) - array([0, 0, 0, 1, 1, 1, 1]) + >>> y = model.fit_predict(X, axis=0) """ _tags = { diff --git a/aeon/segmentation/_ggs.py b/aeon/segmentation/_ggs.py index 6fef577346..d8bdd21d71 100644 --- a/aeon/segmentation/_ggs.py +++ b/aeon/segmentation/_ggs.py @@ -435,12 +435,10 @@ class GreedyGaussianSegmenter(BaseSegmenter): Examples -------- - >>> from aeon.testing.data_generation import piecewise_normal_multivariate + >>> from aeon.testing.data_generation import make_example_dataframe_series >>> from sklearn.preprocessing import MinMaxScaler >>> from aeon.segmentation import GreedyGaussianSegmenter - >>> X = piecewise_normal_multivariate(lengths=[10, 10, 10, 10], - ... means=[[0.0, 1.0], [11.0, 10.0], [5.0, 3.0], [2.0, 2.0]], - ... variances=0.5) + >>> X = make_example_dataframe_series(n_channels=2, random_state=10) >>> X_scaled = MinMaxScaler(feature_range=(0, 1)).fit_transform(X) >>> ggs = GreedyGaussianSegmenter(k_max=3, max_shuffles=5) >>> y = ggs.fit_predict(X_scaled, axis=0) diff --git a/aeon/segmentation/_igts.py b/aeon/segmentation/_igts.py index 20d632435c..b59bbcfbbf 100644 --- a/aeon/segmentation/_igts.py +++ b/aeon/segmentation/_igts.py @@ -152,19 +152,6 @@ class _IGTS: "Information gain-based metric for recognizing transitions in human activities.", Pervasive and Mobile Computing, 38, 92-109, (2017). https://www.sciencedirect.com/science/article/abs/pii/S1574119217300081 - - Examples - -------- - >>> from aeon.testing.data_generation import piecewise_normal_multivariate - >>> from sklearn.preprocessing import MinMaxScaler - >>> from aeon.segmentation import InformationGainSegmenter - >>> X = piecewise_normal_multivariate(lengths=[10, 10, 10, 10], - ... means=[[0.0, 1.0], [11.0, 10.0], [5.0, 3.0], [2.0, 2.0]], - ... variances=0.5) - >>> X_scaled = MinMaxScaler(feature_range=(0, 1)).fit_transform(X) - >>> igts = InformationGainSegmenter(k_max=3, step=2) - >>> y = igts.fit_predict(X_scaled, axis=0) - """ # init attributes @@ -256,7 +243,8 @@ def find_change_points(self, X: npt.ArrayLike) -> list[int]: current_change_points = self.identity(X) for k in range(self.k_max): - ig_max = 0 + best_candidate = -1 + ig_max = -1 # find a point which maximizes score for candidate in self.get_candidates(n_samples, current_change_points): try_change_points = {candidate} @@ -335,12 +323,10 @@ class InformationGainSegmenter(BaseSegmenter): Examples -------- - >>> from aeon.testing.data_generation import piecewise_normal_multivariate + >>> from aeon.testing.data_generation import make_example_dataframe_series >>> from sklearn.preprocessing import MinMaxScaler >>> from aeon.segmentation import InformationGainSegmenter - >>> X = piecewise_normal_multivariate(lengths=[10, 10, 10, 10], - ... means=[[0.0, 1.0], [11.0, 10.0], [5.0, 3.0], [2.0, 2.0]], - ... variances=0.5) + >>> X = make_example_dataframe_series(n_channels=2, random_state=10) >>> X_scaled = MinMaxScaler(feature_range=(0, 1)).fit_transform(X) >>> igts = InformationGainSegmenter(k_max=3, step=2) >>> y = igts.fit_predict(X_scaled, axis=0) diff --git a/aeon/testing/utils/_cicd_numba_caching.py b/aeon/testing/_cicd_numba_caching.py similarity index 100% rename from aeon/testing/utils/_cicd_numba_caching.py rename to aeon/testing/_cicd_numba_caching.py diff --git a/aeon/testing/data_generation/__init__.py b/aeon/testing/data_generation/__init__.py index 67f91f5359..4cd1824071 100644 --- a/aeon/testing/data_generation/__init__.py +++ b/aeon/testing/data_generation/__init__.py @@ -14,15 +14,6 @@ "make_example_2d_numpy_series", "make_example_pandas_series", "make_example_dataframe_series", - # other - "piecewise_normal_multivariate", - "piecewise_normal", - "piecewise_multinomial", - "piecewise_poisson", - "labels_with_repeats", - "label_piecewise_normal", - "_make_hierarchical", - "_bottom_hier_datagen", ] @@ -41,15 +32,3 @@ make_example_dataframe_series, make_example_pandas_series, ) -from aeon.testing.data_generation.hierarchical import ( - _bottom_hier_datagen, - _make_hierarchical, -) -from aeon.testing.data_generation.segmentation import ( - label_piecewise_normal, - labels_with_repeats, - piecewise_multinomial, - piecewise_normal, - piecewise_normal_multivariate, - piecewise_poisson, -) diff --git a/aeon/testing/data_generation/hierarchical.py b/aeon/testing/data_generation/hierarchical.py deleted file mode 100644 index e889cf7634..0000000000 --- a/aeon/testing/data_generation/hierarchical.py +++ /dev/null @@ -1,246 +0,0 @@ -"""Hierarchical Data Generators.""" - -from typing import Optional - -__maintainer__ = [] - -from itertools import product -from typing import Union - -import numpy as np -import pandas as pd -from sklearn.utils import check_random_state - -from aeon.datasets import load_airline - - -def _make_index(n_timepoints, index_type=None): - """Make indices for unit testing.""" - if index_type == "period": - start = "2000-01" - freq = "M" - return pd.period_range(start=start, periods=n_timepoints, freq=freq) - - elif index_type == "datetime" or index_type is None: - start = "2000-01-01" - freq = "D" - return pd.date_range(start=start, periods=n_timepoints, freq=freq) - - elif index_type == "range": - start = 3 # check non-zero based indices - return pd.RangeIndex(start=start, stop=start + n_timepoints) - - elif index_type == "int": - start = 3 - return pd.Index(np.arange(start, start + n_timepoints), dtype=int) - - else: - raise ValueError(f"index_class: {index_type} is not supported") - - -def _make_hierarchical( - hierarchy_levels: tuple = (2, 4), - max_timepoints: int = 12, - min_timepoints: int = 12, - same_cutoff: bool = True, - n_columns: int = 1, - all_positive: bool = True, - index_type: Optional[str] = None, - random_state: Optional[Union[int, np.random.RandomState]] = None, - add_nan: bool = False, -) -> pd.DataFrame: - """Generate hierarchical multiindex type for testing. - - Parameters - ---------- - hierarchy_levels : Tuple, optional - the number of groups at each hierarchy level, by default (2, 4) - max_timepoints : int, optional - maximum time points a series can have, by default 12 - min_timepoints : int, optional - minimum time points a seires can have, by default 12 - same_cutoff : bool, optional - If it's True all series will end at the same date, by default True - n_columns : int, optional - number of columns in the output dataframe, by default 1 - all_positive : bool, optional - If True the time series will be , by default True - index_type : str, optional - type of index, by default None - Supported types are "period", "datetime", "range" or "int". - If it's not provided, "datetime" is selected. - random_state : int, np.random.RandomState or None - Controls the randomness of the estimator, by default None - add_nan : bool, optional - If it's true the series will contain NaNs, by default False - - Returns - ------- - pd.DataFrame - hierarchical dataframe - """ - levels = [ - [f"h{i}_{j}" for j in range(hierarchy_levels[i])] - for i in range(len(hierarchy_levels)) - ] - level_names = [f"h{i}" for i in range(len(hierarchy_levels))] - rng = check_random_state(random_state) - if min_timepoints == max_timepoints: - time_index = _make_index(max_timepoints, index_type) - index = pd.MultiIndex.from_product( - levels + [time_index], names=level_names + ["time"] - ) - else: - df_list = [] - for levels_tuple in product(*levels): - n_timepoints = rng.randint(low=min_timepoints, high=max_timepoints) - if same_cutoff: - time_index = _make_index(max_timepoints, index_type)[-n_timepoints:] - else: - time_index = _make_index(n_timepoints, index_type) - d = dict(zip(level_names, levels_tuple)) - d["time"] = time_index - df_list.append(pd.DataFrame(d)) - index = pd.MultiIndex.from_frame( - pd.concat(df_list), names=level_names + ["time"] - ) - - total_time_points = len(index) - data = rng.normal(size=(total_time_points, n_columns)) - if add_nan: - # add some nan values - data[int(len(data) / 2)] = np.nan - data[0] = np.nan - data[-1] = np.nan - if all_positive: - data -= np.min(data, axis=0) - 1 - df = pd.DataFrame( - data=data, index=index, columns=[f"c{i}" for i in range(n_columns)] - ) - - return df - - -def _bottom_hier_datagen( - no_levels=3, - no_bottom_nodes=6, - intercept_max=20, - coef_1_max=20, - coef_2_max=0.1, - random_seed=None, -): - """Hierarchical data generator using the flights dataset. - - This function generates bottom level, i.e. not aggregated, time-series - from the flights dataset. - - Each series is generated from the flights dataset using a linear model, - y = c0 + c1x + c2x^(c3), where the coefficients, intercept, and exponent - are randomly sampled for each series. The coefficients and intercept are - sampled between np.arange(0, *_max, 0.01) to keep the values positive. The - exponent is sampled from [0.5, 1, 1.5, 2]. - - - Parameters - ---------- - no_levels : int, optional - The number of levels not considering the time-index, by default 3 - no_bottom_nodes : int, optional - Number of time series, i.e. bottom nodes, to generate, by default 6. - *_max : int, optional - Maximum possible value of the coefficient or intercept value. - random_seed : int, optional - Random seed for reproducability. - - - Returns - ------- - pd.DataFrame with multiindex - """ - if no_levels > no_bottom_nodes: - raise ValueError("no_levels should be less than no_bottom_nodes") - - rng = np.random.default_rng(random_seed) - - base_ts = load_airline(return_array=False) - df = pd.DataFrame(base_ts, index=base_ts.index) - df.index.rename(None, inplace=True) - - if no_levels == 0: - df.columns = ["passengers"] - df.index.rename("timepoints", inplace=True) - return df - else: - df.columns = ["l1_node01"] - - intercept = np.arange(0, intercept_max, 0.01) - coef_1 = np.arange(0, coef_1_max, 0.01) - coef_2 = np.arange(0, coef_2_max, 0.01) - power_2 = [0.5, 1, 1.5, 2] - - # create structure of hierarchy - node_lookup = pd.DataFrame( - ["l1_node" + f"{x:02d}" for x in range(1, no_bottom_nodes + 1)] - ) - node_lookup.columns = ["l1_agg"] - - if no_levels >= 2: - # create index from bottom up, sampling node names - for i in range(2, no_levels + 1): - name = f"l{i}_agg" - name_groupby = f"l{i - 1}_agg" - node_lookup[name] = node_lookup.groupby([name_groupby])[ - "l1_agg" - ].transform( - lambda x: "l" - + str(i) # noqa: B023 - + "_node" - + "{:02d}".format( - _sample_node(node_lookup.index, i, rng) # noqa: B023 - ) - ) - - node_lookup = node_lookup.set_index("l1_agg", drop=True) - - # now define the series for each level by sampling coefficients etc. - for i in range(2, no_bottom_nodes + 1): - df["l1_node" + f"{i:02d}"] = ( - rng.choice(intercept, size=1) - + rng.choice(coef_1, size=1) * df["l1_node01"] - + ( - rng.choice(coef_2, size=1) - * (df["l1_node01"] ** rng.choice(power_2, size=1)) - ) - ) - - df = ( - df.melt(ignore_index=False) - .reset_index(drop=False) - .rename( - columns={ - "variable": "l1_agg", - "index": "timepoints", - "value": "passengers", - } - ) - ) - - df = pd.merge(left=df, right=node_lookup.reset_index(), on="l1_agg") - df = df[df.columns.sort_values(ascending=True)] - - df_newindex = ["l" + str(x) + "_agg" for x in range(1, no_levels + 1)][::-1] - df_newindex.append("timepoints") - - df = df.set_index(df_newindex) - df.sort_index(inplace=True) - - return df - - -def _sample_node(index_table, level, sampler): - """Sample a number of nodes depending on the size of hierarchy and level.""" - nodes = np.arange(1, np.floor(len(index_table) / level) + 1, 1) - # return a single sample of them - sample_nodes = int(sampler.choice(nodes, size=1)) - - return sample_nodes diff --git a/aeon/testing/data_generation/segmentation.py b/aeon/testing/data_generation/segmentation.py deleted file mode 100644 index b8ccd22aff..0000000000 --- a/aeon/testing/data_generation/segmentation.py +++ /dev/null @@ -1,388 +0,0 @@ -"""Synthetic data generating functions.""" - -from typing import Optional, Union - -import numpy as np -import numpy.typing as npt -from sklearn.utils.validation import check_random_state - - -def piecewise_normal_multivariate( - means: npt.ArrayLike, - lengths: npt.ArrayLike, - variances: Union[npt.ArrayLike, float] = 1.0, - covariances: Optional[npt.ArrayLike] = None, - random_state: Optional[Union[int, np.random.RandomState]] = None, -) -> npt.ArrayLike: - """ - Generate multivariate series from segments. - - Each segment has length specified in ``lengths`` and data sampled from a - multivariate normal distribution with a mean from ``means`` and covariance - from ``covariances`` (either specified or built from ``variances`` when - unspecified) - - Parameters - ---------- - lengths : array_like - Lengths of the segments to be generated of shape (n_segments,) - means : array_like - Means of the segments to be generated, as an array of shape - (n_segments, n_series) - variances : float or array_like (default=1.0) - Variance of the segments to be generated - covariances : array_like (default=None) - Covariances of segments to be generated of shape - (n_segments, n_series, n_series) - If None, this will be constructed from variances by assuming independence - of random vairables, i.e. variance as diagonal elements of covariance matrix - random_state : int or np.random.RandomState - Either a random seed or ``RandomState`` instance - - Returns - ------- - data : array_like - Multivariate time series as ``np.array`` of shape (sum(lengths), n_series) - - Examples - -------- - >>> from aeon.testing.data_generation import piecewise_normal_multivariate - >>> piecewise_normal_multivariate(means=[[1, 1], [2, 2], [3, 3]],\ - lengths=[2, 3, 1], random_state=2) - array([[ 0.58324215, 0.94373317], - [-1.1361961 , 2.64027081], - [ 0.20656441, 1.15825263], - [ 2.50288142, 0.75471191], - [ 0.94204778, 1.09099239], - [ 3.55145404, 5.29220801]]) - - >>> from aeon.testing.data_generation import piecewise_normal_multivariate - >>> piecewise_normal_multivariate(means=[[1, 1], [2, 2], [3, 3]],\ - lengths=[2, 3, 1], variances=[[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]],\ - random_state=2) - array([[ 0.58324215, 0.94373317], - [-1.1361961 , 2.64027081], - [ 0.20656441, 1.15825263], - [ 2.50288142, 0.75471191], - [ 0.94204778, 1.09099239], - [ 3.55145404, 5.29220801]]) - - >>> from aeon.testing.data_generation import piecewise_normal_multivariate - >>> piecewise_normal_multivariate(means=[[1, 1], [2, 2], [3, 3]],\ - lengths=[2, 3, 1], covariances=[[[1.0, 0], [0, 1.0]], [[1.0, 0],\ - [0, 1.0]], [[1.0, 0], [0, 1.0]]], random_state=2) - array([[ 0.58324215, 0.94373317], - [-1.1361961 , 2.64027081], - [ 0.20656441, 1.15825263], - [ 2.50288142, 0.75471191], - [ 0.94204778, 1.09099239], - [ 3.55145404, 5.29220801]]) - - >>> from aeon.testing.data_generation import piecewise_normal_multivariate - >>> piecewise_normal_multivariate(means=[[1, 3], [4, 5]], lengths=[3, 3],\ - covariances=[[[0.5, 0.3], [0.3, 1.0]], [[1.0, 0.3], [0.3, 0.7]]],\ - random_state=2) - array([[ 0.78066776, 2.61125356], - [ 0.92296736, 0.51689669], - [-0.2694238 , 1.47959507], - [ 4.00389069, 3.95225998], - [ 5.32264874, 5.05088075], - [ 2.62479901, 6.08308546]]) - - """ - - def get_covariances(var): - """Fill 1D variance array of length N to 2D covariance array of size (N,N).""" - cov = np.zeros((N, N), float) - np.fill_diagonal(cov, var) - return cov - - L, N = np.array(means).shape - - rng = check_random_state(random_state) - assert len(lengths) == L - - # if no covariance is specified, build it from variance - # assuming independent random variables - if covariances is None: - assert variances is not None - - # variances van be specified as a float, make 1D array, repeat L times - if isinstance(variances, (float, int)): - variances = np.repeat(variances, N) - variances = np.tile(variances, (L, 1)) - - assert np.array(variances).shape == (L, N) - - # get covariance matrices from variance arrays - covariances = [get_covariances(var) for var in variances] - - else: - assert all(np.allclose(np.array(cov), np.array(cov).T) for cov in covariances) - assert all(np.all(np.linalg.eigvals(cov) >= 0) for cov in covariances) - - assert np.array(covariances).shape[0] == L - assert np.array(covariances).shape[1] == N - - return np.concatenate( - [ - rng.multivariate_normal(mean=mean, cov=cov, size=length) - for mean, cov, length in zip(means, covariances, lengths) - ] - ) - - -def piecewise_normal( - means: npt.ArrayLike, - lengths: npt.ArrayLike, - std_dev: Union[npt.ArrayLike, float] = 1.0, - random_state: Optional[Union[int, np.random.RandomState]] = None, -) -> npt.ArrayLike: - """ - Generate series from segments. - - Each segment has length specified in ``lengths`` and data sampled from a normal - distribution with a mean from ``means`` and standard deviation from ``std_dev``. - - Parameters - ---------- - means : array_like - Means of the segments to be generated - lengths : array_like - Lengths of the segments to be generated - std_dev : float or array_like - Standard deviations of the segments to be generated - random_state : int or np.random.RandomState - Either a random seed or RandomState instance - - Returns - ------- - data : np.array - univariate time series as np.array - - Examples - -------- - >>> from aeon.testing.data_generation import piecewise_normal - >>> piecewise_normal([1, 2, 3], lengths=[2, 4, 8], random_state=42) # doctest: +SKIP - array([1.49671415, 0.8617357 , 2.64768854, 3.52302986, 1.76584663, - 1.76586304, 4.57921282, 3.76743473, 2.53052561, 3.54256004, - 2.53658231, 2.53427025, 3.24196227, 1.08671976]) - - >>> from aeon.testing.data_generation import piecewise_normal - >>> piecewise_normal([1, 2, 3], lengths=[2, 4, 8], std_dev=0) # doctest: +SKIP - array([1., 1., 2., 2., 2., 2., 3., 3., 3., 3., 3., 3., 3., 3.]) - - >>> from aeon.testing.data_generation import piecewise_normal - >>> piecewise_normal([1, 2, 3], lengths=[2, 4, 8], std_dev=[0, 0.5, 1.0])\ - # doctest: +SKIP - array([1. , 1. , 2.32384427, 2.76151493, 1.88292331, - 1.88293152, 4.57921282, 3.76743473, 2.53052561, 3.54256004, - 2.53658231, 2.53427025, 3.24196227, 1.08671976]) - - """ - rng = check_random_state(random_state) - assert len(means) == len(lengths) - - if isinstance(std_dev, (float, int)): - std_dev = np.repeat(std_dev, len(means)) - - assert len(std_dev) == len(means) - - segments_data = [ - rng.normal(loc=mean, scale=sd, size=[length]) - for mean, length, sd in zip(means, lengths, std_dev) - ] - return np.concatenate(tuple(segments_data)) - - -def piecewise_multinomial( - n_trials: int, - lengths: npt.ArrayLike, - p_vals: npt.ArrayLike, - random_state: Optional[Union[int, np.random.RandomState]] = None, -) -> npt.ArrayLike: - """ - Generate series from segments. - - Each segment has length specified in ``lengths`` and data sampled from a multinomial - distribution with a total number of experiments for each trial set from ``n_trials`` - and the probability for each outcome stored inside a list contained in ``p_vals``. - - Parameters - ---------- - n_trials : int - Number of experiments to run during each trial - lengths : array_like - Lengths of the segments to be generated - p_vals : array_like - Set of probabilities for each outcome for each distribution - Each set of probabilities must be equal length - random_state : int or np.random.RandomState - Either a random seed or RandomState instance - - Returns - ------- - data : np.array - univariate or multivariate time series as np.array - that has dimensions sum(lengths) x n_outcomes - where n_outcomes = # of outcomes for each item in ``p_vals`` - - Examples - -------- - >>> from aeon.testing.data_generation import piecewise_multinomial - >>> piecewise_multinomial(20, lengths=[3, 2], p_vals=[[1/4, 3/4], \ - [3/4, 1/4]], random_state=42) # doctest: +SKIP - array([[ 4, 16], - [ 8, 12], - [ 6, 14], - [15, 5], - [17, 3]]) - - >>> from aeon.testing.data_generation import piecewise_multinomial - >>> piecewise_multinomial(10, lengths=[2, 4, 8], \ - p_vals=[[1, 0], [0, 1], [1, 0]]) # doctest: +SKIP - array([[10, 0], - [10, 0], - [ 0, 10], - [ 0, 10], - [ 0, 10], - [ 0, 10], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0]]) - """ - rng = check_random_state(random_state) - - # error handling for inputs - if len(lengths) != len(p_vals): - raise ValueError("lengths and p_vals arguments must be same length") - elif any(sum(p_val) != 1 for p_val in p_vals): - raise ValueError("each set of probabilities in p_val must sum to 1") - elif not (np.array([len(p_val) for p_val in p_vals]) == len(p_vals[0])).all(): - raise ValueError("each set of probabilities in p_val must be equal length") - - segments_data = [ - rng.multinomial(n=n_trials, pvals=p_val, size=[length]) - for p_val, length, in zip(p_vals, lengths) - ] - return np.concatenate(tuple(segments_data)) - - -def piecewise_poisson( - lambdas: npt.ArrayLike, - lengths: npt.ArrayLike, - random_state: Optional[Union[int, np.random.RandomState]] = None, -) -> npt.ArrayLike: - """ - Generate series using Possion distribution. - - Each segment has length specified in ``lengths`` and data sampled from a Poisson - distribution with expected lambda from ``lambdas``. - - Parameters - ---------- - lambdas : array_like - Expected number and variance of events within a specified time interval - lengths : array_like - Lengths of the segments to be generated - random_state : int or np.random.RandomState - Either a random seed or RandomState instance - - Returns - ------- - data : np.array - univariate time series as np.array - - Examples - -------- - >>> from aeon.testing.data_generation import piecewise_poisson - >>> piecewise_poisson(lambdas=[1,2,3],lengths=[2,4,8],random_state=42)#doctest:+SKIP - array([1, 2, 1, 3, 3, 1, 3, 1, 3, 2, 2, 4, 2, 1]) - - >>> from aeon.testing.data_generation import piecewise_poisson - >>> piecewise_poisson(lambdas=[1,3,6],lengths=[2,4,8],random_state=42)#doctest:+SKIP - array([1, 2, 1, 3, 3, 2, 5, 5, 6, 4, 4, 9, 3, 5]) - - """ - rng = check_random_state(random_state) - - assert len(lambdas) == len(lengths) - - try: - segments_data = [ - rng.poisson(lam=lams, size=[length]) - for lams, length in zip(lambdas, lengths) - ] - except ValueError as e: - raise Exception("Size mismatch") from e - - return np.concatenate(tuple(segments_data)) - - -def labels_with_repeats(means: npt.ArrayLike, std_dev: npt.ArrayLike) -> npt.ArrayLike: - """Generate labels for unique combinations of means and std_dev.""" - data = [means, std_dev] - unique, indices = np.unique(data, axis=1, return_inverse=True) - labels = np.arange(unique.shape[1]) - return labels[indices] - - -def label_piecewise_normal( - means: npt.ArrayLike, - lengths: npt.ArrayLike, - std_dev: Union[npt.ArrayLike, float] = 1.0, - repeated_labels: bool = True, -) -> npt.ArrayLike: - """ - Generate labels for a series composed of segments. - - Parameters - ---------- - means : array_like - Means of the segments to be generated - lengths : array_like - Lengths of the segments to be generated - std_dev : float or array_like - Standard deviations of the segments to be generated - repeated_labels : bool - Flag to indicate whether segment labels should be repeated for similar segments. - If ``True`` same label will be assigned for segments with same mean and std_dev, - independently of length. If ``False`` each consecutive segment will have - a unique label. - - Returns - ------- - labels : np.array - integer encoded array of labels, same length as data - """ - if isinstance(std_dev, (float, int)): - std_dev = np.repeat(std_dev, len(means)) - if repeated_labels: - unique_labels = labels_with_repeats(means, std_dev) - else: - unique_labels = range(len(lengths)) - return np.repeat(unique_labels, lengths) - - -class GenBasicGauss: - """Data generator base class in order to allow composition.""" - - def __init__(self, means, lengths, std_dev=1.0, random_state=None): - self.means = means - self.lengths = lengths - self.std_dev = std_dev - self.random_state = random_state - - def sample(self): - """Generate univariate mean shift random data sample.""" - return piecewise_normal( - means=self.means, - lengths=self.lengths, - std_dev=self.std_dev, - random_state=self.random_state, - ) diff --git a/aeon/testing/data_generation/tests/test_hierarchical.py b/aeon/testing/data_generation/tests/test_hierarchical.py deleted file mode 100644 index 4a1fe1ef31..0000000000 --- a/aeon/testing/data_generation/tests/test_hierarchical.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Test hierarchical generators.""" - -import pandas as pd - -from aeon.testing.data_generation.hierarchical import _make_hierarchical - - -def test_make_hierarchical_basic(): - """Test make hierarchy.""" - df = _make_hierarchical() - assert isinstance(df, pd.DataFrame), "Output is not a pandas DataFrame" - assert df.shape[1] == 1, "DataFrame does not have the expected number of columns" - assert not df.isnull().values.any(), "DataFrame contains unexpected NaN values" - - -def test_make_hierarchical_custom_levels(): - """Test make hierarchy.""" - # Test custom hierarchy levels - hierarchy_levels = (3, 2) - df = _make_hierarchical(hierarchy_levels=hierarchy_levels) - expected_levels = len(hierarchy_levels) + 1 # +1 for the time index - assert df.index.nlevels == expected_levels, "Incorrect number of index levels" - - -def test_make_hierarchical_timepoints_range(): - """Test make hierarchy.""" - # Test varying timepoints - min_timepoints, max_timepoints = 5, 10 - df = _make_hierarchical( - min_timepoints=min_timepoints, max_timepoints=max_timepoints, same_cutoff=False - ) - # Verifying that series lengths vary within the specified range - lengths = df.groupby(level=list(range(len(df.index.levels) - 1))).size() - assert ( - lengths.min() >= min_timepoints and lengths.max() <= max_timepoints - ), "Time points do not fall within the specified range" - - -def test_make_hierarchical_nan_values(): - """Test make hierarchy.""" - # Test NaN values inclusion - df = _make_hierarchical(add_nan=True) - assert df.isnull().values.any(), "DataFrame does not contain NaN values as expected" - - -def test_make_hierarchical_positive_values(): - """Test make hierarchy.""" - # Test all positive values - df = _make_hierarchical(all_positive=True) - assert (df >= 0).all().all(), "DataFrame contains non-positive values" - - -def test_make_hierarchical_index_type(): - """Test make hierarchy.""" - # Test for specific index types - index_type = "datetime" - df = _make_hierarchical(index_type=index_type) - assert isinstance( - df.index.get_level_values(-1)[0], pd.Timestamp - ), "Index type does not match 'datetime'" diff --git a/aeon/testing/data_generation/tests/test_segmentation.py b/aeon/testing/data_generation/tests/test_segmentation.py deleted file mode 100644 index 2d2a62c1d4..0000000000 --- a/aeon/testing/data_generation/tests/test_segmentation.py +++ /dev/null @@ -1,184 +0,0 @@ -"""Test segmentation data generation.""" - -import numpy as np -import pytest -from numpy import array_equal - -from aeon.testing.data_generation.segmentation import ( - GenBasicGauss, - label_piecewise_normal, - labels_with_repeats, - piecewise_multinomial, - piecewise_normal, - piecewise_normal_multivariate, - piecewise_poisson, -) - - -def test_segmentation_generation(): - """Test the piecewise generation functions.""" - X = piecewise_normal_multivariate( - means=[[1, 1], [2, 2], [3, 3]], lengths=[2, 3, 1], random_state=2 - ) - assert isinstance(X, np.ndarray) - exp = np.array( - [ - [0.58324215, 0.94373317], - [-1.1361961, 2.64027081], - [0.20656441, 1.15825263], - [2.50288142, 0.75471191], - [0.94204778, 1.09099239], - [3.55145404, 5.29220801], - ] - ) - assert np.allclose(X, exp) - X = piecewise_normal([1, 2, 3], lengths=[2, 4, 8], random_state=42) - exp = np.array( - [ - 1.49671415, - 0.8617357, - 2.64768854, - 3.52302986, - 1.76584663, - 1.76586304, - 4.57921282, - 3.76743473, - 2.53052561, - 3.54256004, - 2.53658231, - 2.53427025, - 3.24196227, - 1.08671976, - ] - ) - assert np.allclose(X, exp) - - X = piecewise_normal( - [1, 2, 3], lengths=[2, 4, 8], std_dev=[0, 0.5, 1.0], random_state=42 - ) - exp = np.array( - [ - 1.0, - 1.0, - 2.32384427, - 2.76151493, - 1.88292331, - 1.88293152, - 4.57921282, - 3.76743473, - 2.53052561, - 3.54256004, - 2.53658231, - 2.53427025, - 3.24196227, - 1.08671976, - ] - ) - assert np.allclose(X, exp) - X = piecewise_multinomial( - 20, lengths=[3, 2], p_vals=[[1 / 4, 3 / 4], [3 / 4, 1 / 4]], random_state=42 - ) - exp = np.array([[4, 16], [8, 12], [6, 14], [15, 5], [17, 3]]) - assert np.allclose(X, exp) - X = piecewise_multinomial(10, lengths=[2, 4, 8], p_vals=[[1, 0], [0, 1], [1, 0]]) - exp = np.array( - [ - [10, 0], - [10, 0], - [0, 10], - [0, 10], - [0, 10], - [0, 10], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - [10, 0], - ] - ) - assert np.allclose(X, exp) - X = piecewise_poisson(lambdas=[1, 2, 3], lengths=[2, 4, 8], random_state=42) - exp = np.array([1, 2, 1, 3, 3, 1, 3, 1, 3, 2, 2, 4, 2, 1]) - assert np.allclose(X, exp) - X = piecewise_poisson(lambdas=[1, 3, 6], lengths=[2, 4, 8], random_state=42) - exp = np.array([1, 2, 1, 3, 3, 2, 5, 5, 6, 4, 4, 9, 3, 5]) - assert np.allclose(X, exp) - - -def test_label_generation(): - """Test label generation.""" - y = labels_with_repeats(means=[1.0, 2.0, 3.0], std_dev=[0.5, 1.0, 2.0]) - exp = np.array([0, 1, 2]) - assert np.allclose(y, exp) - y = label_piecewise_normal([1, 2, 3], lengths=[10, 10, 10], std_dev=[0.5, 1.0, 2.0]) - exp = np.array( - [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - 2, - ] - ) - assert np.allclose(y, exp) - gen = GenBasicGauss([1, 2, 3], lengths=[2, 4, 8], random_state=42) - X = gen.sample() - exp = np.array( - [ - 1.49671415, - 0.8617357, - 2.64768854, - 3.52302986, - 1.76584663, - 1.76586304, - 4.57921282, - 3.76743473, - 2.53052561, - 3.54256004, - 2.53658231, - 2.53427025, - 3.24196227, - 1.08671976, - ] - ) - assert np.allclose(X, exp) - - -@pytest.mark.parametrize( - "lambdas, lengths, random_state, output", - [ - ([1, 2, 3], [2, 4, 8], 42, [1, 2, 1, 3, 3, 1, 3, 1, 3, 2, 2, 4, 2, 1]), - ([1, 3, 6], [2, 4, 8], 42, [1, 2, 1, 3, 3, 2, 5, 5, 6, 4, 4, 9, 3, 5]), - ], -) -def test_piecewise_poisson(lambdas, lengths, random_state, output): - """Test piecewise_poisson fuction returns the expected Poisson distributed array.""" - assert array_equal(piecewise_poisson(lambdas, lengths, random_state), output) diff --git a/aeon/testing/estimator_checking/_yield_classification_checks.py b/aeon/testing/estimator_checking/_yield_classification_checks.py index 2d49236c8c..271284b94d 100644 --- a/aeon/testing/estimator_checking/_yield_classification_checks.py +++ b/aeon/testing/estimator_checking/_yield_classification_checks.py @@ -8,6 +8,7 @@ from sys import platform import numpy as np +from numpy.testing import assert_array_almost_equal from sklearn.utils._testing import set_random_state from aeon.base._base import _clone_estimator @@ -18,7 +19,7 @@ unit_test_proba, ) from aeon.testing.testing_data import FULL_TEST_DATA_DICT -from aeon.testing.utils.estimator_checks import _assert_array_almost_equal, _get_tag +from aeon.testing.utils.estimator_checks import _get_tag from aeon.utils.validation import get_n_cases @@ -124,7 +125,7 @@ def check_classifier_against_expected_results(estimator_class): y_proba = estimator_instance.predict_proba(X_test[indices]) # assert probabilities are the same - _assert_array_almost_equal( + assert_array_almost_equal( y_proba, expected_probas, decimal=2, diff --git a/aeon/testing/estimator_checking/_yield_early_classification_checks.py b/aeon/testing/estimator_checking/_yield_early_classification_checks.py index 9459b39442..e19b8bd0f0 100644 --- a/aeon/testing/estimator_checking/_yield_early_classification_checks.py +++ b/aeon/testing/estimator_checking/_yield_early_classification_checks.py @@ -4,6 +4,7 @@ from sys import platform import numpy as np +from numpy.testing import assert_array_almost_equal from sklearn.utils._testing import set_random_state from aeon.base._base import _clone_estimator @@ -13,7 +14,6 @@ unit_test_proba, ) from aeon.testing.testing_data import FULL_TEST_DATA_DICT -from aeon.testing.utils.estimator_checks import _assert_array_almost_equal from aeon.utils.validation import get_n_cases @@ -74,7 +74,7 @@ def check_early_classifier_against_expected_results(estimator_class): y_proba, _ = estimator_instance.predict_proba(X_test[indices]) # assert probabilities are the same - _assert_array_almost_equal( + assert_array_almost_equal( y_proba, expected_probas, decimal=2, diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 20664bea73..112fc96aee 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -1,5 +1,6 @@ """Tests for all estimators.""" +import inspect import numbers import pickle import types @@ -10,6 +11,7 @@ import joblib import numpy as np import pytest +from numpy.testing import assert_array_almost_equal from sklearn.exceptions import NotFittedError from sklearn.utils.estimator_checks import check_get_params_invariance @@ -64,13 +66,7 @@ ) from aeon.testing.testing_data import FULL_TEST_DATA_DICT, _get_datatypes_for_estimator from aeon.testing.utils.deep_equals import deep_equals -from aeon.testing.utils.estimator_checks import ( - _assert_array_almost_equal, - _get_args, - _get_tag, - _list_required_methods, - _run_estimator_method, -) +from aeon.testing.utils.estimator_checks import _get_tag, _run_estimator_method from aeon.transformations.base import BaseTransformer from aeon.transformations.collection import BaseCollectionTransformer from aeon.transformations.series import BaseSeriesTransformer @@ -286,21 +282,22 @@ def check_inheritance(estimator_class): def check_has_common_interface(estimator_class): """Check estimator implements the common interface.""" - # Check class for type of attribute - if isinstance(estimator_class, BaseAeonEstimator): - assert isinstance(estimator_class.is_fitted, property) - - required_methods = _list_required_methods(estimator_class) - - for attr in required_methods: - assert hasattr( - estimator_class, attr - ), f"Estimator: {estimator_class.__name__} does not implement attribute: {attr}" - - if hasattr(estimator_class, "inverse_transform"): - assert hasattr(estimator_class, "transform") - if hasattr(estimator_class, "predict_proba"): - assert hasattr(estimator_class, "predict") + assert issubclass(estimator_class, BaseAeonEstimator) + assert hasattr(estimator_class, "fit") and callable(estimator_class.fit) + assert hasattr(estimator_class, "reset") and callable(estimator_class.reset) + assert hasattr(estimator_class, "clone") and callable(estimator_class.clone) + assert hasattr(estimator_class, "get_class_tags") and callable( + estimator_class.get_class_tags + ) + assert hasattr(estimator_class, "get_class_tag") and callable( + estimator_class.get_class_tag + ) + assert hasattr(estimator_class, "get_tags") and callable(estimator_class.get_tags) + assert hasattr(estimator_class, "get_tag") and callable(estimator_class.get_tag) + assert hasattr(estimator_class, "set_tags") and callable(estimator_class.set_tags) + assert hasattr(estimator_class, "get_fitted_params") and callable( + estimator_class.get_fitted_params + ) def check_set_params_sklearn(estimator_class): @@ -322,9 +319,10 @@ def check_set_params_sklearn(estimator_class): params_full = estimator.get_params(deep=False) params_full.update(params) - msg = f"set_params of {estimator_class.__name__} does not return self" est_after_set = estimator.set_params(**params_full) - assert est_after_set is estimator, msg + assert ( + est_after_set is estimator + ), f"set_params of {estimator_class.__name__} does not return self" is_equal, equals_msg = deep_equals( estimator.get_params(deep=False), params_full, return_msg=True @@ -361,7 +359,7 @@ def check_constructor(estimator_class): assert isinstance(estimator, estimator_class) # Ensure that each parameter is set in init - init_params = _get_args(type(estimator).__init__) + init_params = inspect.signature(estimator_class.__init__).parameters invalid_attr = set(init_params) - set(vars(estimator)) - {"self"} assert not invalid_attr, ( "Estimator %s should store all parameters" @@ -461,8 +459,9 @@ def check_set_params(estimator): estimator = _clone_estimator(estimator) params = estimator.get_params() - msg = f"set_params of {type(estimator).__name__} does not return self" - assert estimator.set_params(**params) is estimator, msg + assert ( + estimator.set_params(**params) is estimator + ), f"set_params of {type(estimator).__name__} does not return self" is_equal, equals_msg = deep_equals(estimator.get_params(), params, return_msg=True) msg = ( @@ -559,8 +558,7 @@ def check_non_state_changing_method(estimator, datatype): ), f"Estimator: {type(estimator)} has side effects on arguments of {method}" # dict_after = dictionary of estimator after predict and fit - dict_after = estimator.__dict__ - is_equal, msg = deep_equals(dict_after, dict_before, return_msg=True) + is_equal, msg = deep_equals(estimator.__dict__, dict_before, return_msg=True) assert is_equal, ( f"Estimator: {type(estimator).__name__} changes __dict__ " f"during {method}, " @@ -667,7 +665,7 @@ def check_persistence_via_pickle(estimator, datatype): if hasattr(estimator, method) and callable(getattr(estimator, method)): output = _run_estimator_method(estimator, method, datatype, "test") - _assert_array_almost_equal( + assert_array_almost_equal( output, results[i], err_msg=f"Running {method} after fit twice with test " @@ -699,7 +697,7 @@ def check_fit_deterministic(estimator, datatype): if hasattr(estimator, method) and callable(getattr(estimator, method)): output = _run_estimator_method(estimator, method, datatype, "test") - _assert_array_almost_equal( + assert_array_almost_equal( output, results[i], err_msg=f"Running {method} after fit twice with test " diff --git a/aeon/testing/estimator_checking/_yield_regression_checks.py b/aeon/testing/estimator_checking/_yield_regression_checks.py index af498a520d..ce0ae00462 100644 --- a/aeon/testing/estimator_checking/_yield_regression_checks.py +++ b/aeon/testing/estimator_checking/_yield_regression_checks.py @@ -7,6 +7,7 @@ from sys import platform import numpy as np +from numpy.testing import assert_array_almost_equal from sklearn.utils._testing import set_random_state from aeon.base._base import _clone_estimator @@ -17,7 +18,6 @@ covid_3month_preds, ) from aeon.testing.testing_data import FULL_TEST_DATA_DICT -from aeon.testing.utils.estimator_checks import _assert_array_almost_equal def _yield_regression_checks(estimator_class, estimator_instances, datatypes): @@ -92,7 +92,7 @@ def check_regressor_against_expected_results(estimator_class): y_pred = estimator_instance.predict(X_test[indices_test]) # assert predictions are the same - _assert_array_almost_equal( + assert_array_almost_equal( y_pred, expected_preds, decimal=2, diff --git a/aeon/testing/estimator_checking/_yield_transformation_checks.py b/aeon/testing/estimator_checking/_yield_transformation_checks.py index 6383c8797b..88936cd719 100644 --- a/aeon/testing/estimator_checking/_yield_transformation_checks.py +++ b/aeon/testing/estimator_checking/_yield_transformation_checks.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +from numpy.testing import assert_array_almost_equal from sklearn.utils._testing import set_random_state from aeon.base._base import _clone_estimator @@ -14,10 +15,7 @@ unit_test_result, ) from aeon.testing.testing_data import FULL_TEST_DATA_DICT -from aeon.testing.utils.estimator_checks import ( - _assert_array_almost_equal, - _run_estimator_method, -) +from aeon.testing.utils.estimator_checks import _run_estimator_method def _yield_transformation_checks(estimator_class, estimator_instances, datatypes): @@ -81,7 +79,7 @@ def check_transformer_against_expected_results(estimator_class): ) # assert results are the same - _assert_array_almost_equal( + assert_array_almost_equal( results, expected_results, decimal=2, @@ -105,6 +103,6 @@ def check_transform_inverse_transform_equivalent(estimator, datatype): Xit = estimator.inverse_transform(Xt) if isinstance(X, pd.DataFrame): - _assert_array_almost_equal(X.loc[Xit.index], Xit) + assert_array_almost_equal(X.loc[Xit.index], Xit) else: - _assert_array_almost_equal(X, Xit) + assert_array_almost_equal(X, Xit) diff --git a/aeon/benchmarking/example_results/classification/accuracy/FreshPRINCE_accuracy.csv b/aeon/testing/example_results_files/classification/accuracy/FreshPRINCE_accuracy.csv similarity index 100% rename from aeon/benchmarking/example_results/classification/accuracy/FreshPRINCE_accuracy.csv rename to aeon/testing/example_results_files/classification/accuracy/FreshPRINCE_accuracy.csv diff --git a/aeon/benchmarking/example_results/classification/accuracy/HC2_accuracy.csv b/aeon/testing/example_results_files/classification/accuracy/HC2_accuracy.csv similarity index 100% rename from aeon/benchmarking/example_results/classification/accuracy/HC2_accuracy.csv rename to aeon/testing/example_results_files/classification/accuracy/HC2_accuracy.csv diff --git a/aeon/benchmarking/example_results/classification/accuracy/InceptionTime_accuracy.csv b/aeon/testing/example_results_files/classification/accuracy/InceptionTime_accuracy.csv similarity index 100% rename from aeon/benchmarking/example_results/classification/accuracy/InceptionTime_accuracy.csv rename to aeon/testing/example_results_files/classification/accuracy/InceptionTime_accuracy.csv diff --git a/aeon/benchmarking/example_results/classification/accuracy/WEASEL-2.0_accuracy.csv b/aeon/testing/example_results_files/classification/accuracy/WEASEL-2.0_accuracy.csv similarity index 100% rename from aeon/benchmarking/example_results/classification/accuracy/WEASEL-2.0_accuracy.csv rename to aeon/testing/example_results_files/classification/accuracy/WEASEL-2.0_accuracy.csv diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py index fde6662879..260bf48b74 100644 --- a/aeon/testing/testing_config.py +++ b/aeon/testing/testing_config.py @@ -3,7 +3,7 @@ __maintainer__ = ["MatthewMiddlehurst"] __all__ = ["PR_TESTING", "EXCLUDE_ESTIMATORS", "EXCLUDED_TESTS"] -import aeon.testing.utils._cicd_numba_caching # noqa: F401 +import aeon.testing._cicd_numba_caching # noqa: F401 # whether to use smaller parameter matrices for test generation and subsample estimators # per os/version default is False, can be set to True by pytest --prtesting True flag diff --git a/aeon/testing/tests/test_all_estimators.py b/aeon/testing/tests/test_all_estimators.py index f2e747045d..2716021bba 100644 --- a/aeon/testing/tests/test_all_estimators.py +++ b/aeon/testing/tests/test_all_estimators.py @@ -3,10 +3,12 @@ import platform import sys +import numpy as np +from sklearn.utils import check_random_state + from aeon.testing.estimator_checking import parametrize_with_checks from aeon.testing.testing_config import PR_TESTING from aeon.utils.discovery import all_estimators -from aeon.utils.sampling import random_partition ALL_TEST_ESTIMATORS = all_estimators(return_names=False, include_sklearn=False) @@ -15,28 +17,29 @@ # but all are tested on every OS at least once, and on every python version once if PR_TESTING: # only use 3 Python versions in PR - ix = sys.version_info.minor - if ix == 9: - ix = 0 - elif ix == 11: - ix = 1 - elif ix == 12: - ix = 2 + i = sys.version_info.minor + if i == 9: + i = 0 + elif i == 11: + i = 1 + elif i == 12: + i = 2 os_str = platform.system() if os_str == "Windows": - ix = ix + i = i elif os_str == "Linux": - ix = ix + 1 + i = i + 1 elif os_str == "Darwin": - ix = ix + 2 + i = i + 2 + + i = i % 3 - ix = ix % 3 + rng = check_random_state(42) + idx = np.arange(len(ALL_TEST_ESTIMATORS)) + rng.shuffle(idx) - ALL_TEST_ESTIMATORS = [ - ALL_TEST_ESTIMATORS[i] - for i in random_partition(len(ALL_TEST_ESTIMATORS), 3)[ix] - ] + ALL_TEST_ESTIMATORS = [ALL_TEST_ESTIMATORS[n] for n in idx[i::3]] @parametrize_with_checks(ALL_TEST_ESTIMATORS) diff --git a/aeon/testing/utils/deep_equals.py b/aeon/testing/utils/deep_equals.py index 1b4c99c85d..aedaa202d4 100644 --- a/aeon/testing/utils/deep_equals.py +++ b/aeon/testing/utils/deep_equals.py @@ -1,29 +1,21 @@ -"""Testing utility to compare equality in value for nested objects. - -Objects compared can have one of the following valid types: - types compatible with != comparison - pd.Series, pd.DataFrame, np.ndarray - lists, tuples, or dicts of a valid type (recursive) -""" +"""Testing utility to compare equality in value for nested objects.""" __maintainer__ = [] - __all__ = ["deep_equals"] from inspect import isclass import numpy as np import pandas as pd +from scipy.sparse import csr_matrix def deep_equals(x, y, return_msg=False): """Test two objects for equality in value. - Correct if x/y are one of the following valid types: - types compatible with != comparison - pd.Series, pd.DataFrame, np.ndarray - lists, tuples, or dicts of a valid type (recursive) - delayed types that result in the above when calling .compute(), e.g., dask df + Intended for: + pd.Series, pd.DataFrame, np.ndarray, lists, tuples, or dicts. + Will recursively compare nested objects. Important note: this function will return "not equal" if types of x,y are different @@ -32,263 +24,148 @@ def deep_equals(x, y, return_msg=False): Parameters ---------- x : object + First item to compare. y : object - return_msg : bool, optional, default=False - whether to return informative message about what is not equal + Second item to compare. + return_msg : bool, default=False + Whether to return an informative message about what is not equal. Returns ------- - is_equal: bool - True if x and y are equal in value - x and y do not need to be equal in reference - msg : str, only returned if return_msg = True - indication of what is the reason for not being equal - concatenation of the following strings: - .type - type is not equal - .len - length is not equal - .value - value is not equal - .keys - if dict, keys of dict are not equal - if class/object, names of attributes and methods are not equal - .dtype - dtype of pandas or numpy object is not equal - .index - index of pandas object is not equal - .series_equals, .df_equals, .index_equals - .equals of pd returns False - [i] - if tuple/list: i-th element not equal - [key] - if dict: value at key is not equal - [colname] - if pandas.DataFrame: column with name colname is not equal - != - call to generic != returns False + is_equal: bool + True if x and y are equal in value, x and y do not need to be equal in + reference. + msg : str + Only returned if return_msg is True. + Indication of what is the reason for not being equal """ + eq, msg = _deep_equals(x, y, 0) + return eq if not return_msg else (eq, msg) - def ret(is_equal, msg): - if return_msg: - if is_equal: - msg = "" - return is_equal, msg - else: - return is_equal +def _deep_equals(x, y, depth): + if x is y: + return True, "" if type(x) is not type(y): - return ret(False, f".type, x.type = {type(x)} != y.type = {type(y)}") + return False, f"x.type ({type(x)}) != y.type ({type(y)}), depth={depth}" - # compute delayed objects (dask) - if hasattr(x, "compute"): - x = x.compute() - if hasattr(y, "compute"): - y = y.compute() - - # we now know all types are the same - # so now we compare values if isinstance(x, pd.Series): - if x.dtype != y.dtype: - return ret(False, f".dtype, x.dtype= {x.dtype} != y.dtype = {y.dtype}") - # if columns are object, recurse over entries and index - if x.dtype == "object": - index_equal = x.index.equals(y.index) - values_equal, values_msg = deep_equals( - list(x.values), list(y.values), return_msg=True - ) - if not values_equal: - msg = ".values" + values_msg - elif not index_equal: - msg = f".index, x.index: {x.index}, y.index: {y.index}" - else: - msg = "" - return ret(index_equal and values_equal, msg) - else: - return ret(x.equals(y), f".series_equals, x = {x} != y = {y}") + return _series_equals(x, y, depth) elif isinstance(x, pd.DataFrame): - if not x.columns.equals(y.columns): - return ret( - False, f".columns, x.columns = {x.columns} != y.columns = {y.columns}" - ) - # if columns are equal and at least one is object, recurse over Series - if sum(x.dtypes == "object") > 0: - for c in x.columns: - is_equal, msg = deep_equals(x[c], y[c], return_msg=True) - if not is_equal: - return ret(False, f'["{c}"]' + msg) - return ret(True, "") - else: - return ret(x.equals(y), f".df_equals, x = {x} != y = {y}") - elif isinstance(x, pd.Index): - return ret(x.equals(y), f".index_equals, x = {x} != y = {y}") + return _dataframe_equals(x, y, depth) elif isinstance(x, np.ndarray): - if x.dtype != y.dtype: - return ret(False, f".dtype, x.dtype = {x.dtype} != y.dtype = {y.dtype}") - return ret(np.array_equal(x, y, equal_nan=True), ".values") - # recursion through lists, tuples and dicts + return _numpy_equals(x, y, depth) elif isinstance(x, (list, tuple)): - return ret(*_tuple_equals(x, y, return_msg=True)) + return _list_equals(x, y, depth) elif isinstance(x, dict): - return ret(*_dict_equals(x, y, return_msg=True)) - elif _is_np_nan(x): - return ret(_is_np_nan(y), f"type(x)={type(x)} != type(y)={type(y)}") + return _dict_equals(x, y, depth) + elif isinstance(x, csr_matrix): + return _csrmatrix_equals(x, y, depth) + # non-iterable types elif isclass(x): - return ret(x == y, f".class, x={x.__name__} != y={y.__name__}") - elif type(x).__name__ == "ForecastingHorizon": - return ret(*_fh_equals(x, y, return_msg=True)) - elif isinstance(x != y, bool) and x != y: - return ret(False, f" !=, {x} != {y}") - # csr-matrix must not be compared using np.any(x!=y) - elif type(x).__name__ == "csr_matrix": # isinstance(x, csr_matrix): - if not np.allclose(x.toarray(), y.toarray()): - return ret(False, f" !=, {x} != {y}") - elif np.any(x != y): - return ret(False, f" !=, {x} != {y}") - return ret(True, "") + eq = x == y + msg = "" if eq else f"x ({x.__name__}) != y ({y.__name__}), depth={depth}" + return eq, msg + elif np.isnan(x): + eq = np.isnan(y) + msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}" + return eq, msg + elif isinstance(x == y, bool): + eq = x == y + msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}" + return eq, msg + # unknown type + else: + raise ValueError(f"Unknown type: {type(x)}, depth={depth}") + + +def _series_equals(x, y, depth): + if x.dtype != y.dtype: + return False, f"x.dtype ({x.dtype}) != y.dtype ({y.dtype}), depth={depth}" + + # if columns are object, recurse over entries and index + if x.dtype == "object": + index_equal = x.index.equals(y.index) + values_equal, values_msg = _deep_equals(list(x.values), list(y.values), depth) + + if not values_equal: + msg = values_msg + elif not index_equal: + msg = f".index, x.index: {x.index}, y.index: {y.index}, depth={depth}" + else: + msg = "" + return index_equal and values_equal, msg + else: + eq = x.equals(y) + msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}" + return eq, msg -def _is_np_nan(x): - return isinstance(x, float) and np.isnan(x) +def _dataframe_equals(x, y, depth): + if not x.columns.equals(y.columns): + return False, f"x.columns ({x.columns}) != y.columns ({y.columns})" -def _tuple_equals(x, y, return_msg=False): - """Test two tuples or lists for equality. + # if columns are equal and at least one is object, recurse over Series + if sum(x.dtypes == "object") > 0: + for i, c in enumerate(x.columns): + eq, msg = _deep_equals(x[c], y[c], depth + 1) - Correct if tuples/lists contain the following valid types: - types compatible with != comparison - pd.Series, pd.DataFrame, np.ndarray - lists, tuples, or dicts of a valid type (recursive) + if not eq: + return False, msg + f", idx={i}" + return True, "" + else: + eq = x.equals(y) + msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}" + return eq, msg - Parameters - ---------- - x: tuple or list - y: tuple or list - return_msg : bool, optional, default=False - whether to return informative message about what is not equal - Returns - ------- - is_equal: bool - True if x and y are equal in value - x and y do not need to be equal in reference - msg : str, only returned if return_msg = True - indication of what is the reason for not being equal - concatenation of the following elements: - .len - length is not equal - [i] - i-th element not equal - """ - - def ret(is_equal, msg): - if return_msg: - if is_equal: - msg = "" - return is_equal, msg - else: - return is_equal - - n = len(x) - - if n != len(y): - return ret(False, f".len, x.len = {n} != y.len = {len(y)}") +def _numpy_equals(x, y, depth): + if x.dtype != y.dtype: + return False, f"x.dtype ({x.dtype}) != y.dtype ({y.dtype})" - # we now know dicts are same length - for i in range(n): - xi = x[i] - yi = y[i] + eq = np.array_equal(x, y, equal_nan=True) + msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}" + return eq, msg - # recurse through xi/yi - is_equal, msg = deep_equals(xi, yi, return_msg=True) - if not is_equal: - return ret(False, f"[{i}]" + msg) - return ret(True, "") +def _csrmatrix_equals(x, y, depth): + if not np.allclose(x.toarray(), y.toarray()): + return False, f"x ({x}) != y ({y}), depth={depth}" + return True, "" -def _dict_equals(x, y, return_msg=False): - """Test two dicts for equality. +def _list_equals(x, y, depth): + if len(x) != len(y): + return False, f"x.len ({len(x)}) != y.len ({len(y)}), depth={depth}" - Correct if dicts contain the following valid types: - types compatible with != comparison - pd.Series, pd.DataFrame, np.ndarray - lists, tuples, or dicts of a valid type (recursive) + for i in range(len(x)): + eq, msg = _deep_equals(x[i], y[i], depth + 1) - Parameters - ---------- - x: dict - y: dict - return_msg : bool, optional, default=False - whether to return informative message about what is not equal - - Returns - ------- - is_equal: bool - True if x and y are equal in value - x and y do not need to be equal in reference - msg : str, only returned if return_msg = True - indication of what is the reason for not being equal - concatenation of the following strings: - .keys - keys are not equal - [key] - values at key is not equal - """ + if not eq: + return False, msg + f", idx={i}" + return True, "" - def ret(is_equal, msg): - if return_msg: - if is_equal: - msg = "" - return is_equal, msg - else: - return is_equal +def _dict_equals(x, y, depth): xkeys = set(x.keys()) ykeys = set(y.keys()) - if xkeys != ykeys: xmy = xkeys.difference(ykeys) ymx = ykeys.difference(xkeys) - diffmsg = ".keys," + + msg = "x.keys != y.keys" if len(xmy) > 0: - diffmsg += f" x.keys-y.keys = {xmy}." + msg += f", x.keys-y.keys = {xmy}" if len(ymx) > 0: - diffmsg += f" y.keys-x.keys = {ymx}." - return ret(False, diffmsg) - - # we now know that xkeys == ykeys - for key in xkeys: - xi = x[key] - yi = y[key] + msg += f", y.keys-x.keys = {ymx}" - # recurse through xi/yi - is_equal, msg = deep_equals(xi, yi, return_msg=True) - if not is_equal: - return ret(False, f"[{key}]" + msg) + return False, msg + f", depth={depth}" - return ret(True, "") - - -def _fh_equals(x, y, return_msg=False): - """Test two forecasting horizons for equality. - - Correct if both x and y are ForecastingHorizon - - Parameters - ---------- - x: ForcastingHorizon - y: ForcastingHorizon - return_msg : bool, optional, default=False - whether to return informative message about what is not equal - - Returns - ------- - is_equal: bool - True if x and y are equal in value - x and y do not need to be equal in reference - msg : str, only returned if return_msg = True - indication of what is the reason for not being equal - concatenation of the following strings: - .is_relative - x is absolute and y is relative, or vice versa - .values - values of x and y are not equal - """ - - def ret(is_equal, msg): - if return_msg: - if is_equal: - msg = "" - return is_equal, msg - else: - return is_equal - - if x.is_relative != y.is_relative: - return ret(False, ".is_relative") - - # recurse through values of x, y - is_equal, msg = deep_equals(x._values, y._values, return_msg=True) - if not is_equal: - return ret(False, ".values" + msg) + # we now know that xkeys == ykeys + for i, key in enumerate(xkeys): + eq, msg = _deep_equals(x[key], y[key], depth + 1) - return ret(True, "") + if not eq: + return False, msg + f", idx={i}" + return True, "" diff --git a/aeon/testing/utils/estimator_checks.py b/aeon/testing/utils/estimator_checks.py index 18227002ba..f5db28964b 100644 --- a/aeon/testing/utils/estimator_checks.py +++ b/aeon/testing/utils/estimator_checks.py @@ -1,18 +1,12 @@ """Utility function for estimator testing.""" -__maintainer__ = [] +__maintainer__ = ["MatthewMiddlehurst"] import inspect -from inspect import isclass, signature +from inspect import isclass -import numpy as np - -from aeon.base import BaseAeonEstimator -from aeon.clustering.base import BaseClusterer -from aeon.regression.base import BaseRegressor from aeon.similarity_search.base import BaseSimilaritySearch from aeon.testing.testing_data import FULL_TEST_DATA_DICT -from aeon.transformations.base import BaseTransformer def _run_estimator_method(estimator, method_name, datatype, split): @@ -64,68 +58,3 @@ def _get_tag(estimator, tag_name, default=None, raise_error=False): return estimator.get_tag( tag_name=tag_name, raise_error=raise_error, tag_value_default=default ) - - -def _list_required_methods(estimator): - """Return list of required method names (beyond BaseAeonEstimator ones).""" - # all BaseAeonEstimator children must implement these - MUST_HAVE_FOR_OBJECTS = ["set_params", "get_params"] - - # all BaseAeonEstimator children must implement these - MUST_HAVE_FOR_ESTIMATORS = [ - "fit", - "check_is_fitted", - "is_fitted", # read-only property - ] - # prediction/forecasting base classes that must have predict - BASE_CLASSES_THAT_MUST_HAVE_PREDICT = ( - BaseClusterer, - BaseRegressor, - ) - # transformation base classes that must have transform - BASE_CLASSES_THAT_MUST_HAVE_TRANSFORM = (BaseTransformer,) - - required_methods = [] - - if isinstance(estimator, BaseAeonEstimator): - required_methods += MUST_HAVE_FOR_OBJECTS - - if isinstance(estimator, BaseAeonEstimator): - required_methods += MUST_HAVE_FOR_ESTIMATORS - - if isinstance(estimator, BASE_CLASSES_THAT_MUST_HAVE_PREDICT): - required_methods += ["predict"] - - if isinstance(estimator, BASE_CLASSES_THAT_MUST_HAVE_TRANSFORM): - required_methods += ["transform"] - - return required_methods - - -def _assert_array_almost_equal(x, y, decimal=6, err_msg=""): - np.testing.assert_array_almost_equal(x, y, decimal=decimal, err_msg=err_msg) - - -def _get_args(function, varargs=False): - """Get function arguments.""" - try: - params = signature(function).parameters - except ValueError: - # Error on builtin C function - return [] - args = [ - key - for key, param in params.items() - if param.kind not in (param.VAR_POSITIONAL, param.VAR_KEYWORD) - ] - if varargs: - varargs = [ - param.name - for param in params.values() - if param.kind == param.VAR_POSITIONAL - ] - if len(varargs) == 0: - varargs = None - return args, varargs - else: - return args diff --git a/aeon/testing/utils/tests/test_deep_equals.py b/aeon/testing/utils/tests/test_deep_equals.py index 63cdb6c0c0..f2d143d236 100644 --- a/aeon/testing/utils/tests/test_deep_equals.py +++ b/aeon/testing/utils/tests/test_deep_equals.py @@ -5,11 +5,12 @@ import numpy as np import pandas as pd import pytest +from scipy.sparse import csr_matrix from aeon.testing.utils.deep_equals import deep_equals # examples used for comparison below -EXAMPLES = [ +DEEPEQUALS_ITEMS = [ 42, [], (()), @@ -19,41 +20,52 @@ 3.5, 4.2, np.nan, + pd.Series([1, 2], ["a", "b"]), pd.DataFrame({"a": [4, 2]}), pd.DataFrame({"a": [4, 3]}), (np.array([1, 2, 4]), [pd.DataFrame({"a": [4, 2]})]), {"foo": [42], "bar": pd.Series([1, 2])}, - {"bar": [42], "foo": pd.Series([1, 2])}, + {"bar": [12], "foo": pd.Series([1, 2])}, + csr_matrix([1, 2, 3]), +] +DEEPEQUALS_PAIRS = [ + (DEEPEQUALS_ITEMS[i], DEEPEQUALS_ITEMS[j]) + for i in range(len(DEEPEQUALS_ITEMS)) + for j in range(len(DEEPEQUALS_ITEMS)) + if i is not j ] -@pytest.mark.parametrize("fixture", EXAMPLES) -def test_deep_equals_positive(fixture): +@pytest.mark.parametrize("item", DEEPEQUALS_ITEMS) +def test_deep_equals_positive(item): """Tests that deep_equals correctly identifies equal objects as equal.""" - x = deepcopy(fixture) - y = deepcopy(fixture) + x = deepcopy(item) + y = deepcopy(item) + eq, msg = deep_equals(x, y, return_msg=True) msg = ( - f"deep_copy incorrectly returned False for two identical copies of " - f"the following object: {x}" + f"deep_equals incorrectly returned False for two identical copies of " + f"the following object: {x}. msg = {msg}" ) - assert deep_equals(x, y), msg - - -n = len(EXAMPLES) -DIFFERENT_PAIRS = [ - (EXAMPLES[i], EXAMPLES[j]) for i in range(n) for j in range(n) if i != j -] + assert eq, msg -@pytest.mark.parametrize("fixture1,fixture2", DIFFERENT_PAIRS) -def test_deep_equals_negative(fixture1, fixture2): +@pytest.mark.parametrize("item1, item2", DEEPEQUALS_PAIRS) +def test_deep_equals_negative(item1, item2): """Tests that deep_equals correctly identifies unequal objects as unequal.""" - x = deepcopy(fixture1) - y = deepcopy(fixture2) + x = deepcopy(item1) + y = deepcopy(item2) + eq = deep_equals(x, y) msg = ( - f"deep_copy incorrectly returned True when comparing " - f"the following, different objects: x={x}, y={y}" + f"deep_equals incorrectly returned True when comparing " + f"the following, different objects: x={x}, y={y}." ) - assert not deep_equals(x, y), msg + assert not eq, msg + + +def test_deep_equals_same(): + """Tests that deep_equals correctly identifies the same object as equal.""" + x = [1, 2, 3] + eq = deep_equals(x, x) + assert eq diff --git a/aeon/transformations/collection/compose/tests/test_pipeline.py b/aeon/transformations/collection/compose/tests/test_pipeline.py index 14fb6eb700..a3a8ca2d1e 100644 --- a/aeon/transformations/collection/compose/tests/test_pipeline.py +++ b/aeon/transformations/collection/compose/tests/test_pipeline.py @@ -3,6 +3,7 @@ __maintainer__ = ["MatthewMiddlehurst"] import pytest +from numpy.testing import assert_array_almost_equal from sklearn.preprocessing import StandardScaler from aeon.testing.data_generation import ( @@ -10,7 +11,6 @@ make_example_3d_numpy_list, ) from aeon.testing.mock_estimators import MockCollectionTransformer -from aeon.testing.utils.estimator_checks import _assert_array_almost_equal from aeon.transformations.collection import ( AutocorrelationFunctionTransformer, HOG1DTransformer, @@ -50,7 +50,7 @@ def test_collection_transform_pipeline(transformers): for t in transformers: X = t.fit_transform(X, y) - _assert_array_almost_equal(Xt, X) + assert_array_almost_equal(Xt, X) def test_unequal_tag_inference(): diff --git a/aeon/utils/sampling.py b/aeon/utils/sampling.py deleted file mode 100644 index 2860c82b8d..0000000000 --- a/aeon/utils/sampling.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Functions to sample aeon datasets. - -Used in experiments to get deterministic resamples. -""" - -import random - - -def random_partition(n, k=2, seed=42): - """Construct a uniformly random partition, iloc reference. - - Parameters - ---------- - n : int - size of set to partition - k : int, optional, default=2 - number of sets to partition into - seed : int - random seed, used in random.shuffle - - Returns - ------- - parts : list of list of int - elements of `parts` are lists of iloc int indices between 0 and n-1 - elements of `parts` are of length floor(n / k) or ceil(n / k) - elements of `parts`, as sets, are disjoint partition of [0, ..., n-1] - elements of elements of `parts` are in no particular order - `parts` is sampled uniformly at random, subject to the above properties - """ - rng = random.Random(seed) - idx = list(range(n)) - rng.shuffle(idx) - - parts = [] - for i in range(k): - d = round(len(idx) / (k - i)) - parts += [idx[:d]] - idx = idx[d:] - - return parts diff --git a/aeon/utils/tests/test_sampling.py b/aeon/utils/tests/test_sampling.py deleted file mode 100644 index 68a4f84f98..0000000000 --- a/aeon/utils/tests/test_sampling.py +++ /dev/null @@ -1,43 +0,0 @@ -"""Testing sampling utilities.""" - -import pytest - -from aeon.testing.utils.deep_equals import deep_equals -from aeon.utils.sampling import random_partition - -NK_FIXTURES = [(10, 3), (15, 5), (19, 6), (3, 1), (1, 2)] -SEED_FIXTURES = [42, 0, 100, -5] - - -@pytest.mark.parametrize("n, k", NK_FIXTURES) -def test_partition(n, k): - """Test that random_partition returns a disjoint partition.""" - part = random_partition(n, k) - - assert isinstance(part, list) - assert all(isinstance(x, list) for x in part) - assert all(isinstance(x, int) for y in part for x in y) - - low_size = n // k - hi_size = low_size + 1 - assert all(len(x) == low_size or len(x) == hi_size for x in part) - - part_union = set() - for x in part: - part_union = part_union.union(x) - assert set(range(n)) == part_union - - for i, x in enumerate(part): - for j, y in enumerate(part): - if i != j: - assert len(set(x).intersection(y)) == 0 - - -@pytest.mark.parametrize("seed", SEED_FIXTURES) -@pytest.mark.parametrize("n, k", NK_FIXTURES) -def test_seed(n, k, seed): - """Test that seed is deterministic.""" - part = random_partition(n, k, seed) - part2 = random_partition(n, k, seed) - - assert deep_equals(part, part2) diff --git a/aeon/utils/validation/tests/test_series.py b/aeon/utils/validation/tests/test_series.py index be07ddf3c7..f7f6ed4621 100644 --- a/aeon/utils/validation/tests/test_series.py +++ b/aeon/utils/validation/tests/test_series.py @@ -2,12 +2,13 @@ __maintainer__ = ["TonyBagnall"] +from typing import Optional, Union + import numpy as np import pandas as pd import pytest from aeon.testing.data_generation import ( - _make_hierarchical, make_example_1d_numpy, make_example_2d_numpy_series, make_example_3d_numpy, @@ -64,3 +65,113 @@ def test_check_series(): with pytest.raises(ValueError, match="Input type of y should be one "): check_series(None) # check + + +def _make_hierarchical( + hierarchy_levels: tuple = (2, 4), + max_timepoints: int = 12, + min_timepoints: int = 12, + same_cutoff: bool = True, + n_columns: int = 1, + all_positive: bool = True, + index_type: Optional[str] = None, + random_state: Optional[Union[int, np.random.RandomState]] = None, + add_nan: bool = False, +) -> pd.DataFrame: + """Generate hierarchical multiindex type for testing. + + Parameters + ---------- + hierarchy_levels : Tuple, optional + the number of groups at each hierarchy level, by default (2, 4) + max_timepoints : int, optional + maximum time points a series can have, by default 12 + min_timepoints : int, optional + minimum time points a seires can have, by default 12 + same_cutoff : bool, optional + If it's True all series will end at the same date, by default True + n_columns : int, optional + number of columns in the output dataframe, by default 1 + all_positive : bool, optional + If True the time series will be , by default True + index_type : str, optional + type of index, by default None + Supported types are "period", "datetime", "range" or "int". + If it's not provided, "datetime" is selected. + random_state : int, np.random.RandomState or None + Controls the randomness of the estimator, by default None + add_nan : bool, optional + If it's true the series will contain NaNs, by default False + + Returns + ------- + pd.DataFrame + hierarchical dataframe + """ + from itertools import product + + from sklearn.utils import check_random_state + + def _make_index(n_timepoints, index_type=None): + """Make indices for unit testing.""" + if index_type == "period": + start = "2000-01" + freq = "M" + return pd.period_range(start=start, periods=n_timepoints, freq=freq) + + elif index_type == "datetime" or index_type is None: + start = "2000-01-01" + freq = "D" + return pd.date_range(start=start, periods=n_timepoints, freq=freq) + + elif index_type == "range": + start = 3 # check non-zero based indices + return pd.RangeIndex(start=start, stop=start + n_timepoints) + + elif index_type == "int": + start = 3 + return pd.Index(np.arange(start, start + n_timepoints), dtype=int) + + else: + raise ValueError(f"index_class: {index_type} is not supported") + + levels = [ + [f"h{i}_{j}" for j in range(hierarchy_levels[i])] + for i in range(len(hierarchy_levels)) + ] + level_names = [f"h{i}" for i in range(len(hierarchy_levels))] + rng = check_random_state(random_state) + if min_timepoints == max_timepoints: + time_index = _make_index(max_timepoints, index_type) + index = pd.MultiIndex.from_product( + levels + [time_index], names=level_names + ["time"] + ) + else: + df_list = [] + for levels_tuple in product(*levels): + n_timepoints = rng.randint(low=min_timepoints, high=max_timepoints) + if same_cutoff: + time_index = _make_index(max_timepoints, index_type)[-n_timepoints:] + else: + time_index = _make_index(n_timepoints, index_type) + d = dict(zip(level_names, levels_tuple)) + d["time"] = time_index + df_list.append(pd.DataFrame(d)) + index = pd.MultiIndex.from_frame( + pd.concat(df_list), names=level_names + ["time"] + ) + + total_time_points = len(index) + data = rng.normal(size=(total_time_points, n_columns)) + if add_nan: + # add some nan values + data[int(len(data) / 2)] = np.nan + data[0] = np.nan + data[-1] = np.nan + if all_positive: + data -= np.min(data, axis=0) - 1 + df = pd.DataFrame( + data=data, index=index, columns=[f"c{i}" for i in range(n_columns)] + ) + + return df diff --git a/aeon/visualisation/results/tests/test_boxplot.py b/aeon/visualisation/results/tests/test_boxplot.py index cd35f423d2..81b27cbfa0 100644 --- a/aeon/visualisation/results/tests/test_boxplot.py +++ b/aeon/visualisation/results/tests/test_boxplot.py @@ -12,7 +12,7 @@ data_path = os.path.join( os.path.dirname(aeon.__file__), - "benchmarking/example_results/", + "testing/example_results_files/", ) diff --git a/aeon/visualisation/results/tests/test_critical_difference.py b/aeon/visualisation/results/tests/test_critical_difference.py index bcd6645417..2ed87bc96a 100644 --- a/aeon/visualisation/results/tests/test_critical_difference.py +++ b/aeon/visualisation/results/tests/test_critical_difference.py @@ -16,7 +16,7 @@ data_path = os.path.join( os.path.dirname(aeon.__file__), - "benchmarking/example_results/", + "testing/example_results_files/", ) test_clique1 = np.array( diff --git a/aeon/visualisation/results/tests/test_scatter.py b/aeon/visualisation/results/tests/test_scatter.py index 7d7a61616e..0c3f4d5bf8 100644 --- a/aeon/visualisation/results/tests/test_scatter.py +++ b/aeon/visualisation/results/tests/test_scatter.py @@ -18,7 +18,7 @@ data_path = os.path.join( os.path.dirname(aeon.__file__), - "benchmarking/example_results/", + "testing/example_results_files/", ) diff --git a/aeon/visualisation/results/tests/test_significance.py b/aeon/visualisation/results/tests/test_significance.py index 71b4a456c3..4568d13c92 100644 --- a/aeon/visualisation/results/tests/test_significance.py +++ b/aeon/visualisation/results/tests/test_significance.py @@ -13,7 +13,7 @@ data_path = os.path.join( os.path.dirname(aeon.__file__), - "benchmarking/example_results/", + "testing/example_results_files/", )