diff --git a/pyproject.toml b/pyproject.toml index 67cb462e..0840fa92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ exclude = ''' [tool.poetry] name = 'climate_indices' -version = '2.0.0' +version = '2.0.1' description = 'Reference implementations of various climate indices typically used for drought monitoring' authors = ['James Adams '] readme = 'README.md' @@ -44,19 +44,21 @@ classifiers = [ packages = [{include = 'climate_indices', from = 'src'}] [tool.poetry.dependencies] -cftime = '>=1.6.2' -dask = '>=2022.2.0' -h5netcdf = '>=1.1.0' +# only Python and scipy are required for the base library code python = '>=3.8,<3.12' -scipy = '>=1.10.0' +scipy = '1.10.1' +# remaining dependencies are required for the CLI (console) scripts +cftime = '>=1.6.4' +dask = '>=2023.5.0' +h5netcdf = '>=1.1.0' xarray = '>=2023.1.0' [tool.poetry.dev-dependencies] -pytest = '*' +pytest = '8.3.3' toml = '>=0.10.2' [tool.poetry.group.dev.dependencies] -sphinx-autodoc-typehints = "^1.23.3" +sphinx-autodoc-typehints = '2.0.1' [tool.poetry.scripts] process_climate_indices = 'climate_indices.__main__:main' @@ -66,4 +68,3 @@ spi = 'climate_indices.__spi__:main' filterwarnings = [ 'ignore::FutureWarning', ] - diff --git a/src/climate_indices/__main__.py b/src/climate_indices/__main__.py index 5f1e5a17..4e0eceb4 100644 --- a/src/climate_indices/__main__.py +++ b/src/climate_indices/__main__.py @@ -7,7 +7,7 @@ import logging import multiprocessing import os -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple import numpy as np import scipy.constants @@ -563,7 +563,7 @@ def _drop_data_into_shared_arrays_grid( var_names: list, periodicity: compute.Periodicity, data_start_year: int, -): +) -> Tuple[int, ...]: output_shape = None # get the data arrays we'll use later in the index computations @@ -626,9 +626,16 @@ def _drop_data_into_shared_arrays_grid( def _drop_data_into_shared_arrays_divisions( - dataset, + dataset: xr.Dataset, var_names: List[str], -): +) -> Tuple[int, ...]: + """ + Drop data into shared arrays for use in the index computations. + + :param dataset: + :param var_names: + :return: + """ output_shape = None # get the data arrays we'll use later in the index computations @@ -874,7 +881,7 @@ def _compute_write_index(keyword_arguments): # TODO once we support daily Palmers then we'll need to convert values # from a 366-day calendar back into a normal/Gregorian calendar - # get the computedPDSI data as an array of float32 values + # get the computed PDSI data as an array of float32 values array = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_ARRAY] shape = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_SHAPE] pdsi = np.frombuffer(array.get_obj()).reshape(shape).astype(float) @@ -1579,6 +1586,42 @@ def main(): # type: () -> None arguments = parser.parse_args() + process_climate_indices(arguments=arguments) + + # report the elapsed time + end_datetime = datetime.now() + _logger.info("End time: %s", end_datetime) + elapsed = end_datetime - start_datetime + _logger.info("Elapsed time: %s", elapsed) + + except Exception: + _logger.exception("Failed to complete", exc_info=True) + raise + + +def process_climate_indices( + arguments: argparse.Namespace, +) -> None: + """ + Process climate indices based on the provided arguments. + + :param arguments: A dictionary or argparse.Namespace containing the arguments + :return: The results of the climate indices processing + """ + # Extract arguments + # index = args['index'] + # periodicity = args['periodicity'] + # scales = args['scales'] + # calibration_start_year = args['calibration_start_year'] + # calibration_end_year = args['calibration_end_year'] + # netcdf_precip = args['netcdf_precip'] + # var_name_precip = args['var_name_precip'] + # output_file_base = args['output_file_base'] + + # Add your existing processing logic here + # ... + + try: # validate the arguments and determine the input type input_type = _validate_args(arguments) @@ -1740,16 +1783,12 @@ def main(): # type: () -> None if netcdf_awc != arguments.netcdf_awc: os.remove(netcdf_awc) - # report on the elapsed time - end_datetime = datetime.now() - _logger.info("End time: %s", end_datetime) - elapsed = end_datetime - start_datetime - _logger.info("Elapsed time: %s", elapsed) - except Exception: _logger.exception("Failed to complete", exc_info=True) raise + return None + if __name__ == "__main__": # (please do not remove -- useful for running as a script when debugging) @@ -1777,4 +1816,50 @@ def main(): # type: () -> None # --calibration_start_year 1951 --calibration_end_year 2010 # --multiprocessing all --periodicity monthly + """ + SYNOPSIS: + + The main program in the provided code excerpt is designed to process climate indices on NetCDF + gridded datasets in parallel, leveraging Python's multiprocessing module. The process can be + broken down into several key steps, which together implement a quasi "map-reduce" model for parallel + computation. Here's an overview of how it works: + + Step 1: Initialization and Argument Parsing + The program starts by parsing command-line arguments that specify the details of the computation, + such as the index to compute (e.g., SPI, SPEI), the input NetCDF files, and various parameters + relevant to the computation. It then validates these arguments to ensure they form a coherent set + of instructions for the computation. + + Step 2: Setting Up Multiprocessing + Based on the command-line arguments, the program determines the number of worker processes to use. + It can use all available CPUs minus one, a single process, or all CPUs, depending on the user's choice. + Global shared arrays are prepared for use by worker processes. These arrays hold the input data + (e.g., precipitation, temperature) and the results of the computations. + + Step 3: Data Preparation + The input data from NetCDF files is loaded into shared memory arrays. This step involves reading the data, + possibly converting units, and then distributing it across shared arrays that worker processes can access. + The program checks the dimensions and shapes of the input data to ensure they match expected patterns, + adjusting as necessary to fit the computation requirements. + + Step 4: Parallel Computation ("Map") + The program splits the computation into chunks that can be processed independently. + This is the "map" part of the "map-reduce" model. + Worker processes are spawned, each taking a portion of the data from the shared arrays + to compute the climate index (e.g., SPI, SPEI) over that subset. + Each worker applies the computation function along the specified axis of the data chunk it has been given. + This could involve complex calculations like the Thornthwaite method for PET or statistical analysis for SPI. + + Step 5: Aggregating Results ("Reduce") + Once all worker processes complete their computations, the results are aggregated back into a single dataset. Summary + This is the "reduce" part of the "map-reduce" model. + The program collects the computed indices from the shared arrays and assembles them into a coherent + output dataset, maintaining the correct dimensions and metadata. + + Step 6: Writing Output + The final step involves writing the computed indices back to NetCDF files. + Each index computed (e.g., SPI, SPEI, PET) is saved in its own file. + The program ensures that the output files contain all necessary metadata and are structured + correctly to be used in further analysis or visualization. + """ main() diff --git a/src/climate_indices/compute.py b/src/climate_indices/compute.py index fa61fa61..df766f96 100644 --- a/src/climate_indices/compute.py +++ b/src/climate_indices/compute.py @@ -118,23 +118,23 @@ def sum_to_scale( ) -> np.ndarray: """ Compute a sliding sums array using 1-D convolution. The initial - (scale - 1) elements of the result array will be padded with np.NaN values. - Missing values are not ignored, i.e. if a np.NaN + (scale - 1) elements of the result array will be padded with np.nan values. + Missing values are not ignored, i.e. if a np.nan (missing) value is part of the group of values to be summed then the sum - will be np.NaN + will be np.nan For example if the first array is [3, 4, 6, 2, 1, 3, 5, 8, 5] and the number of values to sum is 3 then the resulting array - will be [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18]. + will be [np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18]. More generally: Y = f(X, n) - Y[i] == np.NaN, where i < n + Y[i] == np.nan, where i < n Y[i] == sum(X[i - n + 1:i + 1]), where i >= n - 1 and X[i - n + 1:i + 1] contains no NaN values - Y[i] == np.NaN, where i >= n - 1 and X[i - n + 1:i + 1] contains + Y[i] == np.nan, where i >= n - 1 and X[i - n + 1:i + 1] contains one or more NaN values :param values: the array of values over which we'll compute sliding sums @@ -158,7 +158,7 @@ def sum_to_scale( # BELOW FOR dask/xarray DataArray integration # # pad the values array with (scale - 1) NaNs - # values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.NaN) + # values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.nan) # # start = 1 # end = -(scale - 2) diff --git a/src/climate_indices/indices.py b/src/climate_indices/indices.py index a7f304c9..fc7118a0 100644 --- a/src/climate_indices/indices.py +++ b/src/climate_indices/indices.py @@ -424,7 +424,7 @@ def percentage_of_normal( # get an array containing a sliding sum on the specified time step # scale -- i.e. if the scale is 3 then the first two elements will be - # np.NaN, since we need 3 elements to get a sum, and then from the third + # np.nan, since we need 3 elements to get a sum, and then from the third # element to the end the values will equal the sum of the corresponding # time step plus the values of the two previous time steps scale_sums = compute.sum_to_scale(values, scale) diff --git a/tests/test_compute.py b/tests/test_compute.py index 7c5a8f2a..245b970c 100644 --- a/tests/test_compute.py +++ b/tests/test_compute.py @@ -43,7 +43,7 @@ def test_transform_fitted_gamma( """ # confirm that an input array of all NaNs results in the same array returned - all_nans = np.full(precips_mm_monthly.shape, np.NaN) + all_nans = np.full(precips_mm_monthly.shape, np.nan) computed_values = compute.transform_fitted_gamma( all_nans, data_year_start_monthly, @@ -172,9 +172,9 @@ def test_gamma_parameters( """ # confirm that an input array of all NaNs results in the same array returned - all_nans = np.full(precips_mm_monthly.shape, np.NaN) - nan_alphas = np.full(shape=(12,), fill_value=np.NaN) - nan_betas = np.full(shape=(12,), fill_value=np.NaN) + all_nans = np.full(precips_mm_monthly.shape, np.nan) + nan_alphas = np.full(shape=(12,), fill_value=np.nan) + nan_betas = np.full(shape=(12,), fill_value=np.nan) alphas, betas = compute.gamma_parameters( all_nans, data_year_start_monthly, @@ -247,7 +247,7 @@ def test_transform_fitted_pearson( """ # confirm that an input array of all NaNs results in the same array returned - all_nans = np.full(precips_mm_monthly.shape, np.NaN) + all_nans = np.full(precips_mm_monthly.shape, np.nan) computed_values = compute.transform_fitted_pearson( all_nans, data_year_start_monthly, @@ -280,7 +280,7 @@ def test_transform_fitted_pearson( ) # confirm that an input array of all NaNs will return the same array - all_nans = np.full(precips_mm_monthly.shape, np.NaN) + all_nans = np.full(precips_mm_monthly.shape, np.nan) computed_values = compute.transform_fitted_pearson( all_nans, data_year_start_monthly, @@ -524,14 +524,14 @@ def test_sum_to_scale(): # test an input array with no missing values values = np.array([3.0, 4, 6, 2, 1, 3, 5, 8, 5]) computed_values = compute.sum_to_scale(values, 3) - expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18]) + expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18]) np.testing.assert_allclose( computed_values, expected_values, err_msg=UNEXPECTED_SLIDING_SUMS_MESSAGE, ) computed_values = compute.sum_to_scale(values, 4) - expected_values = np.array([np.NaN, np.NaN, np.NaN, 15, 13, 12, 11, 17, 21]) + expected_values = np.array([np.nan, np.nan, np.nan, 15, 13, 12, 11, 17, 21]) np.testing.assert_allclose( computed_values, expected_values, @@ -539,9 +539,9 @@ def test_sum_to_scale(): ) # test an input array with missing values on the end - values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN]) + values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.nan, np.nan, np.nan]) computed_values = compute.sum_to_scale(values, 3) - expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN]) + expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18, np.nan, np.nan, np.nan]) np.testing.assert_allclose( computed_values, expected_values, @@ -549,9 +549,9 @@ def test_sum_to_scale(): ) # test an input array with missing values within the array - values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6]) + values = np.array([3, 4, 6, 2, 1, 3, 5, np.nan, 8, 5, 6]) computed_values = compute.sum_to_scale(values, 3) - expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19]) + expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, np.nan, np.nan, np.nan, 19]) np.testing.assert_allclose( computed_values, expected_values, @@ -559,9 +559,9 @@ def test_sum_to_scale(): ) test_values = np.array([1.0, 5, 7, 2, 3, 4, 9, 6, 3, 8]) - sum_by2 = np.array([np.NaN, 6, 12, 9, 5, 7, 13, 15, 9, 11]) - sum_by4 = np.array([np.NaN, np.NaN, np.NaN, 15, 17, 16, 18, 22, 22, 26]) - sum_by6 = np.array([np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22, 30, 31, 27, 33]) + sum_by2 = np.array([np.nan, 6, 12, 9, 5, 7, 13, 15, 9, 11]) + sum_by4 = np.array([np.nan, np.nan, np.nan, 15, 17, 16, 18, 22, 22, 26]) + sum_by6 = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, 22, 30, 31, 27, 33]) np.testing.assert_equal( compute.sum_to_scale(test_values, 2), sum_by2, diff --git a/tests/test_eto.py b/tests/test_eto.py index b708d733..aa87a3e2 100644 --- a/tests/test_eto.py +++ b/tests/test_eto.py @@ -73,19 +73,19 @@ def test_eto_thornthwaite(temps_celsius, latitude_degrees, data_year_start_month pytest.raises(TypeError, eto.eto_thornthwaite, temps_celsius, None, data_year_start_monthly) # make sure that an invalid latitude value (NaN) raises an error - pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.NaN, data_year_start_monthly) + pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.nan, data_year_start_monthly) # ------------------------------------------------------------------------------ def test_sunset_hour_angle(): # make sure that an invalid latitude value raises an error pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(-100.0), np.deg2rad(0.0)) - pytest.raises(ValueError, eto._sunset_hour_angle, np.NaN, np.deg2rad(0.0)) + pytest.raises(ValueError, eto._sunset_hour_angle, np.nan, np.deg2rad(0.0)) # make sure that an invalid solar declination angle raises an error pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(-75.0)) pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(85.0)) - pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.NaN) + pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.nan) expected_value = math.pi / 2 computed_value = eto._sunset_hour_angle(0.0, np.deg2rad(0.0)) @@ -113,7 +113,7 @@ def test_solar_declination(): pytest.raises(ValueError, eto._solar_declination, -1) pytest.raises(ValueError, eto._solar_declination, 367) pytest.raises(ValueError, eto._solar_declination, 5000) - pytest.raises(ValueError, eto._solar_declination, np.NaN) + pytest.raises(ValueError, eto._solar_declination, np.nan) expected_value = -0.313551072399921 computed_value = eto._solar_declination(30) @@ -130,7 +130,7 @@ def test_daylight_hours(): # make sure invalid arguments raise an error pytest.raises(ValueError, eto._daylight_hours, math.pi + 1) pytest.raises(ValueError, eto._daylight_hours, -1.0) - pytest.raises(ValueError, eto._daylight_hours, np.NaN) + pytest.raises(ValueError, eto._daylight_hours, np.nan) expected_value = 7.999999999999999 computed_value = eto._daylight_hours(math.pi / 3) diff --git a/tests/test_indices.py b/tests/test_indices.py index 7d48e2e9..a18c7c46 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -23,7 +23,7 @@ def test_pet( ): # confirm that an input temperature array of only NaNs # results in the same all NaNs array being returned - all_nan_temps = np.full(temps_celsius.shape, np.NaN) + all_nan_temps = np.full(temps_celsius.shape, np.nan) computed_pet = indices.pet(all_nan_temps, latitude_degrees, data_year_start_monthly) np.testing.assert_equal( computed_pet, @@ -45,7 +45,7 @@ def test_pet( np.testing.assert_raises(ValueError, indices.pet, temps_celsius, None, data_year_start_monthly) # confirm that a missing/None latitude value raises an error - np.testing.assert_raises(ValueError, indices.pet, temps_celsius, np.NaN, data_year_start_monthly) + np.testing.assert_raises(ValueError, indices.pet, temps_celsius, np.nan, data_year_start_monthly) # confirm that an invalid latitude value raises an error pytest.raises( @@ -100,7 +100,7 @@ def test_pnp( ): # confirm that an input precipitation array containing # only NaNs results in the same array returned - all_nan_precips = np.full(precips_mm_monthly.shape, np.NaN) + all_nan_precips = np.full(precips_mm_monthly.shape, np.nan) computed_pnp = indices.percentage_of_normal( all_nan_precips, 1, @@ -210,7 +210,7 @@ def test_spi( ) -> None: # confirm that an input array of all NaNs for # precipitation results in the same array returned - all_nans = np.full(precips_mm_monthly.shape, np.NaN) + all_nans = np.full(precips_mm_monthly.shape, np.nan) computed_spi = indices.spi( all_nans, 1, @@ -386,7 +386,7 @@ def test_spei( ) -> None: # confirm that an input precipitation array containing # only NaNs results in the same array being returned - all_nans = np.full(precips_mm_monthly.shape, np.NaN) + all_nans = np.full(precips_mm_monthly.shape, np.nan) computed_spei = indices.spei( all_nans, all_nans, @@ -491,7 +491,7 @@ def test_pci( ): # confirm that an input rainfall array of only NaNs # results in the same all NaNs array being returned - all_nan_rainfall = np.full(rain_mm.shape, np.NaN) + all_nan_rainfall = np.full(rain_mm.shape, np.nan) computed_pci = indices.pci(all_nan_rainfall) np.testing.assert_equal( computed_pci, diff --git a/tests/test_utils.py b/tests/test_utils.py index 6d1c444f..802b1a94 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -69,7 +69,7 @@ def test_count_zeros_and_non_missings(): non_missing_count_error = "Failed to correctly count non-missing values" # vanilla use case - values_list = [3, 4, 0, 2, 3.1, 5, np.NaN, 8, 5, 6, 0.0, np.NaN, 5.6, 2] + values_list = [3, 4, 0, 2, 3.1, 5, np.nan, 8, 5, 6, 0.0, np.nan, 5.6, 2] values = np.array(values_list) zeros, non_missings = utils.count_zeros_and_non_missings(values) if zeros != 2: @@ -84,7 +84,7 @@ def test_count_zeros_and_non_missings(): raise AssertionError(zero_count_error) if non_missings != 12: raise AssertionError(non_missing_count_error) - values = [[3, 4, 0, 2, 3.1, 5, np.NaN], [8, 5, 6, 0.0, np.NaN, 5.6, 2]] + values = [[3, 4, 0, 2, 3.1, 5, np.nan], [8, 5, 6, 0.0, np.nan, 5.6, 2]] zeros, non_missings = utils.count_zeros_and_non_missings(values) if zeros != 2: raise AssertionError(zero_count_error) @@ -102,7 +102,7 @@ def test_is_data_valid(): # Test for the utils.is_data_valid() function valid_array = np.full((12,), 1.0) - invalid_array = np.full((12,), np.NaN) + invalid_array = np.full((12,), np.nan) if not utils.is_data_valid(valid_array): raise AssertionError() if utils.is_data_valid(invalid_array): @@ -201,7 +201,7 @@ def test_reshape_to_2d(): [ [3, 4, 6, 2, 1, 3, 5, 8, 5, 6, 3, 4], [6, 2, 1, 3, 5, 8, 5, 6, 3, 4, 6, 2], - [1, 3, 5, 8, 5, 6, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN], + [1, 3, 5, 8, 5, 6, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], ] ) @@ -221,7 +221,7 @@ def test_reshape_to_2d(): [3, 4, 6, 2, 1, 3, 5, 8], [5, 6, 3, 4, 6, 2, 1, 3], [5, 8, 5, 6, 3, 4, 6, 2], - [1, 3, 5, 8, 5, 6, np.NaN, np.NaN], + [1, 3, 5, 8, 5, 6, np.nan, np.nan], ] )