monocongo · monocongo · Sep 19, 2024 · Sep 19, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,7 +23,7 @@ exclude = '''
 
 [tool.poetry]
 name = 'climate_indices'
-version = '2.0.0'
+version = '2.0.1'
 description = 'Reference implementations of various climate indices typically used for drought monitoring'
 authors = ['James Adams <[email protected]>']
 readme = 'README.md'
@@ -44,19 +44,21 @@ classifiers = [
 packages = [{include = 'climate_indices', from = 'src'}]
 
 [tool.poetry.dependencies]
-cftime = '>=1.6.2'
-dask = '>=2022.2.0'
-h5netcdf = '>=1.1.0'
+# only Python and scipy are required for the base library code
 python = '>=3.8,<3.12'
-scipy = '>=1.10.0'
+scipy = '1.10.1'
+# remaining dependencies are required for the CLI (console) scripts
+cftime = '>=1.6.4'
+dask = '>=2023.5.0'
+h5netcdf = '>=1.1.0'
 xarray = '>=2023.1.0'
 
 [tool.poetry.dev-dependencies]
-pytest = '*'
+pytest = '8.3.3'
 toml = '>=0.10.2'
 
 [tool.poetry.group.dev.dependencies]
-sphinx-autodoc-typehints = "^1.23.3"
+sphinx-autodoc-typehints = '2.0.1'
 
 [tool.poetry.scripts]
 process_climate_indices = 'climate_indices.__main__:main'
@@ -66,4 +68,3 @@ spi = 'climate_indices.__spi__:main'
 filterwarnings = [
     'ignore::FutureWarning',
 ]
-
diff --git a/src/climate_indices/__main__.py b/src/climate_indices/__main__.py
@@ -7,7 +7,7 @@
 import logging
 import multiprocessing
 import os
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Tuple
 
 import numpy as np
 import scipy.constants
@@ -563,7 +563,7 @@ def _drop_data_into_shared_arrays_grid(
     var_names: list,
     periodicity: compute.Periodicity,
     data_start_year: int,
-):
+) -> Tuple[int, ...]:
     output_shape = None
 
     # get the data arrays we'll use later in the index computations
@@ -626,9 +626,16 @@ def _drop_data_into_shared_arrays_grid(
 
 
 def _drop_data_into_shared_arrays_divisions(
-    dataset,
+    dataset: xr.Dataset,
     var_names: List[str],
-):
+) -> Tuple[int, ...]:
+    """
+    Drop data into shared arrays for use in the index computations.
+
+    :param dataset:
+    :param var_names:
+    :return:
+    """
     output_shape = None
 
     # get the data arrays we'll use later in the index computations
@@ -874,7 +881,7 @@ def _compute_write_index(keyword_arguments):
         # TODO once we support daily Palmers then we'll need to convert values
         #  from a 366-day calendar back into a normal/Gregorian calendar
 
-        # get the computedPDSI data as an array of float32 values
+        # get the computed PDSI data as an array of float32 values
         array = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_ARRAY]
         shape = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_SHAPE]
         pdsi = np.frombuffer(array.get_obj()).reshape(shape).astype(float)
@@ -1579,6 +1586,44 @@ def main():  # type: () -> None
 
         arguments = parser.parse_args()
 
+        process_climate_indices(arguments=arguments)
+
+        # report the elapsed time
+        end_datetime = datetime.now()
+        _logger.info("End time:      %s", end_datetime)
+        elapsed = end_datetime - start_datetime
+        _logger.info("Elapsed time:  %s", elapsed)
+
+    except Exception:
+        _logger.exception("Failed to complete", exc_info=True)
+        raise
+
+
+def process_climate_indices(
+    arguments: argparse.Namespace,
+) -> None:
+    """
+    Process climate indices based on the provided arguments.
+
+    :param arguments: A dictionary or argparse.Namespace containing the arguments
+    :return: The results of the climate indices processing
+    """
+    # Extract arguments
+    # index = args['index']
+    # periodicity = args['periodicity']
+    # scales = args['scales']
+    # calibration_start_year = args['calibration_start_year']
+    # calibration_end_year = args['calibration_end_year']
+    # netcdf_precip = args['netcdf_precip']
+    # var_name_precip = args['var_name_precip']
-    # Extract arguments
-    # index = args['index']
-    # periodicity = args['periodicity']
-    # scales = args['scales']
-    # calibration_start_year = args['calibration_start_year']
-    # calibration_end_year = args['calibration_end_year']
-    # netcdf_precip = args['netcdf_precip']
-    # var_name_precip = args['var_name_precip']
+    # TODO: Implement argument extraction
+    # Expected arguments: index, periodicity, scales, calibration_start_year,
+    # calibration_end_year, netcdf_precip, var_name_precip
-    # Extract arguments
-    # index = args['index']
-    # periodicity = args['periodicity']
-    # scales = args['scales']
-    # calibration_start_year = args['calibration_start_year']
-    # calibration_end_year = args['calibration_end_year']
-    # netcdf_precip = args['netcdf_precip']
-    # var_name_precip = args['var_name_precip']
+    # TODO: Implement argument extraction
+    # Expected arguments: index, periodicity, scales, calibration_start_year,
+    # calibration_end_year, netcdf_precip, var_name_precip
+    # output_file_base = args['output_file_base']
+
+    # Add your existing processing logic here
+    # ...
+
+    try:
+        start_datetime = datetime.now()
+
         # validate the arguments and determine the input type
         input_type = _validate_args(arguments)
 
@@ -1740,16 +1785,12 @@ def main():  # type: () -> None
             if netcdf_awc != arguments.netcdf_awc:
                 os.remove(netcdf_awc)
 
-        # report on the elapsed time
-        end_datetime = datetime.now()
-        _logger.info("End time:      %s", end_datetime)
-        elapsed = end_datetime - start_datetime
-        _logger.info("Elapsed time:  %s", elapsed)
-
     except Exception:
         _logger.exception("Failed to complete", exc_info=True)
         raise
 
+    return None
+
 
 if __name__ == "__main__":
     # (please do not remove -- useful for running as a script when debugging)
@@ -1777,4 +1818,50 @@ def main():  # type: () -> None
     #  --calibration_start_year 1951 --calibration_end_year 2010
     #  --multiprocessing all --periodicity monthly
 
+    """
+    SYNOPSIS:
+
+    The main program in the provided code excerpt is designed to process climate indices on NetCDF 
+    gridded datasets in parallel, leveraging Python's multiprocessing module. The process can be 
+    broken down into several key steps, which together implement a quasi "map-reduce" model for parallel
+    computation. Here's an overview of how it works:
+
+    Step 1: Initialization and Argument Parsing
+    The program starts by parsing command-line arguments that specify the details of the computation, 
+    such as the index to compute (e.g., SPI, SPEI), the input NetCDF files, and various parameters 
+    relevant to the computation. It then validates these arguments to ensure they form a coherent set 
+    of instructions for the computation.
+
+    Step 2: Setting Up Multiprocessing
+    Based on the command-line arguments, the program determines the number of worker processes to use. 
+    It can use all available CPUs minus one, a single process, or all CPUs, depending on the user's choice.
+    Global shared arrays are prepared for use by worker processes. These arrays hold the input data 
+    (e.g., precipitation, temperature) and the results of the computations.
+
+    Step 3: Data Preparation
+    The input data from NetCDF files is loaded into shared memory arrays. This step involves reading the data, 
+    possibly converting units, and then distributing it across shared arrays that worker processes can access.
+    The program checks the dimensions and shapes of the input data to ensure they match expected patterns, 
+    adjusting as necessary to fit the computation requirements.
+
+    Step 4: Parallel Computation ("Map")
+    The program splits the computation into chunks that can be processed independently. 
+    This is the "map" part of the "map-reduce" model.
+    Worker processes are spawned, each taking a portion of the data from the shared arrays 
+    to compute the climate index (e.g., SPI, SPEI) over that subset.
+    Each worker applies the computation function along the specified axis of the data chunk it has been given. 
+    This could involve complex calculations like the Thornthwaite method for PET or statistical analysis for SPI.
+
+    Step 5: Aggregating Results ("Reduce")
+    Once all worker processes complete their computations, the results are aggregated back into a single dataset. Summary
+    This is the "reduce" part of the "map-reduce" model.
+    The program collects the computed indices from the shared arrays and assembles them into a coherent 
+    output dataset, maintaining the correct dimensions and metadata.
+
+    Step 6: Writing Output
+    The final step involves writing the computed indices back to NetCDF files. 
+    Each index computed (e.g., SPI, SPEI, PET) is saved in its own file.
+    The program ensures that the output files contain all necessary metadata and are structured 
+    correctly to be used in further analysis or visualization.
+    """
     main()
diff --git a/src/climate_indices/compute.py b/src/climate_indices/compute.py
@@ -118,23 +118,23 @@ def sum_to_scale(
 ) -> np.ndarray:
     """
     Compute a sliding sums array using 1-D convolution. The initial
-    (scale - 1) elements of the result array will be padded with np.NaN values.
-    Missing values are not ignored, i.e. if a np.NaN
+    (scale - 1) elements of the result array will be padded with np.nan values.
+    Missing values are not ignored, i.e. if a np.nan
     (missing) value is part of the group of values to be summed then the sum
-    will be np.NaN
+    will be np.nan
 
     For example if the first array is [3, 4, 6, 2, 1, 3, 5, 8, 5] and
     the number of values to sum is 3 then the resulting array
-    will be [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18].
+    will be [np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18].
 
     More generally:
 
     Y = f(X, n)
 
-    Y[i] == np.NaN, where i < n
+    Y[i] == np.nan, where i < n
     Y[i] == sum(X[i - n + 1:i + 1]), where i >= n - 1 and X[i - n + 1:i + 1]
         contains no NaN values
-    Y[i] == np.NaN, where i >= n - 1 and X[i - n + 1:i + 1] contains
+    Y[i] == np.nan, where i >= n - 1 and X[i - n + 1:i + 1] contains
         one or more NaN values
 
     :param values: the array of values over which we'll compute sliding sums
@@ -158,7 +158,7 @@ def sum_to_scale(
 
     # BELOW FOR dask/xarray DataArray integration
     # # pad the values array with (scale - 1) NaNs
-    # values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.NaN)
+    # values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.nan)
     #
     # start = 1
     # end = -(scale - 2)

diff --git a/src/climate_indices/indices.py b/src/climate_indices/indices.py
@@ -424,7 +424,7 @@ def percentage_of_normal(
 
     # get an array containing a sliding sum on the specified time step
     # scale -- i.e. if the scale is 3 then the first two elements will be
-    # np.NaN, since we need 3 elements to get a sum, and then from the third
+    # np.nan, since we need 3 elements to get a sum, and then from the third
     # element to the end the values will equal the sum of the corresponding
     # time step plus the values of the two previous time steps
     scale_sums = compute.sum_to_scale(values, scale)

diff --git a/tests/test_compute.py b/tests/test_compute.py
@@ -43,7 +43,7 @@ def test_transform_fitted_gamma(
     """
 
     # confirm that an input array of all NaNs results in the same array returned
-    all_nans = np.full(precips_mm_monthly.shape, np.NaN)
+    all_nans = np.full(precips_mm_monthly.shape, np.nan)
-    all_nans = np.full(precips_mm_monthly.shape, np.nan)
+    """
+
+    # confirm that an input array of all NaNs results in the same array returned
+    all_nans = np.full(precips_mm_monthly.shape, np.nan)
-    all_nans = np.full(precips_mm_monthly.shape, np.nan)
+    """
+
+    # confirm that an input array of all NaNs results in the same array returned
+    all_nans = np.full(precips_mm_monthly.shape, np.nan)
     computed_values = compute.transform_fitted_gamma(
         all_nans,
         data_year_start_monthly,
@@ -172,9 +172,9 @@ def test_gamma_parameters(
     """
 
     # confirm that an input array of all NaNs results in the same array returned
-    all_nans = np.full(precips_mm_monthly.shape, np.NaN)
-    nan_alphas = np.full(shape=(12,), fill_value=np.NaN)
-    nan_betas = np.full(shape=(12,), fill_value=np.NaN)
+    all_nans = np.full(precips_mm_monthly.shape, np.nan)
+    nan_alphas = np.full(shape=(12,), fill_value=np.nan)
+    nan_betas = np.full(shape=(12,), fill_value=np.nan)
     alphas, betas = compute.gamma_parameters(
         all_nans,
         data_year_start_monthly,
@@ -247,7 +247,7 @@ def test_transform_fitted_pearson(
     """
 
     # confirm that an input array of all NaNs results in the same array returned
-    all_nans = np.full(precips_mm_monthly.shape, np.NaN)
+    all_nans = np.full(precips_mm_monthly.shape, np.nan)
     computed_values = compute.transform_fitted_pearson(
         all_nans,
         data_year_start_monthly,
@@ -280,7 +280,7 @@ def test_transform_fitted_pearson(
     )
 
     # confirm that an input array of all NaNs will return the same array
-    all_nans = np.full(precips_mm_monthly.shape, np.NaN)
+    all_nans = np.full(precips_mm_monthly.shape, np.nan)
     computed_values = compute.transform_fitted_pearson(
         all_nans,
         data_year_start_monthly,
@@ -524,44 +524,44 @@ def test_sum_to_scale():
     # test an input array with no missing values
     values = np.array([3.0, 4, 6, 2, 1, 3, 5, 8, 5])
     computed_values = compute.sum_to_scale(values, 3)
-    expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18])
+    expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18])
     np.testing.assert_allclose(
         computed_values,
         expected_values,
         err_msg=UNEXPECTED_SLIDING_SUMS_MESSAGE,
     )
     computed_values = compute.sum_to_scale(values, 4)
-    expected_values = np.array([np.NaN, np.NaN, np.NaN, 15, 13, 12, 11, 17, 21])
+    expected_values = np.array([np.nan, np.nan, np.nan, 15, 13, 12, 11, 17, 21])
     np.testing.assert_allclose(
         computed_values,
         expected_values,
         err_msg=UNEXPECTED_SLIDING_SUMS_MESSAGE,
     )
 
     # test an input array with missing values on the end
-    values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN])
+    values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.nan, np.nan, np.nan])
     computed_values = compute.sum_to_scale(values, 3)
-    expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN])
+    expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18, np.nan, np.nan, np.nan])
     np.testing.assert_allclose(
         computed_values,
         expected_values,
         err_msg="Sliding sums not computed as expected when missing values appended to end of input array",
     )
 
     # test an input array with missing values within the array
-    values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6])
+    values = np.array([3, 4, 6, 2, 1, 3, 5, np.nan, 8, 5, 6])
     computed_values = compute.sum_to_scale(values, 3)
-    expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19])
+    expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, np.nan, np.nan, np.nan, 19])
     np.testing.assert_allclose(
         computed_values,
         expected_values,
         err_msg="Sliding sums not computed as expected when missing values appended to end of input array",
     )
 
     test_values = np.array([1.0, 5, 7, 2, 3, 4, 9, 6, 3, 8])
-    sum_by2 = np.array([np.NaN, 6, 12, 9, 5, 7, 13, 15, 9, 11])
-    sum_by4 = np.array([np.NaN, np.NaN, np.NaN, 15, 17, 16, 18, 22, 22, 26])
-    sum_by6 = np.array([np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22, 30, 31, 27, 33])
+    sum_by2 = np.array([np.nan, 6, 12, 9, 5, 7, 13, 15, 9, 11])
+    sum_by4 = np.array([np.nan, np.nan, np.nan, 15, 17, 16, 18, 22, 22, 26])
+    sum_by6 = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, 22, 30, 31, 27, 33])
     np.testing.assert_equal(
         compute.sum_to_scale(test_values, 2),
         sum_by2,

diff --git a/tests/test_eto.py b/tests/test_eto.py
@@ -73,19 +73,19 @@ def test_eto_thornthwaite(temps_celsius, latitude_degrees, data_year_start_month
     pytest.raises(TypeError, eto.eto_thornthwaite, temps_celsius, None, data_year_start_monthly)
 
     # make sure that an invalid latitude value (NaN) raises an error
-    pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.NaN, data_year_start_monthly)
+    pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.nan, data_year_start_monthly)
 
 
 # ------------------------------------------------------------------------------
 def test_sunset_hour_angle():
     # make sure that an invalid latitude value raises an error
     pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(-100.0), np.deg2rad(0.0))
-    pytest.raises(ValueError, eto._sunset_hour_angle, np.NaN, np.deg2rad(0.0))
+    pytest.raises(ValueError, eto._sunset_hour_angle, np.nan, np.deg2rad(0.0))
 
     # make sure that an invalid solar declination angle raises an error
     pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(-75.0))
     pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(85.0))
-    pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.NaN)
+    pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.nan)
 
     expected_value = math.pi / 2
     computed_value = eto._sunset_hour_angle(0.0, np.deg2rad(0.0))
@@ -113,7 +113,7 @@ def test_solar_declination():
     pytest.raises(ValueError, eto._solar_declination, -1)
     pytest.raises(ValueError, eto._solar_declination, 367)
     pytest.raises(ValueError, eto._solar_declination, 5000)
-    pytest.raises(ValueError, eto._solar_declination, np.NaN)
+    pytest.raises(ValueError, eto._solar_declination, np.nan)
 
     expected_value = -0.313551072399921
     computed_value = eto._solar_declination(30)
@@ -130,7 +130,7 @@ def test_daylight_hours():
     # make sure invalid arguments raise an error
     pytest.raises(ValueError, eto._daylight_hours, math.pi + 1)
     pytest.raises(ValueError, eto._daylight_hours, -1.0)
-    pytest.raises(ValueError, eto._daylight_hours, np.NaN)
+    pytest.raises(ValueError, eto._daylight_hours, np.nan)
 
     expected_value = 7.999999999999999
     computed_value = eto._daylight_hours(math.pi / 3)