diff --git a/conda/environment.yml b/.conda/env_build.yml similarity index 59% rename from conda/environment.yml rename to .conda/env_build.yml index 87cf5b4..a5dcc92 100644 --- a/conda/environment.yml +++ b/.conda/env_build.yml @@ -1,16 +1,11 @@ -name: um2nc - channels: - - conda-forge - accessnri + - conda-forge - coecms - - defaults + - nodefaults dependencies: - anaconda-client - conda-build - conda-verify - - versioneer - - pylint - - pytest - - pytest-cov + - versioneer \ No newline at end of file diff --git a/.conda/env_dev.yml b/.conda/env_dev.yml new file mode 100644 index 0000000..e4d6871 --- /dev/null +++ b/.conda/env_dev.yml @@ -0,0 +1,25 @@ +name: um2nc-dev + +channels: + - accessnri + - conda-forge + - coecms + - nodefaults + + +dependencies: + - python >=3.10,<=3.12 + - cf-units + - cftime + - f90nml + - ipykernel + - mule + - netcdf4 + - numpy <2 + - pip + - pytest + - pytest-cov + - pylint #TODO: remove this after setting up linting with ruff + - ruff + - scitools-iris + - versioneer \ No newline at end of file diff --git a/.conda/meta.yaml b/.conda/meta.yaml new file mode 100644 index 0000000..bf69b8e --- /dev/null +++ b/.conda/meta.yaml @@ -0,0 +1,45 @@ +{% set version = load_setup_py_data(setup_file='../setup.py', from_recipe_dir=True).get('version') %} +{% set project = load_file_data('../pyproject.toml', from_recipe_dir=True).get('project') %} + +package: + name: {{ project.get('name') }} + version: "{{ version }}" + +build: + noarch: python + number: 0 + script: "python3 -m pip install . -vv" + entry_points: + {% for name, script in project.get('scripts').items() %} + - {{ name }} = {{ script }} + {% endfor %} + +source: + path: ../ + +requirements: + host: + - python + - pip + - setuptools >=61.0.0 + - versioneer + run: + {% for dep in project.get('dependencies') %} + - {{ dep }} + {% endfor %} + +test: + imports: + - umpost + commands: + {% for name, script in project.get('scripts').items() %} + - {{ name }} --help + {% endfor %} + +about: + home: {{ project.get('urls').get('Repository') }} + license: Apache Software + license_file: LICENSE + summary: {{ project.get('description') }} + license_family: Apache + diff --git a/.github/workflows/CD.yml b/.github/workflows/CD.yml index ea89ab4..16a784e 100644 --- a/.github/workflows/CD.yml +++ b/.github/workflows/CD.yml @@ -6,7 +6,7 @@ on: - '*' env: - PY_VERSION: "3.10" + PY_VERSION: "3.11" jobs: @@ -25,7 +25,7 @@ jobs: with: miniconda-version: "latest" python-version: ${{ env.PY_VERSION }} - environment-file: conda/environment.yml + environment-file: conda/env_build.yml auto-update-conda: false auto-activate-base: false show-channel-urls: true diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 068d3e9..27ce022 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -5,85 +5,95 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the master branch push: - branches: [ main, develop ] + branches: main pull_request: - branches: [ main, develop ] - + branches: main # Allows you to run this workflow manually from the Actions tab workflow_dispatch: - env: - PY_VERSION: "3.10" + PY_VERSION: "3.11" -# A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - conda-build: - name: Conda Build + # JOB to run change in the build files + changes: runs-on: ubuntu-latest - env: - NAME: test-${{ github.event.repository.name }} + # Required permissions + permissions: + pull-requests: read + # Set job outputs to values from filter step outputs: - artifact-name: ${{ env.NAME }} + files: ${{ steps.filter.outputs.files }} + steps: + - name: Checkout code + uses: actions/checkout@v4.1.7 + + - name: Filter files + uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 #v3.0.2 + id: filter + with: + filters: | + files: + - 'setup.py' + - 'pyproject.toml' + - '.conda/env_build.yml' + - '.conda/meta.yml' + + verify-conda-build: + name: Conda Build + runs-on: ubuntu-latest + needs: changes + # Only run if there are changes in the build files + if: ${{ needs.changes.outputs.files == 'true' }} steps: - uses: actions/checkout@v4 - - name: Setup conda environment - uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 + - name: Setup conda build environment + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: miniconda-version: "latest" - python-version: ${{ env.PY_VERSION }} - environment-file: conda/environment.yml - auto-update-conda: false + python-version: ${{ env.PY_VERSION }} + environment-file: .conda/env_build.yml auto-activate-base: false + auto-update-conda: false show-channel-urls: true - - name: Run conda build + - name: Verify conda recipe shell: bash -el {0} - run: | - conda build . --no-anaconda-upload --output-folder=./build + run: conda-verify .conda --ignore C2105,C2122 - - uses: actions/upload-artifact@v4 - with: - name: ${{ env.NAME }} - if-no-files-found: error - path: ./build + - name: Run conda build + shell: bash -el {0} + run: conda build . --no-anaconda-upload --output-folder=./build -c conda-forge -c accessnri -c coecms + - name: Verify conda package + shell: bash -el {0} + run: conda-verify ./build/noarch/*.tar.bz2 --ignore C1105,C1115,C1141 + tests: name: Tests runs-on: ubuntu-latest - needs: conda-build - - # Run the job for different versions of python strategy: + fail-fast: true matrix: python-version: ["3.10", "3.11", "3.12"] - - env: - ARTIFACT_LOCATION: ${{ github.workspace }}/conda-local - - # Steps represent a sequence of tasks that will be executed as part of the job steps: - - name: Checkout code uses: actions/checkout@v4.1.7 - - uses: actions/download-artifact@v4 - with: - name: ${{ needs.conda-build.outputs.artifact-name }} - path: ${{ env.ARTIFACT_LOCATION }} - - name: Setup conda environment - uses: conda-incubator/setup-miniconda@v3 + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4 with: miniconda-version: "latest" python-version: ${{ matrix.python-version }} - environment-file: conda/environment.yml - activate-environment: um2nc + environment-file: .conda/env_dev.yml + activate-environment: um2nc-dev + auto-update-conda: false + auto-activate-base: false + show-channel-urls: true - - name: Install conda package + - name: Install source shell: bash -l {0} - run: | - conda install -c file://${{ env.ARTIFACT_LOCATION }} -c conda-forge -c accessnri -c coecms um2nc + run: python3 -m pip install --no-deps --no-build-isolation -e . - name: List installed packages shell: bash -l {0} @@ -103,12 +113,12 @@ jobs: - name: Run tests shell: bash -l {0} - run: python -m pytest --cov=umpost --cov-report=xml -s test - - - name: Upload code coverage - uses: codecov/codecov-action@v4 - # Only upload once for the installed version - if: matrix.python-version == ${{ env.PY_VERSION }} - with: - token: ${{ secrets.codecov_token }} - files: ./coverage.xml + run: python -m pytest --cov=umpost --cov-report=html -s test + + # - name: Upload code coverage + # # Only upload once for the installed version + # if: ${{ matrix.python-version == env.PY_VERSION }} + # uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 #v4.6.0 + # with: + # token: ${{ secrets.codecov_token }} + # files: ./coverage.xml diff --git a/.gitignore b/.gitignore index 644c4ad..e6f70f0 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ .idea .coverage *~ +*.egg-info +coverage_html +*.code-workspace \ No newline at end of file diff --git a/README.md b/README.md index 55559b6..226431f 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,72 @@ -# Unified Model Post-processing: um2nc-standalone +# Unified Model to NetCDF Post-processing: um2nc ## About -`um2nc-standalone` is an [ACCESS-NRI](https://www.access-nri.org.au/) project to merge multiple versions of unified model conversion tools to a single, canonical project for the ESM1.5 model. +`um2nc` is a `Python3` utility to convert [Unified Model data files](https://code.metoffice.gov.uk/doc/um/latest/papers/umdp_F03.pdf) to NetCDF format. + +The `um2nc-standalone` project is an [ACCESS-NRI](https://www.access-nri.org.au/) initiative to merge multiple versions of Unified Model NetCDF conversion tool to a single, canonical project. ## Installation -TODO +### Gadi -* `virtualenv` instructions -* `conda`/`miniconda`/`micromamba?` instructions +On Gadi, `um2nc` is available within the `vk83` `payu` environment. +To access it, run: +``` +module use /g/data/vk83/modules +module load payu +``` +> [!IMPORTANT] +> You need to be a member of the vk83 project on NCI to access the module. For more information check how to [Join relevant NCI projects](https://access-hive.org.au/getting_started/set_up_nci_account/#join-relevant-nci-projects) -## User documentation +### Local installation +`um2nc` is available as a `conda` package in the [access-nri conda channel](https://anaconda.org/accessnri/um2nc). +To install it run: +``` +conda install accessnri::um2nc +``` -TODO +## Development/Testing instructions +For development/testing, it is recommended to install `um2nc` as a development package within a `micromamba`/`conda` testing environment. -### Running the tests +### Clone um2nc-standalone GitHub repo +``` +git clone git@github.com:ACCESS-NRI/um2nc-standalone.git +``` -This project uses `pytest`. To run the tests: +### Create a micromamba/conda testing environment +> [!TIP] +> In the following instructions `micromamba` can be replaced with `conda`. -```Bash -$ cd -$ pytest +``` +cd um2nc-standalone +micromamba env create -n um2nc_dev --file .conda/env_dev.yml +micromamba activate um2nc_dev +``` + +### Install um2nc as a development package +``` +pip install --no-deps --no-build-isolation -e . ``` -A minimal code coverage setup has been included, to run & generate an HTML coverage report: +### Running the tests + +The `um2nc-standalone` project uses `pytest` and `pytest-cov`.
+To run the tests and generate print a coverage report (with missing lines) run: ``` -$ cd -$ pytest --cov-report=html --cov=umpost +python3 -m pytest --cov-report=term-missing --cov=umpost ``` +> [!TIP] +> To generate an HTML coverage report substitute `term-missing` with `html`. + +## User documentation + +TODO: this needs to cover: -Then load the `index.html` from the project root/coverage_html dir. +1. Running `um2netcdf` standalone +2. Using the workflow run script +3. Using `um2netcdf` as an API ## Further information diff --git a/conda/meta.yaml b/conda/meta.yaml deleted file mode 100644 index a32554a..0000000 --- a/conda/meta.yaml +++ /dev/null @@ -1,41 +0,0 @@ -{% set data = load_setup_py_data(setup_file='../setup.py', from_recipe_dir=True) %} -{% set version = data.get('version') %} -{% set pyproj = load_file_data('../pyproject.toml', from_recipe_dir=True) %} -{% set project = pyproj.get('project') %} -{% set name = project.get('name') %} - -package: - name: {{ name }} - version: "{{ version }}" - -build: - noarch: python - number: 0 - script: "python3 -m pip install . -vv" - entry_points: - {% for name, script in project.get('scripts', {}).items() %} - - {{ name }} = {{ script }} - {% endfor %} - -source: - path: ../ - -requirements: - host: - - python - - pip - - setuptools >=61.0.0 - - versioneer - run: - - python >=3.10 - {% for dep in project.get('dependencies', []) %} - - {{ dep }} - {% endfor %} - -about: - home: https://github.com/access-nri/um2nc-standalone - license: Apache Software - license_file: LICENSE - license_family: Apache - summary: "Tool to convert fieldsfiles into netCDF format. Used for post-processing UM climate model output" - diff --git a/pyproject.toml b/pyproject.toml index 9f66ef7..b159ecb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,7 @@ name = "um2nc" authors = [ {name = "Ben Davies", email="ben.davies@anu.edu.au"}, + {name = "Davide Marchegiani", email="davide.marchegiani@anu.edu.au"}, {name = "Martin Dix", email="martin.dix@anu.edu.au"}, {name = "Spencer Wong", email="spencer.wong@anu.edu.au"}, ] @@ -14,27 +15,24 @@ readme = "README.md" keywords = ["netCDF", "UM", "postprocessing"] dynamic = ["version"] dependencies = [ - "numpy", - "mule", - "cftime", - "netCDF4", - "scitools-iris", - "cf-units", - "PyYAML", - "f90nml", + "python >=3.10,<=3.12", + "cf-units", + "cftime", + "f90nml", + "mule", + "netcdf4", + "numpy <2", + "scitools-iris", + "versioneer", ] + [project.scripts] esm1p5_convert_nc = "umpost.conversion_driver_esm1p5:main" um2nc = "umpost.um2netcdf:main" -[tool.versioneer] -VCS = "git" -style = "pep440" -versionfile_source = "umpost/_version.py" -versionfile_build = "umpost/_version.py" -tag_prefix = "" -parentdir_prefix = "umpost-" +[project.urls] +Repository = "https://github.com/ACCESS-NRI/um2nc-standalone" [build-system] build-backend = "setuptools.build_meta" @@ -44,5 +42,13 @@ requires = [ ] [tool.setuptools.packages.find] -include = ["umpost*"] +include = ["umpost"] namespaces = false + +[tool.versioneer] +VCS = "git" +style = "pep440" +versionfile_source = "umpost/_version.py" +versionfile_build = "umpost/_version.py" +tag_prefix = "" +parentdir_prefix = "umpost-" diff --git a/test/test_conversion_driver_esm1p5.py b/test/test_conversion_driver_esm1p5.py index cc1af8d..14f3e5f 100644 --- a/test/test_conversion_driver_esm1p5.py +++ b/test/test_conversion_driver_esm1p5.py @@ -80,11 +80,12 @@ def test_get_nc_write_path_unrecognized_unit(): nc_write_dir = Path("netCDF") expected_nc_write_path = nc_write_dir / f"aiihca.p{unknown_key}-005007.nc" - nc_write_path = esm1p5_convert.get_nc_write_path( - Path(ff_name), - nc_write_dir, - (ff_year, ff_month, 1) - ) + with pytest.warns(RuntimeWarning): + nc_write_path = esm1p5_convert.get_nc_write_path( + Path(ff_name), + nc_write_dir, + (ff_year, ff_month, 1) + ) assert nc_write_path == expected_nc_write_path @@ -220,6 +221,48 @@ def test_convert_esm1p5_output_dir_error(): ) +@pytest.mark.parametrize( + "input_output_pairs, expected_pairs", + [( # input_output_pairs + [(Path("/output000/atmosphere/aiihca.pea1120"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1130"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1140"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1150"), + Path("/output000/atmosphere/netCDF/aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.aiihca.paa1jan"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010101_mon.nc")), + (Path("/output000/atmosphere/aiihca.aiihca.paa1feb"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010102_mon.nc"))], + # Expected pairs + [(Path("/output000/atmosphere/aiihca.aiihca.paa1jan"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010101_mon.nc")), + (Path("/output000/atmosphere/aiihca.aiihca.paa1feb"), + Path("/output000/atmosphere/netCDF/aiihca.pa-010102_mon.nc"))] + ), + ( # input_output_pairs + [(Path("/output000/atmosphere/aiihca.pea1120"), + Path("/dir_1/dir_2/../aiihca.pe-010101_dai.nc")), + (Path("/output000/atmosphere/aiihca.pea1130"), + Path("/dir_1/aiihca.pe-010101_dai.nc"))], + # Expected pairs + [] + )] +) +def test_filter_naming_collisions(input_output_pairs, expected_pairs): + """ + Test that inputs with overlapping output paths are removed. + """ + with pytest.warns(match="Multiple inputs have same output path"): + filtered_paths = list( + esm1p5_convert.filter_name_collisions(input_output_pairs) + ) + + assert filtered_paths == expected_pairs + + def test_format_successes(): succeeded_inputs = [ Path("dir_1/fake_file_1"), diff --git a/umpost/__init__.py b/umpost/__init__.py index e69de29..4a558f3 100644 --- a/umpost/__init__.py +++ b/umpost/__init__.py @@ -0,0 +1,3 @@ +from umpost import _version + +__version__ = _version.get_versions()["version"] diff --git a/umpost/conversion_driver_esm1p5.py b/umpost/conversion_driver_esm1p5.py index 2a96f9c..bb7b51d 100755 --- a/umpost/conversion_driver_esm1p5.py +++ b/umpost/conversion_driver_esm1p5.py @@ -153,7 +153,7 @@ def get_nc_filename(fields_file_name, date=None): warnings.warn( f"Unit code '{unit}' from filename f{fields_file_name} " "not recognized. Frequency information will not be added " - "to the netCDF filename." + "to the netCDF filename.", RuntimeWarning ) suffix = "" @@ -268,6 +268,45 @@ def format_failures(failed, quiet): yield failure_report +def _resolve_path(path): + """ + Resolve path for use in comparison. Ensure that symlinks, relative paths, + and home directories are expanded. + """ + return os.path.realpath(os.path.expanduser(path)) + + +def filter_name_collisions(input_output_pairs): + """ + Remove input/output pairs which have overlapping output paths. + + Parameters + ---------- + input_ouptut_pairs: iterator of tuples (input_path, output_path). + + Yields + ------- + filtered_pairs: (input_path, output_path) tuples with unique + output_path values. + """ + # Convert to list to allow repeated traversal. + input_output_pairs = list(input_output_pairs) + + output_paths = [_resolve_path(output) for _, output in input_output_pairs] + output_counts = collections.Counter(output_paths) + + for input_path, output_path in input_output_pairs: + if output_counts[_resolve_path(output_path)] != 1: + msg = ( + f"Multiple inputs have same output path {output_path}.\n" + f"{input_path} will not be converted." + ) + warnings.warn(msg) + continue + + yield input_path, output_path + + def convert_esm1p5_output_dir(esm1p5_output_dir): """ Driver function for converting ESM1.5 atmospheric outputs during a simulation. @@ -321,6 +360,7 @@ def convert_esm1p5_output_dir(esm1p5_output_dir): output_paths = [get_nc_write_path(path, nc_write_dir, get_ff_date(path)) for path in atm_dir_fields_files] input_output_pairs = zip(atm_dir_fields_files, output_paths) + input_output_pairs = filter_name_collisions(input_output_pairs) succeeded, failed = convert_fields_file_list(input_output_pairs) diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py index 7817487..30f3b97 100644 --- a/umpost/um2netcdf.py +++ b/umpost/um2netcdf.py @@ -16,6 +16,7 @@ import warnings import collections +import umpost from umpost import stashvar_cmip6 as stashvar import mule @@ -34,8 +35,8 @@ STASH = "STASH" ITEM_CODE = "item_code" -GRID_END_GAME = 'EG' -GRID_NEW_DYNAMICS = 'ND' +GRID_END_GAME = "EG" +GRID_NEW_DYNAMICS = "ND" # TODO: what is this limit & does it still exist? XCONV_LONG_NAME_LIMIT = 110 @@ -44,10 +45,7 @@ LATITUDE = "latitude" # Bounds for global single cells -GLOBAL_COORD_BOUNDS = { - LONGITUDE: np.array([[0., 360.]]), - LATITUDE: np.array([[-90., 90.]]) -} +GLOBAL_COORD_BOUNDS = {LONGITUDE: np.array([[0.0, 360.0]]), LATITUDE: np.array([[-90.0, 90.0]])} NUM_LAT_RIVER_GRID_POINTS = 180 NUM_LON_RIVER_GRID_POINTS = 360 @@ -59,12 +57,7 @@ VAR_NAME_LAT_STANDARD = "lat" VAR_NAME_LON_STANDARD = "lon" -NC_FORMATS = { - 1: 'NETCDF3_CLASSIC', - 2: 'NETCDF3_64BIT', - 3: 'NETCDF4', - 4: 'NETCDF4_CLASSIC' -} +NC_FORMATS = {1: "NETCDF3_CLASSIC", 2: "NETCDF3_64BIT", 3: "NETCDF4", 4: "NETCDF4_CLASSIC"} MODEL_LEVEL_NUM = "model_level_number" MODEL_RHO_LEVEL = "model_rho_level_number" @@ -83,11 +76,12 @@ FORECAST_PERIOD = "forecast_period" TIME = "time" -DEFAULT_FILL_VAL_FLOAT = 1.e20 +DEFAULT_FILL_VAL_FLOAT = 1.0e20 class PostProcessingError(Exception): """Generic class for um2nc specific errors.""" + pass @@ -96,6 +90,7 @@ class UnsupportedTimeSeriesError(PostProcessingError): Error to be raised when latitude and longitude coordinates are missing. """ + pass @@ -133,10 +128,7 @@ def fix_lat_coord_name(lat_coordinate, grid_type, dlat): """ if lat_coordinate.name() != LATITUDE: - raise ValueError( - f"Wrong coordinate {lat_coordinate.name()} supplied. " - f"Expected {LATITUDE}." - ) + raise ValueError(f"Wrong coordinate {lat_coordinate.name()} supplied. " f"Expected {LATITUDE}.") if is_lat_river_grid(lat_coordinate.points): lat_coordinate.var_name = VAR_NAME_LAT_RIVER @@ -162,10 +154,7 @@ def fix_lon_coord_name(lon_coordinate, grid_type, dlon): """ if lon_coordinate.name() != LONGITUDE: - raise ValueError( - f"Wrong coordinate {lon_coordinate.name()} supplied. " - f"Expected {LATITUDE}." - ) + raise ValueError(f"Wrong coordinate {lon_coordinate.name()} supplied. " f"Expected {LATITUDE}.") if is_lon_river_grid(lon_coordinate.points): lon_coordinate.var_name = VAR_NAME_LON_RIVER @@ -209,7 +198,7 @@ def is_lat_v_grid(latitude_points, grid_type, dlat): dlat: (float) meridional spacing between latitude grid points. """ min_latitude = latitude_points[0] - min_lat_v_nd_grid = -90.+0.5*dlat + min_lat_v_nd_grid = -90.0 + 0.5 * dlat min_lat_v_eg_grid = -90 if grid_type == GRID_END_GAME: @@ -231,7 +220,7 @@ def is_lon_u_grid(longitude_points, grid_type, dlon): dlon: (float) zonal spacing between longitude grid points. """ min_longitude = longitude_points[0] - min_lon_u_nd_grid = 0.5*dlon + min_lon_u_nd_grid = 0.5 * dlon min_lon_u_eg_grid = 0 if grid_type == GRID_END_GAME: @@ -253,10 +242,7 @@ def add_latlon_coord_bounds(cube_coordinate): """ coordinate_name = cube_coordinate.name() if coordinate_name not in [LONGITUDE, LATITUDE]: - raise ValueError( - f"Wrong coordinate {coordinate_name} supplied. " - f"Expected one of {LONGITUDE}, {LATITUDE}." - ) + raise ValueError(f"Wrong coordinate {coordinate_name} supplied. " f"Expected one of {LONGITUDE}, {LATITUDE}.") # Only add bounds if not already present. if not cube_coordinate.has_bounds(): @@ -289,10 +275,7 @@ def fix_latlon_coords(cube, grid_type, dlat, dlon): latitude_coordinate = cube.coord(LATITUDE) longitude_coordinate = cube.coord(LONGITUDE) except iris.exceptions.CoordinateNotFoundError as coord_error: - msg = ( - "Missing latitude or longitude coordinate for variable (possible timeseries?): \n" - f"{cube}\n" - ) + msg = "Missing latitude or longitude coordinate for variable (possible timeseries?): \n" f"{cube}\n" raise UnsupportedTimeSeriesError(msg) from coord_error # Force to double for consistency with CMOR @@ -318,13 +301,11 @@ def get_default_fill_value(cube): ------- fill_value: numpy scalar with type matching cube.data """ - if cube.data.dtype.kind == 'f': + if cube.data.dtype.kind == "f": fill_value = DEFAULT_FILL_VAL_FLOAT else: - fill_value = netCDF4.default_fillvals[ - f"{cube.data.dtype.kind:s}{cube.data.dtype.itemsize:1d}" - ] + fill_value = netCDF4.default_fillvals[f"{cube.data.dtype.kind:s}{cube.data.dtype.itemsize:1d}"] # NB: the `_FillValue` attribute appears to be converted to match the # cube data's type externally (likely in the netCDF4 library). It's not @@ -365,8 +346,7 @@ def fix_fill_value(cube, custom_fill_value=None): # TODO: Is placing the fill value in an array still necessary, # given the type conversion in get_default_fill_value() - cube.attributes['missing_value'] = np.array([fill_value], - cube.data.dtype) + cube.attributes["missing_value"] = np.array([fill_value], cube.data.dtype) return fill_value @@ -387,7 +367,7 @@ def fix_forecast_reference_time(cube): refdate = reftime.units.num2date(reftime.points[0]) # TODO: replace with `if` check to prevent assert vanishing in optimised Python mode - assert time.units.origin == 'hours since 1970-01-01 00:00:00' + assert time.units.origin == "hours since 1970-01-01 00:00:00" # TODO: add reference year as a configurable arg? # TODO: determine if the start year should be changed from 1600 @@ -401,10 +381,10 @@ def fix_forecast_reference_time(cube): new_calendar = time.units.calendar time.units = cf_units.Unit("days since 1970-01-01 00:00", calendar=new_calendar) - time.points = time.points / 24. + time.points = time.points / 24.0 if time.bounds is not None: - time.bounds = time.bounds / 24. + time.bounds = time.bounds / 24.0 # TODO: remove_coord() calls the coord() lookup, which raises # CoordinateNotFoundError if the forecast period is missing, this @@ -424,8 +404,7 @@ def fix_forecast_reference_time(cube): # https://github.com/ACCESS-NRI/um2nc-standalone/pull/118/files#r1794321677 def convert_proleptic(time): # Convert units from hours to days and shift origin from 1970 to 0001 - newunits = cf_units.Unit("days since 0001-01-01 00:00", - calendar=cf_units.CALENDAR_PROLEPTIC_GREGORIAN) + newunits = cf_units.Unit("days since 0001-01-01 00:00", calendar=cf_units.CALENDAR_PROLEPTIC_GREGORIAN) tvals = np.array(time.points) # Need a copy because can't assign to time.points[i] tbnds = np.array(time.bounds) if time.bounds is not None else None @@ -435,15 +414,17 @@ def convert_proleptic(time): # https://github.com/ACCESS-NRI/um2nc-standalone/pull/118/files#r1794315128 for i in range(len(time.points)): date = time.units.num2date(tvals[i]) - newdate = cftime.DatetimeProlepticGregorian(date.year, date.month, date.day, - date.hour, date.minute, date.second) + newdate = cftime.DatetimeProlepticGregorian( + date.year, date.month, date.day, date.hour, date.minute, date.second + ) tvals[i] = newunits.date2num(newdate) if tbnds is not None: # Fields with instantaneous data don't have bounds for j in range(2): date = time.units.num2date(tbnds[i][j]) - newdate = cftime.DatetimeProlepticGregorian(date.year, date.month, date.day, - date.hour, date.minute, date.second) + newdate = cftime.DatetimeProlepticGregorian( + date.year, date.month, date.day, date.hour, date.minute, date.second + ) tbnds[i][j] = newunits.date2num(newdate) time.points = tvals @@ -468,13 +449,12 @@ def fix_cell_methods(cell_methods): ------- A tuple of cell methods, with "hour" removed from interval names """ - return tuple(CellMethod(m.method, m.coord_names, _remove_hour_interval(m), m.comments) - for m in cell_methods) + return tuple(CellMethod(m.method, m.coord_names, _remove_hour_interval(m), m.comments) for m in cell_methods) def _remove_hour_interval(cell_method): """Helper retains all non 'hour' intervals.""" - return (i for i in cell_method.intervals if i.find('hour') == -1) + return (i for i in cell_method.intervals if i.find("hour") == -1) def apply_mask(c, heaviside, hcrit): @@ -484,32 +464,31 @@ def apply_mask(c, heaviside, hcrit): # If the shapes match it's simple # Temporarily turn off warnings from 0/0 # TODO: refactor to use np.where() - with np.errstate(divide='ignore', invalid='ignore'): - c.data = np.ma.masked_array(c.data/heaviside.data, heaviside.data <= hcrit).astype(np.float32) + with np.errstate(divide="ignore", invalid="ignore"): + c.data = np.ma.masked_array(c.data / heaviside.data, heaviside.data <= hcrit).astype(np.float32) else: # Are the levels of c a subset of the levels of the heaviside variable? - c_p = c.coord('pressure') - h_p = heaviside.coord('pressure') + c_p = c.coord("pressure") + h_p = heaviside.coord("pressure") # print('Levels for masking', c_p.points, h_p.points) if set(c_p.points).issubset(h_p.points): # Match is possible constraint = iris.Constraint(pressure=c_p.points) h_tmp = heaviside.extract(constraint) # Double check they're actually the same after extraction - if not np.all(c_p.points == h_tmp.coord('pressure').points): - raise Exception('Unexpected mismatch in levels of extracted heaviside function') - with np.errstate(divide='ignore', invalid='ignore'): - c.data = np.ma.masked_array(c.data/h_tmp.data, h_tmp.data <= hcrit).astype(np.float32) + if not np.all(c_p.points == h_tmp.coord("pressure").points): + raise Exception("Unexpected mismatch in levels of extracted heaviside function") + with np.errstate(divide="ignore", invalid="ignore"): + c.data = np.ma.masked_array(c.data / h_tmp.data, h_tmp.data <= hcrit).astype(np.float32) else: - raise Exception('Unable to match levels of heaviside function to variable %s' % c.name()) + raise Exception("Unable to match levels of heaviside function to variable %s" % c.name()) def process(infile, outfile, args): with warnings.catch_warnings(): # NB: Information from STASHmaster file is not required by `process`. # Hence supress missing STASHmaster warnings. - warnings.filterwarnings(action="ignore", category=UserWarning, - message=r"\sUnable to load STASHmaster") + warnings.filterwarnings(action="ignore", category=UserWarning, message=r"\sUnable to load STASHmaster") ff = mule.load_umfile(str(infile)) mv = process_mule_vars(ff) @@ -615,7 +594,7 @@ def process_mule_vars(fields_file: mule.ff.FieldsFile): A MuleVars data structure. """ if isinstance(fields_file, mule.ancil.AncilFile): - raise NotImplementedError('Ancillary files are currently not supported') + raise NotImplementedError("Ancillary files are currently not supported") if mule.__version__ == "2020.01.1": msg = "mule 2020.01.1 doesn't handle pathlib Paths properly" @@ -793,20 +772,17 @@ def non_masking_cubes(cubes, heaviside_uv, heaviside_t, verbose: bool): heaviside_t : heaviside_t cube or None if it's missing verbose : True to emit warnings to indicate a cube has been removed """ - msg = ("{} field needed for masking pressure level data is missing. " - "Excluding cube '{}' as it cannot be masked") + msg = "{} field needed for masking pressure level data is missing. " "Excluding cube '{}' as it cannot be masked" for c in cubes: if require_heaviside_uv(c.item_code) and heaviside_uv is None: if verbose: - warnings.warn(msg.format("heaviside_uv", c.name()), - category=RuntimeWarning) + warnings.warn(msg.format("heaviside_uv", c.name()), category=RuntimeWarning) continue elif require_heaviside_t(c.item_code) and heaviside_t is None: if verbose: - warnings.warn(msg.format("heaviside_t", c.name()), - category=RuntimeWarning) + warnings.warn(msg.format("heaviside_t", c.name()), category=RuntimeWarning) continue yield c @@ -843,13 +819,11 @@ def filtered_cubes(cubes, include=None, exclude=None): def add_global_history(infile, iris_out): - version = -1 # FIXME: determine version + version = umpost.__version__ t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") um2nc_path = os.path.abspath(__file__) history = f"File {infile} converted with {um2nc_path} {version} at {t}" - - iris_out.update_global_attributes({'history': history}) - warnings.warn("um2nc version number not specified!") + iris_out.update_global_attributes({"history": history}) # TODO: refactor func sig to take exclusive simple OR unique name field? @@ -871,9 +845,9 @@ def fix_var_name(cube, um_unique_name, simple: bool): # Could there be cases with both max and min? if cube.var_name: - if any([m.method == 'maximum' for m in cube.cell_methods]): + if any([m.method == "maximum" for m in cube.cell_methods]): cube.var_name += "_max" - if any([m.method == 'minimum' for m in cube.cell_methods]): + if any([m.method == "minimum" for m in cube.cell_methods]): cube.var_name += "_min" @@ -891,18 +865,20 @@ def fix_standard_name(cube, um_standard_name, verbose: bool): # The iris name mapping seems wrong for these - perhaps assuming rotated grids? if cube.standard_name: - if cube.standard_name == 'x_wind': - cube.standard_name = 'eastward_wind' - if cube.standard_name == 'y_wind': - cube.standard_name = 'northward_wind' + if cube.standard_name == "x_wind": + cube.standard_name = "eastward_wind" + if cube.standard_name == "y_wind": + cube.standard_name = "northward_wind" if um_standard_name and cube.standard_name != um_standard_name: # TODO: remove verbose arg & always warn? Control warning visibility at cmd line? if verbose: # TODO: show combined stash code instead? - msg = (f"Standard name mismatch section={stash_code.section}" - f" item={stash_code.item} standard_name={cube.standard_name}" - f" UM var name={um_standard_name}") + msg = ( + f"Standard name mismatch section={stash_code.section}" + f" item={stash_code.item} standard_name={cube.standard_name}" + f" UM var name={um_standard_name}" + ) warnings.warn(msg) cube.standard_name = um_standard_name @@ -964,17 +940,17 @@ def fix_level_coord(cube, z_rho, z_theta, tol=1e-6): return if c_lev: - d_rho = abs(c_height.points[0]-z_rho) + d_rho = abs(c_height.points[0] - z_rho) if d_rho.min() < tol: - c_lev.var_name = 'model_rho_level_number' - c_height.var_name = 'rho_level_height' - c_sigma.var_name = 'sigma_rho' + c_lev.var_name = "model_rho_level_number" + c_height.var_name = "rho_level_height" + c_sigma.var_name = "sigma_rho" else: - d_theta = abs(c_height.points[0]-z_theta) + d_theta = abs(c_height.points[0] - z_theta) if d_theta.min() < tol: - c_lev.var_name = 'model_theta_level_number' - c_height.var_name = 'theta_level_height' - c_sigma.var_name = 'sigma_theta' + c_lev.var_name = "model_theta_level_number" + c_height.var_name = "theta_level_height" + c_sigma.var_name = "sigma_theta" def fix_pressure_levels(cube, decimals=5): @@ -995,13 +971,13 @@ def fix_pressure_levels(cube, decimals=5): cube if the pressure levels are reversed. """ try: - pressure = cube.coord('pressure') + pressure = cube.coord("pressure") except iris.exceptions.CoordinateNotFoundError: return # update existing cube metadata in place - pressure.attributes['positive'] = 'down' - pressure.convert_units('Pa') + pressure.attributes["positive"] = "down" + pressure.convert_units("Pa") # Round small fractions otherwise coordinates are off by 1e-10 in ncdump output pressure.points = np.round(pressure.points, decimals) @@ -1010,7 +986,7 @@ def fix_pressure_levels(cube, decimals=5): # Flip to get pressure decreasing as per CMIP6 standard # NOTE: returns a new cube! # TODO: add an iris.util.monotonic() check here? - return iris.util.reverse(cube, 'pressure') + return iris.util.reverse(cube, "pressure") MAX_NP_INT32 = np.iinfo(np.int32).max @@ -1029,14 +1005,16 @@ def convert_32_bit(cube): ----- RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow. """ - if cube.data.dtype == 'float64': + if cube.data.dtype == "float64": cube.data = cube.data.astype(np.float32) - elif cube.data.dtype == 'int64': + elif cube.data.dtype == "int64": _max = np.max(cube.data) _min = np.min(cube.data) - msg = (f"32 bit under/overflow converting {cube.var_name}! Output data " - f"likely invalid. Use '--64' option to retain data integrity.") + msg = ( + f"32 bit under/overflow converting {cube.var_name}! Output data " + f"likely invalid. Use '--64' option to retain data integrity." + ) if _max > MAX_NP_INT32: warnings.warn(msg, category=RuntimeWarning) @@ -1068,7 +1046,7 @@ def fix_time_coord(cube, verbose): try: # If time is a dimension but not a coordinate dimension, coord_dims('time') # returns empty tuple - if tdim := cube.coord_dims('time'): + if tdim := cube.coord_dims("time"): # For fields with a pseudo-level, time may not be the first dimension if tdim != (0,): tdim = tdim[0] @@ -1084,9 +1062,9 @@ def fix_time_coord(cube, verbose): cube.transpose(neworder) else: # TODO: does this return a new copy or modified cube? - cube = iris.util.new_axis(cube, cube.coord('time')) + cube = iris.util.new_axis(cube, cube.coord("time")) - unlimited_dimensions = ['time'] + unlimited_dimensions = ["time"] except iris.exceptions.CoordinateNotFoundError: # No time dimension (probably ancillary file) @@ -1097,39 +1075,61 @@ def fix_time_coord(cube, verbose): def parse_args(): parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf") - parser.add_argument('-k', dest='nckind', required=False, type=int, - default=3, - help=('specify netCDF output format: 1 classic, 2 64-bit' - ' offset, 3 netCDF-4, 4 netCDF-4 classic model.' - ' Default 3'), - choices=[1, 2, 3, 4]) - parser.add_argument('-c', dest='compression', required=False, type=int, - default=4, help='compression level (0=none, 9=max). Default 4') - parser.add_argument('--64', dest='use64bit', action='store_true', - default=False, help='Use 64 bit netcdf for 64 bit input') - parser.add_argument('-v', '--verbose', dest='verbose', - action='count', default=0, - help='verbose output (-vv for extra verbose)') + parser.add_argument( + "-k", + dest="nckind", + required=False, + type=int, + default=3, + help=( + "specify netCDF output format: 1 classic, 2 64-bit" + " offset, 3 netCDF-4, 4 netCDF-4 classic model." + " Default 3" + ), + choices=[1, 2, 3, 4], + ) + parser.add_argument( + "-c", + dest="compression", + required=False, + type=int, + default=4, + help="compression level (0=none, 9=max). Default 4", + ) + parser.add_argument( + "--64", dest="use64bit", action="store_true", default=False, help="Use 64 bit netcdf for 64 bit input" + ) + parser.add_argument( + "-v", "--verbose", dest="verbose", action="count", default=0, help="verbose output (-vv for extra verbose)" + ) group = parser.add_mutually_exclusive_group() - group.add_argument('--include', dest='include_list', type=int, - nargs='+', help='List of stash codes to include') - group.add_argument('--exclude', dest='exclude_list', type=int, - nargs='+', help='List of stash codes to exclude') - - parser.add_argument('--nomask', dest='nomask', action='store_true', - default=False, - help="Don't apply heaviside function mask to pressure level fields") - parser.add_argument('--nohist', dest='nohist', action='store_true', - default=False, help="Don't update history attribute") - parser.add_argument('--simple', dest='simple', action='store_true', - default=False, help="Use a simple names of form fld_s01i123.") - parser.add_argument('--hcrit', dest='hcrit', type=float, default=0.5, - help=("Critical value of heavyside fn for pressure level" - " masking (default=0.5)")) - - parser.add_argument('infile', help='Input file') - parser.add_argument('outfile', help='Output file') + group.add_argument("--include", dest="include_list", type=int, nargs="+", help="List of stash codes to include") + group.add_argument("--exclude", dest="exclude_list", type=int, nargs="+", help="List of stash codes to exclude") + + parser.add_argument( + "--nomask", + dest="nomask", + action="store_true", + default=False, + help="Don't apply heaviside function mask to pressure level fields", + ) + parser.add_argument( + "--nohist", dest="nohist", action="store_true", default=False, help="Don't update history attribute" + ) + parser.add_argument( + "--simple", dest="simple", action="store_true", default=False, help="Use a simple names of form fld_s01i123." + ) + parser.add_argument( + "--hcrit", + dest="hcrit", + type=float, + default=0.5, + help=("Critical value of heavyside fn for pressure level" " masking (default=0.5)"), + ) + + parser.add_argument("infile", help="Input file") + parser.add_argument("outfile", help="Output file") return parser.parse_args() @@ -1139,5 +1139,5 @@ def main(): process(args.infile, args.outfile, args) -if __name__ == '__main__': +if __name__ == "__main__": main()