From 5c0568303f95cc2e7f41a89c85d886dcffced4a4 Mon Sep 17 00:00:00 2001 From: Yohan Chatelain Date: Thu, 21 Dec 2023 12:03:17 -0500 Subject: [PATCH] Release v0.3.0 - Fix #31. Failed when axis!=0 - Change API. Only expose: * significant_digits * contributing_bits * probability_estimation_bernoulli * minimum_number_of_trials * Method, Error - Add tests for compute_z - Add docs with pdoc - Generate github-pages with github/actions Escape python-version in github/actions --- .github/workflows/docs.yml | 43 +++ .github/workflows/python-app.yml | 8 +- README.md | 92 +++-- pdoc/template/math.html.jinja2 | 34 ++ pyproject.toml | 3 +- significantdigits/__init__.py | 32 +- significantdigits/_significantdigits.py | 468 +++++++++++++---------- significantdigits/args.py | 36 +- significantdigits/test/test_compute_z.py | 39 +- 9 files changed, 484 insertions(+), 271 deletions(-) create mode 100644 .github/workflows/docs.yml create mode 100644 pdoc/template/math.html.jinja2 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..5c6e53c --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,43 @@ +name: website + +# build the documentation whenever there are new commits on main +on: + push: + branches: + - main + +# security: restrict permissions for CI jobs. +permissions: + contents: read + +jobs: + # Build the documentation and upload the static HTML files as an artifact. + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - run: pip install . + - run: pdoc -d numpy -t pdoc/template/ --math -o docs significantdigits/ + + - uses: actions/upload-pages-artifact@v2 + with: + path: docs/ + + # Deploy the artifact to GitHub pages. + # This is a separate job so that only actions/deploy-pages has the necessary permissions. + deploy: + needs: build + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@v2 \ No newline at end of file diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index b867452..bd3b2fb 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -17,12 +17,16 @@ jobs: runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: - uses: actions/checkout@v3 - - name: Set up Python 3.10 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/README.md b/README.md index 99b9fb5..29594fc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# significantdigits package - v0.2.0 +# significantdigits package - v0.3.0 -Compute the number of significant digits basis on the paper [Confidence Intervals for Stochastic Arithmetic](https://arxiv.org/abs/1807.09655). +Compute the number of significant digits based on the paper [Confidence Intervals for Stochastic Arithmetic](https://arxiv.org/abs/1807.09655). This package is also inspired by the [Jupyter Notebook](https://github.com/interflop/stochastic-confidence-intervals/blob/master/Intervals.ipynb) included with the publication. ## Getting started @@ -9,14 +9,14 @@ This synthetic example illustrates how to compute significant digits of a results sample with a given known reference: ```python ->>> import significantdigits as sig +>>> import significantdigits as sd >>> import numpy as np >>> from numpy.random import uniform as U >>> np.random.seed(0) >>> eps = 2**-52 >>> # simulates results with epsilon differences >>> X = [1+U(-1,1)*eps for _ in range(10)] ->>> sig.significant_digits(X, reference=1) +>>> sd.significant_digits(X, reference=1) >>> 51.02329058847853 ``` @@ -30,21 +30,21 @@ If the reference is unknown, one can use the sample average: ```python ... ->>> sig.significant_digits(X, reference=np.mean(X)) +>>> sd.significant_digits(X, reference=np.mean(X)) >>> 51.02329058847853 ``` ## Installation ```bash - python3 -m pip install -U significantdigits +python3 -m pip install -U significantdigits ``` or if you want the latest version of the code, you can install it **from** the repository directly ```bash - python3 -m pip install -U git+https://github.com/verificarlo/significantdigits.git - # or if you don't have 'git' installed - python3 -m pip install -U https://github.com/verificarlo/significantdigits/zipball/master +python3 -m pip install -U git+https://github.com/verificarlo/significantdigits.git +# or if you don't have 'git' installed +python3 -m pip install -U https://github.com/verificarlo/significantdigits/zipball/master ``` ## Advanced Usage @@ -53,13 +53,13 @@ or if you want the latest version of the code, you can install it **from** the r Functions accept the following types of inputs: ```python - InputType: np.ndarray | tuple | list + InputType: ArrayLike ``` -Those types are accessible with the `get_input_type` function. +Those types are accessible with the `numpy.typing.ArrayLike` type. ### Z computation Metrics are computed using Z, the distance between the samples and the reference. -They are four possible cases depending on the distance and the nature of the reference that is summarized in this table: +There are four possible cases depending on the distance and the nature of the reference that are summarized in this table: | | constant reference (x) | random variable reference (Y) | | ------------------ | ---------------------- | ----------------------------- | @@ -68,7 +68,11 @@ They are four possible cases depending on the distance and the nature of the ref ```python -compute_z(array: ~InputType, reference: Optional[~ReferenceType], error: significantdigits._significantdigits.Error | str, axis: int, shuffle_samples: bool = False) -> ~InputType +_compute_z(array: InternalArrayType, + reference: InternalArrayType | None, + error: Error | str, + axis: int, + shuffle_samples: bool = False) -> InternalArrayType Compute Z, the distance between the random variable and the reference Compute Z, the distance between the random variable and the reference @@ -88,11 +92,11 @@ compute_z(array: ~InputType, reference: Optional[~ReferenceType], error: signifi Parameters ---------- - array : InputType + array : InternalArrayType The random variable - reference : Optional[ReferenceType] + reference : Optional[InternalArrayType] The reference to compare against - error : Method.Error | str + error : Error | str The error function to compute Z axis : int, default=0 The axis or axes along which compute Z @@ -101,7 +105,7 @@ compute_z(array: ~InputType, reference: Optional[~ReferenceType], error: signifi Returns ------- - array : numpy.ndarray + array : InternalArrayType The result of Z following the error method choose ``` @@ -129,10 +133,18 @@ class Method(AutoName): ```python -significant_digits(array: ~InputType, - reference: Optional[~ReferenceType] = None, - axis: int = 0, basis: int = 2, - error: str | significantdigits._significantdi +significant_digits(array: InputType, + reference: ReferenceType | None = None, + axis: int = 0, + basis: int = 2, + error: Error | str, + method: Method | str, + probability: float = 0.95, + confidence: float = 0.95, + shuffle_samples: bool = False, + dtype: DTypeLike | None = None + ) -> ArrayLike + Compute significant digits This function computes with a certain probability @@ -142,7 +154,7 @@ significant_digits(array: ~InputType, ---------- array: InputType Element to compute - reference: Optional[ReferenceType], optional=None + reference: ReferenceType | None, optional=None Reference for comparing the array axis: int, optional=0 Axis or axes along which the significant digits are computed @@ -160,7 +172,7 @@ significant_digits(array: ~InputType, If reference is None, the array is split in two and \ comparison is done between both pieces. \ If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing significant digits Widest format between array and reference is taken if no supplied. @@ -174,10 +186,20 @@ significant_digits(array: ~InputType, ### Contributing digits ```python -contributing_digits(array: ~InputType, reference: Optional[~ReferenceType] = None, axis: int = 0, basis: int = 2, error: str | significantdigits._significantdigits.Error = ArrayLike + Compute contributing digits - This function computes with a certain probability the number of bits of the mantissa that will round the result towards the correct reference value[1]_ @@ -186,7 +208,7 @@ contributing_digits(array: ~InputType, reference: Optional[~ReferenceType] = Non ---------- array: InputArray Element to compute - reference: Optional[ReferenceArray], default=None + reference: ReferenceArray | None, default=None Reference for comparing the array axis: int, default=0 Axis or axes along which the contributing digits are computed @@ -205,7 +227,7 @@ contributing_digits(array: ~InputType, reference: Optional[~ReferenceType] = Non If reference is None, the array is split in two and comparison is done between both pieces. If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing contributing digits Widest format between array and reference is taken if no supplied. @@ -219,12 +241,10 @@ contributing_digits(array: ~InputType, reference: Optional[~ReferenceType] = Non These are utility functions for the general case. -`probability_estimation_general` -allows having an estimation -on the lower bound probability given the sample size. +#### `probability_estimation_general` + +Estimates the lower bound probability given the sample size. -`minimum_number_of_trials` gives the minimal sample size -required to reach the requested `probability` and `confidence`. ```python probability_estimation_general(success: int, trials: int, confidence: float) -> float @@ -250,8 +270,12 @@ probability_estimation_general(success: int, trials: int, confidence: float) -> The lower bound probability with `confidence` level to have `success` successes for `trials` trials ``` -```python +#### `minimum_number_of_trials` + +Returns the minimal sample size required to reach the requested `probability` and `confidence`. + +```python minimum_number_of_trials(probability: float, confidence: float) -> int Computes the minimum number of trials to have probability and confidence @@ -273,7 +297,6 @@ minimum_number_of_trials(probability: float, confidence: float) -> int ------- int Minimal sample size to have given probability and confidence - ``` ### License @@ -285,3 +308,4 @@ See https://llvm.org/LICENSE.txt for license information. Copyright (c) 2020-2023 Verificarlo Contributors +--- \ No newline at end of file diff --git a/pdoc/template/math.html.jinja2 b/pdoc/template/math.html.jinja2 new file mode 100644 index 0000000..ee0b833 --- /dev/null +++ b/pdoc/template/math.html.jinja2 @@ -0,0 +1,34 @@ +{# This template is included in math mode and loads MathJax for formula rendering. #} + + + + + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b361736..115bd04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "significantdigits" -version = "0.2.0" +version = "0.3.0" description = "Solid stochastic statistic analysis of Stochastic Arithmetic" authors = [ { name = "Verificarlo contributors", email = "verificarlo@googlegroups.com" }, @@ -31,6 +31,7 @@ dependencies = [ "scipy>=1.7.3", "toml>=0.10.2", "icecream>=2.1.3", + "pdoc>=14.2.0" ] [tool.hatch.build] diff --git a/significantdigits/__init__.py b/significantdigits/__init__.py index 6f3ab69..7d76b56 100644 --- a/significantdigits/__init__.py +++ b/significantdigits/__init__.py @@ -1 +1,31 @@ -from ._significantdigits import * +""" +.. include:: ../README.md +""" + +from ._significantdigits import ( + InputType, + ReferenceType, + Method, + Metric, + Error, + SignificantDigitsException, + significant_digits, + contributing_digits, + change_basis, + probability_estimation_bernoulli, + minimum_number_of_trials, +) + +__all__ = [ + "InputType", + "ReferenceType", + "Method", + "Metric", + "Error", + "SignificantDigitsException", + "significant_digits", + "contributing_digits", + "change_basis", + "probability_estimation_bernoulli", + "minimum_number_of_trials", +] diff --git a/significantdigits/_significantdigits.py b/significantdigits/_significantdigits.py index 51f2a3e..43cedd3 100644 --- a/significantdigits/_significantdigits.py +++ b/significantdigits/_significantdigits.py @@ -1,25 +1,52 @@ +from __future__ import annotations + +import typing import warnings from enum import Enum, auto -from typing import Optional, Tuple, TypeVar, Union +from typing import Optional, Tuple, Union import numpy as np +import numpy.typing as npt import scipy import scipy.stats +__all__ = [ + "significant_digits", + "contributing_digits", + "change_basis", + "probability_estimation_bernoulli", + "minimum_number_of_trials", + "InputType", + "ReferenceType", + "Metric", + "Method", + "Error", + "SignificantDigitsException", +] + class SignificantDigitsException(Exception): pass class AutoName(Enum): + """@private""" + + names: list[str] + """@private""" + map: dict[str, AutoName] + """@private""" + def _generate_next_value_(name, start, count, last_values): return name class Metric(AutoName): """ - Significant: Compute the number of significant digits - Contributing: Compute the number of contributing digits + Different metrics to compute + + - Significant: Compute the number of significant digits + - Contributing: Compute the number of contributing digits """ Significant = auto() @@ -27,6 +54,7 @@ class Metric(AutoName): @classmethod def is_significant(cls, error): + """@private""" if isinstance(error, cls): return error == cls.Significant if isinstance(error, str): @@ -34,6 +62,7 @@ def is_significant(cls, error): @classmethod def is_contributing(cls, error): + """@private""" if isinstance(error, cls): return error == cls.Contributing if isinstance(error, str): @@ -42,10 +71,12 @@ def is_contributing(cls, error): class Method(AutoName): """ - CNH: Centered Normality Hypothesis - X follows a Gaussian law centered around the reference or - Z follows a Gaussian law centered around 0 - General: No assumption about the distribution of X or Z + Methods for underlying distribution hypothesis + + - CNH: Centered Normality Hypothesis + - X follows a Gaussian law centered around the reference or + - Z follows a Gaussian law centered around 0 + - General: No assumption about the distribution of X or Z """ CNH = auto() @@ -53,6 +84,7 @@ class Method(AutoName): @classmethod def is_cnh(cls, error): + """@private""" if isinstance(error, cls): return error == cls.CNH if isinstance(error, str): @@ -60,6 +92,7 @@ def is_cnh(cls, error): @classmethod def is_general(cls, error): + """@private""" if isinstance(error, cls): return error == cls.General if isinstance(error, str): @@ -67,13 +100,20 @@ def is_general(cls, error): class Error(AutoName): - """ - ----------+-------------+------------- - | Reference x | Reference Y - ----------+-------------+------------- - Absolute | Z = X - x | Z = X - Y - Relative | Z = X/x - 1 | Z = X/Y - 1 - ----------+-------------+------------- + r""" + Errors between random variable and reference + + Reference is either a random variable ($Y$) or a constant ($x$) + + .. math:: + \begin{array}{|c|c|c|} + \hline + & \text{Reference } x & \text{Reference } Y \newline + \hline + \text{Absolute} & Z = X - x & Z = X - Y \newline + \text{Relative} & Z = X/x - 1 & Z = X/Y - 1 \newline + \hline + \end{array} """ Absolute = auto() @@ -81,6 +121,7 @@ class Error(AutoName): @classmethod def is_absolute(cls, error): + """@private""" if isinstance(error, cls): return error == cls.Absolute if isinstance(error, str): @@ -88,12 +129,23 @@ def is_absolute(cls, error): @classmethod def is_relative(cls, error): + """@private""" if isinstance(error, cls): return error == cls.Relative if isinstance(error, str): return error.lower() == cls.Relative.name +@typing.overload +def _lower_map(x: list[str]) -> list[str]: + ... + + +@typing.overload +def _lower_map(x: dict[str, AutoName]) -> dict[str, AutoName]: + ... + + def _lower_map(x): if isinstance(x, list): return list(map(str.lower, x)) @@ -110,38 +162,30 @@ def _lower_map(x): Error.names = _lower_map(vars(Error)["_member_names_"]) Error.map = _lower_map(vars(Error)["_value2member_map_"]) -internal_dtype = np.dtype(np.float64) -default_probability = {Metric.Significant: 0.95, Metric.Contributing: 0.51} -default_confidence = {Metric.Significant: 0.95, Metric.Contributing: 0.95} +_internal_dtype = np.dtype(np.float64) +_default_probability = {Metric.Significant: 0.95, Metric.Contributing: 0.51} +_default_confidence = {Metric.Significant: 0.95, Metric.Contributing: 0.95} -InputType = TypeVar("InputType", np.ndarray, tuple, list) +InputType = npt.ArrayLike r"""Valid random variable inputs type (np.ndarray, tuple, list) Types allowing for `array` in significant_digits and contributing_digits functions """ -ReferenceType = TypeVar( - "ReferenceType", np.ndarray, tuple, list, float, int, np.floating -) + +ReferenceType = Union[npt.ArrayLike, np.number] r"""Valid reference inputs type (np.ndarray, tuple, list, float, int) Types allowing for `reference` in significant_digits and contributing_digits functions """ +InternalArrayType = npt.NDArray[np.number] +r"""Internal array type used for computation""" -def get_input_type(): - r"""Returns InputType subtypes""" - return InputType.__constraints__ - -def get_reference_type(): - r"""Returns ReferenceType subtypes""" - return ReferenceType.__constraints__ - - -def assert_is_valid_metric(metric: Union[Metric, str]) -> None: +def _assert_is_valid_metric(metric: Union[Metric, str]) -> None: if Metric.is_significant(metric) or Metric.is_contributing(metric): return @@ -150,7 +194,7 @@ def assert_is_valid_metric(metric: Union[Metric, str]) -> None: ) -def assert_is_valid_method(method: Union[Method, str]) -> None: +def _assert_is_valid_method(method: Union[Method, str]) -> None: if Method.is_cnh(method) or Method.is_general(method): return @@ -159,23 +203,32 @@ def assert_is_valid_method(method: Union[Method, str]) -> None: ) -def assert_is_valid_error(error: Union[Error, str]) -> None: +def _assert_is_valid_error(error: Union[Error, str]) -> None: if Error.is_absolute(error) or Error.is_relative(error): return raise TypeError(f"provided invalid error {error}: " f"must be one of {list(Error)}") -def assert_is_probability(probability: float) -> None: - if probability < 0 or probability > 1: +def _assert_is_probability(probability: float) -> None: + if not (0 <= probability <= 1): raise TypeError("probability must be between 0 and 1") -def assert_is_confidence(confidence: float) -> None: - if confidence < 0 or confidence > 1: +def _assert_is_confidence(confidence: float) -> None: + if not (0 <= confidence <= 1): raise TypeError("confidence must be between 0 and 1") +def _preprocess_inputs( + array: InputType, + reference: Optional[ReferenceType], +) -> Tuple[InternalArrayType, Optional[InternalArrayType]]: + preprocessed_array = np.asanyarray(array) + preprocessed_reference = np.asanyarray(reference) if reference is not None else None + return (preprocessed_array, preprocessed_reference) + + def change_basis(array: InputType, basis: int) -> InputType: """Changes basis from binary to `basis` representation @@ -191,62 +244,34 @@ def change_basis(array: InputType, basis: int) -> InputType: np.ndarray Array convert to basis `basis` """ + (preprocessed_array, _) = _preprocess_inputs(array, None) pow2 = np.power(2, array, dtype=np.float64) - array_masked = np.ma.array(pow2, mask=array <= 0) + array_masked = np.ma.array(pow2, mask=(preprocessed_array <= 0)) return np.emath.logn(basis, array_masked) -def preprocess_inputs( - array: Union[np.ndarray, tuple, list], reference: Union[np.ndarray, tuple, list] -) -> Tuple[np.ndarray, np.ndarray]: - if scipy.sparse.issparse(array[0]): - array = np.asanyarray([i.toarray() for i in array]) - - if not isinstance(array, get_input_type()): - raise TypeError( - f"Input array must be " f"one of the following types " f"{get_input_type()}" - ) - - if not isinstance(array, np.ndarray): - array = np.array(array) - - if reference is not None: - if scipy.sparse.issparse(reference): - reference = reference.toarray() - if not isinstance(reference, get_reference_type()): - raise TypeError( - f"Reference must be " - f"one of the following types " - f"{get_reference_type()}" - ) - - if not isinstance(reference, np.ndarray): - reference = np.array(reference) - - return (array, reference) - - -def divide_along_axis(x, y, axis): +def _operator_along_axis(operator, x, y, axis): shape = list(y.shape) shape.insert(axis, 1) y_reshaped = np.reshape(y, shape) - return np.divide(x, y_reshaped) + return operator(x, y_reshaped) -def substract_along_axis(x, y, axis): - shape = list(y.shape) - shape.insert(axis, 1) - y_reshaped = np.reshape(y, shape) - return np.subtract(x, y_reshaped) +def _divide_along_axis(x, y, axis): + return _operator_along_axis(np.divide, x, y, axis) -def compute_z( - array: np.ndarray, - reference: Optional[ReferenceType], +def _substract_along_axis(x, y, axis): + return _operator_along_axis(np.subtract, x, y, axis) + + +def _compute_z( + array: InternalArrayType, + reference: Optional[InternalArrayType], error: Union[Error, str], axis: int, shuffle_samples: bool = False, -) -> InputType: +) -> InternalArrayType: r"""Compute Z, the distance between the random variable and the reference Compute Z, the distance between the random variable and the reference @@ -254,23 +279,24 @@ def compute_z( X = array Y = reference + Three cases: - - Y is none - The case when X = Y - We split X in two and set one group to X and the other to Y - - X.ndim == Y.ndim - X and Y have the same dimension - It it the case when Y is a random variable - - X.ndim - 1 == Y.ndim or Y.ndim == 0 - Y is a scalar value + - Y is none + - The case when X = Y + - We split X in two and set one group to X and the other to Y + - X.ndim == Y.ndim + X and Y have the same dimension + It it the case when Y is a random variable + - X.ndim - 1 == Y.ndim or Y.ndim == 0 + Y is a scalar value Parameters ---------- - array : np.ndarray + array : InternalArrayType The random variable - reference : Optional[ReferenceType] + reference : InternalArrayType | None The reference to compare against - error : Method.Error | str + error : Error | str The error function to use to compute error between array and reference. axis : int, default=0 The axis or axes along which compute Z @@ -279,13 +305,12 @@ def compute_z( Returns ------- - array : numpy.ndarray + array : InternalArrayType The result of Z following the error method choose See Also -------- - significantdigits.InputType : Type for random variable - significantdigits.ReferenceType : Type for reference + significantdigits.InternalArrayType : Type used for internal computations """ @@ -314,38 +339,38 @@ def compute_z( else: raise TypeError("No comparison found for X and reference:") - x = np.array(x) - y = np.array(y) + x = np.asanyarray(x) + y = np.asanyarray(y) if Error.is_absolute(error): - z = substract_along_axis(x, y, axis=axis) + z = _substract_along_axis(x, y, axis=axis) elif Error.is_relative(error): if np.any(y[y == 0]): warn_msg = "error is set to relative and the reference has 0 leading to NaN" warnings.warn(warn_msg) - z = divide_along_axis(x, y, axis=axis) - 1 + z = _divide_along_axis(x, y, axis=axis) - 1 else: raise SignificantDigitsException(f"Unknown error {error}") return z def _significant_digits_cnh( - array: InputType, - reference: Optional[ReferenceType], + array: InternalArrayType, + reference: Optional[InternalArrayType], axis: int, error: Union[Error, str], probability: float, confidence: float, shuffle_samples: bool = False, - dtype: Optional[np.dtype] = None, -) -> InputType: + dtype: Optional[npt.DTypeLike] = None, +) -> InternalArrayType: r"""Compute significant digits for Centered Normality Hypothesis (CNH) Parameters ---------- - array: InputType + array: InternalArrayType Element to compute - reference: Optional[ReferenceType] + reference: InternalArrayType | None Reference for comparing the array axis: int Axis or axes along which the significant digits are computed @@ -359,7 +384,7 @@ def _significant_digits_cnh( If reference is None, the array is split in two and comparison is done between both pieces. If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing significant digits Widest format between array and reference is taken if not supplied. @@ -368,13 +393,6 @@ def _significant_digits_cnh( ndarray array_like containing significant digits - See Also - -------- - significantdigits.contributing_digits_general : Computes the contributing digits in general case - significantdigits.compute_z : Computes the error between random variable and reference - significantdigits.get_input_type : get InputType types - significantdigits.get_output_type : get ReferenceType types - Notes ----- .. [1] Sohier, D., Castro, P. D. O., Févotte, F., @@ -383,11 +401,12 @@ def _significant_digits_cnh( ACM Transactions on Mathematical Software (TOMS), 47(2), 1-33. .. math:: - s >= -log_2(std) - [\frac{1}{2} log_2( \frac{n-1}{ Chi^2_{1-\frac{\alpha}{2}} }) ) + log_2(F^{-1}(\frac{p+1}{2})] + + s >= -log_2(std) - [\frac{1}{2} log_2( \frac{n-1}{ Chi^2_{1-\frac{\alpha}{2}} }) ) + log_2(F^{-1}(\frac{p+1}{2})] """ - z = compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) + z = _compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) nb_samples = z.shape[axis] - std = np.std(z, axis=axis, dtype=internal_dtype) + std = z.std(axis=axis, dtype=_internal_dtype) std0 = np.ma.array(std, mask=std == 0) chi2 = scipy.stats.chi2.interval(confidence, nb_samples - 1)[0] inorm = scipy.stats.norm.ppf((probability + 1) / 2) @@ -401,13 +420,13 @@ def _significant_digits_cnh( def _significant_digits_general( - array: InputType, - reference: Optional[ReferenceType], + array: InternalArrayType, + reference: Optional[InternalArrayType], axis: int, error: Union[Error, str], shuffle_samples: bool = False, - dtype: Optional[np.dtype] = None, -) -> InputType: + dtype: Optional[npt.DTypeLike] = None, +) -> InternalArrayType: r"""Compute significant digits for unknown underlying distribution For the general case, the probability is not parametrizable but @@ -417,9 +436,9 @@ def _significant_digits_general( Parameters ---------- - array: InputType + array: InternalArrayType Element to compute - reference: Optional[ReferenceType] + reference: InternalArrayType | None Reference for comparing the array axis: int Axis or axes along which the significant digits are computed @@ -429,7 +448,7 @@ def _significant_digits_general( If reference is None, the array is split in two and comparison is done between both pieces. If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing significant digits Widest format between array and reference is taken if not supplied. @@ -438,13 +457,6 @@ def _significant_digits_general( out : ndarray array_like containing significant digits - See Also - -------- - significantdigits.significant_digits_cnh : Computes the significant digits under CNH - significantdigits.compute_z : Computes the error between random variable and reference - significantdigits.get_input_type : get InputType types - significantdigits.get_output_type : get ReferenceType types - Notes ----- .. [1] Sohier, D., Castro, P. D. O., Févotte, F., @@ -455,7 +467,7 @@ def _significant_digits_general( .. math:: s = max{k \in [1,mant], st \forall i \in [1,n], |Z_i| <= 2^{-k}} """ - z = compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) + z = _compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) sample_shape = tuple(dim for i, dim in enumerate(z.shape) if i != axis) max_bits = np.finfo(dtype if dtype else z.dtype).nmant significant = np.ma.MaskedArray( @@ -486,18 +498,42 @@ def significant_digits( basis: int = 2, error: Union[str, Error] = Error.Relative, method: Union[str, Method] = Method.CNH, - probability: float = default_probability[Metric.Significant], - confidence: float = default_confidence[Metric.Significant], + probability: float = _default_probability[Metric.Significant], + confidence: float = _default_confidence[Metric.Significant], shuffle_samples: bool = False, - dtype: Optional[np.dtype] = None, -) -> InputType: + dtype: Optional[npt.DTypeLike] = None, +) -> npt.ArrayLike: r"""Compute significant digits + This function calculates with a certain probability the number of + significant bits with, in comparison to a correct reference value[1]_. + + .. math:: + \begin{array}{ll} + & \text{Significant digits formulae for both cases} \newline + \text{CNH:} & \hat{s}_{CNH} = -\log_2(\hat{\sigma}_Z) - \left[ \dfrac{1}{2} \log_2\left( \dfrac{n-1}{ \chi^2_{1-\alpha/2 } }\right) + \log_2 \left( F^{-1}\left( \dfrac{p+1}{2} \right) \right) \right] \newline + \text{General:} & \hat{s}_B = \max \left\\{ k \in [0,53] \text{ s.t. } \forall i \in [1, n], |Z_i| \leq 2^{-k} \right\\} + \end{array} + + - X = array + - Y = reference + + Three cases: + - Y is None + - Divide X equally between variables 'X' and 'Y' + - The case when X = Y + - X.ndim == Y.ndim + - X and Y have the same dimension + - The case when Y is a random variable + - X.ndim - 1 == Y.ndim or Y.ndim == 0 + - Y is a scalar value + + Parameters ---------- array: InputType Element to compute - reference: Optional[ReferenceType], optional=None + reference: ReferenceType | None, optional=None Reference for comparing the array axis: int, optional=0 Axis or axes along which the significant digits are computed @@ -512,10 +548,10 @@ def significant_digits( confidence : float, default=0.95 Confidence level for the significant digits result shuffle_samples : bool, optional=False - If reference is None, the array is split in two and \ - comparison is done between both pieces. \ + If reference is None, the array is split in two and + comparison is done between both pieces. If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing significant digits Widest format between array and reference is taken if no supplied. @@ -527,31 +563,33 @@ def significant_digits( See Also -------- significantdigits.contributing_digits : Computes the contributing digits - significantdigits.compute_z : Computes the error between random variable and reference - significantdigits.get_input_type : get InputType types - significantdigits.get_output_type : get ReferenceType types + significantdigits.Error : Errors between random variable and reference + significantdigits.Method : Methods for underlying distribution hypothesis + significantdigits.InputType : get InputType types + significantdigits.ReferenceType : get ReferenceType types Notes ----- .. [1] Sohier, D., Castro, P. D. O., Févotte, F., - Lathuilière, B., Petit, E., & Jamond, O. (2021). - Confidence intervals for stochastic arithmetic. - ACM Transactions on Mathematical Software (TOMS), 47(2), 1-33. + Lathuilière, B., Petit, E., & Jamond, O. (2021). + Confidence intervals for stochastic arithmetic. + ACM Transactions on Mathematical Software (TOMS), 47(2), 1-33. + """ - assert_is_probability(probability) - assert_is_confidence(confidence) - assert_is_valid_method(method) - assert_is_valid_error(error) + _assert_is_probability(probability) + _assert_is_confidence(confidence) + _assert_is_valid_method(method) + _assert_is_valid_error(error) significant = None - array, reference = preprocess_inputs(array, reference) + preproc_array, preproc_reference = _preprocess_inputs(array, reference) if method == Method.CNH: significant = _significant_digits_cnh( - array=array, - reference=reference, + array=preproc_array, + reference=preproc_reference, error=error, probability=probability, confidence=confidence, @@ -562,13 +600,15 @@ def significant_digits( elif method == Method.General: significant = _significant_digits_general( - array=array, - reference=reference, + array=preproc_array, + reference=preproc_reference, error=error, axis=axis, shuffle_samples=shuffle_samples, dtype=dtype, ) + else: + raise SignificantDigitsException(f"Unknown method {method}") if basis != 2: significant = change_basis(significant, basis) @@ -577,22 +617,22 @@ def significant_digits( def _contributing_digits_cnh( - array: InputType, - reference: Optional[ReferenceType], + array: InternalArrayType, + reference: Optional[InternalArrayType], axis: int, error: Union[Error, str], probability: float, confidence: float, shuffle_samples: bool = False, - dtype: Optional[np.dtype] = None, -) -> InputType: + dtype: Optional[npt.DTypeLike] = None, +) -> InternalArrayType: r"""Compute contributing digits for Centered Hypothesis Normality Parameters ---------- - array: InputType + array: InternalArrayType Element to compute - reference: Optional[ReferenceType] + reference: InternalArrayType | None Reference for comparing the array axis: int Axis or axes along which the contributing digits are computed @@ -606,7 +646,7 @@ def _contributing_digits_cnh( If reference is None, the array is split in two and comparison is done between both pieces. If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing contributing digits Widest format between array and reference is taken if no supplied. @@ -615,13 +655,6 @@ def _contributing_digits_cnh( ndarray array_like containing contributing digits - See Also - -------- - significantdigits.significant_general : Computes the significant digits for general case - significantdigits.compute_z : Computes the error between random variable and reference - significantdigits.get_input_type : get InputType types - significantdigits.get_output_type : get ReferenceType types - Notes ----- .. [1] Sohier, D., Castro, P. D. O., Févotte, F., @@ -632,9 +665,9 @@ def _contributing_digits_cnh( .. math:: c >= -log_2(std) - [\frac{1}{2} log_2( \frac{n-1} / \frac{ Chi^2_{1-\frac{alpha}{2}} }) ) + log_2(p+\frac{1}{2}) + log_2(2\sqrt{2\pi})] """ - z = compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) + z = _compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) nb_samples = z.shape[axis] - std = np.std(z, axis=axis, dtype=internal_dtype) + std = z.std(axis=axis, dtype=_internal_dtype) std0 = np.ma.masked_array(std, mask=std == 0) chi2 = scipy.stats.chi2.interval(confidence, nb_samples - 1)[0] delta_chn = ( @@ -643,20 +676,19 @@ def _contributing_digits_cnh( + np.log2(2 * np.sqrt(2 * np.pi)) ) contributing = -np.ma.log2(std0) - delta_chn - max_bits = np.finfo(dtype if dtype else z.dtype).nmant + max_bits: int = np.finfo(dtype if dtype else z.dtype).nmant contributing = contributing.filled(fill_value=max_bits - delta_chn) return contributing def _contributing_digits_general( - array: InputType, - reference: Optional[ReferenceType], + array: InternalArrayType, + reference: Optional[InternalArrayType], axis: int, error: Union[Error, str], - probability: float, shuffle_samples: bool = False, - dtype: Optional[np.dtype] = None, -) -> InputType: + dtype: Optional[npt.DTypeLike] = None, +) -> InternalArrayType: r"""Computes contributing digits for unknown underlying distribution This function computes with a certain probability the number of bits @@ -665,21 +697,19 @@ def _contributing_digits_general( Parameters ---------- - array: InputType + array: InternalArrayType Element to compute - reference: Optional[ReferenceType] + reference: InternalArrayType | None Reference for comparing the array axis: int Axis or axes along which the contributing digits are computed error : Error | str The error function to use to compute error between array and reference. - probability : float - Probability for the contributing digits result shuffle_samples : bool, default=False If reference is None, the array is split in two and comparison is done between both pieces. If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing contributing digits Widest format between array and reference is taken if no supplied. Returns @@ -706,7 +736,7 @@ def _contributing_digits_general( c = (\frac{#success}{#trials} > p) """ - z = compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) + z = _compute_z(array, reference, error, axis=axis, shuffle_samples=shuffle_samples) sample_shape = tuple(dim for i, dim in enumerate(z.shape) if i != axis) max_bits = np.finfo(dtype if dtype else z.dtype).nmant contributing = np.ma.MaskedArray( @@ -739,23 +769,42 @@ def contributing_digits( basis: int = 2, error: Union[str, Error] = Error.Relative, method: Union[str, Method] = Method.CNH, - probability: float = default_probability[Metric.Contributing], - confidence: float = default_confidence[Metric.Contributing], + probability: float = _default_probability[Metric.Contributing], + confidence: float = _default_confidence[Metric.Contributing], shuffle_samples: bool = False, - dtype: Optional[np.dtype] = None, -) -> InputType: + dtype: Optional[npt.DTypeLike] = None, +) -> npt.ArrayLike: r"""Compute contributing digits - This function computes with a certain probability the number of bits - of the mantissa that will round the result towards the correct reference - value[1]_ + that will round the result towards the correct reference + value [1]_ + + - X = array + - Y = reference + + Three cases: + - Y is None + - Divide X equally between variables 'X' and 'Y' + - The case when X = Y + - X.ndim == Y.ndim + - X and Y have the same dimension + - The case when Y is a random variable + - X.ndim - 1 == Y.ndim or Y.ndim == 0 + - Y is a scalar value + + .. math:: + \begin{array}{ll} + & \text{Contributing digits formulae for both cases} \newline + \text{CNH} & \hat{c}_{cnh} = -\log_2(\hat{\sigma}_Z) - \left[ \dfrac{1}{2} \log_2\left( \dfrac{n-1}{ \chi^2_{1-\alpha/2}} \right) + \log_2 \left (p-\dfrac{1}{2} \right) + \log_2( 2\sqrt{2\pi}) \right] \newline + \text{General:} & \hat{c}_B = \max \left\\{ k \in [0,53] \text{ s.t. } \forall i \in [1, n], \lfloor 2^k|Z_i| \rfloor \text{ is even } \right\\} + \end{array} Parameters ---------- array: InputArray Element to compute - reference: Optional[ReferenceArray], default=None + reference: ReferenceArray | None, default=None Reference for comparing the array axis: int, default=0 Axis or axes along which the contributing digits are computed @@ -774,7 +823,7 @@ def contributing_digits( If reference is None, the array is split in two and comparison is done between both pieces. If shuffle_samples is True, it shuffles pieces. - dtype : np.dtype, default=None + dtype : dtype_like | None, default=None Numerical type used for computing contributing digits Widest format between array and reference is taken if no supplied. Returns @@ -785,27 +834,28 @@ def contributing_digits( See Also -------- significantdigits.significant_digits : Computes the significant digits - significantdigits.compute_z : Computes the error between random variable and reference - significantdigits.get_input_type : get InputType types - significantdigits.get_output_type : get ReferenceType types + significantdigits.Error : Errors between random variable and reference + significantdigits.Method : Methods for underlying distribution hypothesis + significantdigits.InputType : get InputType types + significantdigits.ReferenceType : get ReferenceType types Notes ----- .. [1] Sohier, D., Castro, P. D. O., Févotte, F., - Lathuilière, B., Petit, E., & Jamond, O. (2021). - Confidence intervals for stochastic arithmetic. - ACM Transactions on Mathematical Software (TOMS), 47(2), 1-33. + Lathuilière, B., Petit, E., & Jamond, O. (2021). + Confidence intervals for stochastic arithmetic. + ACM Transactions on Mathematical Software (TOMS), 47(2), 1-33. """ - assert_is_probability(probability) - assert_is_confidence(confidence) - assert_is_valid_method(method) - assert_is_valid_error(error) + _assert_is_probability(probability) + _assert_is_confidence(confidence) + _assert_is_valid_method(method) + _assert_is_valid_error(error) contributing = None - array, reference = preprocess_inputs(array, reference) + array, reference = _preprocess_inputs(array, reference) if method == Method.CNH: contributing = _contributing_digits_cnh( @@ -825,10 +875,11 @@ def contributing_digits( reference=reference, error=error, axis=axis, - probability=probability, shuffle_samples=shuffle_samples, dtype=dtype, ) + else: + raise SignificantDigitsException(f"Unknown method {method}") if basis != 2: contributing = change_basis(contributing, basis) @@ -870,9 +921,15 @@ def probability_estimation_bernoulli( ACM Transactions on Mathematical Software (TOMS), 47(2), 1-33. .. math:: - p = 1 + \frac{\log{ 1 - \alpha }}{n} if success=sample\_size - p = \frac{s+2}{s+4} - F^{-1}(\frac{p+1}{2}) \sqrt{ \frac{(s+2)(n-s+2)}{n+4}^3 } else + p = + \begin{cases} + 1 + \frac{\log{ 1 - \alpha }}{n} & \text{if s=n} \newline + \frac{s+2}{s+4} - F^{-1}(\frac{p+1}{2}) \sqrt{ \frac{(s+2)(n-s+2)}{n+4}^3 } & \text{else} + \end{cases} + """ + _assert_is_confidence(confidence) + s = success n = trials coef = scipy.stats.norm.ppf(confidence) @@ -918,9 +975,12 @@ def minimum_number_of_trials(probability: float, confidence: float) -> int: ACM Transactions on Mathematical Software (TOMS), 47(2), 1-33. .. math:: - n = \lceil \frac{\ln{\alpha}}{\ln{p}} + n = \left \lceil \frac{\ln{\alpha}}{\ln{p}} \right \rceil """ + _assert_is_probability(probability) + _assert_is_confidence(confidence) + alpha = 1 - confidence n = np.log(alpha) / np.log(probability) return int(np.ceil(n)) diff --git a/significantdigits/args.py b/significantdigits/args.py index f2be4dc..f5cb8ad 100644 --- a/significantdigits/args.py +++ b/significantdigits/args.py @@ -2,7 +2,13 @@ import sys import ast -import significantdigits +import significantdigits as sd +from significantdigits._significantdigits import ( + _assert_is_confidence, + _assert_is_probability, + _default_confidence, + _default_probability, +) from significantdigits.export import input_formats, input_types, output_formats @@ -16,9 +22,9 @@ def safe_eval(s): def process_args(args): - args.metric = significantdigits.Metric.map[args.metric] - args.method = significantdigits.Method.map[args.method] - args.error = significantdigits.Error.map[args.error] + args.metric = sd.Metric.map[args.metric] + args.method = sd.Method.map[args.method] + args.error = sd.Error.map[args.error] args.input_type = input_types[args.input_type] if args.input_format == "stdin": @@ -27,20 +33,20 @@ def process_args(args): args.reference = safe_eval(args.reference) if args.probability is not None: - significantdigits.assert_is_probability(args.probability) + _assert_is_probability(args.probability) else: - args.probability = significantdigits.default_probability[args.metric] + args.probability = _default_probability[args.metric] if args.confidence is not None: - significantdigits.assert_is_confidence(args.probability) + _assert_is_confidence(args.probability) else: - args.confidence = significantdigits.default_confidence[args.metric] + args.confidence = _default_confidence[args.metric] if args.probability: - significantdigits.assert_is_probability(args.probability) + _assert_is_probability(args.probability) if args.confidence: - significantdigits.assert_is_confidence(args.confidence) + _assert_is_confidence(args.confidence) def create_parser(): @@ -51,21 +57,21 @@ def create_parser(): "--metric", required=True, type=str.lower, - choices=significantdigits.Metric.names, + choices=sd.Metric.names, help="Metric to compute", ) parser.add_argument( "--method", type=str.lower, - default=significantdigits.Method.CNH.name, - choices=significantdigits.Method.names, + default=sd.Method.CNH.name, + choices=sd.Method.names, help="Method to use", ) parser.add_argument( "--error", type=str.lower, - default=significantdigits.Error.Relative.name, - choices=significantdigits.Error.names, + default=sd.Error.Relative.name, + choices=sd.Error.names, help="Error to use", ) parser.add_argument( diff --git a/significantdigits/test/test_compute_z.py b/significantdigits/test/test_compute_z.py index 91e018c..b9723e1 100644 --- a/significantdigits/test/test_compute_z.py +++ b/significantdigits/test/test_compute_z.py @@ -1,6 +1,7 @@ import pytest import significantdigits as sd +from significantdigits._significantdigits import _compute_z import numpy as np # Test for compute z function @@ -18,7 +19,7 @@ def test_compute_z_none_none(error): Test compute_z when X and Y are None """ with pytest.raises(TypeError): - sd.compute_z(np.array(), None, axis=0, error=error) + _compute_z(np.array(), None, axis=0, error=error) def test_compute_z_x_rv(): @@ -30,17 +31,17 @@ def test_compute_z_x_rv(): y = None z_rel = x[:2] / x[2:] - 1 - z = sd.compute_z(x, y, axis=0, error=sd.Error.Relative) + z = _compute_z(x, y, axis=0, error=sd.Error.Relative) assert np.all(np.equal(z, z_rel)) z_abs = x[:2] - x[2:] - z = sd.compute_z(x, y, axis=0, error=sd.Error.Absolute) + z = _compute_z(x, y, axis=0, error=sd.Error.Absolute) assert np.all(np.equal(z, z_abs)) x = np.array([1, 2, 3]) # Invalid shape, must be a multiple of two with pytest.raises(sd.SignificantDigitsException): - sd.compute_z(x, y, axis=0, error=sd.Error.Absolute) + _compute_z(x, y, axis=0, error=sd.Error.Absolute) def test_compute_z_x_rv_y_rv(): @@ -51,11 +52,11 @@ def test_compute_z_x_rv_y_rv(): y = np.array([1, 2, 3, 4]) + 1e-5 z_rel = x / y - 1 - z = sd.compute_z(x, y, axis=0, error=sd.Error.Relative) + z = _compute_z(x, y, axis=0, error=sd.Error.Relative) assert np.all(np.equal(z, z_rel)) z_abs = x - y - z = sd.compute_z(x, y, axis=0, error=sd.Error.Absolute) + z = _compute_z(x, y, axis=0, error=sd.Error.Absolute) assert np.all(np.equal(z, z_abs)) @@ -67,19 +68,29 @@ def test_compute_z_x_rv_y_scalar(): y = np.array(1) z_rel = x / y - 1 - z = sd.compute_z(x, y, axis=0, error=sd.Error.Relative) + z = _compute_z(x, y, axis=0, error=sd.Error.Relative) assert np.all(np.equal(z, z_rel)) z_abs = x - y - z = sd.compute_z(x, y, axis=0, error=sd.Error.Absolute) + z = _compute_z(x, y, axis=0, error=sd.Error.Absolute) assert np.all(np.equal(z, z_abs)) -def tesT_compute_z_x_rv_y_srv(): +def test_compute_z_x_rv_y_ref(): """ Test compute_z when X is an array and Y is an array with ndim = X.ndim - 1 """ - x = np.arange(3 * 3).reshape(3, 3, 3) + x = np.arange(3**3).reshape(3, 3, 3) + y = x.sum(axis=0) + + z_rel = x / y - 1 + z_abs = x - y + + z_rel_axis = x / y[np.newaxis, :, :] - 1 + z_abs_axis = x - y[np.newaxis, :, :] + + assert np.all(np.equal(z_rel, z_rel_axis)) + assert np.all(np.equal(z_abs, z_abs_axis)) indexes = [ np.index_exp[np.newaxis, :, :], @@ -89,10 +100,10 @@ def tesT_compute_z_x_rv_y_srv(): for axis, index in enumerate(indexes): y = x.sum(axis=axis) - z_rel = x[index] / y - 1 - z = sd.compute_z(x, y, axis=axis, error=sd.Error.Relative) + z_rel = x / y[index] - 1 + z = _compute_z(x, y, axis=axis, error=sd.Error.Relative) assert np.all(np.equal(z, z_rel)) - z_abs = x[index] - y - z = sd.compute_z(x, y, axis=axis, error=sd.Error.Absolute) + z_abs = x - y[index] + z = _compute_z(x, y, axis=axis, error=sd.Error.Absolute) assert np.all(np.equal(z, z_abs))