diff --git a/.github/workflows/distr_test.yml b/.github/workflows/distr_test.yml new file mode 100644 index 0000000000..34d632770a --- /dev/null +++ b/.github/workflows/distr_test.yml @@ -0,0 +1,24 @@ +name: distr_test + +on: + pull_request: + paths: + - ".github/workflows/distr_test.yml" + - "distr_test/**" + +jobs: + distr_test: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./distr_test + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: clippy, rustfmt + - name: Rustfmt + run: cargo fmt -- --check + - name: Clippy + run: cargo clippy --all-targets -- -D warnings diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fadd73f4aa..4c4f28d380 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -69,7 +69,9 @@ jobs: - uses: actions/checkout@v4 - name: MSRV if: ${{ matrix.variant == 'MSRV' }} - run: cp Cargo.lock.msrv Cargo.lock + run: | + cp Cargo.lock.msrv Cargo.lock + cp distr_test/Cargo.lock.msrv distr_test/Cargo.lock - name: Install toolchain uses: dtolnay/rust-toolchain@master with: @@ -106,6 +108,7 @@ jobs: cargo test --target ${{ matrix.target }} --manifest-path rand_distr/Cargo.toml --features=serde cargo test --target ${{ matrix.target }} --manifest-path rand_distr/Cargo.toml --no-default-features cargo test --target ${{ matrix.target }} --manifest-path rand_distr/Cargo.toml --no-default-features --features=std,std_math + cargo test --target ${{ matrix.target }} --manifest-path distr_test/Cargo.toml --release - name: Test rand_pcg run: cargo test --target ${{ matrix.target }} --manifest-path rand_pcg/Cargo.toml --features=serde - name: Test rand_chacha diff --git a/Cargo.toml b/Cargo.toml index 1f2eccdc63..0b22e9606b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ members = [ "rand_chacha", "rand_pcg", ] -exclude = ["benches"] +exclude = ["benches", "distr_test"] [dependencies] rand_core = { path = "rand_core", version = "=0.9.0-alpha.1", default-features = false } diff --git a/distr_test/Cargo.lock.msrv b/distr_test/Cargo.lock.msrv new file mode 100644 index 0000000000..990c9c34a3 --- /dev/null +++ b/distr_test/Cargo.lock.msrv @@ -0,0 +1,449 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bytemuck" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cauchy" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ff11ddd2af3b5e80dd0297fee6e56ac038d9bdc549573cdb51bd6d2efe7f05e" +dependencies = [ + "num-complex", + "num-traits", + "rand 0.8.5", + "serde", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "distr_test" +version = "0.1.0" +dependencies = [ + "libm", + "num-traits", + "rand 0.9.0-alpha.1", + "rand_distr 0.5.0-alpha.1", + "special", + "spfunc", + "statrs", +] + +[[package]] +name = "fast_polynomial" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62eea6ee590b08a5f8b1139f4d6caee195b646d0c07e4b1808fbd5c4dea4829a" +dependencies = [ + "num-traits", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lambert_w" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8852c2190439a46c77861aca230080cc9db4064be7f9de8ee81816d6c72c25" +dependencies = [ + "fast_polynomial", + "libm", +] + +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "libm" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bda4c6077b0b08da2c48b172195795498381a7c8988c9e6212a6c55c5b9bd70" + +[[package]] +name = "matrixmultiply" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "nalgebra" +version = "0.32.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5c17de023a86f59ed79891b2e5d5a94c705dbe904a5b5c9c952ea6221b03e4" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand 0.8.5", + "rand_distr 0.4.3", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", + "rand 0.8.5", + "serde", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy 0.7.35", +] + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.0-alpha.1" +dependencies = [ + "rand_chacha 0.9.0-alpha.1", + "rand_core 0.9.0-alpha.1", + "zerocopy 0.8.10", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0-alpha.1" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.0-alpha.1", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_core" +version = "0.9.0-alpha.1" +dependencies = [ + "getrandom", + "zerocopy 0.8.10", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "rand_distr" +version = "0.5.0-alpha.1" +dependencies = [ + "num-traits", + "rand 0.9.0-alpha.1", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "safe_arch" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3460605018fdc9612bce72735cba0d27efbcd9904780d44c7e3a9948f96148a" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "serde" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.215" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "simba" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", + "wide", +] + +[[package]] +name = "special" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98d279079c3ddec4e7851337070c1055a18b8f606bba0b1aeb054bc059fc2e27" +dependencies = [ + "lambert_w", + "libm", +] + +[[package]] +name = "spfunc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a453caefb216996e21861a3e6fea3d4318258414d8b727d00dbc40118bb13e" +dependencies = [ + "cauchy", + "num-complex", + "num-traits", +] + +[[package]] +name = "statrs" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f697a07e4606a0a25c044de247e583a330dbb1731d11bc7350b81f48ad567255" +dependencies = [ + "approx", + "nalgebra", + "num-traits", + "rand 0.8.5", +] + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wide" +version = "0.7.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b828f995bf1e9622031f8009f8481a85406ce1f4d4588ff746d872043e855690" +dependencies = [ + "bytemuck", + "safe_arch", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive 0.7.35", +] + +[[package]] +name = "zerocopy" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13a42ed30c63171d820889b2981318736915150575b8d2d6dbee7edd68336ca" +dependencies = [ + "zerocopy-derive 0.8.10", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "593e7c96176495043fcb9e87cf7659f4d18679b5bab6b92bdef359c76a7795dd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/distr_test/Cargo.toml b/distr_test/Cargo.toml new file mode 100644 index 0000000000..7f37853b3c --- /dev/null +++ b/distr_test/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "distr_test" +version = "0.1.0" +edition = "2021" +publish = false + +[dev-dependencies] +rand_distr = { path = "../rand_distr", version = "=0.5.0-alpha.1", default-features = false } +rand = { path = "..", version = "=0.9.0-alpha.1", features = ["small_rng"] } +num-traits = "0.2.19" +# Special functions for testing distributions +special = "0.11.0" +spfunc = "0.1.0" +# Cdf implementation +statrs = "0.17.1" diff --git a/distr_test/tests/cdf.rs b/distr_test/tests/cdf.rs new file mode 100644 index 0000000000..f417c630ae --- /dev/null +++ b/distr_test/tests/cdf.rs @@ -0,0 +1,454 @@ +// Copyright 2021 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use core::f64; + +use special::{Beta, Gamma, Primitive}; +use statrs::distribution::ContinuousCDF; +use statrs::distribution::DiscreteCDF; + +mod ks; +use ks::test_continuous; +use ks::test_discrete; + +#[test] +fn normal() { + let parameters = [ + (0.0, 1.0), + (0.0, 0.1), + (1.0, 10.0), + (1.0, 100.0), + (-1.0, 0.00001), + (-1.0, 0.0000001), + ]; + + for (seed, (mean, std_dev)) in parameters.into_iter().enumerate() { + test_continuous( + seed as u64, + rand_distr::Normal::new(mean, std_dev).unwrap(), + |x| { + statrs::distribution::Normal::new(mean, std_dev) + .unwrap() + .cdf(x) + }, + ); + } +} + +#[test] +fn cauchy() { + let parameters = [ + (0.0, 1.0), + (0.0, 0.1), + (1.0, 10.0), + (1.0, 100.0), + (-1.0, 0.00001), + (-1.0, 0.0000001), + ]; + + for (seed, (median, scale)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Cauchy::new(median, scale).unwrap(); + test_continuous(seed as u64, dist, |x| { + statrs::distribution::Cauchy::new(median, scale) + .unwrap() + .cdf(x) + }); + } +} + +#[test] +fn uniform() { + fn cdf(x: f64, a: f64, b: f64) -> f64 { + if x < a { + 0.0 + } else if x < b { + (x - a) / (b - a) + } else { + 1.0 + } + } + + let parameters = [(0.0, 1.0), (-1.0, 1.0), (0.0, 100.0), (-100.0, 100.0)]; + + for (seed, (a, b)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Uniform::new(a, b).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, a, b)); + } +} + +#[test] +fn log_normal() { + let parameters = [ + (0.0, 1.0), + (0.0, 0.1), + (0.5, 0.7), + (1.0, 10.0), + (1.0, 100.0), + ]; + + for (seed, (mean, std_dev)) in parameters.into_iter().enumerate() { + let dist = rand_distr::LogNormal::new(mean, std_dev).unwrap(); + test_continuous(seed as u64, dist, |x| { + statrs::distribution::LogNormal::new(mean, std_dev) + .unwrap() + .cdf(x) + }); + } +} + +#[test] +fn pareto() { + let parameters = [ + (1.0, 1.0), + (1.0, 0.1), + (1.0, 10.0), + (1.0, 100.0), + (0.1, 1.0), + (10.0, 1.0), + (100.0, 1.0), + ]; + + for (seed, (scale, alpha)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Pareto::new(scale, alpha).unwrap(); + test_continuous(seed as u64, dist, |x| { + statrs::distribution::Pareto::new(scale, alpha) + .unwrap() + .cdf(x) + }); + } +} + +#[test] +fn exp() { + fn cdf(x: f64, lambda: f64) -> f64 { + 1.0 - (-lambda * x).exp() + } + + let parameters = [0.5, 1.0, 7.5, 32.0, 100.0]; + + for (seed, lambda) in parameters.into_iter().enumerate() { + let dist = rand_distr::Exp::new(lambda).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, lambda)); + } +} + +#[test] +fn weibull() { + fn cdf(x: f64, lambda: f64, k: f64) -> f64 { + if x < 0.0 { + return 0.0; + } + + 1.0 - (-(x / lambda).powf(k)).exp() + } + + let parameters = [ + (0.5, 1.0), + (1.0, 1.0), + (10.0, 0.1), + (0.1, 10.0), + (15.0, 20.0), + (1000.0, 0.01), + ]; + + for (seed, (lambda, k)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Weibull::new(lambda, k).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, lambda, k)); + } +} + +#[test] +fn gumbel() { + fn cdf(x: f64, mu: f64, beta: f64) -> f64 { + (-(-(x - mu) / beta).exp()).exp() + } + + let parameters = [ + (0.0, 1.0), + (1.0, 2.0), + (-1.0, 0.5), + (10.0, 0.1), + (100.0, 0.0001), + ]; + + for (seed, (mu, beta)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Gumbel::new(mu, beta).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, mu, beta)); + } +} + +#[test] +fn frechet() { + fn cdf(x: f64, alpha: f64, s: f64, m: f64) -> f64 { + if x < m { + return 0.0; + } + + (-((x - m) / s).powf(-alpha)).exp() + } + + let parameters = [ + (0.5, 2.0, 1.0), + (1.0, 1.0, 1.0), + (10.0, 0.1, 1.0), + (100.0, 0.0001, 1.0), + (0.9999, 2.0, 1.0), + ]; + + for (seed, (alpha, s, m)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Frechet::new(m, s, alpha).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, alpha, s, m)); + } +} + +#[test] +fn gamma() { + fn cdf(x: f64, shape: f64, scale: f64) -> f64 { + if x < 0.0 { + return 0.0; + } + + (x / scale).inc_gamma(shape) + } + + let parameters = [ + (0.5, 2.0), + (1.0, 1.0), + (10.0, 0.1), + (100.0, 0.0001), + (0.9999, 2.0), + ]; + + for (seed, (shape, scale)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Gamma::new(shape, scale).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, shape, scale)); + } +} + +#[test] +fn chi_squared() { + fn cdf(x: f64, k: f64) -> f64 { + if x < 0.0 { + return 0.0; + } + + (x / 2.0).inc_gamma(k / 2.0) + } + + let parameters = [0.1, 1.0, 2.0, 10.0, 100.0, 1000.0]; + + for (seed, k) in parameters.into_iter().enumerate() { + let dist = rand_distr::ChiSquared::new(k).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, k)); + } +} +#[test] +fn studend_t() { + fn cdf(x: f64, df: f64) -> f64 { + let h = df / (df + x.powi(2)); + let ib = 0.5 * h.inc_beta(df / 2.0, 0.5, 0.5.ln_beta(df / 2.0)); + if x < 0.0 { + ib + } else { + 1.0 - ib + } + } + + let parameters = [1.0, 10.0, 50.0]; + + for (seed, df) in parameters.into_iter().enumerate() { + let dist = rand_distr::StudentT::new(df).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, df)); + } +} + +#[test] +fn fisher_f() { + fn cdf(x: f64, m: f64, n: f64) -> f64 { + if (m == 1.0 && x <= 0.0) || x < 0.0 { + 0.0 + } else { + let k = m * x / (m * x + n); + let d1 = m / 2.0; + let d2 = n / 2.0; + k.inc_beta(d1, d2, d1.ln_beta(d2)) + } + } + + let parameters = [(1.0, 1.0), (1.0, 2.0), (2.0, 1.0), (50.0, 1.0)]; + + for (seed, (m, n)) in parameters.into_iter().enumerate() { + let dist = rand_distr::FisherF::new(m, n).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, m, n)); + } +} + +#[test] +fn beta() { + fn cdf(x: f64, alpha: f64, beta: f64) -> f64 { + if x < 0.0 { + return 0.0; + } + if x > 1.0 { + return 1.0; + } + let ln_beta_ab = alpha.ln_beta(beta); + x.inc_beta(alpha, beta, ln_beta_ab) + } + + let parameters = [(0.5, 0.5), (2.0, 3.5), (10.0, 1.0), (100.0, 50.0)]; + + for (seed, (alpha, beta)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Beta::new(alpha, beta).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, alpha, beta)); + } +} + +#[test] +fn triangular() { + fn cdf(x: f64, a: f64, b: f64, c: f64) -> f64 { + if x <= a { + 0.0 + } else if a < x && x <= c { + (x - a).powi(2) / ((b - a) * (c - a)) + } else if c < x && x < b { + 1.0 - (b - x).powi(2) / ((b - a) * (b - c)) + } else { + 1.0 + } + } + + let parameters = [ + (0.0, 1.0, 0.0001), + (0.0, 1.0, 0.9999), + (0.0, 1.0, 0.5), + (0.0, 100.0, 50.0), + (-100.0, 100.0, 0.0), + ]; + + for (seed, (a, b, c)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Triangular::new(a, b, c).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, a, b, c)); + } +} + +fn binomial_cdf(k: i64, p: f64, n: u64) -> f64 { + if k < 0 { + return 0.0; + } + let k = k as u64; + if k >= n { + return 1.0; + } + + let a = (n - k) as f64; + let b = k as f64 + 1.0; + + let q = 1.0 - p; + + let ln_beta_ab = a.ln_beta(b); + + q.inc_beta(a, b, ln_beta_ab) +} + +#[test] +fn binomial() { + let parameters = [ + (0.5, 10), + (0.5, 100), + (0.1, 10), + (0.0000001, 1000000), + (0.0000001, 10), + (0.9999, 2), + ]; + + for (seed, (p, n)) in parameters.into_iter().enumerate() { + test_discrete(seed as u64, rand_distr::Binomial::new(n, p).unwrap(), |k| { + binomial_cdf(k, p, n) + }); + } +} + +#[test] +fn geometric() { + fn cdf(k: i64, p: f64) -> f64 { + if k < 0 { + 0.0 + } else { + 1.0 - (1.0 - p).powi(1 + k as i32) + } + } + + let parameters = [0.3, 0.5, 0.7, 0.0000001, 0.9999]; + + for (seed, p) in parameters.into_iter().enumerate() { + let dist = rand_distr::Geometric::new(p).unwrap(); + test_discrete(seed as u64, dist, |k| cdf(k, p)); + } +} + +#[test] +fn hypergeometric() { + fn cdf(x: i64, n: u64, k: u64, n_: u64) -> f64 { + let min = if n_ + k > n { n_ + k - n } else { 0 }; + let max = k.min(n_); + if x < min as i64 { + return 0.0; + } else if x >= max as i64 { + return 1.0; + } + + (min..x as u64 + 1).fold(0.0, |acc, k_| { + acc + (ln_binomial(k, k_) + ln_binomial(n - k, n_ - k_) - ln_binomial(n, n_)).exp() + }) + } + + let parameters = [ + (15, 13, 10), + (25, 15, 5), + (60, 10, 7), + (70, 20, 50), + (100, 50, 10), + (100, 50, 49), + ]; + + for (seed, (n, k, n_)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Hypergeometric::new(n, k, n_).unwrap(); + test_discrete(seed as u64, dist, |x| cdf(x, n, k, n_)); + } +} + +#[test] +fn poisson() { + use rand_distr::Poisson; + let parameters = [ + 0.1, 1.0, 7.5, + 45.0, // 1e9, passed case but too slow + // 1.844E+19, // fail case + ]; + + for (seed, lambda) in parameters.into_iter().enumerate() { + let dist = Poisson::new(lambda).unwrap(); + let analytic = statrs::distribution::Poisson::new(lambda).unwrap(); + test_discrete::, _>(seed as u64, dist, |k| { + if k < 0 { + 0.0 + } else { + analytic.cdf(k as u64) + } + }); + } +} + +fn ln_factorial(n: u64) -> f64 { + (n as f64 + 1.0).lgamma().0 +} + +fn ln_binomial(n: u64, k: u64) -> f64 { + ln_factorial(n) - ln_factorial(k) - ln_factorial(n - k) +} diff --git a/distr_test/tests/ks/mod.rs b/distr_test/tests/ks/mod.rs new file mode 100644 index 0000000000..ab94db6e1f --- /dev/null +++ b/distr_test/tests/ks/mod.rs @@ -0,0 +1,137 @@ +// Copyright 2021 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// [1] Nonparametric Goodness-of-Fit Tests for Discrete Null Distributions +// by Taylor B. Arnold and John W. Emerson +// http://www.stat.yale.edu/~jay/EmersonMaterials/DiscreteGOF.pdf + +#![allow(dead_code)] + +use num_traits::AsPrimitive; +use rand::SeedableRng; +use rand_distr::Distribution; + +/// Empirical Cumulative Distribution Function (ECDF) +struct Ecdf { + sorted_samples: Vec, +} + +impl Ecdf { + fn new(mut samples: Vec) -> Self { + samples.sort_by(|a, b| a.partial_cmp(b).unwrap()); + Self { + sorted_samples: samples, + } + } + + /// Returns the step points of the ECDF + /// The ECDF is a step function that increases by 1/n at each sample point + /// The function is continuous from the right, so we give the bigger value at the step points + /// First point is (-inf, 0.0), last point is (max(samples), 1.0) + fn step_points(&self) -> Vec<(f64, f64)> { + let mut points = Vec::with_capacity(self.sorted_samples.len() + 1); + let mut last = f64::NEG_INFINITY; + let mut count = 0; + let n = self.sorted_samples.len() as f64; + for &x in &self.sorted_samples { + if x != last { + points.push((last, count as f64 / n)); + last = x; + } + count += 1; + } + points.push((last, count as f64 / n)); + points + } +} + +fn kolmogorov_smirnov_statistic_continuous(ecdf: Ecdf, cdf: impl Fn(f64) -> f64) -> f64 { + // We implement equation (3) from [1] + + let mut max_diff: f64 = 0.; + + let step_points = ecdf.step_points(); // x_i in the paper + for i in 1..step_points.len() { + let (x_i, f_i) = step_points[i]; + let (_, f_i_1) = step_points[i - 1]; + let cdf_i = cdf(x_i); + let max_1 = (cdf_i - f_i).abs(); + let max_2 = (cdf_i - f_i_1).abs(); + + max_diff = max_diff.max(max_1).max(max_2); + } + max_diff +} + +fn kolmogorov_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64 { + // We implement equation (4) from [1] + + let mut max_diff: f64 = 0.; + + let step_points = ecdf.step_points(); // x_i in the paper + for i in 1..step_points.len() { + let (x_i, f_i) = step_points[i]; + let (_, f_i_1) = step_points[i - 1]; + let max_1 = (cdf(x_i as i64) - f_i).abs(); + let max_2 = (cdf(x_i as i64 - 1) - f_i_1).abs(); // -1 is the same as -epsilon, because we have integer support + + max_diff = max_diff.max(max_1).max(max_2); + } + max_diff +} + +const SAMPLE_SIZE: u64 = 1_000_000; + +fn critical_value() -> f64 { + // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001). + // Passing this does not prove that the sampler is correct but is a good indication. + 1.95 / (SAMPLE_SIZE as f64).sqrt() +} + +fn sample_ecdf(seed: u64, dist: impl Distribution) -> Ecdf +where + T: AsPrimitive, +{ + let mut rng = rand::rngs::SmallRng::seed_from_u64(seed); + let samples = (0..SAMPLE_SIZE) + .map(|_| dist.sample(&mut rng).as_()) + .collect(); + Ecdf::new(samples) +} + +/// Tests a distribution against an analytical CDF. +/// The CDF has to be continuous. +pub fn test_continuous(seed: u64, dist: impl Distribution, cdf: impl Fn(f64) -> f64) { + let ecdf = sample_ecdf(seed, dist); + let ks_statistic = kolmogorov_smirnov_statistic_continuous(ecdf, cdf); + + let critical_value = critical_value(); + + println!("KS statistic: {}", ks_statistic); + println!("Critical value: {}", critical_value); + assert!(ks_statistic < critical_value); +} + +/// Tests a distribution over integers against an analytical CDF. +/// The analytical CDF must not have jump points which are not integers. +pub fn test_discrete(seed: u64, dist: D, cdf: F) +where + I: AsPrimitive, + D: Distribution, + F: Fn(i64) -> f64, +{ + let ecdf = sample_ecdf(seed, dist); + let ks_statistic = kolmogorov_smirnov_statistic_discrete(ecdf, cdf); + + // This critical value is bigger than it could be for discrete distributions, but because of large sample sizes this should not matter too much + let critical_value = critical_value(); + + println!("KS statistic: {}", ks_statistic); + println!("Critical value: {}", critical_value); + assert!(ks_statistic < critical_value); +} diff --git a/distr_test/tests/skew_normal.rs b/distr_test/tests/skew_normal.rs new file mode 100644 index 0000000000..0e6b7b3a02 --- /dev/null +++ b/distr_test/tests/skew_normal.rs @@ -0,0 +1,266 @@ +// Copyright 2021 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod ks; +use ks::test_continuous; +use special::Primitive; + +#[test] +fn skew_normal() { + fn cdf(x: f64, location: f64, scale: f64, shape: f64) -> f64 { + let norm = (x - location) / scale; + phi(norm) - 2.0 * owen_t(norm, shape) + } + + let parameters = [(0.0, 1.0, 5.0), (1.0, 10.0, -5.0), (-1.0, 0.00001, 0.0)]; + + for (seed, (location, scale, shape)) in parameters.into_iter().enumerate() { + let dist = rand_distr::SkewNormal::new(location, scale, shape).unwrap(); + test_continuous(seed as u64, dist, |x| cdf(x, location, scale, shape)); + } +} + +/// [1] Patefield, M. (2000). Fast and Accurate Calculation of Owen’s T Function. +/// Journal of Statistical Software, 5(5), 1–25. +/// https://doi.org/10.18637/jss.v005.i05 +/// +/// This function is ported to Rust from the Fortran code provided in the paper +fn owen_t(h: f64, a: f64) -> f64 { + let absh = h.abs(); + let absa = a.abs(); + let ah = absa * absh; + + let mut t; + if absa <= 1.0 { + t = tf(absh, absa, ah); + } else if absh <= 0.67 { + t = 0.25 - znorm1(absh) * znorm1(ah) - tf(ah, 1.0 / absa, absh); + } else { + let normh = znorm2(absh); + let normah = znorm2(ah); + t = 0.5 * (normh + normah) - normh * normah - tf(ah, 1.0 / absa, absh); + } + + if a < 0.0 { + t = -t; + } + + fn tf(h: f64, a: f64, ah: f64) -> f64 { + let rtwopi = 0.159_154_943_091_895_35; + let rrtpi = 0.398_942_280_401_432_7; + + let c2 = [ + 0.999_999_999_999_999_9, + -0.999_999_999_999_888, + 0.999_999_999_982_907_5, + -0.999_999_998_962_825, + 0.999_999_966_604_593_7, + -0.999_999_339_862_724_7, + 0.999_991_256_111_369_6, + -0.999_917_776_244_633_8, + 0.999_428_355_558_701_4, + -0.996_973_117_207_23, + 0.987_514_480_372_753, + -0.959_158_579_805_728_8, + 0.892_463_055_110_067_1, + -0.768_934_259_904_64, + 0.588_935_284_684_846_9, + -0.383_803_451_604_402_55, + 0.203_176_017_010_453, + -8.281_363_160_700_499e-2, + 2.416_798_473_575_957_8e-2, + -4.467_656_666_397_183e-3, + 3.914_116_940_237_383_6e-4, + ]; + + let pts = [ + 3.508_203_967_645_171_6e-3, + 3.127_904_233_803_075_6e-2, + 8.526_682_628_321_945e-2, + 0.162_450_717_308_122_77, + 0.258_511_960_491_254_36, + 0.368_075_538_406_975_3, + 0.485_010_929_056_047, + 0.602_775_141_526_185_7, + 0.714_778_842_177_532_3, + 0.814_755_109_887_601, + 0.897_110_297_559_489_7, + 0.957_238_080_859_442_6, + 0.991_788_329_746_297, + ]; + + let wts = [ + 1.883_143_811_532_350_3e-2, + 1.856_708_624_397_765e-2, + 1.804_209_346_122_338_5e-2, + 1.726_382_960_639_875_2e-2, + 1.624_321_997_598_985_8e-2, + 1.499_459_203_411_670_5e-2, + 1.353_547_446_966_209e-2, + 1.188_635_160_582_016_5e-2, + 1.007_037_724_277_743_2e-2, + 8.113_054_574_229_958e-3, + 6.041_900_952_847_024e-3, + 3.886_221_701_074_205_7e-3, + 1.679_303_108_454_609e-3, + ]; + + let hrange = [ + 0.02, 0.06, 0.09, 0.125, 0.26, 0.4, 0.6, 1.6, 1.7, 2.33, 2.4, 3.36, 3.4, 4.8, + ]; + let arange = [0.025, 0.09, 0.15, 0.36, 0.5, 0.9, 0.99999]; + + let select = [ + [1, 1, 2, 13, 13, 13, 13, 13, 13, 13, 13, 16, 16, 16, 9], + [1, 2, 2, 3, 3, 5, 5, 14, 14, 15, 15, 16, 16, 16, 9], + [2, 2, 3, 3, 3, 5, 5, 15, 15, 15, 15, 16, 16, 16, 10], + [2, 2, 3, 5, 5, 5, 5, 7, 7, 16, 16, 16, 16, 16, 10], + [2, 3, 3, 5, 5, 6, 6, 8, 8, 17, 17, 17, 12, 12, 11], + [2, 3, 5, 5, 5, 6, 6, 8, 8, 17, 17, 17, 12, 12, 12], + [2, 3, 4, 4, 6, 6, 8, 8, 17, 17, 17, 17, 17, 12, 12], + [2, 3, 4, 4, 6, 6, 18, 18, 18, 18, 17, 17, 17, 12, 12], + ]; + + let ihint = hrange.iter().position(|&r| h < r).unwrap_or(14); + + let iaint = arange.iter().position(|&r| a < r).unwrap_or(7); + + let icode = select[iaint][ihint]; + let m = [ + 2, 3, 4, 5, 7, 10, 12, 18, 10, 20, 30, 20, 4, 7, 8, 20, 13, 0, + ][icode - 1]; + let method = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 6][icode - 1]; + + match method { + 1 => { + let hs = -0.5 * h * h; + let dhs = hs.exp(); + let as_ = a * a; + let mut j = 1; + let mut jj = 1; + let mut aj = rtwopi * a; + let mut tf = rtwopi * a.atan(); + let mut dj = dhs - 1.0; + let mut gj = hs * dhs; + loop { + tf += dj * aj / (jj as f64); + if j >= m { + return tf; + } + j += 1; + jj += 2; + aj *= as_; + dj = gj - dj; + gj *= hs / (j as f64); + } + } + 2 => { + let maxii = m + m + 1; + let mut ii = 1; + let mut tf = 0.0; + let hs = h * h; + let as_ = -a * a; + let mut vi = rrtpi * a * (-0.5 * ah * ah).exp(); + let mut z = znorm1(ah) / h; + let y = 1.0 / hs; + loop { + tf += z; + if ii >= maxii { + tf *= rrtpi * (-0.5 * hs).exp(); + return tf; + } + z = y * (vi - (ii as f64) * z); + vi *= as_; + ii += 2; + } + } + 3 => { + let mut i = 1; + let mut ii = 1; + let mut tf = 0.0; + let hs = h * h; + let as_ = a * a; + let mut vi = rrtpi * a * (-0.5 * ah * ah).exp(); + let mut zi = znorm1(ah) / h; + let y = 1.0 / hs; + loop { + tf += zi * c2[i - 1]; + if i > m { + tf *= rrtpi * (-0.5 * hs).exp(); + return tf; + } + zi = y * ((ii as f64) * zi - vi); + vi *= as_; + i += 1; + ii += 2; + } + } + 4 => { + let maxii = m + m + 1; + let mut ii = 1; + let mut tf = 0.0; + let hs = h * h; + let as_ = -a * a; + let mut ai = rtwopi * a * (-0.5 * hs * (1.0 - as_)).exp(); + let mut yi = 1.0; + loop { + tf += ai * yi; + if ii >= maxii { + return tf; + } + ii += 2; + yi = (1.0 - hs * yi) / (ii as f64); + ai *= as_; + } + } + 5 => { + let mut tf = 0.0; + let as_ = a * a; + let hs = -0.5 * h * h; + for i in 0..m { + let r = 1.0 + as_ * pts[i]; + tf += wts[i] * (hs * r).exp() / r; + } + tf *= a; + tf + } + 6 => { + let normh = znorm2(h); + let mut tf = 0.5 * normh * (1.0 - normh); + let y = 1.0 - a; + let r = (y / (1.0 + a)).atan(); + if r != 0.0 { + tf -= rtwopi * r * (-0.5 * y * h * h / r).exp(); + } + tf + } + _ => 0.0, + } + } + + // P(0 ≤ Z ≤ x) + fn znorm1(x: f64) -> f64 { + phi(x) - 0.5 + } + + // P(x ≤ Z < ∞) + fn znorm2(x: f64) -> f64 { + 1.0 - phi(x) + } + + t +} + +fn normal_cdf(x: f64, mean: f64, std_dev: f64) -> f64 { + 0.5 * ((mean - x) / (std_dev * core::f64::consts::SQRT_2)).erfc() +} + +/// standard normal cdf +fn phi(x: f64) -> f64 { + normal_cdf(x, 0.0, 1.0) +} diff --git a/distr_test/tests/zeta.rs b/distr_test/tests/zeta.rs new file mode 100644 index 0000000000..6e5ab1f594 --- /dev/null +++ b/distr_test/tests/zeta.rs @@ -0,0 +1,56 @@ +// Copyright 2021 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +mod ks; +use ks::test_discrete; + +#[test] +fn zeta() { + fn cdf(k: i64, s: f64) -> f64 { + use spfunc::zeta::zeta as zeta_func; + if k < 1 { + return 0.0; + } + + gen_harmonic(k as u64, s) / zeta_func(s) + } + + let parameters = [2.0, 3.7, 5.0, 100.0]; + + for (seed, s) in parameters.into_iter().enumerate() { + let dist = rand_distr::Zeta::new(s).unwrap(); + test_discrete(seed as u64, dist, |k| cdf(k, s)); + } +} + +#[test] +fn zipf() { + fn cdf(k: i64, n: u64, s: f64) -> f64 { + if k < 1 { + return 0.0; + } + if k > n as i64 { + return 1.0; + } + gen_harmonic(k as u64, s) / gen_harmonic(n, s) + } + + let parameters = [(1000, 1.0), (500, 2.0), (1000, 0.5)]; + + for (seed, (n, x)) in parameters.into_iter().enumerate() { + let dist = rand_distr::Zipf::new(n as f64, x).unwrap(); + test_discrete(seed as u64, dist, |k| cdf(k, n, x)); + } +} + +fn gen_harmonic(n: u64, m: f64) -> f64 { + match n { + 0 => 1.0, + _ => (0..n).fold(0.0, |acc, x| acc + (x as f64 + 1.0).powf(-m)), + } +} diff --git a/rand_distr/tests/cdf.rs b/rand_distr/tests/cdf.rs deleted file mode 100644 index 62286860db..0000000000 --- a/rand_distr/tests/cdf.rs +++ /dev/null @@ -1,883 +0,0 @@ -// Copyright 2021 Developers of the Rand project. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -mod common; -use core::f64; - -use num_traits::AsPrimitive; -use rand::SeedableRng; -use rand_distr::{Distribution, Normal}; -use special::{Beta, Gamma, Primitive}; - -// [1] Nonparametric Goodness-of-Fit Tests for Discrete Null Distributions -// by Taylor B. Arnold and John W. Emerson -// http://www.stat.yale.edu/~jay/EmersonMaterials/DiscreteGOF.pdf - -/// Empirical Cumulative Distribution Function (ECDF) -struct Ecdf { - sorted_samples: Vec, -} - -impl Ecdf { - fn new(mut samples: Vec) -> Self { - samples.sort_by(|a, b| a.partial_cmp(b).unwrap()); - Self { - sorted_samples: samples, - } - } - - /// Returns the step points of the ECDF - /// The ECDF is a step function that increases by 1/n at each sample point - /// The function is continuous from the right, so we give the bigger value at the step points - /// First point is (-inf, 0.0), last point is (max(samples), 1.0) - fn step_points(&self) -> Vec<(f64, f64)> { - let mut points = Vec::with_capacity(self.sorted_samples.len() + 1); - let mut last = f64::NEG_INFINITY; - let mut count = 0; - let n = self.sorted_samples.len() as f64; - for &x in &self.sorted_samples { - if x != last { - points.push((last, count as f64 / n)); - last = x; - } - count += 1; - } - points.push((last, count as f64 / n)); - points - } -} - -fn kolmogorov_smirnov_statistic_continuous(ecdf: Ecdf, cdf: impl Fn(f64) -> f64) -> f64 { - // We implement equation (3) from [1] - - let mut max_diff: f64 = 0.; - - let step_points = ecdf.step_points(); // x_i in the paper - for i in 1..step_points.len() { - let (x_i, f_i) = step_points[i]; - let (_, f_i_1) = step_points[i - 1]; - let cdf_i = cdf(x_i); - let max_1 = (cdf_i - f_i).abs(); - let max_2 = (cdf_i - f_i_1).abs(); - - max_diff = max_diff.max(max_1).max(max_2); - } - max_diff -} - -fn kolmogorov_smirnov_statistic_discrete(ecdf: Ecdf, cdf: impl Fn(i64) -> f64) -> f64 { - // We implement equation (4) from [1] - - let mut max_diff: f64 = 0.; - - let step_points = ecdf.step_points(); // x_i in the paper - for i in 1..step_points.len() { - let (x_i, f_i) = step_points[i]; - let (_, f_i_1) = step_points[i - 1]; - let max_1 = (cdf(x_i as i64) - f_i).abs(); - let max_2 = (cdf(x_i as i64 - 1) - f_i_1).abs(); // -1 is the same as -epsilon, because we have integer support - - max_diff = max_diff.max(max_1).max(max_2); - } - max_diff -} - -const SAMPLE_SIZE: u64 = 1_000_000; - -fn critical_value() -> f64 { - // If the sampler is correct, we expect less than 0.001 false positives (alpha = 0.001). - // Passing this does not prove that the sampler is correct but is a good indication. - 1.95 / (SAMPLE_SIZE as f64).sqrt() -} - -fn sample_ecdf(seed: u64, dist: impl Distribution) -> Ecdf -where - T: AsPrimitive, -{ - let mut rng = rand::rngs::SmallRng::seed_from_u64(seed); - let samples = (0..SAMPLE_SIZE) - .map(|_| dist.sample(&mut rng).as_()) - .collect(); - Ecdf::new(samples) -} - -/// Tests a distribution against an analytical CDF. -/// The CDF has to be continuous. -pub fn test_continuous(seed: u64, dist: impl Distribution, cdf: impl Fn(f64) -> f64) { - let ecdf = sample_ecdf(seed, dist); - let ks_statistic = kolmogorov_smirnov_statistic_continuous(ecdf, cdf); - - let critical_value = critical_value(); - - println!("KS statistic: {}", ks_statistic); - println!("Critical value: {}", critical_value); - assert!(ks_statistic < critical_value); -} - -/// Tests a distribution over integers against an analytical CDF. -/// The analytical CDF must not have jump points which are not integers. -pub fn test_discrete(seed: u64, dist: D, cdf: F) -where - I: AsPrimitive, - D: Distribution, - F: Fn(i64) -> f64, -{ - let ecdf = sample_ecdf(seed, dist); - let ks_statistic = kolmogorov_smirnov_statistic_discrete(ecdf, cdf); - - // This critical value is bigger than it could be for discrete distributions, but because of large sample sizes this should not matter too much - let critical_value = critical_value(); - - println!("KS statistic: {}", ks_statistic); - println!("Critical value: {}", critical_value); - assert!(ks_statistic < critical_value); -} - -fn normal_cdf(x: f64, mean: f64, std_dev: f64) -> f64 { - 0.5 * ((mean - x) / (std_dev * f64::consts::SQRT_2)).erfc() -} - -#[test] -fn normal() { - let parameters = [ - (0.0, 1.0), - (0.0, 0.1), - (1.0, 10.0), - (1.0, 100.0), - (-1.0, 0.00001), - (-1.0, 0.0000001), - ]; - - for (seed, (mean, std_dev)) in parameters.into_iter().enumerate() { - test_continuous(seed as u64, Normal::new(mean, std_dev).unwrap(), |x| { - normal_cdf(x, mean, std_dev) - }); - } -} - -#[test] -fn skew_normal() { - fn cdf(x: f64, location: f64, scale: f64, shape: f64) -> f64 { - let norm = (x - location) / scale; - phi(norm) - 2.0 * owen_t(norm, shape) - } - - let parameters = [(0.0, 1.0, 5.0), (1.0, 10.0, -5.0), (-1.0, 0.00001, 0.0)]; - - for (seed, (location, scale, shape)) in parameters.into_iter().enumerate() { - let dist = rand_distr::SkewNormal::new(location, scale, shape).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, location, scale, shape)); - } -} - -#[test] -fn cauchy() { - fn cdf(x: f64, median: f64, scale: f64) -> f64 { - (1.0 / f64::consts::PI) * ((x - median) / scale).atan() + 0.5 - } - - let parameters = [ - (0.0, 1.0), - (0.0, 0.1), - (1.0, 10.0), - (1.0, 100.0), - (-1.0, 0.00001), - (-1.0, 0.0000001), - ]; - - for (seed, (median, scale)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Cauchy::new(median, scale).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, median, scale)); - } -} - -#[test] -fn uniform() { - fn cdf(x: f64, a: f64, b: f64) -> f64 { - if x < a { - 0.0 - } else if x < b { - (x - a) / (b - a) - } else { - 1.0 - } - } - - let parameters = [(0.0, 1.0), (-1.0, 1.0), (0.0, 100.0), (-100.0, 100.0)]; - - for (seed, (a, b)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Uniform::new(a, b).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, a, b)); - } -} - -#[test] -fn log_normal() { - fn cdf(x: f64, mean: f64, std_dev: f64) -> f64 { - if x <= 0.0 { - 0.0 - } else if x.is_infinite() { - 1.0 - } else { - 0.5 * ((mean - x.ln()) / (std_dev * f64::consts::SQRT_2)).erfc() - } - } - - let parameters = [ - (0.0, 1.0), - (0.0, 0.1), - (0.5, 0.7), - (1.0, 10.0), - (1.0, 100.0), - ]; - - for (seed, (mean, std_dev)) in parameters.into_iter().enumerate() { - let dist = rand_distr::LogNormal::new(mean, std_dev).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, mean, std_dev)); - } -} - -#[test] -fn pareto() { - fn cdf(x: f64, scale: f64, alpha: f64) -> f64 { - if x <= scale { - 0.0 - } else { - 1.0 - (scale / x).powf(alpha) - } - } - - let parameters = [ - (1.0, 1.0), - (1.0, 0.1), - (1.0, 10.0), - (1.0, 100.0), - (0.1, 1.0), - (10.0, 1.0), - (100.0, 1.0), - ]; - - for (seed, (scale, alpha)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Pareto::new(scale, alpha).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, scale, alpha)); - } -} - -#[test] -fn exp() { - fn cdf(x: f64, lambda: f64) -> f64 { - 1.0 - (-lambda * x).exp() - } - - let parameters = [0.5, 1.0, 7.5, 32.0, 100.0]; - - for (seed, lambda) in parameters.into_iter().enumerate() { - let dist = rand_distr::Exp::new(lambda).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, lambda)); - } -} - -#[test] -fn weibull() { - fn cdf(x: f64, lambda: f64, k: f64) -> f64 { - if x < 0.0 { - return 0.0; - } - - 1.0 - (-(x / lambda).powf(k)).exp() - } - - let parameters = [ - (0.5, 1.0), - (1.0, 1.0), - (10.0, 0.1), - (0.1, 10.0), - (15.0, 20.0), - (1000.0, 0.01), - ]; - - for (seed, (lambda, k)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Weibull::new(lambda, k).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, lambda, k)); - } -} - -#[test] -fn gumbel() { - fn cdf(x: f64, mu: f64, beta: f64) -> f64 { - (-(-(x - mu) / beta).exp()).exp() - } - - let parameters = [ - (0.0, 1.0), - (1.0, 2.0), - (-1.0, 0.5), - (10.0, 0.1), - (100.0, 0.0001), - ]; - - for (seed, (mu, beta)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Gumbel::new(mu, beta).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, mu, beta)); - } -} - -#[test] -fn frechet() { - fn cdf(x: f64, alpha: f64, s: f64, m: f64) -> f64 { - if x < m { - return 0.0; - } - - (-((x - m) / s).powf(-alpha)).exp() - } - - let parameters = [ - (0.5, 2.0, 1.0), - (1.0, 1.0, 1.0), - (10.0, 0.1, 1.0), - (100.0, 0.0001, 1.0), - (0.9999, 2.0, 1.0), - ]; - - for (seed, (alpha, s, m)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Frechet::new(m, s, alpha).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, alpha, s, m)); - } -} - -#[test] -fn zeta() { - fn cdf(k: i64, s: f64) -> f64 { - use common::spec_func::zeta_func; - if k < 1 { - return 0.0; - } - - gen_harmonic(k as u64, s) / zeta_func(s) - } - - let parameters = [2.0, 3.7, 5.0, 100.0]; - - for (seed, s) in parameters.into_iter().enumerate() { - let dist = rand_distr::Zeta::new(s).unwrap(); - test_discrete(seed as u64, dist, |k| cdf(k, s)); - } -} - -#[test] -fn zipf() { - fn cdf(k: i64, n: u64, s: f64) -> f64 { - if k < 1 { - return 0.0; - } - if k > n as i64 { - return 1.0; - } - gen_harmonic(k as u64, s) / gen_harmonic(n, s) - } - - let parameters = [(1000, 1.0), (500, 2.0), (1000, 0.5)]; - - for (seed, (n, x)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Zipf::new(n as f64, x).unwrap(); - test_discrete(seed as u64, dist, |k| cdf(k, n, x)); - } -} - -#[test] -fn gamma() { - fn cdf(x: f64, shape: f64, scale: f64) -> f64 { - if x < 0.0 { - return 0.0; - } - - (x / scale).inc_gamma(shape) - } - - let parameters = [ - (0.5, 2.0), - (1.0, 1.0), - (10.0, 0.1), - (100.0, 0.0001), - (0.9999, 2.0), - ]; - - for (seed, (shape, scale)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Gamma::new(shape, scale).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, shape, scale)); - } -} - -#[test] -fn chi_squared() { - fn cdf(x: f64, k: f64) -> f64 { - if x < 0.0 { - return 0.0; - } - - (x / 2.0).inc_gamma(k / 2.0) - } - - let parameters = [0.1, 1.0, 2.0, 10.0, 100.0, 1000.0]; - - for (seed, k) in parameters.into_iter().enumerate() { - let dist = rand_distr::ChiSquared::new(k).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, k)); - } -} -#[test] -fn studend_t() { - fn cdf(x: f64, df: f64) -> f64 { - let h = df / (df + x.powi(2)); - let ib = 0.5 * h.inc_beta(df / 2.0, 0.5, 0.5.ln_beta(df / 2.0)); - if x < 0.0 { - ib - } else { - 1.0 - ib - } - } - - let parameters = [1.0, 10.0, 50.0]; - - for (seed, df) in parameters.into_iter().enumerate() { - let dist = rand_distr::StudentT::new(df).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, df)); - } -} - -#[test] -fn fisher_f() { - fn cdf(x: f64, m: f64, n: f64) -> f64 { - if (m == 1.0 && x <= 0.0) || x < 0.0 { - 0.0 - } else { - let k = m * x / (m * x + n); - let d1 = m / 2.0; - let d2 = n / 2.0; - k.inc_beta(d1, d2, d1.ln_beta(d2)) - } - } - - let parameters = [(1.0, 1.0), (1.0, 2.0), (2.0, 1.0), (50.0, 1.0)]; - - for (seed, (m, n)) in parameters.into_iter().enumerate() { - let dist = rand_distr::FisherF::new(m, n).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, m, n)); - } -} - -#[test] -fn beta() { - fn cdf(x: f64, alpha: f64, beta: f64) -> f64 { - if x < 0.0 { - return 0.0; - } - if x > 1.0 { - return 1.0; - } - let ln_beta_ab = alpha.ln_beta(beta); - x.inc_beta(alpha, beta, ln_beta_ab) - } - - let parameters = [(0.5, 0.5), (2.0, 3.5), (10.0, 1.0), (100.0, 50.0)]; - - for (seed, (alpha, beta)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Beta::new(alpha, beta).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, alpha, beta)); - } -} - -#[test] -fn triangular() { - fn cdf(x: f64, a: f64, b: f64, c: f64) -> f64 { - if x <= a { - 0.0 - } else if a < x && x <= c { - (x - a).powi(2) / ((b - a) * (c - a)) - } else if c < x && x < b { - 1.0 - (b - x).powi(2) / ((b - a) * (b - c)) - } else { - 1.0 - } - } - - let parameters = [ - (0.0, 1.0, 0.0001), - (0.0, 1.0, 0.9999), - (0.0, 1.0, 0.5), - (0.0, 100.0, 50.0), - (-100.0, 100.0, 0.0), - ]; - - for (seed, (a, b, c)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Triangular::new(a, b, c).unwrap(); - test_continuous(seed as u64, dist, |x| cdf(x, a, b, c)); - } -} - -fn binomial_cdf(k: i64, p: f64, n: u64) -> f64 { - if k < 0 { - return 0.0; - } - let k = k as u64; - if k >= n { - return 1.0; - } - - let a = (n - k) as f64; - let b = k as f64 + 1.0; - - let q = 1.0 - p; - - let ln_beta_ab = a.ln_beta(b); - - q.inc_beta(a, b, ln_beta_ab) -} - -#[test] -fn binomial() { - let parameters = [ - (0.5, 10), - (0.5, 100), - (0.1, 10), - (0.0000001, 1000000), - (0.0000001, 10), - (0.9999, 2), - ]; - - for (seed, (p, n)) in parameters.into_iter().enumerate() { - test_discrete(seed as u64, rand_distr::Binomial::new(n, p).unwrap(), |k| { - binomial_cdf(k, p, n) - }); - } -} - -#[test] -fn geometric() { - fn cdf(k: i64, p: f64) -> f64 { - if k < 0 { - 0.0 - } else { - 1.0 - (1.0 - p).powi(1 + k as i32) - } - } - - let parameters = [0.3, 0.5, 0.7, 0.0000001, 0.9999]; - - for (seed, p) in parameters.into_iter().enumerate() { - let dist = rand_distr::Geometric::new(p).unwrap(); - test_discrete(seed as u64, dist, |k| cdf(k, p)); - } -} - -#[test] -fn hypergeometric() { - fn cdf(x: i64, n: u64, k: u64, n_: u64) -> f64 { - let min = if n_ + k > n { n_ + k - n } else { 0 }; - let max = k.min(n_); - if x < min as i64 { - return 0.0; - } else if x >= max as i64 { - return 1.0; - } - - (min..x as u64 + 1).fold(0.0, |acc, k_| { - acc + (ln_binomial(k, k_) + ln_binomial(n - k, n_ - k_) - ln_binomial(n, n_)).exp() - }) - } - - let parameters = [ - (15, 13, 10), - (25, 15, 5), - (60, 10, 7), - (70, 20, 50), - (100, 50, 10), - (100, 50, 49), - ]; - - for (seed, (n, k, n_)) in parameters.into_iter().enumerate() { - let dist = rand_distr::Hypergeometric::new(n, k, n_).unwrap(); - test_discrete(seed as u64, dist, |x| cdf(x, n, k, n_)); - } -} - -#[test] -fn poisson() { - use rand_distr::Poisson; - fn cdf(k: i64, lambda: f64) -> f64 { - use common::spec_func::gamma_lr; - - if k < 0 || lambda <= 0.0 { - return 0.0; - } - - 1.0 - gamma_lr(k as f64 + 1.0, lambda) - } - let parameters = [ - 0.1, 1.0, 7.5, - // 1e9, passed case but too slow - // 1.844E+19, // fail case - ]; - - for (seed, lambda) in parameters.into_iter().enumerate() { - let dist = Poisson::new(lambda).unwrap(); - test_discrete::, _>(seed as u64, dist, |k| cdf(k, lambda)); - } -} - -fn ln_factorial(n: u64) -> f64 { - (n as f64 + 1.0).lgamma().0 -} - -fn ln_binomial(n: u64, k: u64) -> f64 { - ln_factorial(n) - ln_factorial(k) - ln_factorial(n - k) -} - -fn gen_harmonic(n: u64, m: f64) -> f64 { - match n { - 0 => 1.0, - _ => (0..n).fold(0.0, |acc, x| acc + (x as f64 + 1.0).powf(-m)), - } -} - -/// [1] Patefield, M. (2000). Fast and Accurate Calculation of Owen’s T Function. -/// Journal of Statistical Software, 5(5), 1–25. -/// https://doi.org/10.18637/jss.v005.i05 -/// -/// This function is ported to Rust from the Fortran code provided in the paper -fn owen_t(h: f64, a: f64) -> f64 { - let absh = h.abs(); - let absa = a.abs(); - let ah = absa * absh; - - let mut t; - if absa <= 1.0 { - t = tf(absh, absa, ah); - } else if absh <= 0.67 { - t = 0.25 - znorm1(absh) * znorm1(ah) - tf(ah, 1.0 / absa, absh); - } else { - let normh = znorm2(absh); - let normah = znorm2(ah); - t = 0.5 * (normh + normah) - normh * normah - tf(ah, 1.0 / absa, absh); - } - - if a < 0.0 { - t = -t; - } - - fn tf(h: f64, a: f64, ah: f64) -> f64 { - let rtwopi = 0.159_154_943_091_895_35; - let rrtpi = 0.398_942_280_401_432_7; - - let c2 = [ - 0.999_999_999_999_999_9, - -0.999_999_999_999_888, - 0.999_999_999_982_907_5, - -0.999_999_998_962_825, - 0.999_999_966_604_593_7, - -0.999_999_339_862_724_7, - 0.999_991_256_111_369_6, - -0.999_917_776_244_633_8, - 0.999_428_355_558_701_4, - -0.996_973_117_207_23, - 0.987_514_480_372_753, - -0.959_158_579_805_728_8, - 0.892_463_055_110_067_1, - -0.768_934_259_904_64, - 0.588_935_284_684_846_9, - -0.383_803_451_604_402_55, - 0.203_176_017_010_453, - -8.281_363_160_700_499e-2, - 2.416_798_473_575_957_8e-2, - -4.467_656_666_397_183e-3, - 3.914_116_940_237_383_6e-4, - ]; - - let pts = [ - 3.508_203_967_645_171_6e-3, - 3.127_904_233_803_075_6e-2, - 8.526_682_628_321_945e-2, - 0.162_450_717_308_122_77, - 0.258_511_960_491_254_36, - 0.368_075_538_406_975_3, - 0.485_010_929_056_047, - 0.602_775_141_526_185_7, - 0.714_778_842_177_532_3, - 0.814_755_109_887_601, - 0.897_110_297_559_489_7, - 0.957_238_080_859_442_6, - 0.991_788_329_746_297, - ]; - - let wts = [ - 1.883_143_811_532_350_3e-2, - 1.856_708_624_397_765e-2, - 1.804_209_346_122_338_5e-2, - 1.726_382_960_639_875_2e-2, - 1.624_321_997_598_985_8e-2, - 1.499_459_203_411_670_5e-2, - 1.353_547_446_966_209e-2, - 1.188_635_160_582_016_5e-2, - 1.007_037_724_277_743_2e-2, - 8.113_054_574_229_958e-3, - 6.041_900_952_847_024e-3, - 3.886_221_701_074_205_7e-3, - 1.679_303_108_454_609e-3, - ]; - - let hrange = [ - 0.02, 0.06, 0.09, 0.125, 0.26, 0.4, 0.6, 1.6, 1.7, 2.33, 2.4, 3.36, 3.4, 4.8, - ]; - let arange = [0.025, 0.09, 0.15, 0.36, 0.5, 0.9, 0.99999]; - - let select = [ - [1, 1, 2, 13, 13, 13, 13, 13, 13, 13, 13, 16, 16, 16, 9], - [1, 2, 2, 3, 3, 5, 5, 14, 14, 15, 15, 16, 16, 16, 9], - [2, 2, 3, 3, 3, 5, 5, 15, 15, 15, 15, 16, 16, 16, 10], - [2, 2, 3, 5, 5, 5, 5, 7, 7, 16, 16, 16, 16, 16, 10], - [2, 3, 3, 5, 5, 6, 6, 8, 8, 17, 17, 17, 12, 12, 11], - [2, 3, 5, 5, 5, 6, 6, 8, 8, 17, 17, 17, 12, 12, 12], - [2, 3, 4, 4, 6, 6, 8, 8, 17, 17, 17, 17, 17, 12, 12], - [2, 3, 4, 4, 6, 6, 18, 18, 18, 18, 17, 17, 17, 12, 12], - ]; - - let ihint = hrange.iter().position(|&r| h < r).unwrap_or(14); - - let iaint = arange.iter().position(|&r| a < r).unwrap_or(7); - - let icode = select[iaint][ihint]; - let m = [ - 2, 3, 4, 5, 7, 10, 12, 18, 10, 20, 30, 20, 4, 7, 8, 20, 13, 0, - ][icode - 1]; - let method = [1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 6][icode - 1]; - - match method { - 1 => { - let hs = -0.5 * h * h; - let dhs = hs.exp(); - let as_ = a * a; - let mut j = 1; - let mut jj = 1; - let mut aj = rtwopi * a; - let mut tf = rtwopi * a.atan(); - let mut dj = dhs - 1.0; - let mut gj = hs * dhs; - loop { - tf += dj * aj / (jj as f64); - if j >= m { - return tf; - } - j += 1; - jj += 2; - aj *= as_; - dj = gj - dj; - gj *= hs / (j as f64); - } - } - 2 => { - let maxii = m + m + 1; - let mut ii = 1; - let mut tf = 0.0; - let hs = h * h; - let as_ = -a * a; - let mut vi = rrtpi * a * (-0.5 * ah * ah).exp(); - let mut z = znorm1(ah) / h; - let y = 1.0 / hs; - loop { - tf += z; - if ii >= maxii { - tf *= rrtpi * (-0.5 * hs).exp(); - return tf; - } - z = y * (vi - (ii as f64) * z); - vi *= as_; - ii += 2; - } - } - 3 => { - let mut i = 1; - let mut ii = 1; - let mut tf = 0.0; - let hs = h * h; - let as_ = a * a; - let mut vi = rrtpi * a * (-0.5 * ah * ah).exp(); - let mut zi = znorm1(ah) / h; - let y = 1.0 / hs; - loop { - tf += zi * c2[i - 1]; - if i > m { - tf *= rrtpi * (-0.5 * hs).exp(); - return tf; - } - zi = y * ((ii as f64) * zi - vi); - vi *= as_; - i += 1; - ii += 2; - } - } - 4 => { - let maxii = m + m + 1; - let mut ii = 1; - let mut tf = 0.0; - let hs = h * h; - let as_ = -a * a; - let mut ai = rtwopi * a * (-0.5 * hs * (1.0 - as_)).exp(); - let mut yi = 1.0; - loop { - tf += ai * yi; - if ii >= maxii { - return tf; - } - ii += 2; - yi = (1.0 - hs * yi) / (ii as f64); - ai *= as_; - } - } - 5 => { - let mut tf = 0.0; - let as_ = a * a; - let hs = -0.5 * h * h; - for i in 0..m { - let r = 1.0 + as_ * pts[i]; - tf += wts[i] * (hs * r).exp() / r; - } - tf *= a; - tf - } - 6 => { - let normh = znorm2(h); - let mut tf = 0.5 * normh * (1.0 - normh); - let y = 1.0 - a; - let r = (y / (1.0 + a)).atan(); - if r != 0.0 { - tf -= rtwopi * r * (-0.5 * y * h * h / r).exp(); - } - tf - } - _ => 0.0, - } - } - - // P(0 ≤ Z ≤ x) - fn znorm1(x: f64) -> f64 { - phi(x) - 0.5 - } - - // P(x ≤ Z < ∞) - fn znorm2(x: f64) -> f64 { - 1.0 - phi(x) - } - - t -} - -/// standard normal cdf -fn phi(x: f64) -> f64 { - normal_cdf(x, 0.0, 1.0) -} diff --git a/rand_distr/tests/common/mod.rs b/rand_distr/tests/common/mod.rs deleted file mode 100644 index 4fa8db7de6..0000000000 --- a/rand_distr/tests/common/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod spec_func; diff --git a/rand_distr/tests/common/spec_func.rs b/rand_distr/tests/common/spec_func.rs deleted file mode 100644 index bd39a79fb4..0000000000 --- a/rand_distr/tests/common/spec_func.rs +++ /dev/null @@ -1,155 +0,0 @@ -// MIT License - -// Copyright (c) 2016 Michael Ma - -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -/// https://docs.rs/statrs/latest/src/statrs/function/gamma.rs.html#260-347 -use special::Primitive; - -pub fn gamma_lr(a: f64, x: f64) -> f64 { - if a.is_nan() || x.is_nan() { - return f64::NAN; - } - - if a <= 0.0 || a == f64::INFINITY { - panic!("a must be positive and finite"); - } - if x <= 0.0 || x == f64::INFINITY { - panic!("x must be positive and finite"); - } - - const ACC: f64 = 0.0000000000000011102230246251565; - - if a.abs() < ACC { - return 1.0; - } - - if x.abs() < ACC { - return 0.0; - } - - let eps = 0.000000000000001; - let big = 4503599627370496.0; - let big_inv = 2.220_446_049_250_313e-16; - - let ax = a * x.ln() - x - a.lgamma().0; - if ax < -709.782_712_893_384 { - if a < x { - return 1.0; - } - return 0.0; - } - if x <= 1.0 || x <= a { - let mut r2 = a; - let mut c2 = 1.0; - let mut ans2 = 1.0; - loop { - r2 += 1.0; - c2 *= x / r2; - ans2 += c2; - - if c2 / ans2 <= eps { - break; - } - } - return ax.exp() * ans2 / a; - } - - let mut y = 1.0 - a; - let mut z = x + y + 1.0; - let mut c = 0; - - let mut p3 = 1.0; - let mut q3 = x; - let mut p2 = x + 1.0; - let mut q2 = z * x; - let mut ans = p2 / q2; - - loop { - y += 1.0; - z += 2.0; - c += 1; - let yc = y * f64::from(c); - - let p = p2 * z - p3 * yc; - let q = q2 * z - q3 * yc; - - p3 = p2; - p2 = p; - q3 = q2; - q2 = q; - - if p.abs() > big { - p3 *= big_inv; - p2 *= big_inv; - q3 *= big_inv; - q2 *= big_inv; - } - - if q != 0.0 { - let nextans = p / q; - let error = ((ans - nextans) / nextans).abs(); - ans = nextans; - - if error <= eps { - break; - } - } - } - 1.0 - ax.exp() * ans -} - -/// https://docs.rs/spfunc/latest/src/spfunc/zeta.rs.html#20-43 -pub fn zeta_func(s: f64) -> f64 { - fn update_akn(akn: &mut Vec, s: f64) { - let n = akn.len() - 1; - - let n1 = n as f64 + 1.0; - - akn.iter_mut().enumerate().for_each(|(k, a)| { - let num = n1; - let den = n + 1 - k; - *a *= num / den as f64; - }); - let p1 = -1.0 / n1; - let p2 = (n1 / (n1 + 1.0)).powf(s); - - akn.push(p1 * p2 * akn[n]); - } - - if s == 1.0 { - return f64::INFINITY; - } - - let mut akn = vec![1.0]; - let mut two_pow = 0.5; - let head = 1.0 / (1.0 - 2.0.powf(1.0 - s)); - let mut tail_prev = 0.0; - let mut tail = two_pow * akn[0]; - - while (tail - tail_prev).abs() >= f64::EPSILON { - update_akn(&mut akn, s); - two_pow /= 2.0; - tail_prev = tail; - tail += two_pow * akn.iter().sum::(); - } - - head * tail -} diff --git a/rand_distr/tests/pdf.rs b/rand_distr/tests/pdf.rs deleted file mode 100644 index 1bbbd32d55..0000000000 --- a/rand_distr/tests/pdf.rs +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright 2021 Developers of the Rand project. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![allow(clippy::float_cmp)] - -use average::Histogram; -use rand::{Rng, SeedableRng}; -use rand_distr::{Normal, SkewNormal}; - -const HIST_LEN: usize = 100; -average::define_histogram!(hist, crate::HIST_LEN); -use hist::Histogram as Histogram100; - -mod sparkline; - -#[test] -fn normal() { - const N_SAMPLES: u64 = 1_000_000; - const MEAN: f64 = 2.; - const STD_DEV: f64 = 0.5; - const MIN_X: f64 = -1.; - const MAX_X: f64 = 5.; - - let dist = Normal::new(MEAN, STD_DEV).unwrap(); - let mut hist = Histogram100::with_const_width(MIN_X, MAX_X); - let mut rng = rand::rngs::SmallRng::seed_from_u64(1); - - for _ in 0..N_SAMPLES { - let _ = hist.add(rng.sample(dist)); // Ignore out-of-range values - } - - println!( - "Sampled normal distribution:\n{}", - sparkline::render_u64_as_string(hist.bins()) - ); - - fn pdf(x: f64) -> f64 { - (-0.5 * ((x - MEAN) / STD_DEV).powi(2)).exp() - / (STD_DEV * (2. * core::f64::consts::PI).sqrt()) - } - - let mut bin_centers = hist.centers(); - let mut expected = [0.; HIST_LEN]; - for e in &mut expected[..] { - *e = pdf(bin_centers.next().unwrap()); - } - - println!( - "Expected normal distribution:\n{}", - sparkline::render_u64_as_string(hist.bins()) - ); - - let mut diff = [0.; HIST_LEN]; - for (i, n) in hist.normalized_bins().enumerate() { - let bin = n / (N_SAMPLES as f64); - diff[i] = (bin - expected[i]).abs(); - } - - println!( - "Difference:\n{}", - sparkline::render_f64_as_string(&diff[..]) - ); - println!( - "max diff: {:?}", - diff.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)) - ); - - // Check that the differences are significantly smaller than the expected error. - let mut expected_error = [0.; HIST_LEN]; - // Calculate error from histogram - for (err, var) in expected_error.iter_mut().zip(hist.variances()) { - *err = var.sqrt() / (N_SAMPLES as f64); - } - // Normalize error by bin width - for (err, width) in expected_error.iter_mut().zip(hist.widths()) { - *err /= width; - } - // TODO: Calculate error from distribution cutoff / normalization - - println!( - "max expected_error: {:?}", - expected_error - .iter() - .fold(f64::NEG_INFINITY, |a, &b| a.max(b)) - ); - for (&d, &e) in diff.iter().zip(expected_error.iter()) { - // Difference larger than 4 standard deviations or cutoff - let tol = (4. * e).max(1e-4); - assert!(d <= tol, "Difference = {} * tol", d / tol); - } -} - -#[test] -fn skew_normal() { - const N_SAMPLES: u64 = 1_000_000; - const LOCATION: f64 = 2.; - const SCALE: f64 = 0.5; - const SHAPE: f64 = -3.0; - const MIN_X: f64 = -1.; - const MAX_X: f64 = 4.; - - let dist = SkewNormal::new(LOCATION, SCALE, SHAPE).unwrap(); - let mut hist = Histogram100::with_const_width(MIN_X, MAX_X); - let mut rng = rand::rngs::SmallRng::seed_from_u64(1); - - for _ in 0..N_SAMPLES { - let _ = hist.add(rng.sample(dist)); // Ignore out-of-range values - } - - println!( - "Sampled skew normal distribution:\n{}", - sparkline::render_u64_as_string(hist.bins()) - ); - - use special::Error; - fn pdf(x: f64) -> f64 { - let x_normalized = (x - LOCATION) / SCALE; - let normal_density_x = - (-0.5 * (x_normalized).powi(2)).exp() / (2. * core::f64::consts::PI).sqrt(); - let normal_distribution_x = - 0.5 * (1.0 + (SHAPE * x_normalized / core::f64::consts::SQRT_2).error()); - 2.0 / SCALE * normal_density_x * normal_distribution_x - } - - let mut bin_centers = hist.centers(); - let mut expected = [0.; HIST_LEN]; - for e in &mut expected[..] { - *e = pdf(bin_centers.next().unwrap()); - } - - println!( - "Expected skew normal distribution:\n{}", - sparkline::render_u64_as_string(hist.bins()) - ); - - let mut diff = [0.; HIST_LEN]; - for (i, n) in hist.normalized_bins().enumerate() { - let bin = n / (N_SAMPLES as f64); - diff[i] = (bin - expected[i]).abs(); - } - - println!( - "Difference:\n{}", - sparkline::render_f64_as_string(&diff[..]) - ); - println!( - "max diff: {:?}", - diff.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)) - ); - - // Check that the differences are significantly smaller than the expected error. - let mut expected_error = [0.; HIST_LEN]; - // Calculate error from histogram - for (err, var) in expected_error.iter_mut().zip(hist.variances()) { - *err = var.sqrt() / (N_SAMPLES as f64); - } - // Normalize error by bin width - for (err, width) in expected_error.iter_mut().zip(hist.widths()) { - *err /= width; - } - // TODO: Calculate error from distribution cutoff / normalization - - println!( - "max expected_error: {:?}", - expected_error - .iter() - .fold(f64::NEG_INFINITY, |a, &b| a.max(b)) - ); - for (&d, &e) in diff.iter().zip(expected_error.iter()) { - // Difference larger than 4 standard deviations or cutoff - let tol = (4. * e).max(1e-4); - assert!(d <= tol, "Difference = {} * tol", d / tol); - } -} diff --git a/rand_distr/tests/sparkline.rs b/rand_distr/tests/sparkline.rs deleted file mode 100644 index ec0ee98de9..0000000000 --- a/rand_distr/tests/sparkline.rs +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2021 Developers of the Rand project. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/// Number of ticks. -const N: usize = 8; -/// Ticks used for the sparkline. -static TICKS: [char; N] = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█']; - -/// Render a sparkline of `data` into `buffer`. -pub fn render_u64(data: &[u64], buffer: &mut String) { - match data.len() { - 0 => { - return; - } - 1 => { - if data[0] == 0 { - buffer.push(TICKS[0]); - } else { - buffer.push(TICKS[N - 1]); - } - return; - } - _ => {} - } - let max = data.iter().max().unwrap(); - let min = data.iter().min().unwrap(); - let scale = ((N - 1) as f64) / ((max - min) as f64); - for i in data { - let tick = (((i - min) as f64) * scale) as usize; - buffer.push(TICKS[tick]); - } -} - -/// Calculate the required capacity for the sparkline, given the length of the -/// input data. -pub fn required_capacity(len: usize) -> usize { - len * TICKS[0].len_utf8() -} - -/// Render a sparkline of `data` into a newly allocated string. -pub fn render_u64_as_string(data: &[u64]) -> String { - let cap = required_capacity(data.len()); - let mut s = String::with_capacity(cap); - render_u64(data, &mut s); - debug_assert_eq!(s.capacity(), cap); - s -} - -/// Render a sparkline of `data` into `buffer`. -pub fn render_f64(data: &[f64], buffer: &mut String) { - match data.len() { - 0 => { - return; - } - 1 => { - if data[0] == 0. { - buffer.push(TICKS[0]); - } else { - buffer.push(TICKS[N - 1]); - } - return; - } - _ => {} - } - for x in data { - assert!(x.is_finite(), "can only render finite values"); - } - let max = data.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)); - let min = data.iter().fold(f64::INFINITY, |a, &b| a.min(b)); - let scale = ((N - 1) as f64) / (max - min); - for x in data { - let tick = ((x - min) * scale) as usize; - buffer.push(TICKS[tick]); - } -} - -/// Render a sparkline of `data` into a newly allocated string. -pub fn render_f64_as_string(data: &[f64]) -> String { - let cap = required_capacity(data.len()); - let mut s = String::with_capacity(cap); - render_f64(data, &mut s); - debug_assert_eq!(s.capacity(), cap); - s -} - -#[cfg(test)] -mod tests { - #[test] - fn render_u64() { - let data = [2, 250, 670, 890, 2, 430, 11, 908, 123, 57]; - let mut s = String::with_capacity(super::required_capacity(data.len())); - super::render_u64(&data, &mut s); - println!("{}", s); - assert_eq!("▁▂▆▇▁▄▁█▁▁", &s); - } - - #[test] - fn render_u64_as_string() { - let data = [2, 250, 670, 890, 2, 430, 11, 908, 123, 57]; - let s = super::render_u64_as_string(&data); - println!("{}", s); - assert_eq!("▁▂▆▇▁▄▁█▁▁", &s); - } - - #[test] - fn render_f64() { - let data = [2., 250., 670., 890., 2., 430., 11., 908., 123., 57.]; - let mut s = String::with_capacity(super::required_capacity(data.len())); - super::render_f64(&data, &mut s); - println!("{}", s); - assert_eq!("▁▂▆▇▁▄▁█▁▁", &s); - } - - #[test] - fn render_f64_as_string() { - let data = [2., 250., 670., 890., 2., 430., 11., 908., 123., 57.]; - let s = super::render_f64_as_string(&data); - println!("{}", s); - assert_eq!("▁▂▆▇▁▄▁█▁▁", &s); - } -} diff --git a/rand_distr/tests/uniformity.rs b/rand_distr/tests/uniformity.rs deleted file mode 100644 index 4818eac990..0000000000 --- a/rand_distr/tests/uniformity.rs +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2018 Developers of the Rand project. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![allow(clippy::float_cmp)] - -use average::Histogram; -use rand::prelude::*; - -const N_BINS: usize = 100; -const N_SAMPLES: u32 = 1_000_000; -const TOL: f64 = 1e-3; -average::define_histogram!(hist, 100); -use hist::Histogram as Histogram100; - -#[test] -fn unit_sphere() { - const N_DIM: usize = 3; - let h = Histogram100::with_const_width(-1., 1.); - let mut histograms = [h.clone(), h.clone(), h]; - let dist = rand_distr::UnitSphere; - let mut rng = rand_pcg::Pcg32::from_os_rng(); - for _ in 0..N_SAMPLES { - let v: [f64; 3] = dist.sample(&mut rng); - for i in 0..N_DIM { - histograms[i] - .add(v[i]) - .map_err(|e| { - println!("v: {}", v[i]); - e - }) - .unwrap(); - } - } - for h in &histograms { - let sum: u64 = h.bins().iter().sum(); - println!("{:?}", h); - for &b in h.bins() { - let p = (b as f64) / (sum as f64); - assert!((p - 1.0 / (N_BINS as f64)).abs() < TOL, "{}", p); - } - } -} - -#[test] -fn unit_circle() { - use core::f64::consts::PI; - let mut h = Histogram100::with_const_width(-PI, PI); - let dist = rand_distr::UnitCircle; - let mut rng = rand_pcg::Pcg32::from_os_rng(); - for _ in 0..N_SAMPLES { - let v: [f64; 2] = dist.sample(&mut rng); - h.add(v[0].atan2(v[1])).unwrap(); - } - let sum: u64 = h.bins().iter().sum(); - println!("{:?}", h); - for &b in h.bins() { - let p = (b as f64) / (sum as f64); - assert!((p - 1.0 / (N_BINS as f64)).abs() < TOL, "{}", p); - } -}