diff --git a/Orange/distance/distance.py b/Orange/distance/distance.py index c49045dcf1c..40bb510eced 100644 --- a/Orange/distance/distance.py +++ b/Orange/distance/distance.py @@ -507,6 +507,9 @@ def _corrcoef2(a, b, axis=0): numpy.corrcoef """ a, b = np.atleast_2d(a, b) + if not (axis == 0 or axis == 1): + raise ValueError("Invalid axis {} (only 0 or 1 accepted)".format(axis)) + mean_a = np.mean(a, axis=axis, keepdims=True) mean_b = np.mean(b, axis=axis, keepdims=True) assert a.shape[axis] == b.shape[axis] @@ -523,8 +526,6 @@ def _corrcoef2(a, b, axis=0): elif axis == 1: C = a.dot(b.T) assert C.shape == (n, m) - else: - raise ValueError() ss_a = np.sum(a ** 2, axis=axis, keepdims=True) ss_b = np.sum(b ** 2, axis=axis, keepdims=True) diff --git a/Orange/tests/test_distances.py b/Orange/tests/test_distances.py index 46693b2aafc..3864a188586 100644 --- a/Orange/tests/test_distances.py +++ b/Orange/tests/test_distances.py @@ -6,6 +6,8 @@ import numpy as np import scipy +import scipy.spatial +import scipy.stats from scipy.sparse import csr_matrix from Orange.data import (Table, Domain, ContinuousVariable, @@ -13,6 +15,7 @@ from Orange.distance import (Euclidean, SpearmanR, SpearmanRAbsolute, PearsonR, PearsonRAbsolute, Manhattan, Cosine, Jaccard, _preprocess, MahalanobisDistance) +from Orange.distance.distance import _spearmanr2, _corrcoef2 from Orange.misc import DistMatrix from Orange.tests import named_file, test_filename from Orange.util import OrangeDeprecationWarning @@ -598,6 +601,30 @@ def test_spearmanr_distance_numpy(self): [0.3833333], [0.]])) + def test_spearmanr2(self): + # Test that _spearnmanr2 returns the same result that stats.spearmanr + # would + n, m = tuple(np.random.randint(2, 5, size=2)) + mean = np.random.uniform(-1, 1, size=m) + cov = np.random.uniform(0, 1./m, size=(m, m)) + cov = (cov + cov.T) / 2 + cov.flat[::m + 1] = 1.0 + X1 = np.random.multivariate_normal(mean, cov, size=n) + X2 = np.random.multivariate_normal(mean, cov, size=n) + expected = scipy.stats.spearmanr(X1, X2, axis=1)[0][:n, n:] + np.testing.assert_almost_equal( + _spearmanr2(X1, X2, axis=1), + expected, + decimal=9 + ) + + expected = scipy.stats.spearmanr(X1, X2, axis=0)[0][:m, m:] + np.testing.assert_almost_equal( + _spearmanr2(X1, X2, axis=0), + expected, + decimal=9, + ) + # noinspection PyTypeChecker class TestSpearmanRAbsolute(TestCase): @@ -752,6 +779,32 @@ def test_pearsonr_distance_numpy(self): [0.32783865], [0.]])) + def test_corrcoef2(self): + # Test that _corrcoef2 returns the same result that np.corrcoef would + n, m = tuple(np.random.randint(2, 5, size=2)) + mean = np.random.uniform(-1, 1, size=m) + cov = np.random.uniform(0, 1./m, size=(m, m)) + cov = (cov + cov.T) / 2 + cov.flat[::m + 1] = 1.0 + X1 = np.random.multivariate_normal(mean, cov, size=n) + X2 = np.random.multivariate_normal(mean, cov, size=n) + expected = np.corrcoef(X1, X2, rowvar=True)[:n, n:] + np.testing.assert_almost_equal( + _corrcoef2(X1, X2, axis=1), + expected, + decimal=9 + ) + + expected = np.corrcoef(X1, X2, rowvar=False)[:m, m:] + np.testing.assert_almost_equal( + _corrcoef2(X1, X2, axis=0), + expected, + decimal=9, + ) + + with self.assertRaises(ValueError): + _corrcoef2(X1, X2, axis=10) + # noinspection PyTypeChecker class TestPearsonRAbsolute(TestCase):