From ff2a5b4cbe32cd208df7b484adf9ba295c778009 Mon Sep 17 00:00:00 2001 From: nikicc Date: Fri, 8 Jul 2016 14:09:49 +0200 Subject: [PATCH] statistics.utils: Fix stats for sparse when last column missing `np.bincount` has a length of the maximal element and can hence be shorter than the number of columns (e.g. when some of the last columns have all zeros). This forces it to count non zero elements for all columns. --- Orange/statistics/util.py | 2 +- Orange/tests/test_statistics.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index 80bdba22fc7..200664b532a 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -178,7 +178,7 @@ def stats(X, weights=None, compute_variance=False): if compute_variance: raise NotImplementedError - non_zero = np.bincount(X.nonzero()[1]) + non_zero = np.bincount(X.nonzero()[1], minlength=X.shape[1]) X = X.tocsc() return np.column_stack(( X.min(axis=0).toarray().ravel(), diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py index 03da9f81c56..82451fb4a42 100644 --- a/Orange/tests/test_statistics.py +++ b/Orange/tests/test_statistics.py @@ -41,3 +41,11 @@ def test_stats_sparse(self): [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1], [0, 1, .2, 0, 4, 1]]) + + # assure last two columns have just zero elements + X = X[:3] + np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 4, 1], + [0, 1, 1/3, 0, 4, 1], + [0, 1, 1/3, 0, 4, 1], + [0, 0, 0, 0, 5, 0], + [0, 0, 0, 0, 5, 0]])