diff --git a/Orange/data/domain.py b/Orange/data/domain.py index 2869fc489f5..54adb259085 100644 --- a/Orange/data/domain.py +++ b/Orange/data/domain.py @@ -283,25 +283,25 @@ def index(self, var): except KeyError: raise ValueError("'%s' is not in domain" % var) - def has_discrete_attributes(self, include_class=False): + def has_discrete_attributes(self, include_class=False, include_metas=False): """ - Return `True` if domain has any discrete attributes. If `include_class` - is set, the check includes the class attribute(s). + Return `True` if domain has any discrete attributes. If + `include_class` is set, the check includes the class attribute(s). If + `include_metas` is set, the check includes the meta attributes. """ - if not include_class: - return any(var.is_discrete for var in self.attributes) - else: - return any(var.is_discrete for var in self.variables) + vars = self.variables if include_class else self.attributes + vars += self.metas if include_metas else () + return any(var.is_discrete for var in vars) - def has_continuous_attributes(self, include_class=False): + def has_continuous_attributes(self, include_class=False, include_metas=False): """ Return `True` if domain has any continuous attributes. If - `include_class` is set, the check includes the class attribute(s). + `include_class` is set, the check includes the class attribute(s). If + `include_metas` is set, the check includes the meta attributes. """ - if not include_class: - return any(var.is_continuous for var in self.attributes) - else: - return any(var.is_continuous for var in self.variables) + vars = self.variables if include_class else self.attributes + vars += self.metas if include_metas else () + return any(var.is_continuous for var in vars) @property def has_continuous_class(self): diff --git a/Orange/tests/test_domain.py b/Orange/tests/test_domain.py index 9b34552a624..8dad8bf9719 100644 --- a/Orange/tests/test_domain.py +++ b/Orange/tests/test_domain.py @@ -310,6 +310,15 @@ def test_has_discrete(self): self.assertTrue(Domain([race], age).has_discrete_attributes(True)) self.assertTrue(Domain([], [race, age]).has_discrete_attributes(True)) + d = Domain([], None, [gender]) + self.assertTrue(d.has_discrete_attributes(False, True)) + d = Domain([], None, [age]) + self.assertFalse(d.has_discrete_attributes(False, True)) + d = Domain([], [age], [gender]) + self.assertTrue(d.has_discrete_attributes(True, True)) + d = Domain([], [incomeA], [age]) + self.assertFalse(d.has_discrete_attributes(True, True)) + def test_has_continuous(self): self.assertFalse(Domain([]).has_continuous_attributes()) self.assertFalse(Domain([], [age]).has_continuous_attributes()) @@ -328,6 +337,15 @@ def test_has_continuous(self): self.assertTrue(Domain([race], age).has_continuous_attributes(True)) self.assertTrue(Domain([], [race, age]).has_continuous_attributes(True)) + d = Domain([], None, [age]) + self.assertTrue(d.has_continuous_attributes(False, True)) + d = Domain([], None, [gender]) + self.assertFalse(d.has_continuous_attributes(False, True)) + d = Domain([], [gender], [age]) + self.assertTrue(d.has_continuous_attributes(True, True)) + d = Domain([], [race], [gender]) + self.assertFalse(d.has_continuous_attributes(True, True)) + def test_get_conversion(self): compute_value = lambda: 42 new_income = income.copy(compute_value=compute_value) diff --git a/Orange/widgets/visualize/owboxplot.py b/Orange/widgets/visualize/owboxplot.py index f7d7b0c2587..174f28cfbdb 100644 --- a/Orange/widgets/visualize/owboxplot.py +++ b/Orange/widgets/visualize/owboxplot.py @@ -289,7 +289,7 @@ def compute_score(attr): return 3 if attr.is_continuous: # One-way ANOVA - col = data.get_column_view(attr)[0] + col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] @@ -297,6 +297,8 @@ def compute_score(attr): else: # Chi-square with the given distribution into groups # (see degrees of freedom in computation of the p-value) + if not attr.values or not group_var.values: + return 2 observed = np.array( contingency.get_contingency(data, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] @@ -320,9 +322,9 @@ def compute_score(attr): group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) - group_col = data.get_column_view(group_var)[0] \ - if domain.has_continuous_attributes(include_class=True) \ - else None + group_col = data.get_column_view(group_var)[0] if \ + domain.has_continuous_attributes( + include_class=True, include_metas=True) else None self.attrs.sort(key=compute_score) else: self.attrs[:] = chain( @@ -368,10 +370,11 @@ def compute_box_data(self): if not attr: return dataset = self.dataset - if dataset is None: + self.is_continuous = attr.is_continuous + if dataset is None or not self.is_continuous and not attr.values or \ + self.group_var and not self.group_var.values: self.stats = self.dist = self.conts = [] return - self.is_continuous = attr.is_continuous if self.group_var: self.dist = [] self.conts = contingency.get_contingency( @@ -554,6 +557,8 @@ def stat_ttest(): df = pooled_var ** 2 / \ ((d1.var / d1.n) ** 2 / (d1.n - 1) + (d2.var / d2.n) ** 2 / (d2.n - 1)) + if pooled_var == 0: + return np.nan, np.nan t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var) p = 2 * (1 - scipy.special.stdtr(df, t)) return t, p diff --git a/Orange/widgets/visualize/tests/test_owboxplot.py b/Orange/widgets/visualize/tests/test_owboxplot.py index fc1bdb9a2d2..c3d90e89659 100644 --- a/Orange/widgets/visualize/tests/test_owboxplot.py +++ b/Orange/widgets/visualize/tests/test_owboxplot.py @@ -3,7 +3,7 @@ import numpy as np -from Orange.data import Table, ContinuousVariable +from Orange.data import Table, ContinuousVariable, StringVariable, Domain from Orange.widgets.visualize.owboxplot import OWBoxPlot, FilterGraphicsRectItem from Orange.widgets.tests.base import WidgetTest, WidgetOutputsTestMixin @@ -56,15 +56,37 @@ def test_input_data_missings_cont_no_group_var(self): def test_input_data_missings_disc_group_var(self): """Check widget with discrete data with missing values and group variable""" data = self.zoo - data.X[:, 0] = np.nan + data.X[:, 1] = np.nan + data.domain.attributes[1].values = [] self.send_signal("Data", data) + self.widget.controls.order_by_importance.setChecked(True) + self._select_list_items(self.widget.controls.attribute) + self._select_list_items(self.widget.controls.group_var) def test_input_data_missings_disc_no_group_var(self): """Check widget discrete data with missing values and no group variable""" data = self.zoo data.domain.class_var = ContinuousVariable("cls") - data.X[:, 0] = np.nan + data.X[:, 1] = np.nan + data.domain.attributes[1].values = [] + self.send_signal("Data", data) + self.widget.controls.order_by_importance.setChecked(True) + self._select_list_items(self.widget.controls.attribute) + self._select_list_items(self.widget.controls.group_var) + + def test_attribute_combinations(self): + data = Table("anneal") self.send_signal("Data", data) + group_list = self.widget.controls.group_var + m = group_list.selectionModel() + for i in range(len(group_list.model())): + m.setCurrentIndex(group_list.model().index(i), m.ClearAndSelect) + self._select_list_items(self.widget.controls.attribute) + + def _select_list_items(self, _list): + model = _list.selectionModel() + for i in range(len(_list.model())): + model.setCurrentIndex(_list.model().index(i), model.ClearAndSelect) def test_apply_sorting(self): controls = self.widget.controls @@ -122,3 +144,11 @@ def _select_data(self): return [100, 103, 104, 108, 110, 111, 112, 115, 116, 120, 123, 124, 126, 128, 132, 133, 136, 137, 139, 140, 141, 143, 144, 145, 146, 147, 148] + + def test_continuous_metas(self): + domain = self.iris.domain + metas = domain.attributes[:-1] + (StringVariable("str"),) + domain = Domain([], domain.class_var, metas) + data = Table.from_table(domain, self.iris) + self.send_signal("Data", data) + self.widget.controls.order_by_importance.setChecked(True)