Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] PCA - Output instance of table subclass when instance of table subclass on input #6536

Merged
merged 2 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 16 additions & 21 deletions Orange/widgets/unsupervised/owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
from AnyQt.QtCore import Qt

from orangewidget.report import bool_str
from orangewidget.settings import Setting

from Orange.data import Table, Domain, StringVariable, ContinuousVariable
from Orange.data.util import get_unique_names
from Orange.data.sql.table import SqlTable, AUTO_DL_LIMIT
from Orange.preprocess import preprocess
from Orange.projection import PCA
from Orange.widgets import widget, gui, settings
from Orange.widgets import widget, gui
from Orange.widgets.utils.annotated_data import add_columns
from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin
from Orange.widgets.utils.slidergraph import SliderGraph
from Orange.widgets.utils.widgetpreview import WidgetPreview
Expand All @@ -38,12 +40,12 @@ class Outputs:
components = Output("Components", Table)
pca = Output("PCA", PCA, dynamic=False)

ncomponents = settings.Setting(2)
variance_covered = settings.Setting(100)
auto_commit = settings.Setting(True)
normalize = settings.Setting(True)
maxp = settings.Setting(20)
axis_labels = settings.Setting(10)
ncomponents = Setting(2)
variance_covered = Setting(100)
auto_commit = Setting(True)
normalize = Setting(True)
maxp = Setting(20)
axis_labels = Setting(10)

graph_name = "plot.plotItem" # QGraphicsView (pg.PlotWidget -> SliderGraph)

Expand Down Expand Up @@ -222,8 +224,7 @@ def _setup_plot(self):
self._update_axis()

def _on_cut_changed(self, components):
if components == self.ncomponents \
or self.ncomponents == 0:
if self.ncomponents in (components, 0):
return

self.ncomponents = components
Expand Down Expand Up @@ -333,9 +334,9 @@ def commit(self):
proposed = [a.name for a in self._pca.orig_domain.attributes]
meta_name = get_unique_names(proposed, 'components')
meta_vars = [StringVariable(name=meta_name)]
metas = numpy.array([['PC{}'.format(i + 1)
for i in range(self.ncomponents)]],
dtype=object).T
metas = numpy.array(
[[f"PC{i + 1}"for i in range(self.ncomponents)]], dtype=object
).T
if self._variance_ratio is not None:
variance_name = get_unique_names(proposed, "variance")
meta_vars.append(ContinuousVariable(variance_name))
Expand All @@ -351,14 +352,8 @@ def commit(self):
metas=metas)
components.name = 'components'

data_dom = Domain(
self.data.domain.attributes,
self.data.domain.class_vars,
self.data.domain.metas + domain.attributes)
data = Table.from_numpy(
data_dom, self.data.X, self.data.Y,
numpy.hstack((self.data.metas, transformed.X)),
Copy link
Member

@markotoplak markotoplak Aug 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about replacing everything about this output with the code below? Sure, then, these are transformed twice, but I think this transformation is fast enough so that we do not need to worry.

data = self.data.transform(add_columns(self.data.domain,
                                                   metas=domain.attributes))

This is what @noahnovsak did on the dask brach.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgot about this option. It is done now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now you do not need the next part (under with:)

ids=self.data.ids)
data_dom = add_columns(self.data.domain, metas=domain.attributes)
data = self.data.transform(data_dom)

self.Outputs.transformed_data.send(transformed)
self.Outputs.components.send(components)
Expand All @@ -371,7 +366,7 @@ def send_report(self):
self.report_items((
("Normalize data", bool_str(self.normalize)),
("Selected components", self.ncomponents),
("Explained variance", "{:.3f} %".format(self.variance_covered))
("Explained variance", f"{self.variance_covered:.3f} %")
))
self.report_plot()

Expand Down
27 changes: 21 additions & 6 deletions Orange/widgets/unsupervised/tests/test_owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from unittest.mock import patch, Mock

import numpy as np
from sklearn.utils import check_random_state
from sklearn.utils.extmath import svd_flip

from Orange.data import Table, Domain, ContinuousVariable, TimeVariable
from Orange.preprocess import preprocess
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.utils import table_dense_sparse, possible_duplicate_table
from Orange.widgets.unsupervised.owpca import OWPCA
from Orange.tests import test_filename
from sklearn.utils import check_random_state
from sklearn.utils.extmath import svd_flip


class TestOWPCA(WidgetTest):
Expand Down Expand Up @@ -63,19 +63,19 @@ def test_limit_components(self):
self.widget._setup_plot() # pylint: disable=protected-access

def test_migrate_settings_limits_components(self):
settings = dict(ncomponents=10)
settings = {"ncomponents": 10}
OWPCA.migrate_settings(settings, 0)
self.assertEqual(settings['ncomponents'], 10)
settings = dict(ncomponents=101)
settings = {"ncomponents": 101}
OWPCA.migrate_settings(settings, 0)
self.assertEqual(settings['ncomponents'], 100)

def test_migrate_settings_changes_variance_covered_to_int(self):
settings = dict(variance_covered=17.5)
settings = {"variance_covered": 17.5}
OWPCA.migrate_settings(settings, 0)
self.assertEqual(settings["variance_covered"], 17)

settings = dict(variance_covered=float('nan'))
settings = {"variance_covered": float('nan')}
OWPCA.migrate_settings(settings, 0)
self.assertEqual(settings["variance_covered"], 100)

Expand Down Expand Up @@ -277,6 +277,21 @@ def test_output_data(self):
output = self.get_output(widget.Outputs.data)
self.assertIsNone(output)

def test_table_subclass(self):
"""
When input table is instance of Table's subclass (e.g. Corpus) resulting
tables should also be an instance subclasses
"""
class TableSub(Table): # pylint: disable=abstract-method
pass

table_subclass = TableSub(self.iris)
self.send_signal(self.widget.Inputs.data, table_subclass)
data_out = self.get_output(self.widget.Outputs.data)
trans_data_out = self.get_output(self.widget.Outputs.transformed_data)
self.assertIsInstance(data_out, TableSub)
self.assertIsInstance(trans_data_out, TableSub)


if __name__ == "__main__":
unittest.main()
Loading