From 6194dbcdffb9a81da820210682c9efc2bec13546 Mon Sep 17 00:00:00 2001 From: Ales Erjavec Date: Mon, 2 Dec 2019 16:33:48 +0100 Subject: [PATCH 1/3] owcsvimport: Use round trip parser --- Orange/widgets/data/owcsvimport.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index 4b99ede35c9..9220a77159b 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -1246,6 +1246,7 @@ def expand(ranges): header=header, skiprows=skiprows, dtype=dtypes, parse_dates=parse_dates, prefix=prefix, na_values=na_values, keep_default_na=False, + float_precision="round_trip", **numbers_format_kwds ) if columns_ignored: From 3c05722eaa996f18d74cae97e2e959c5ad6f4e99 Mon Sep 17 00:00:00 2001 From: Ales Erjavec Date: Fri, 13 Dec 2019 13:23:20 +0100 Subject: [PATCH 2/3] oweditdomain: Add column type 'reinterpret' transform --- .coveragerc | 2 + Orange/widgets/data/oweditdomain.py | 1012 +++++++++++++++-- .../widgets/data/tests/test_oweditdomain.py | 321 +++++- 3 files changed, 1233 insertions(+), 102 deletions(-) diff --git a/.coveragerc b/.coveragerc index 81474b67f88..05c224efa30 100644 --- a/.coveragerc +++ b/.coveragerc @@ -17,3 +17,5 @@ exclude_lines = raise NotImplementedError if __name__ == .__main__.: except MemoryError + assert False + raise AssertionError diff --git a/Orange/widgets/data/oweditdomain.py b/Orange/widgets/data/oweditdomain.py index cb07d9c09ba..015437d081e 100644 --- a/Orange/widgets/data/oweditdomain.py +++ b/Orange/widgets/data/oweditdomain.py @@ -7,41 +7,51 @@ """ import warnings from xml.sax.saxutils import escape -from itertools import zip_longest +from itertools import zip_longest, repeat, chain from contextlib import contextmanager from collections import namedtuple, Counter -from functools import singledispatch +from functools import singledispatch, partial from typing import ( Tuple, List, Any, Optional, Union, Dict, Sequence, Iterable, NamedTuple, - FrozenSet, + FrozenSet, Type, Callable, TypeVar, Mapping, Hashable ) - from AnyQt.QtWidgets import ( QWidget, QListView, QTreeView, QVBoxLayout, QHBoxLayout, QFormLayout, - QToolButton, QLineEdit, QAction, QActionGroup, QStackedWidget, QGroupBox, + QToolButton, QLineEdit, QAction, QActionGroup, QGroupBox, QStyledItemDelegate, QStyleOptionViewItem, QStyle, QSizePolicy, QToolTip, - QDialogButtonBox, QPushButton, QCheckBox, QComboBox, QShortcut + QDialogButtonBox, QPushButton, QCheckBox, QComboBox, QShortcut, + QStackedLayout ) from AnyQt.QtGui import QStandardItemModel, QStandardItem, QKeySequence, QIcon from AnyQt.QtCore import ( Qt, QEvent, QSize, QModelIndex, QAbstractItemModel, QPersistentModelIndex, - QRect + QRect, ) from AnyQt.QtCore import pyqtSignal as Signal, pyqtSlot as Slot import numpy as np +import pandas as pd import Orange.data -from Orange.preprocess.transformation import Identity, Lookup +from Orange.preprocess.transformation import Transformation, Identity, Lookup from Orange.widgets import widget, gui, settings from Orange.widgets.utils import itemmodels from Orange.widgets.utils.widgetpreview import WidgetPreview from Orange.widgets.widget import Input, Output +ndarray = np.ndarray # pylint: disable=invalid-name +MArray = np.ma.MaskedArray +DType = Union[np.dtype, type] + +A = TypeVar("A") # pylint: disable=invalid-name +B = TypeVar("B") # pylint: disable=invalid-name +V = TypeVar("V", bound=Orange.data.Variable) # pylint: disable=invalid-name +H = TypeVar("H", bound=Hashable) # pylint: disable=invalid-name + -def unique(sequence): +def unique(sequence: Iterable[H]) -> Iterable[H]: """ Return unique elements in `sequence`, preserving their (first seen) order. """ @@ -49,6 +59,20 @@ def unique(sequence): return iter(dict.fromkeys(sequence)) +class _DataType: + def __eq__(self, other): + """Equal if `other` has the same type and all elements compare equal.""" + if type(self) is not type(other): + return False + return super().__eq__(other) + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((type(self), super().__hash__())) + + #: An ordered sequence of key, value pairs (variable annotations) AnnotationsType = Tuple[Tuple[str, str], ...] @@ -56,7 +80,7 @@ def unique(sequence): # Define abstract representation of the variable types edited class Categorical( - NamedTuple("Categorical", [ + _DataType, NamedTuple("Categorical", [ ("name", str), ("categories", Tuple[str, ...]), ("annotations", AnnotationsType), @@ -68,7 +92,7 @@ class Ordered(Categorical): class Real( - NamedTuple("Real", [ + _DataType, NamedTuple("Real", [ ("name", str), # a precision (int, and a format specifier('f', 'g', or '') ("format", Tuple[int, str]), @@ -77,14 +101,14 @@ class Real( class String( - NamedTuple("String", [ + _DataType, NamedTuple("String", [ ("name", str), ("annotations", AnnotationsType), ])): pass class Time( - NamedTuple("Time", [ + _DataType, NamedTuple("Time", [ ("name", str), ("annotations", AnnotationsType), ])): pass @@ -96,7 +120,7 @@ class Time( # Define variable transformations. -class Rename(namedtuple("Rename", ["name"])): +class Rename(_DataType, namedtuple("Rename", ["name"])): """ Rename a variable. @@ -122,7 +146,7 @@ def __call__(self, var): CategoriesMappingType = List[Tuple[Optional[str], Optional[str]]] -class CategoriesMapping(namedtuple("CategoriesMapping", ["mapping"])): +class CategoriesMapping(_DataType, namedtuple("CategoriesMapping", ["mapping"])): """ Change categories of a categorical variable. @@ -132,11 +156,11 @@ class CategoriesMapping(namedtuple("CategoriesMapping", ["mapping"])): """ def __call__(self, var): # type: (Categorical) -> Categorical - cat = list(unique(cj for _, cj in self.mapping if cj is not None)) + cat = tuple(unique(cj for _, cj in self.mapping if cj is not None)) return var._replace(categories=cat) -class Annotate(namedtuple("Annotate", ["annotations"])): +class Annotate(_DataType, namedtuple("Annotate", ["annotations"])): """ Replace variable annotations. """ @@ -144,7 +168,7 @@ def __call__(self, var): return var._replace(annotations=self.annotations) -class ChangeOrdered(NamedTuple("ChangeOrdered", [("ordered", bool)])): +class ChangeOrdered(_DataType, NamedTuple("ChangeOrdered", [("ordered", bool)])): """ Change Categorical <-> Ordered """ @@ -153,6 +177,149 @@ class ChangeOrdered(NamedTuple("ChangeOrdered", [("ordered", bool)])): Transform = Union[Rename, CategoriesMapping, Annotate, ChangeOrdered] TransformTypes = (Rename, CategoriesMapping, Annotate, ChangeOrdered) +CategoricalTransformTypes = (CategoriesMapping, ChangeOrdered) + + +# Reinterpret vector transformations. +class CategoricalVector( + _DataType, NamedTuple("CategoricalVector", [ + ("vtype", Categorical), + ("data", Callable[[], MArray]), + ])): ... + + +class RealVector( + _DataType, NamedTuple("RealVector", [ + ("vtype", Real), + ("data", Callable[[], MArray]), + ])): ... + + +class StringVector( + _DataType, NamedTuple("StringVector", [ + ("vtype", String), + ("data", Callable[[], MArray]), + ])): ... + + +class TimeVector( + _DataType, NamedTuple("TimeVector", [ + ("vtype", Time), + ("data", Callable[[], MArray]), + ])): ... + + +DataVector = Union[CategoricalVector, RealVector, StringVector, TimeVector] +DataVectorTypes = (CategoricalVector, RealVector, StringVector, TimeVector) + + +class AsString(_DataType, NamedTuple("AsString", [])): + """Reinterpret a data vector as a string.""" + def __call__(self, vector: DataVector) -> StringVector: + var, _ = vector + if isinstance(var, String): + return vector + return StringVector( + String(var.name, var.annotations), + lambda: as_string(vector.data()), + ) + + +class AsContinuous(_DataType, NamedTuple("AsContinuous", [])): + """ + Reinterpret as a continuous variable (values that do not parse as + float are NaN). + """ + def __call__(self, vector: DataVector) -> RealVector: + var, _ = vector + if isinstance(var, Real): + return vector + elif isinstance(var, Categorical): + def data() -> MArray: + d = vector.data() + a = categorical_to_string_vector(d, var.values) + return MArray(as_float_or_nan(a, where=a.mask), mask=a.mask) + return RealVector( + Real(var.name, (6, 'g'), var.annotations), data + ) + elif isinstance(var, Time): + return RealVector( + Real(var.name, (6, 'g'), var.annotations), + lambda: vector.data().astype(float) + ) + elif isinstance(var, String): + def data(): + s = vector.data() + return MArray(as_float_or_nan(s, where=s.mask), mask=s.mask) + return RealVector( + Real(var.name, (6, "g"), var.annotations), data + ) + raise AssertionError + + +class AsCategorical(_DataType, namedtuple("AsCategorical", [])): + """Reinterpret as a categorical variable""" + def __call__(self, vector: DataVector) -> CategoricalVector: + # this is the main complication in type transformation since we need + # the data and not just the variable description + var, _ = vector + if isinstance(var, Categorical): + return vector + if isinstance(var, Real): + data, values = categorical_from_vector(vector.data()) + return CategoricalVector( + Categorical(var.name, values, var.annotations), + lambda: data + ) + elif isinstance(var, Time): + data, values = categorical_from_vector(vector.data()) + return CategoricalVector( + Categorical(var.name, values, var.annotations), + lambda: data + ) + elif isinstance(var, String): + data, values = categorical_from_vector(vector.data()) + return CategoricalVector( + Categorical(var.name, values, var.annotations), + lambda: data + ) + raise AssertionError + + +class AsTime(_DataType, namedtuple("AsTime", [])): + """Reinterpret as a datetime vector""" + def __call__(self, vector: DataVector) -> TimeVector: + var, _ = vector + if isinstance(var, Time): + return vector + elif isinstance(var, Real): + return TimeVector( + Time(var.name, var.annotations), + lambda: vector.data().astype("M8[us]") + ) + elif isinstance(var, Categorical): + def data(): + d = vector.data() + s = categorical_to_string_vector(d, var.values) + dt = pd.to_datetime(s, errors="coerce").values.astype("M8[us]") + return MArray(dt, mask=d.mask) + return TimeVector( + Time(var.name, var.annotations), data + ) + elif isinstance(var, String): + def data(): + s = vector.data() + dt = pd.to_datetime(s, errors="coerce").values.astype("M8[us]") + return MArray(dt, mask=s.mask) + return TimeVector( + Time(var.name, var.annotations), data + ) + raise AssertionError + + +ReinterpretTransform = Union[AsCategorical, AsContinuous, AsTime, AsString] +ReinterpretTransformTypes = (AsCategorical, AsContinuous, AsTime, AsString) + def deconstruct(obj): # type: (tuple) -> Tuple[str, Tuple[Any, ...]] @@ -194,6 +361,78 @@ def reconstruct(tname, args): return constructor(*args) +def formatter_for_dtype(dtype: np.dtype) -> Callable[[Any], str]: + if dtype.metadata is None: + return str + else: + return dtype.metadata.get("__formatter", str) # metadata abuse + + +def masked_unique(data: MArray) -> Tuple[MArray, ndarray]: + if not np.any(data.mask): + return np.ma.unique(data, return_inverse=True) + elif data.dtype.kind == "O": + # np.ma.unique does not work for object arrays + # (no ma.minimum_fill_value for object arrays) + # maybe sorted(set(data.data[...])) + unq = np.unique(data.data[~data.mask]) + mapper = make_dict_mapper( + DictMissingConst(len(unq), ((v, i) for i, v in enumerate(unq))) + ) + index = mapper(data.data) + unq = np.array(unq.tolist() + [data.fill_value], dtype=data.dtype) + unq_mask = [False] * unq.size + unq_mask[-1] = True + unq = MArray(unq, mask=unq_mask) + return unq, index + else: + unq, index = np.ma.unique(data, return_inverse=True) + assert not np.any(unq.mask[:-1]), \ + "masked value if present must be in last position" + return unq, index + + +def categorical_from_vector(data: MArray) -> Tuple[MArray, Tuple[str, ...]]: + formatter = formatter_for_dtype(data.dtype) + unq, index = categorize_unique(data) + if formatter is not str: + # str(np.array([0], "M8[s]")[0]) is different then + # str(np.array([0], "M8[s]").astype(object)[0]) which is what + # as_string is doing + names = tuple(map(formatter, unq.astype(object))) + else: + names = tuple(as_string(unq)) + data = MArray( + index, mask=data.mask, + dtype=np.dtype(int, metadata={ + "__formater": lambda i: names[i] if 0 <= i < unq.size else "?" + }) + ) + return data, names + + +def categorize_unique(data: MArray) -> Tuple[ndarray, MArray]: + unq, index = masked_unique(data) + if np.any(unq.mask): + unq = unq[:-1] + assert not np.any(unq.mask), "masked value if present must be last" + unq = unq.data + index[data.mask] = -1 + index = MArray(index, mask=data.mask) + return unq, index + + +def categorical_to_string_vector(data: MArray, values: Tuple[str, ...]) -> MArray: + lookup = np.asarray(values, object) + out = np.full(data.shape, "", dtype=object) + mask_ = ~data.mask + out[mask_] = lookup[data.data[mask_]] + return MArray(out, mask=data.mask, fill_value="") + + +# Item models + + class DictItemsModel(QStandardItemModel): """A Qt Item Model class displaying the contents of a python dictionary. @@ -230,6 +469,7 @@ def get_dict(self): class FixedSizeButton(QToolButton): + def __init__(self, *args, defaultAction=None, **kwargs): super().__init__(*args, **kwargs) sh = self.sizePolicy() @@ -554,7 +794,7 @@ class CountedStateModel(CountedListModel): """ Count by EditRole (name) and EditStateRole (ItemEditState) """ - # The purpouse is to count the items with target name only for + # The purpose is to count the items with target name only for # ItemEditState.NoRole, i.e. excluding added/dropped values. # def key(self, index): # type: (QModelIndex) -> Tuple[Any, Any] @@ -621,6 +861,7 @@ def __init__(self, *args, **kwargs): self.values_edit = QListView( editTriggers=QListView.DoubleClicked | QListView.EditKeyPressed, selectionMode=QListView.ExtendedSelection, + uniformItemSizes=True, ) self.values_edit.setItemDelegate(CategoriesEditDelegate(self)) self.values_edit.setModel(self.values_model) @@ -805,7 +1046,7 @@ def __categories_mapping(self): model = self.values_model source = self.var.categories - res = [] + res = [] # type: CategoriesMappingType for i in range(model.rowCount()): midx = model.index(i, 0) category = midx.data(Qt.EditRole) @@ -830,6 +1071,7 @@ def get_data(self): if var is None: return var, tr mapping = self.__categories_mapping() + assert len(mapping) >= len(var.categories), f'{mapping}, {var}' if any(_1 != _2 or _2 != _3 for (_1, _2), _3 in zip_longest(mapping, var.categories)): tr.append(CategoriesMapping(mapping)) @@ -1015,29 +1257,41 @@ class TimeVariableEditor(VariableEditor): def variable_icon(var): - # type: (Variable) -> QIcon - if isinstance(var, (Categorical, Ordered)): + # type: (Union[Variable, Type[Variable], ReinterpretTransform]) -> QIcon + if not isinstance(var, type): + var = type(var) + + if issubclass(var, (Categorical, Ordered, AsCategorical)): return gui.attributeIconDict[1] - elif isinstance(var, Real): + elif issubclass(var, (Real, AsContinuous)): return gui.attributeIconDict[2] - elif isinstance(var, String): + elif issubclass(var, (String, AsString)): return gui.attributeIconDict[3] - elif isinstance(var, Time): + elif issubclass(var, (Time, AsTime)): return gui.attributeIconDict[4] else: return gui.attributeIconDict[-1] -#: ItemDataRole storing the variable transform (`List[Transform]`) +#: ItemDataRole storing the data vector transform +#: (`List[Union[ReinterpretTransform, Transform]]`) TransformRole = Qt.UserRole + 42 class VariableEditDelegate(QStyledItemDelegate): + ReinterpretNames = { + AsCategorical: "categorical", AsContinuous: "numeric", + AsString: "string", AsTime: "time" + } + def initStyleOption(self, option, index): # type: (QStyleOptionViewItem, QModelIndex) -> None super().initStyleOption(option, index) item = index.data(Qt.EditRole) var = tr = None + if isinstance(item, DataVectorTypes): + var = item.vtype + option.icon = variable_icon(var) if isinstance(item, VariableTypes): var = item option.icon = variable_icon(item) @@ -1045,21 +1299,28 @@ def initStyleOption(self, option, index): var = item option.icon = gui.attributeIconDict[var] - if not option.icon.isNull(): - option.features |= QStyleOptionViewItem.HasDecoration - transform = index.data(TransformRole) if not isinstance(transform, list): transform = [] + if transform and isinstance(transform[0], ReinterpretTransformTypes): + option.icon = variable_icon(transform[0]) + + if not option.icon.isNull(): + option.features |= QStyleOptionViewItem.HasDecoration + if var is not None: text = var.name for tr in transform: if isinstance(tr, Rename): text = ("{} \N{RIGHTWARDS ARROW} {}" .format(var.name, tr.name)) + for tr in transform: + if isinstance(tr, ReinterpretTransformTypes): + text += f" (reinterpreted as " \ + f"{self.ReinterpretNames[type(tr)]})" option.text = text - if tr: + if transform: # mark as changed (maybe also change color, add text, ...) option.font.setItalic(True) @@ -1077,9 +1338,181 @@ def data(self, index, role=Qt.DisplayRole): item = self[row] if isinstance(item, VariableTypes): return item.name + if isinstance(item, DataVectorTypes): + return item.vtype.name return super().data(index, role) +class ReinterpretVariableEditor(VariableEditor): + """ + A 'compound' variable editor capable of variable type reinterpretations. + """ + _editors = { + Categorical: 0, Ordered: 0, + Real: 1, + String: 2, + Time: 3, + type(None): -1, + } + + def __init__(self, parent=None, **kwargs): + # Explicitly skip VariableEditor's __init__, this is ugly but we have + # a completely different layout/logic as a compound editor (should + # really not subclass VariableEditor). + super(VariableEditor, self).__init__(parent, **kwargs) # pylint: disable=bad-super-call + self.var = None # type: Optional[Variable] + self.__transform = None # type: Optional[ReinterpretTransform] + self.__data = None # type: Optional[DataVector] + #: Stored transform state indexed by variable. Used to preserve state + #: between type switches. + self.__history = {} # type: Dict[Variable, List[Transform]] + + self.setLayout(QStackedLayout()) + + def decorate(editor: VariableEditor) -> VariableEditor: + """insert an type combo box into a `editor`'s layout.""" + form = editor.layout().itemAt(0) + assert isinstance(form, QFormLayout) + typecb = QComboBox(objectName="type-combo") + typecb.addItem(variable_icon(Categorical), "Categorical", Categorical) + typecb.addItem(variable_icon(Real), "Numeric", Real) + typecb.addItem(variable_icon(String), "Text", String) + typecb.addItem(variable_icon(Time), "Time", Time) + typecb.activated[int].connect(self.__reinterpret_activated) + form.insertRow(1, "Type:", typecb) + # Insert the typecb after name edit in the focus chain + nameedit = editor.findChild(QLineEdit, ) + if nameedit is not None: + QWidget.setTabOrder(nameedit, typecb) + return editor + # This is ugly. Create an editor for each type and insert a type + # selection combo box into its layout. Switch between widgets + # on type change. + dedit = decorate(DiscreteVariableEditor()) + cedit = decorate(ContinuousVariableEditor()) + tedit = decorate(TimeVariableEditor()) + sedit = decorate(VariableEditor()) + + for ed in [dedit, cedit, tedit, sedit]: + ed.variable_changed.connect(self.variable_changed) + + self.layout().addWidget(dedit) + self.layout().addWidget(cedit) + self.layout().addWidget(sedit) + self.layout().addWidget(tedit) + + def set_data(self, data, transform=()): # pylint: disable=arguments-differ + # type: (Optional[DataVector], Sequence[Transform]) -> None + """ + Set the editor data. + + Note + ---- + This must be a `DataVector` as the vector's values are needed for type + reinterpretation/casts. + + If the `transform` sequence contains ReinterpretTransform then it + must be in the first position. + """ + type_transform = None # type: Optional[ReinterpretTransform] + if transform: + _tr = transform[0] + if isinstance(_tr, ReinterpretTransformTypes): + type_transform = _tr + transform = transform[1:] + assert not any(isinstance(t, ReinterpretTransformTypes) + for t in transform) + self.__transform = type_transform + self.__data = data + self.var = data.vtype if data is not None else None + + if type_transform is not None and data is not None: + data = type_transform(data) + if data is not None: + var = data.vtype + else: + var = None + index = self._editors.get(type(var), -1) + self.layout().setCurrentIndex(index) + if index != -1: + w = self.layout().currentWidget() + assert isinstance(w, VariableEditor) + w.set_data(var, transform) + self.__history[var] = tuple(transform) + cb = w.findChild(QComboBox, "type-combo") + cb.setCurrentIndex(index) + + def get_data(self): + # type: () -> Tuple[Variable, Sequence[Transform]] + editor = self.layout().currentWidget() # type: VariableEditor + var, tr = editor.get_data() + if type(var) != type(self.var): # pylint: disable=unidiomatic-typecheck + assert self.__transform is not None + var = self.var + tr = [self.__transform, *tr] + return var, tr + + def __reinterpret_activated(self, index): + layout = self.layout() + assert isinstance(layout, QStackedLayout) + if index == layout.currentIndex(): + return + current = layout.currentWidget() + assert isinstance(current, VariableEditor) + Specific = { + Categorical: CategoricalTransformTypes + } + _var, _tr = current.get_data() + if _var is not None: + self.__history[_var] = _tr + + var = self.var + transform = self.__transform + # take/preserve the general transforms that apply to all types + specific = Specific.get(type(var), ()) + _tr = [t for t in _tr if not isinstance(t, specific)] + + layout.setCurrentIndex(index) + w = layout.currentWidget() + cb = w.findChild(QComboBox, "type-combo") + cb.setCurrentIndex(index) + cb.setFocus() + target = cb.itemData(index, Qt.UserRole) + assert issubclass(target, VariableTypes) + if not isinstance(var, target): + if target == Real: + transform = AsContinuous() + elif target == Categorical: + transform = AsCategorical() + elif target == Time: + transform = AsTime() + elif target == String: + transform = AsString() + else: + transform = None + var = self.var + + self.__transform = transform + if transform is not None and self.__data is not None: + data = transform(self.__data) + var = data.vtype + + if var in self.__history: + tr = self.__history[var] + else: + tr = [] + # type specific transform + specific = Specific.get(type(var), ()) + # merge tr and _tr + tr = _tr + [t for t in tr if isinstance(t, specific)] + with disconnected( + w.variable_changed, self.variable_changed, + Qt.UniqueConnection + ): + w.set_data(var, tr) + self.variable_changed.emit() + + class OWEditDomain(widget.OWWidget): name = "Edit Domain" description = "Rename variables, edit categories and variable annotations." @@ -1110,6 +1543,7 @@ def __init__(self): #: The current selected variable index self.selected_index = -1 self._invalidated = False + self.typeindex = 0 mainlayout = self.mainArea.layout() assert isinstance(mainlayout, QVBoxLayout) @@ -1135,14 +1569,9 @@ def __init__(self): box.setLayout(QVBoxLayout(margin=4)) layout.addWidget(box) - self.editor_stack = QStackedWidget() - - self.editor_stack.addWidget(DiscreteVariableEditor()) - self.editor_stack.addWidget(ContinuousVariableEditor()) - self.editor_stack.addWidget(TimeVariableEditor()) - self.editor_stack.addWidget(VariableEditor()) + self._editor = ReinterpretVariableEditor() - box.layout().addWidget(self.editor_stack) + box.layout().addWidget(self._editor) bbox = QDialogButtonBox() bbox.setStyleSheet( @@ -1185,7 +1614,7 @@ def set_data(self, data): self.data = data if self.data is not None: - self.set_domain(data.domain) + self.setup_model(data) self.openContext(self.data) self._restore() @@ -1211,7 +1640,7 @@ def reset_selected(self): tr = midx.data(TransformRole) if not tr: return # nothing to reset - editor = self.editor_stack.currentWidget() + editor = self._editor with disconnected(editor.variable_changed, self._on_variable_changed): model.setData(midx, [], TransformRole) @@ -1237,10 +1666,26 @@ def selected_var_index(self): assert len(rows) <= 1 return rows[0].row() if rows else -1 - def set_domain(self, domain): - # type: (Orange.data.Domain) -> None - self.variables_model[:] = [abstract(v) - for v in domain.variables + domain.metas] + def setup_model(self, data: Orange.data.Table): + model = self.variables_model + vars_ = [] + columns = [] + for i, _, var, coldata in enumerate_columns(data): + var = abstract(var) + vars_.append(var) + if isinstance(var, Categorical): + data = CategoricalVector(var, coldata) + elif isinstance(var, Real): + data = RealVector(var, coldata) + elif isinstance(var, Time): + data = TimeVector(var, coldata) + elif isinstance(var, String): + data = StringVector(var, coldata) + columns.append(data) + + model[:] = vars_ + for i, d in enumerate(columns): + model.setData(model.index(i), d, Qt.EditRole) def _restore(self, ): """ @@ -1249,16 +1694,16 @@ def _restore(self, ): model = self.variables_model for i in range(model.rowCount()): midx = model.index(i, 0) - var = model.data(midx, Qt.EditRole) - tr = self._restore_transform(var) + coldesc = model.data(midx, Qt.EditRole) # type: DataVector + tr = self._restore_transform(coldesc.vtype) if tr: model.setData(midx, tr, TransformRole) # Restore the current variable selection i = -1 if self._selected_item is not None: - for i, var in enumerate(model): - if var.name == self._selected_item: + for i, vec in enumerate(model): + if vec.vtype.name == self._selected_item: break if i == -1 and model.rowCount(): i = 0 @@ -1269,7 +1714,7 @@ def _restore(self, ): def _on_selection_changed(self): self.selected_index = self.selected_var_index() if self.selected_index != -1: - self._selected_item = self.variables_model[self.selected_index].name + self._selected_item = self.variables_model[self.selected_index].vtype.name else: self._selected_item = None self.open_editor(self.selected_index) @@ -1281,28 +1726,18 @@ def open_editor(self, index): if not 0 <= index < model.rowCount(): return idx = model.index(index, 0) - var = model.data(idx, Qt.EditRole) + vector = model.data(idx, Qt.EditRole) tr = model.data(idx, TransformRole) if tr is None: tr = [] - - editors = { - Categorical: 0, Ordered: 0, - Real: 1, - Time: 2, - String: 3 - } - - editor_index = editors.get(type(var), 3) - editor = self.editor_stack.widget(editor_index) - self.editor_stack.setCurrentWidget(editor) - editor.set_data(var, tr) + editor = self._editor + editor.set_data(vector, tr) editor.variable_changed.connect( self._on_variable_changed, Qt.UniqueConnection ) def clear_editor(self): - current = self.editor_stack.currentWidget() + current = self._editor try: current.variable_changed.disconnect(self._on_variable_changed) except TypeError: @@ -1313,7 +1748,7 @@ def clear_editor(self): def _on_variable_changed(self): """User edited the current variable in editor.""" assert 0 <= self.selected_index <= len(self.variables_model) - editor = self.editor_stack.currentWidget() + editor = self._editor var, transform = editor.get_data() model = self.variables_model midx = model.index(self.selected_index, 0) @@ -1365,7 +1800,7 @@ def commit(self): model = self.variables_model def state(i): - # type: (int) -> Tuple[Variable, List[Transform]] + # type: (int) -> Tuple[DataVector, List[Transform]] midx = self.variables_model.index(i, 0) return (model.data(midx, Qt.EditRole), model.data(midx, TransformRole)) @@ -1377,11 +1812,11 @@ def state(i): output_vars = [] input_vars = data.domain.variables + data.domain.metas - assert all(v_.name == v.name + assert all(v_.vtype.name == v.name for v, (v_, _) in zip(input_vars, state)) for (_, tr), v in zip(state, input_vars): if tr: - var = apply_transform(v, tr) + var = apply_transform(v, data, tr) else: var = v output_vars.append(var) @@ -1394,11 +1829,16 @@ def state(i): domain = data.domain nx = len(domain.attributes) ny = len(domain.class_vars) - domain = Orange.data.Domain( - output_vars[:nx], output_vars[nx: nx + ny], output_vars[nx + ny:] - ) + + Xs = output_vars[:nx] + Ys = output_vars[nx: nx + ny] + Ms = output_vars[nx + ny:] + # Move non primitive Xs, Ys to metas (if they were changed) + Ms += [v for v in Xs + Ys if not v.is_primitive()] + Xs = [v for v in Xs if v.is_primitive()] + Ys = [v for v in Ys if v.is_primitive()] + domain = Orange.data.Domain(Xs, Ys, Ms) new_data = data.transform(domain) - # print(new_data) self.Outputs.data.send(new_data) def sizeHint(self): @@ -1414,9 +1854,9 @@ def send_report(self): for i in range(model.rowCount()) for midx in [model.index(i)]) parts = [] - for var, trs in state: + for vector, trs in state: if trs: - parts.append(report_transform(var, trs)) + parts.append(report_transform(vector.vtype, trs)) if parts: html = ("