From 98fe508f962372c8b05bfe52be84507f83a17193 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 24 Jun 2016 11:59:10 +0200 Subject: [PATCH 1/9] Sieve: Refactor drawing --- Orange/widgets/visualize/owsieve.py | 298 +++++++++++++--------------- 1 file changed, 135 insertions(+), 163 deletions(-) diff --git a/Orange/widgets/visualize/owsieve.py b/Orange/widgets/visualize/owsieve.py index e5189e3af37..edc7c8a7f21 100644 --- a/Orange/widgets/visualize/owsieve.py +++ b/Orange/widgets/visualize/owsieve.py @@ -1,8 +1,12 @@ from itertools import chain from math import sqrt, floor, ceil +import numpy as np +from scipy import stats + from PyQt4.QtCore import Qt, QSize -from PyQt4.QtGui import (QGraphicsScene, QColor, QPen, QBrush, +from PyQt4.QtGui import (QGraphicsScene, QColor, QPen, QBrush, QTableView, + QStandardItemModel, QDialog, QApplication, QSizePolicy, QGraphicsLineItem) from Orange.data import Table, filter @@ -12,7 +16,7 @@ from Orange.statistics.contingency import get_contingency from Orange.widgets import gui from Orange.widgets.settings import DomainContextHandler, ContextSetting -from Orange.widgets.utils import getHtmlCompatibleString +from Orange.widgets.utils import getHtmlCompatibleString as to_html from Orange.widgets.utils.itemmodels import VariableListModel from Orange.widgets.visualize.owmosaic import ( CanvasText, CanvasRectangle, ViewWithPress, get_conditional_distribution) @@ -186,192 +190,160 @@ def update_selection(self): selection = self.data[sel_idx] self.send("Selection", selection) - # ----------------------------------------------------------------------- - # Everything from here on is ancient and has been changed only according - # to what has been changed above. Some clean-up may be in order some day - # + class ChiSqStats: + def __init__(self, data, attr1, attr2): + data = data[:, [attr1, attr2]] + self.observed = get_contingency(data, attr1, attr2) + self.n = np.sum(self.observed) + self.probs_x = self.observed.sum(axis=0) / self.n + self.probs_y = self.observed.sum(axis=1) / self.n + print(self.observed) + print(self.probs_x) + print(self.probs_y) + self.expected = np.outer(self.probs_y, self.probs_x) * self.n + self.residuals = \ + (self.observed - self.expected) / np.sqrt(self.expected) + self.chisqs = self.residuals ** 2 + self.chisq = np.sum(self.chisqs) + self.p = stats.distributions.chi2.sf( + self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1)) + def updateGraph(self, *args): + def text(txt, *args, **kwargs): + return CanvasText(self.canvas, "", html_text=to_html(txt), + *args, **kwargs) + + def width(txt): + return text(txt, 0, 0, show=False).boundingRect().width() + for item in self.canvas.items(): self.canvas.removeItem(item) if self.data is None or len(self.data) == 0 or \ self.attrX is None or self.attrY is None: return - data = self.discrete_data[:, [self.attrX, self.attrY]] - valsX = [] - valsY = [] - contX = get_contingency(data, self.attrX, self.attrX) - contY = get_contingency(data, self.attrY, self.attrY) - # compute contingency of x and y attributes - for entry in contX: - sum_ = 0 - try: - for val in entry: sum_ += val - except: pass - valsX.append(sum_) - - for entry in contY: - sum_ = 0 - try: - for val in entry: sum_ += val - except: pass - valsY.append(sum_) - - contXY, _ = get_conditional_distribution( - data, [data.domain[self.attrX], data.domain[self.attrY]]) - # compute probabilities - probs = {} - for i in range(len(valsX)): - valx = valsX[i] - for j in range(len(valsY)): - valy = valsY[j] - try: - actualProb = contXY['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] - # for val in contXY['%s-%s' %(i, j)]: actualProb += val - except: - actualProb = 0 - probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] = ((data.domain[self.attrX].values[i], valx), (data.domain[self.attrY].values[j], valy), actualProb, len(data)) - - #get text width of Y labels - max_ylabel_w = 0 - for j in range(len(valsY)): - xl = CanvasText(self.canvas, "", 0, 0, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j]), show=False) - max_ylabel_w = max(int(xl.boundingRect().width()), max_ylabel_w) - max_ylabel_w = min(max_ylabel_w, 200) #upper limit for label widths - # get text width of Y attribute name - text = CanvasText(self.canvas, data.domain[self.attrY].name, x = 0, y = 0, bold = 1, show = 0, vertical=True) - xOff = int(text.boundingRect().height() + max_ylabel_w) - yOff = 55 - sqareSize = min(self.canvasView.width() - xOff - 35, self.canvasView.height() - yOff - 50) - sqareSize = max(sqareSize, 10) - self.canvasView.setSceneRect(0, 0, self.canvasView.width(), self.canvasView.height()) - - # print graph name - name = "P(%s, %s) ≠ P(%s)×P(%s)" %(self.attrX, self.attrY, self.attrX, self.attrY) - CanvasText(self.canvas, "", xOff + sqareSize / 2, 20, Qt.AlignCenter, html_text= name) - CanvasText(self.canvas, "N = " + str(len(data)), xOff + sqareSize / 2, 38, Qt.AlignCenter, bold = 0) - - ###################### - # compute chi-square - chisquare = 0.0 - for i in range(len(valsX)): - for j in range(len(valsY)): - ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] - expected = float(xVal*yVal)/float(sum_) - if expected == 0: continue - pearson2 = (actual - expected)*(actual - expected) / expected - chisquare += pearson2 - - ###################### - # draw rectangles - currX = xOff + + ddomain = self.discrete_data.domain + attr_x, attr_y = self.attrX, self.attrY + disc_x, disc_y = ddomain[attr_x], ddomain[attr_y] + view = self.canvasView + + chi = self.ChiSqStats(self.discrete_data, attr_x, attr_y) + n = chi.n + max_ylabel_w = max((width(val) for val in disc_y.values), default=0) + max_ylabel_w = min(max_ylabel_w, 200) + x_off = width(attr_x) + max_ylabel_w + y_off = 15 + square_size = min(view.width() - x_off - 35, view.height() - y_off - 50) + square_size = max(square_size, 10) + self.canvasView.setSceneRect(0, 0, view.width(), view.height()) + + curr_x = x_off max_xlabel_h = 0 - normX, normY = sum(valsX), sum(valsY) self.areas = [] - for i in range(len(valsX)): - if valsX[i] == 0: continue - currY = yOff - width = int(float(sqareSize * valsX[i])/float(normX)) + for x, (px, xval_name) in enumerate(zip(chi.probs_x, disc_x.values)): + if px == 0: + continue + width = square_size * px + curr_y = y_off + for y in range(len(chi.probs_y) - 1, -1, -1): # bottom-up order + py = chi.probs_y[y] + yval_name = disc_y.values[y] + if py == 0: + continue + height = square_size * py - for j in range(len(valsY)-1, -1, -1): # this way we sort y values correctly - ((xAttr, xVal), (yAttr, yVal), actual, sum_) = probs['%s-%s' %(data.domain[self.attrX].values[i], data.domain[self.attrY].values[j])] - if valsY[j] == 0: continue - height = int(float(sqareSize * valsY[j])/float(normY)) - - # create rectangle selected = len(self.areas) in self.selection rect = CanvasRectangle( - self.canvas, currX+2, currY+2, width-4, height-4, z = -10, - onclick=self.select_area) - rect.value_pair = i, j + self.canvas, curr_x + 2, curr_y + 2, width - 4, height - 4, + z=-10, onclick=self.select_area) + rect.value_pair = x, y self.areas.append(rect) - self.addRectIndependencePearson(rect, currX+2, currY+2, width-4, height-4, (xAttr, xVal), (yAttr, yVal), actual, sum_, - width=1 + 3 * selected, # Ugly! This is needed since - # resize redraws the graph! When this is handled by resizing - # just the viewer, update_selection will take care of this - ) - - expected = float(xVal*yVal)/float(sum_) - pearson = (actual - expected) / sqrt(expected) - tooltipText = """X Attribute: %s
Value: %s
Number of instances (p(x)): %d (%.2f%%)
- Y Attribute: %s
Value: %s
Number of instances (p(y)): %d (%.2f%%)
- Number Of Instances (Probabilities):
Expected (p(x)p(y)): %.1f (%.2f%%)
Actual (p(x,y)): %d (%.2f%%) -
Statistics:
Chi-square: %.2f
Standardized Pearson residual: %.2f""" %(self.attrX, getHtmlCompatibleString(xAttr), xVal, 100.0*float(xVal)/float(sum_), self.attrY, getHtmlCompatibleString(yAttr), yVal, 100.0*float(yVal)/float(sum_), expected, 100.0*float(xVal*yVal)/float(sum_*sum_), actual, 100.0*float(actual)/float(sum_), chisquare, pearson ) - rect.setToolTip(tooltipText) - - currY += height - if currX == xOff: - CanvasText(self.canvas, "", xOff, currY - height / 2, Qt.AlignRight | Qt.AlignVCenter, html_text= getHtmlCompatibleString(data.domain[self.attrY].values[j])) - - xl = CanvasText(self.canvas, "", currX + width / 2, yOff + sqareSize, Qt.AlignHCenter | Qt.AlignTop, html_text= getHtmlCompatibleString(data.domain[self.attrX].values[i])) + self.show_pearson(rect, chi.residuals[y, x], 3 * selected) + + def _addeq(attr_name, txt): + if self.data.domain[attr_name] is ddomain[attr_name]: + return "=" + return " " if txt[0] in "<≥" else " in " + + tooltip_text = """ + {attrX}{xeq}{xval_name}: {obs_x}/{n} ({prob_x:.0f} %) +
+ {attrY}{yeq}{yval_name}: {obs_y}/{n} ({prob_y:.0f} %) +
+ combination of values:
+    expected {exp:.2f} ({p_exp:.0f} %)
+    observed {obs:.2f} ({p_obs:.0f} %) + """.format( + n=int(n), + attrX=to_html(attr_x), + xeq=_addeq(attr_x, xval_name), + xval_name=to_html(xval_name), + obs_x=int(chi.probs_x[x] * n), + prob_x=100 * chi.probs_x[x], + attrY=to_html(attr_y), + yeq=_addeq(attr_y, yval_name), + yval_name=to_html(yval_name), + obs_y=int(chi.probs_y[y] * n), + prob_y=100 * chi.probs_y[y], + exp=chi.expected[y, x], + p_exp=100 * chi.expected[y, x] / n, + obs=int(chi.observed[y, x]), + p_obs=100 * chi.observed[y, x] / n) + rect.setToolTip(tooltip_text) + + if not x: + text(yval_name, x_off, curr_y + height / 2, + Qt.AlignRight | Qt.AlignVCenter) + curr_y += height + + xl = text(xval_name, curr_x + width / 2, y_off + square_size, + Qt.AlignHCenter | Qt.AlignTop) max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) + curr_x += width - currX += width - - # show attribute names - CanvasText(self.canvas, self.attrY, 0, yOff + sqareSize / 2, Qt.AlignLeft | Qt.AlignVCenter, bold = 1, vertical=True) - CanvasText(self.canvas, self.attrX, xOff + sqareSize / 2, yOff + sqareSize + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold = 1) - + text(attr_y, 0, y_off + square_size / 2, Qt.AlignLeft | Qt.AlignVCenter, + bold=True, vertical=True) + text(attr_x, x_off + square_size / 2, + y_off + square_size + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, + bold=True) - ###################################################################### - ## show deviations from attribute independence with standardized pearson residuals - def addRectIndependencePearson(self, rect, x, y, w, h, xAttr_xVal, yAttr_yVal, actual, sum, width): - xAttr, xVal = xAttr_xVal - yAttr, yVal = yAttr_yVal - expected = float(xVal*yVal)/float(sum) - pearson = (actual - expected) / sqrt(expected) + def show_pearson(self, rect, pearson, pen_width): + r = rect.rect() + x, y, w, h = r.x(), r.y(), r.width(), r.height() + if w == 0 or h == 0: + return - if pearson > 0: # if there are more examples that we would expect under the null hypothesis - intPearson = floor(pearson) - pen = QPen(QColor(0,0,255), width); rect.setPen(pen) - b = 255 - r = g = 255 - intPearson*20 - r = g = max(r, 55) # + r = g = b = 255 + if pearson > 0: + r = g = max(255 - 20 * pearson, 55) elif pearson < 0: - intPearson = ceil(pearson) - pen = QPen(QColor(255,0,0), width) - rect.setPen(pen) - r = 255 - b = g = 255 + intPearson*20 - b = g = max(b, 55) - else: - pen = QPen(QColor(255,255,255), width) - r = g = b = 255 # white - color = QColor(r,g,b) - brush = QBrush(color) - rect.setBrush(brush) - + b = g = max(255 + 20 * pearson, 55) + rect.setBrush(QBrush(QColor(r, g, b))) + pen = QPen(QColor(255 * (r == 255), 255 * (g == 255), 255 * (b == 255)), + pen_width) + rect.setPen(pen) if pearson > 0: pearson = min(pearson, 10) - kvoc = 1 - 0.08 * pearson # if pearson in [0..10] --> kvoc in [1..0.2] + dist = 20 - 1.6 * pearson else: pearson = max(pearson, -10) - kvoc = 1 - 0.4*pearson - + dist = 20 - 8 * pearson pen.setWidth(1) - self.addLines(x,y,w,h, kvoc, pen) - - ################################################## - # add lines - def addLines(self, x, y, w, h, diff, pen): - if w == 0 or h == 0: - return - - dist = 20 * diff # original distance between two lines in pixels - temp = dist - canvas = self.canvas - while temp < w: - r = QGraphicsLineItem(temp + x, y, temp + x, y + h, None) - canvas.addItem(r) + def _offseted_line(ax, ay): + r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w), y + (ay or h)) + self.canvas.addItem(r) r.setPen(pen) - temp += dist - temp = dist - while temp < h: - r = QGraphicsLineItem(x, y + temp, x + w, y + temp, None) - canvas.addItem(r) - r.setPen(pen) - temp += dist + ax = dist + while ax < w: + _offseted_line(ax, 0) + ax += dist + + ay = dist + while ay < h: + _offseted_line(0, ay) + ay += dist def closeEvent(self, ce): QDialog.closeEvent(self, ce) From 94e77ad0da72be8967522ec3328f6f0f689e7cae Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 24 Jun 2016 18:31:35 +0200 Subject: [PATCH 2/9] Sieve: Add VizRank --- Orange/widgets/visualize/owsieve.py | 136 +++++++++++++++++++++++++--- 1 file changed, 124 insertions(+), 12 deletions(-) diff --git a/Orange/widgets/visualize/owsieve.py b/Orange/widgets/visualize/owsieve.py index edc7c8a7f21..c59a08080f6 100644 --- a/Orange/widgets/visualize/owsieve.py +++ b/Orange/widgets/visualize/owsieve.py @@ -1,12 +1,12 @@ from itertools import chain -from math import sqrt, floor, ceil +from bisect import bisect_left import numpy as np from scipy import stats from PyQt4.QtCore import Qt, QSize from PyQt4.QtGui import (QGraphicsScene, QColor, QPen, QBrush, QTableView, - QStandardItemModel, + QStandardItemModel, QStandardItem, QDialog, QApplication, QSizePolicy, QGraphicsLineItem) from Orange.data import Table, filter @@ -19,7 +19,7 @@ from Orange.widgets.utils import getHtmlCompatibleString as to_html from Orange.widgets.utils.itemmodels import VariableListModel from Orange.widgets.visualize.owmosaic import ( - CanvasText, CanvasRectangle, ViewWithPress, get_conditional_distribution) + CanvasText, CanvasRectangle, ViewWithPress) from Orange.widgets.widget import OWWidget, Default, AttributeList @@ -65,10 +65,17 @@ def __init__(self): self.attr_box, self, value="attrY", contentsLength=12, callback=self.change_attr, sendSelectedValue=True, valueType=str) self.attrYCombo.setModel(model) + self.vizrank = self.VizRank(self) + self.vizrank_button = gui.button( + self.attr_box, self, "Score Combinations", + callback=self.vizrank.reshow, + tooltip="Find projections with good class separation", + sizePolicy=QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)) + self.vizrank_button.setEnabled(False) self.canvas = QGraphicsScene() self.canvasView = ViewWithPress(self.canvas, self.mainArea, - handler=self.reset_selection) + handler=self.reset_selection) self.mainArea.layout().addWidget(self.canvasView) self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) @@ -108,8 +115,15 @@ def set_data(self, data): self.openContext(self.data) self.resolve_shown_attributes() self.update_selection() - - def change_attr(self): + self.vizrank._initialize() + self.vizrank_button.setEnabled( + self.data is not None and + len(self.data) > 1 and + len(self.data.domain.attributes) > 1) + + def change_attr(self, attributes=None): + if attributes is not None: + self.attrX, self.attrY = attributes self.selection = set() self.updateGraph() self.update_selection() @@ -192,14 +206,10 @@ def update_selection(self): class ChiSqStats: def __init__(self, data, attr1, attr2): - data = data[:, [attr1, attr2]] self.observed = get_contingency(data, attr1, attr2) self.n = np.sum(self.observed) self.probs_x = self.observed.sum(axis=0) / self.n self.probs_y = self.observed.sum(axis=1) / self.n - print(self.observed) - print(self.probs_x) - print(self.probs_y) self.expected = np.outer(self.probs_y, self.probs_x) * self.n self.residuals = \ (self.observed - self.expected) / np.sqrt(self.expected) @@ -355,12 +365,114 @@ def get_widget_name_extension(self): def send_report(self): self.report_plot() + class VizRank(OWWidget): + name = "Rank projections (Sieve)" + want_control_area = False + + def __init__(self, parent_widget): + super().__init__() + self.parent_widget = parent_widget + self.running = False + self.progress = None + self.i = self.j = 0 + + self.projectionTable = QTableView() + self.mainArea.layout().addWidget(self.projectionTable) + self.projectionTable.setSelectionBehavior(QTableView.SelectRows) + self.projectionTable.setSelectionMode(QTableView.SingleSelection) + self.projectionTable.setSortingEnabled(True) + self.projectionTableModel = QStandardItemModel(self) + self.projectionTable.setModel(self.projectionTableModel) + self.projectionTable.selectionModel().selectionChanged.connect( + self.on_selection_changed) + self.projectionTable.horizontalHeader().hide() + + self.button = gui.button(self.mainArea, self, "Start evaluation", + callback=self.toggle, default=True) + self.resize(320, 512) + self._initialize() + + def _initialize(self): + self.running = False + self.projectionTableModel.clear() + self.projectionTable.setColumnWidth(0, 120) + self.projectionTable.setColumnWidth(1, 120) + self.button.setText("Start evaluation") + self.button.setEnabled(False) + self.pause = False + self.scores = [] + self.i = self.j = 0 + if self.progress: + self.progress.finish() + self.progress = None + + self.information(0) + if self.parent_widget.data: + if not self.parent_widget.data.domain.class_var: + self.information( + 0, "Data with a class variable is required.") + return + if len(self.parent_widget.data.domain.attributes) < 2: + self.information( + 0, 'At least 2 features are needed.') + return + if len(self.parent_widget.data) < 2: + self.information( + 0, 'At least 2 instances are needed.') + return + self.button.setEnabled(True) + + def on_selection_changed(self, selected, deselected): + """Called when the ranks view selection changes.""" + a1 = selected.indexes()[0].data() + a2 = selected.indexes()[1].data() + self.parent_widget.change_attr(attributes=(a1, a2)) + + def toggle(self): + self.running ^= 1 + if self.running: + self.button.setText("Pause") + self.run() + else: + self.button.setText("Continue") + self.button.setEnabled(False) + + def stop(self, i, j): + self.i, self.j = i, j + if not self.projectionTable.selectedIndexes(): + self.projectionTable.selectRow(0) + self.button.setEnabled(True) + + def run(self): + widget = self.parent_widget + attrs = widget.attrs + if not self.progress: + self.progress = gui.ProgressBar(self, len(attrs)) + for i in range(self.i, len(attrs)): + for j in range(self.j, i): + if not self.running: + self.stop(i, j) + return + score = widget.ChiSqStats(widget.discrete_data, i, j).p + pos = bisect_left(self.scores, score) + self.projectionTableModel.insertRow( + len(self.scores) - pos, + [QStandardItem(widget.attrs[i].name), + QStandardItem(widget.attrs[j].name)]) + self.scores.insert(pos, score) + self.progress.advance() + self.progress.finish() + if not self.projectionTable.selectedIndexes(): + self.projectionTable.selectRow(0) + self.button.setText("Finished") + self.button.setEnabled(False) + # test widget appearance if __name__ == "__main__": import sys - a=QApplication(sys.argv) - ow=OWSieveDiagram() + a = QApplication(sys.argv) + ow = OWSieveDiagram() ow.show() data = Table(r"zoo.tab") ow.set_data(data) From 5359f3adddccd4c527f49ad0b129ae59dc82896d Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 24 Jun 2016 22:05:33 +0200 Subject: [PATCH 3/9] gui.py: Allow specifying size policy as tuple --- Orange/widgets/gui.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Orange/widgets/gui.py b/Orange/widgets/gui.py index 6b992c8dd02..f89a5f3d1f6 100644 --- a/Orange/widgets/gui.py +++ b/Orange/widgets/gui.py @@ -14,7 +14,7 @@ from PyQt4 import QtGui, QtCore from PyQt4.QtCore import Qt, pyqtSignal as Signal from PyQt4.QtGui import QCursor, QApplication, QTableView, QHeaderView, \ - QStyledItemDelegate + QStyledItemDelegate, QSizePolicy # Some Orange widgets might expect this here from Orange.widgets.webview import WebView as WebviewWidget # pylint: disable=unused-import @@ -234,7 +234,10 @@ def miscellanea(control, box, parent, :type sizePolicy: PyQt4.QtQui.QSizePolicy """ for prop, val in kwargs.items(): - getattr(control, "set" + prop[0].upper() + prop[1:])(val) + if prop == "sizePolicy": + control.setSizePolicy(QSizePolicy(*val)) + else: + getattr(control, "set" + prop[0].upper() + prop[1:])(val) if disabled: # if disabled==False, do nothing; it can be already disabled control.setDisabled(disabled) @@ -249,6 +252,8 @@ def miscellanea(control, box, parent, box.layout().indexOf(control) == -1: box.layout().addWidget(control) if sizePolicy is not None: + if isinstance(sizePolicy, tuple): + sizePolicy = QSizePolicy(*sizePolicy) (box or control).setSizePolicy(sizePolicy) if addToLayout and parent and parent.layout() is not None: parent.layout().addWidget(box or control, stretch) @@ -1350,6 +1355,8 @@ def appendRadioButton(group, label, insertInto=None, if tooltip is not None: w.setToolTip(tooltip) if sizePolicy: + if isinstance(sizePolicy, tuple): + sizePolicy = QSizePolicy(*sizePolicy) w.setSizePolicy(sizePolicy) if addToLayout: dest = insertInto or group @@ -2218,17 +2225,16 @@ def do_commit(): orientation = Qt.Vertical if checkbox_label else Qt.Horizontal b = widgetBox(widget, box=box, orientation=orientation, addToLayout=False) - b.setSizePolicy(QtGui.QSizePolicy.Preferred, QtGui.QSizePolicy.Maximum) + b.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) b.checkbox = cb = checkBox(b, master, value, checkbox_label, callback=checkbox_toggled, tooltip=auto_label) if _is_horizontal(orientation): b.layout().addSpacing(10) - cb.setSizePolicy(QtGui.QSizePolicy.Preferred, QtGui.QSizePolicy.Preferred) + cb.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Preferred) b.button = btn = button(b, master, label, callback=lambda: do_commit()) if not checkbox_label: - btn.setSizePolicy(QtGui.QSizePolicy.Expanding, - QtGui.QSizePolicy.Preferred) + btn.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Preferred) checkbox_toggled() setattr(master, commit_name, unconditional_commit) misc['addToLayout'] = misc.get('addToLayout', True) and \ @@ -3093,8 +3099,7 @@ def get_bar_brush(self, _, index): class VerticalLabel(QtGui.QLabel): def __init__(self, text, parent=None): super().__init__(text, parent) - self.setSizePolicy(QtGui.QSizePolicy.Preferred, - QtGui.QSizePolicy.MinimumExpanding) + self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.MinimumExpanding) self.setMaximumWidth(self.sizeHint().width() + 2) self.setMargin(4) From 584a6eef875b59fe4db8eff49ad2d9faa629c652 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 24 Jun 2016 22:10:19 +0200 Subject: [PATCH 4/9] Sieve: Refactoring and fixes --- Orange/widgets/visualize/owsieve.py | 349 +++++++++++++++++----------- 1 file changed, 207 insertions(+), 142 deletions(-) diff --git a/Orange/widgets/visualize/owsieve.py b/Orange/widgets/visualize/owsieve.py index c59a08080f6..5e93066fc5a 100644 --- a/Orange/widgets/visualize/owsieve.py +++ b/Orange/widgets/visualize/owsieve.py @@ -5,9 +5,9 @@ from scipy import stats from PyQt4.QtCore import Qt, QSize -from PyQt4.QtGui import (QGraphicsScene, QColor, QPen, QBrush, QTableView, - QStandardItemModel, QStandardItem, - QDialog, QApplication, QSizePolicy, QGraphicsLineItem) +from PyQt4.QtGui import ( + QGraphicsScene, QColor, QPen, QBrush, QTableView, QStandardItemModel, + QStandardItem, QDialog, QSizePolicy, QGraphicsLineItem) from Orange.data import Table, filter from Orange.data.sql.table import SqlTable, LARGE_TABLE, DEFAULT_SAMPLE_TIME @@ -24,10 +24,13 @@ class OWSieveDiagram(OWWidget): + """ + A two-way contingency table providing information on the relation + between the observed and expected frequencies of a combination of feature + values + """ + name = "Sieve Diagram" - description = "A two-way contingency table providing information on the " \ - "relation between the observed and expected frequencies " \ - "of a combination of feature values under the assumption of independence." icon = "icons/SieveDiagram.svg" priority = 4200 @@ -45,6 +48,7 @@ class OWSieveDiagram(OWWidget): selection = ContextSetting(set()) def __init__(self): + # pylint: disable=missing-docstring super().__init__() self.data = self.discrete_data = None @@ -55,27 +59,22 @@ def __init__(self): self.attr_box = gui.hBox(self.mainArea) model = VariableListModel() model.wrap(self.attrs) - self.attrXCombo = gui.comboBox( - self.attr_box, self, value="attrX", contentsLength=12, - callback=self.change_attr, sendSelectedValue=True, valueType=str) - self.attrXCombo.setModel(model) - gui.widgetLabel(self.attr_box, "\u2715").\ - setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) - self.attrYCombo = gui.comboBox( - self.attr_box, self, value="attrY", contentsLength=12, - callback=self.change_attr, sendSelectedValue=True, valueType=str) - self.attrYCombo.setModel(model) + combo_args = dict( + widget=self.attr_box, master=self, contentsLength=12, + callback=self.change_attr, sendSelectedValue=True, valueType=str, + model=model) + fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed) + self.attrXCombo = gui.comboBox(value="attrX", **combo_args) + gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size) + self.attrYCombo = gui.comboBox(value="attrY", **combo_args) self.vizrank = self.VizRank(self) self.vizrank_button = gui.button( - self.attr_box, self, "Score Combinations", - callback=self.vizrank.reshow, - tooltip="Find projections with good class separation", - sizePolicy=QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)) - self.vizrank_button.setEnabled(False) + self.attr_box, self, "Score Combinations", sizePolicy=fixed_size, + callback=self.vizrank.reshow, enabled=False) self.canvas = QGraphicsScene() - self.canvasView = ViewWithPress(self.canvas, self.mainArea, - handler=self.reset_selection) + self.canvasView = ViewWithPress( + self.canvas, self.mainArea, handler=self.reset_selection) self.mainArea.layout().addWidget(self.canvasView) self.canvasView.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) self.canvasView.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff) @@ -85,15 +84,28 @@ def __init__(self): box.layout().addWidget(self.report_button) def sizeHint(self): + # pylint: disable=missing-docstring return QSize(450, 550) def set_data(self, data): - if type(data) == SqlTable and data.approx_len() > LARGE_TABLE: + """ + Discretize continuous attributes, and put all attributes and discrete + metas into self.attrs, which is used as a model for combos. + + Select the first two attributes unless context overrides this. + Method `resolve_shown_attributes` is called to use the attributes from + the input, if it exists and matches the attributes in the data. + + Remove selection; again let the context override this. + Initialize the vizrank dialog, but don't show it. + """ + if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) self.closeContext() self.data = data self.areas = [] + self.selection = [] if self.data is None: self.attrs[:] = [] else: @@ -114,76 +126,94 @@ def set_data(self, data): self.areas = self.selection = None self.openContext(self.data) self.resolve_shown_attributes() + self.update_graph() self.update_selection() - self.vizrank._initialize() + + self.vizrank.initialize() self.vizrank_button.setEnabled( - self.data is not None and - len(self.data) > 1 and + self.data is not None and len(self.data) > 1 and len(self.data.domain.attributes) > 1) def change_attr(self, attributes=None): + """Reset the selection, update graph. Set the attributes, if given.""" if attributes is not None: self.attrX, self.attrY = attributes self.selection = set() - self.updateGraph() + self.update_graph() self.update_selection() - def set_input_features(self, attrList): - self.input_features = attrList + def set_input_features(self, attr_list): + """Store the attributes from the input and call + `resolve_shown_attributes`""" + self.input_features = attr_list self.resolve_shown_attributes() self.update_selection() def resolve_shown_attributes(self): + """Use the attributes from the input signal if the signal is present + and at least two attributes appear in the domain. If there are + multiple, use the first two. Combos are disabled if inputs are used.""" self.warning(1) self.attr_box.setEnabled(True) - if self.input_features: # non-None and non-empty! - features = [f for f in self.input_features if f in self.attrs] - if not features: - self.warning(1, "Features from the input signal " - "are not present in the data") - else: - old_attrs = self.attrX, self.attrY - self.attrX, self.attrY = [f.name for f in (features * 2)[:2]] - self.attr_box.setEnabled(False) - if (self.attrX, self.attrY) != old_attrs: - self.selection = set() - # else: do nothing; keep current features, even if input with the - # features just changed to None - self.updateGraph() - - def resizeEvent(self, e): - OWWidget.resizeEvent(self,e) - self.updateGraph() - - def showEvent(self, ev): - OWWidget.showEvent(self, ev) - self.updateGraph() + if not self.input_features: # None or empty + return + features = [f for f in self.input_features if f in self.attrs] + if not features: + self.warning(1, "Features from the input signal " + "are not present in the data") + return + old_attrs = self.attrX, self.attrY + self.attrX, self.attrY = [f.name for f in (features * 2)[:2]] + self.attr_box.setEnabled(False) + if (self.attrX, self.attrY) != old_attrs: + self.selection = set() + self.update_graph() + + def resizeEvent(self, event): + super().resizeEvent(event) + self.update_graph() + + def showEvent(self, event): + super().showEvent(event) + self.update_graph() + + def closeEvent(self, event): + self.vizrank.close() + super().closeEvent(event) + + def hideEvent(self, event): + self.vizrank.hide() + super().hideEvent(event) def reset_selection(self): self.selection = set() self.update_selection() - def select_area(self, area, ev): - if ev.button() != Qt.LeftButton: + def select_area(self, area, event): + """Add or remove the clicked area from the selection""" + if event.button() != Qt.LeftButton: return index = self.areas.index(area) - if ev.modifiers() & Qt.ControlModifier: + if event.modifiers() & Qt.ControlModifier: self.selection ^= {index} else: self.selection = {index} self.update_selection() def update_selection(self): + """Update the graph (pen width) to show the current selection. + Filter and output the data. + """ if self.areas is None or not self.selection: self.send("Selection", None) return - filters = [] + filts = [] for i, area in enumerate(self.areas): if i in self.selection: width = 4 val_x, val_y = area.value_pair - filters.append( + filts.append( filter.Values([ filter.FilterDiscrete(self.attrX, [val_x]), filter.FilterDiscrete(self.attrY, [val_y]) @@ -193,11 +223,11 @@ def update_selection(self): pen = area.pen() pen.setWidth(width) area.setPen(pen) - if len(filters) == 1: - filters = filters[0] + if len(filts) == 1: + filts = filts[0] else: - filters = filter.Values(filters, conjunction=False) - selection = filters(self.discrete_data) + filts = filter.Values(filts, conjunction=False) + selection = filts(self.discrete_data) if self.discrete_data is not self.data: idset = set(selection.ids) sel_idx = [i for i, id in enumerate(self.data.ids) if id in idset] @@ -205,6 +235,8 @@ def update_selection(self): self.send("Selection", selection) class ChiSqStats: + """Compute and store statistics needed to show a plot for the given + pair of attributes. The class is also used for ranking.""" def __init__(self, data, attr1, attr2): self.observed = get_contingency(data, attr1, attr2) self.n = np.sum(self.observed) @@ -218,7 +250,12 @@ def __init__(self, data, attr1, attr2): self.p = stats.distributions.chi2.sf( self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1)) - def updateGraph(self, *args): + def update_graph(self): + # Function uses weird names like r, g, b, but it does it with utmost + # caution, hence + # pylint: disable=invalid-name + """Update the graph.""" + def text(txt, *args, **kwargs): return CanvasText(self.canvas, "", html_text=to_html(txt), *args, **kwargs) @@ -226,6 +263,88 @@ def text(txt, *args, **kwargs): def width(txt): return text(txt, 0, 0, show=False).boundingRect().width() + def show_pearson(rect, pearson, pen_width): + """Color the given rectangle according to its corresponding + standardized Pearson residual""" + r = rect.rect() + x, y, w, h = r.x(), r.y(), r.width(), r.height() + if w == 0 or h == 0: + return + + r = b = 255 + if pearson > 0: + r = g = max(255 - 20 * pearson, 55) + elif pearson < 0: + b = g = max(255 + 20 * pearson, 55) + else: + r = g = b = 224 + rect.setBrush(QBrush(QColor(r, g, b))) + pen_color = QColor(255 * (r == 255), 255 * (g == 255), + 255 * (b == 255)) + pen = QPen(pen_color, pen_width) + rect.setPen(pen) + if pearson > 0: + pearson = min(pearson, 10) + dist = 20 - 1.6 * pearson + else: + pearson = max(pearson, -10) + dist = 20 - 8 * pearson + pen.setWidth(1) + + def _offseted_line(ax, ay): + r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w), + y + (ay or h)) + self.canvas.addItem(r) + r.setPen(pen) + + ax = dist + while ax < w: + _offseted_line(ax, 0) + ax += dist + + ay = dist + while ay < h: + _offseted_line(0, ay) + ay += dist + + def make_tooltip(): + """Create the tooltip. The function uses local variables from + the enclosing scope.""" + # pylint: disable=undefined-loop-variable + def _oper(attr_name, txt): + if self.data.domain[attr_name] is ddomain[attr_name]: + return "=" + return " " if txt[0] in "<≥" else " in " + + def _fmt(val): + return str(int(val)) if val % 1 == 0 \ + else "{:.2f}".format(val) + + return ( + "{attrX}{xeq}{xval_name}: {obs_x}/{n} ({p_x:.0f} %)". + format(attrX=to_html(attr_x), + xeq=_oper(attr_x, xval_name), + xval_name=to_html(xval_name), + obs_x=_fmt(chi.probs_x[x] * n), + n=int(n), + p_x=100 * chi.probs_x[x]) + + "
" + + "{attrY}{yeq}{yval_name}: {obs_y}/{n} ({p_y:.0f} %)". + format(attrY=to_html(attr_y), + yeq=_oper(attr_y, yval_name), + yval_name=to_html(yval_name), + obs_y=_fmt(chi.probs_y[y] * n), + n=int(n), + p_y=100 * chi.probs_y[y]) + + "
" + + """combination of values:
+    expected {exp} ({p_exp:.0f} %)
+    observed {obs} ({p_obs:.0f} %)""". + format(exp=_fmt(chi.expected[y, x]), + p_exp=100 * chi.expected[y, x] / n, + obs=_fmt(chi.observed[y, x]), + p_obs=100 * chi.observed[y, x] / n)) + for item in self.canvas.items(): self.canvas.removeItem(item) if self.data is None or len(self.data) == 0 or \ @@ -254,6 +373,7 @@ def width(txt): if px == 0: continue width = square_size * px + curr_y = y_off for y in range(len(chi.probs_y) - 1, -1, -1): # bottom-up order py = chi.probs_y[y] @@ -268,40 +388,10 @@ def width(txt): z=-10, onclick=self.select_area) rect.value_pair = x, y self.areas.append(rect) - self.show_pearson(rect, chi.residuals[y, x], 3 * selected) - - def _addeq(attr_name, txt): - if self.data.domain[attr_name] is ddomain[attr_name]: - return "=" - return " " if txt[0] in "<≥" else " in " - - tooltip_text = """ - {attrX}{xeq}{xval_name}: {obs_x}/{n} ({prob_x:.0f} %) -
- {attrY}{yeq}{yval_name}: {obs_y}/{n} ({prob_y:.0f} %) -
- combination of values:
-    expected {exp:.2f} ({p_exp:.0f} %)
-    observed {obs:.2f} ({p_obs:.0f} %) - """.format( - n=int(n), - attrX=to_html(attr_x), - xeq=_addeq(attr_x, xval_name), - xval_name=to_html(xval_name), - obs_x=int(chi.probs_x[x] * n), - prob_x=100 * chi.probs_x[x], - attrY=to_html(attr_y), - yeq=_addeq(attr_y, yval_name), - yval_name=to_html(yval_name), - obs_y=int(chi.probs_y[y] * n), - prob_y=100 * chi.probs_y[y], - exp=chi.expected[y, x], - p_exp=100 * chi.expected[y, x] / n, - obs=int(chi.observed[y, x]), - p_obs=100 * chi.observed[y, x] / n) - rect.setToolTip(tooltip_text) - - if not x: + show_pearson(rect, chi.residuals[y, x], 3 * selected) + rect.setToolTip(make_tooltip()) + + if x == 0: text(yval_name, x_off, curr_y + height / 2, Qt.AlignRight | Qt.AlignVCenter) curr_y += height @@ -317,47 +407,6 @@ def _addeq(attr_name, txt): y_off + square_size + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, bold=True) - def show_pearson(self, rect, pearson, pen_width): - r = rect.rect() - x, y, w, h = r.x(), r.y(), r.width(), r.height() - if w == 0 or h == 0: - return - - r = g = b = 255 - if pearson > 0: - r = g = max(255 - 20 * pearson, 55) - elif pearson < 0: - b = g = max(255 + 20 * pearson, 55) - rect.setBrush(QBrush(QColor(r, g, b))) - pen = QPen(QColor(255 * (r == 255), 255 * (g == 255), 255 * (b == 255)), - pen_width) - rect.setPen(pen) - if pearson > 0: - pearson = min(pearson, 10) - dist = 20 - 1.6 * pearson - else: - pearson = max(pearson, -10) - dist = 20 - 8 * pearson - pen.setWidth(1) - - def _offseted_line(ax, ay): - r = QGraphicsLineItem(x + ax, y + ay, x + (ax or w), y + (ay or h)) - self.canvas.addItem(r) - r.setPen(pen) - - ax = dist - while ax < w: - _offseted_line(ax, 0) - ax += dist - - ay = dist - while ay < h: - _offseted_line(0, ay) - ay += dist - - def closeEvent(self, ce): - QDialog.closeEvent(self, ce) - def get_widget_name_extension(self): if self.data is not None: return "{} vs {}".format(self.attrX, self.attrY) @@ -366,15 +415,19 @@ def send_report(self): self.report_plot() class VizRank(OWWidget): + """VizRank dialog""" name = "Rank projections (Sieve)" want_control_area = False def __init__(self, parent_widget): + # pylint: disable=missing-docstring super().__init__() self.parent_widget = parent_widget self.running = False self.progress = None self.i = self.j = 0 + self.pause = False + self.scores = [] self.projectionTable = QTableView() self.mainArea.layout().addWidget(self.projectionTable) @@ -390,9 +443,14 @@ def __init__(self, parent_widget): self.button = gui.button(self.mainArea, self, "Start evaluation", callback=self.toggle, default=True) self.resize(320, 512) - self._initialize() + self.initialize() + + def initialize(self): + """Reset the dialog - def _initialize(self): + The class peeks into the widget's data and does some checks. + This needs to be fixes ... some day. VizRank dialogues need to be + unified - pulled out from individual classes.""" self.running = False self.projectionTableModel.clear() self.projectionTable.setColumnWidth(0, 120) @@ -429,6 +487,7 @@ def on_selection_changed(self, selected, deselected): self.parent_widget.change_attr(attributes=(a1, a2)) def toggle(self): + """Start or pause the computation""" self.running ^= 1 if self.running: self.button.setText("Pause") @@ -438,12 +497,14 @@ def toggle(self): self.button.setEnabled(False) def stop(self, i, j): + """Stop (pause) the computation""" self.i, self.j = i, j if not self.projectionTable.selectedIndexes(): self.projectionTable.selectRow(0) self.button.setEnabled(True) def run(self): + """Compute and show scores""" widget = self.parent_widget attrs = widget.attrs if not self.progress: @@ -453,7 +514,7 @@ def run(self): if not self.running: self.stop(i, j) return - score = widget.ChiSqStats(widget.discrete_data, i, j).p + score = -widget.ChiSqStats(widget.discrete_data, i, j).p pos = bisect_left(self.scores, score) self.projectionTableModel.insertRow( len(self.scores) - pos, @@ -468,9 +529,10 @@ def run(self): self.button.setEnabled(False) -# test widget appearance -if __name__ == "__main__": +def main(): + # pylint: disable=missing-docstring import sys + from PyQt4.QtGui import QApplication a = QApplication(sys.argv) ow = OWSieveDiagram() ow.show() @@ -478,3 +540,6 @@ def run(self): ow.set_data(data) a.exec_() ow.saveSettings() + +if __name__ == "__main__": + main() From c1a379f4afe13e16703a2b26d7d4e11cbdff365b Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 24 Jun 2016 22:57:14 +0200 Subject: [PATCH 5/9] preprocess.Discretize: Add flags for discretization of classes and metas --- Orange/preprocess/preprocess.py | 17 ++++++++++---- Orange/tests/test_discretize.py | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 0474c1e1e2e..0cca3bb4025 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -80,9 +80,12 @@ class Discretize(Preprocess): during discretization. """ - def __init__(self, method=None, remove_const=True): + def __init__(self, method=None, remove_const=True, + discretize_classes=False, discretize_metas=False): self.method = method self.remove_const = remove_const + self.discretize_classes = discretize_classes + self.discretize_metas = discretize_metas def __call__(self, data): """ @@ -106,11 +109,17 @@ def transform(var): else: return var + def discretized(vars, do_discretize): + if do_discretize: + vars = (transform(var) for var in vars) + vars = [var for var in vars if var is not None] + return vars + method = self.method or discretize.EqualFreq() - attributes = [transform(var) for var in data.domain.attributes] - attributes = [var for var in attributes if var is not None] domain = Orange.data.Domain( - attributes, data.domain.class_vars, data.domain.metas) + discretized(data.domain.attributes, True), + discretized(data.domain.class_vars, self.discretize_classes), + discretized(data.domain.metas, self.discretize_metas)) return data.from_table(domain, data) diff --git a/Orange/tests/test_discretize.py b/Orange/tests/test_discretize.py index 6573e67c040..7e27fdabc91 100644 --- a/Orange/tests/test_discretize.py +++ b/Orange/tests/test_discretize.py @@ -174,6 +174,47 @@ def test_keep_constant(self): self.assertEqual(len(table.domain.attributes), len(new_table.domain.attributes)) + def test_discretize_class(self): + table = data.Table('iris') + domain = table.domain + regr_domain = data.Domain(domain.attributes[:3], + [domain.attributes[3], domain.class_var]) + table = data.Table.from_table(regr_domain, table) + + discretize = Discretize(remove_const=False) + new_table = discretize(table) + self.assertIs(new_table.domain.class_vars[0], + new_table.domain.class_vars[0]) + self.assertIs(new_table.domain.class_vars[1], + new_table.domain.class_vars[1]) + + discretize = Discretize(remove_const=False, discretize_classes=True) + new_table = discretize(table) + self.assertIsInstance(new_table.domain.class_vars[0], DiscreteVariable) + self.assertIs(new_table.domain.class_vars[1], + new_table.domain.class_vars[1]) + + def test_discretize_metas(self): + table = data.Table('iris') + domain = table.domain + regr_domain = data.Domain(domain.attributes[:3], + [], + [domain.attributes[3], domain.class_var]) + table = data.Table.from_table(regr_domain, table) + + discretize = Discretize(remove_const=False) + new_table = discretize(table) + self.assertIs(new_table.domain.metas[0], + new_table.domain.metas[0]) + self.assertIs(new_table.domain.metas[1], + new_table.domain.metas[1]) + + discretize = Discretize(remove_const=False, discretize_metas=True) + new_table = discretize(table) + self.assertIsInstance(new_table.domain.metas[0], DiscreteVariable) + self.assertIs(new_table.domain.metas[1], + new_table.domain.metas[1]) + # noinspection PyPep8Naming class TestDiscretizeTable(TestCase): From 424b946e1f2a01231b1240adc23b0b9996a9ceff Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 24 Jun 2016 22:59:02 +0200 Subject: [PATCH 6/9] Sieve: Fix discretization of classes and metas --- Orange/widgets/visualize/owsieve.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Orange/widgets/visualize/owsieve.py b/Orange/widgets/visualize/owsieve.py index 5e93066fc5a..d1c103d2bd0 100644 --- a/Orange/widgets/visualize/owsieve.py +++ b/Orange/widgets/visualize/owsieve.py @@ -7,7 +7,7 @@ from PyQt4.QtCore import Qt, QSize from PyQt4.QtGui import ( QGraphicsScene, QColor, QPen, QBrush, QTableView, QStandardItemModel, - QStandardItem, QDialog, QSizePolicy, QGraphicsLineItem) + QStandardItem, QSizePolicy, QGraphicsLineItem) from Orange.data import Table, filter from Orange.data.sql.table import SqlTable, LARGE_TABLE, DEFAULT_SAMPLE_TIME @@ -110,7 +110,10 @@ def set_data(self, data): self.attrs[:] = [] else: if any(attr.is_continuous for attr in data.domain): - self.discrete_data = Discretize(method=EqualFreq(n=4))(data) + discretizer = Discretize( + method=EqualFreq(n=4), + discretize_classes=True, discretize_metas=True) + self.discrete_data = discretizer(data) else: self.discrete_data = self.data self.attrs[:] = [ From e60b140a1dc2dd440ab5b74eb05558e756908a83 Mon Sep 17 00:00:00 2001 From: janezd Date: Sat, 25 Jun 2016 10:09:35 +0200 Subject: [PATCH 7/9] Sieve: Improve VizRank layout --- Orange/widgets/visualize/owsieve.py | 72 ++++++++++++++++------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/Orange/widgets/visualize/owsieve.py b/Orange/widgets/visualize/owsieve.py index d1c103d2bd0..30ca6f341db 100644 --- a/Orange/widgets/visualize/owsieve.py +++ b/Orange/widgets/visualize/owsieve.py @@ -17,6 +17,7 @@ from Orange.widgets import gui from Orange.widgets.settings import DomainContextHandler, ContextSetting from Orange.widgets.utils import getHtmlCompatibleString as to_html +from Orange.widgets.data.owcolor import HorizontalGridDelegate from Orange.widgets.utils.itemmodels import VariableListModel from Orange.widgets.visualize.owmosaic import ( CanvasText, CanvasRectangle, ViewWithPress) @@ -249,7 +250,7 @@ def __init__(self, data, attr1, attr2): self.residuals = \ (self.observed - self.expected) / np.sqrt(self.expected) self.chisqs = self.residuals ** 2 - self.chisq = np.sum(self.chisqs) + self.chisq = float(np.sum(self.chisqs)) self.p = stats.distributions.chi2.sf( self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1)) @@ -266,6 +267,9 @@ def text(txt, *args, **kwargs): def width(txt): return text(txt, 0, 0, show=False).boundingRect().width() + def fmt(val): + return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val) + def show_pearson(rect, pearson, pen_width): """Color the given rectangle according to its corresponding standardized Pearson residual""" @@ -319,16 +323,12 @@ def _oper(attr_name, txt): return "=" return " " if txt[0] in "<≥" else " in " - def _fmt(val): - return str(int(val)) if val % 1 == 0 \ - else "{:.2f}".format(val) - return ( "{attrX}{xeq}{xval_name}: {obs_x}/{n} ({p_x:.0f} %)". format(attrX=to_html(attr_x), xeq=_oper(attr_x, xval_name), xval_name=to_html(xval_name), - obs_x=_fmt(chi.probs_x[x] * n), + obs_x=fmt(chi.probs_x[x] * n), n=int(n), p_x=100 * chi.probs_x[x]) + "
" + @@ -336,16 +336,16 @@ def _fmt(val): format(attrY=to_html(attr_y), yeq=_oper(attr_y, yval_name), yval_name=to_html(yval_name), - obs_y=_fmt(chi.probs_y[y] * n), + obs_y=fmt(chi.probs_y[y] * n), n=int(n), p_y=100 * chi.probs_y[y]) + "
" + """combination of values:
   expected {exp} ({p_exp:.0f} %)
   observed {obs} ({p_obs:.0f} %)""". - format(exp=_fmt(chi.expected[y, x]), + format(exp=fmt(chi.expected[y, x]), p_exp=100 * chi.expected[y, x] / n, - obs=_fmt(chi.observed[y, x]), + obs=fmt(chi.observed[y, x]), p_obs=100 * chi.observed[y, x] / n)) for item in self.canvas.items(): @@ -404,11 +404,15 @@ def _fmt(val): max_xlabel_h = max(int(xl.boundingRect().height()), max_xlabel_h) curr_x += width - text(attr_y, 0, y_off + square_size / 2, Qt.AlignLeft | Qt.AlignVCenter, - bold=True, vertical=True) - text(attr_x, x_off + square_size / 2, - y_off + square_size + max_xlabel_h, Qt.AlignHCenter | Qt.AlignTop, - bold=True) + bottom = y_off + square_size + max_xlabel_h + text(attr_y, 0, y_off + square_size / 2, + Qt.AlignLeft | Qt.AlignVCenter, bold=True, vertical=True) + text(attr_x, x_off + square_size / 2, bottom, + Qt.AlignHCenter | Qt.AlignTop, bold=True) + xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), + 0, bottom) + # Assume similar height for both lines + text("N = " + fmt(chi.n), 0, bottom -xl.boundingRect().height()) def get_widget_name_extension(self): if self.data is not None: @@ -432,16 +436,18 @@ def __init__(self, parent_widget): self.pause = False self.scores = [] - self.projectionTable = QTableView() - self.mainArea.layout().addWidget(self.projectionTable) - self.projectionTable.setSelectionBehavior(QTableView.SelectRows) - self.projectionTable.setSelectionMode(QTableView.SingleSelection) - self.projectionTable.setSortingEnabled(True) - self.projectionTableModel = QStandardItemModel(self) - self.projectionTable.setModel(self.projectionTableModel) - self.projectionTable.selectionModel().selectionChanged.connect( + self.rank_model = QStandardItemModel(self) + self.rank_table = view = QTableView( + selectionBehavior=QTableView.SelectRows, + selectionMode=QTableView.SingleSelection, + showGrid=False) + view.setItemDelegate(HorizontalGridDelegate()) + view.setModel(self.rank_model) + view.selectionModel().selectionChanged.connect( self.on_selection_changed) - self.projectionTable.horizontalHeader().hide() + view.horizontalHeader().setStretchLastSection(True) + view.horizontalHeader().hide() + self.mainArea.layout().addWidget(view) self.button = gui.button(self.mainArea, self, "Start evaluation", callback=self.toggle, default=True) @@ -455,9 +461,9 @@ def initialize(self): This needs to be fixes ... some day. VizRank dialogues need to be unified - pulled out from individual classes.""" self.running = False - self.projectionTableModel.clear() - self.projectionTable.setColumnWidth(0, 120) - self.projectionTable.setColumnWidth(1, 120) + self.rank_model.clear() + self.rank_table.setColumnWidth(0, 120) + self.rank_table.setColumnWidth(1, 120) self.button.setText("Start evaluation") self.button.setEnabled(False) self.pause = False @@ -502,8 +508,8 @@ def toggle(self): def stop(self, i, j): """Stop (pause) the computation""" self.i, self.j = i, j - if not self.projectionTable.selectedIndexes(): - self.projectionTable.selectRow(0) + if not self.rank_table.selectedIndexes(): + self.rank_table.selectRow(0) self.button.setEnabled(True) def run(self): @@ -517,17 +523,17 @@ def run(self): if not self.running: self.stop(i, j) return - score = -widget.ChiSqStats(widget.discrete_data, i, j).p + score = widget.ChiSqStats(widget.discrete_data, i, j).p pos = bisect_left(self.scores, score) - self.projectionTableModel.insertRow( - len(self.scores) - pos, + self.rank_model.insertRow( + pos, [QStandardItem(widget.attrs[i].name), QStandardItem(widget.attrs[j].name)]) self.scores.insert(pos, score) self.progress.advance() self.progress.finish() - if not self.projectionTable.selectedIndexes(): - self.projectionTable.selectRow(0) + if not self.rank_table.selectedIndexes(): + self.rank_table.selectRow(0) self.button.setText("Finished") self.button.setEnabled(False) From 20a860c684f510203d8f2dcd915e359f25b804ae Mon Sep 17 00:00:00 2001 From: janezd Date: Sat, 25 Jun 2016 20:22:30 +0200 Subject: [PATCH 8/9] Refactoring of Sieve, Mosaic and VizRank. Extract OWWidget's methods for progress bars to mix-in class `Orange.widgets.utils.progressbar.ProgressBarMixin`. This simplifies `OWWidget` and also allows other dialogs (e.g. `VizRankDialog`) to use progress bars without being derived from `OWWidget`. Move `HorizontalGridDelegate` from `OWColor` to `gui` - it was also used in the `OWFile` 's domain editor and now in VizRank. Move `CanvasText`, `CanvasRectangle` and `ViewWithPress` from `OWMosaic` to `orange.widgets.visualize.utils` since they were used by Sieve and Mosaic (and may also be used elsewhere). Move the common functionality of Scatter Plot's and Sieve's VizRank to more `VizRankDialog` and `VizRankDialogAttrPair` (module `orange.widgets.visualiza.utils`. The classes are also general enough to also support ranking of other visualizations in the future. Derive `VizRankDialog` from `QDialog` (with the progress bar mix-in) instead of `OWWidget` with all its balast. Visually improve VizRankDialog. Remove the uninformative score. Remove the redundant dictionary `ScaleData.attribute_name_index` since it duplicates the functionality of `ScaleData.data_domain.index`. Fix the type of argument `buttonType` and the return type in docstring of `gui.button`. Rename `Orange.widget.utils.getHtmlCompatibleString` to `to_html`. --- Orange/widgets/data/owcolor.py | 23 +- Orange/widgets/gui.py | 15 +- Orange/widgets/utils/__init__.py | 8 +- Orange/widgets/utils/domaineditor.py | 2 +- Orange/widgets/utils/progressbar.py | 159 ++++++++ Orange/widgets/utils/scaling.py | 17 +- Orange/widgets/visualize/owmosaic.py | 106 +----- Orange/widgets/visualize/owparallelgraph.py | 3 +- Orange/widgets/visualize/owscatterplot.py | 226 ++++------- .../widgets/visualize/owscatterplotgraph.py | 10 +- Orange/widgets/visualize/owsieve.py | 282 +++++--------- Orange/widgets/visualize/utils.py | 354 ++++++++++++++++++ Orange/widgets/widget.py | 168 +-------- 13 files changed, 737 insertions(+), 636 deletions(-) create mode 100644 Orange/widgets/utils/progressbar.py create mode 100644 Orange/widgets/visualize/utils.py diff --git a/Orange/widgets/data/owcolor.py b/Orange/widgets/data/owcolor.py index 1a569636496..eb588491451 100644 --- a/Orange/widgets/data/owcolor.py +++ b/Orange/widgets/data/owcolor.py @@ -1,31 +1,18 @@ -""" -Widget for assigning colors to variables -""" - -from PyQt4.QtCore import Qt, QAbstractTableModel, QSize -from PyQt4.QtGui import QStyledItemDelegate, QColor, QHeaderView, QFont, \ - QColorDialog, QTableView, qRgb, QImage, QBrush, QApplication import numpy as np +from PyQt4.QtCore import Qt, QAbstractTableModel, QSize +from PyQt4.QtGui import ( + QColor, QHeaderView, QFont, QColorDialog, QTableView, qRgb, QImage, + QBrush) import Orange from Orange.widgets import widget, settings, gui +from Orange.widgets.gui import HorizontalGridDelegate from Orange.widgets.utils.colorpalette import \ ContinuousPaletteGenerator, ColorPaletteDlg ColorRole = next(gui.OrangeUserRole) -class HorizontalGridDelegate(QStyledItemDelegate): - """Delegate that draws a horizontal grid.""" - def paint(self, painter, option, index): - # pylint: disable=missing-docstring - painter.save() - painter.setPen(QColor(212, 212, 212)) - painter.drawLine(option.rect.bottomLeft(), option.rect.bottomRight()) - painter.restore() - QStyledItemDelegate.paint(self, painter, option, index) - - # noinspection PyMethodOverriding class ColorTableModel(QAbstractTableModel): """Base color model for discrete and continuous attributes. The model diff --git a/Orange/widgets/gui.py b/Orange/widgets/gui.py index f89a5f3d1f6..c269a6d01e3 100644 --- a/Orange/widgets/gui.py +++ b/Orange/widgets/gui.py @@ -14,7 +14,7 @@ from PyQt4 import QtGui, QtCore from PyQt4.QtCore import Qt, pyqtSignal as Signal from PyQt4.QtGui import QCursor, QApplication, QTableView, QHeaderView, \ - QStyledItemDelegate, QSizePolicy + QStyledItemDelegate, QSizePolicy, QColor # Some Orange widgets might expect this here from Orange.widgets.webview import WebView as WebviewWidget # pylint: disable=unused-import @@ -1074,8 +1074,8 @@ def button(widget, master, label, callback=None, width=None, height=None, activated on pressing Return. :type autoDefault: bool :param buttonType: the button type (default: `QPushButton`) - :type buttonType: PyQt4.QtGui.QAbstractButton - :rtype: PyQt4.QtGui.QAbstractButton + :type buttonType: PyQt4.QtGui.QPushButton + :rtype: PyQt4.QtGui.QPushButton """ button = buttonType(widget) if label: @@ -3096,6 +3096,15 @@ def get_bar_brush(self, _, index): return QtGui.QBrush(bar_brush) +class HorizontalGridDelegate(QStyledItemDelegate): + def paint(self, painter, option, index): + painter.save() + painter.setPen(QColor(212, 212, 212)) + painter.drawLine(option.rect.bottomLeft(), option.rect.bottomRight()) + painter.restore() + QStyledItemDelegate.paint(self, painter, option, index) + + class VerticalLabel(QtGui.QLabel): def __init__(self, text, parent=None): super().__init__(text, parent) diff --git a/Orange/widgets/utils/__init__.py b/Orange/widgets/utils/__init__.py index 30c36da2fae..26d6f682778 100644 --- a/Orange/widgets/utils/__init__.py +++ b/Orange/widgets/utils/__init__.py @@ -31,5 +31,9 @@ def getdeepattr(obj, attr, *arg, **kwarg): return kwarg["default"] raise -def getHtmlCompatibleString(strVal): - return strVal.replace("<=", "≤").replace(">=","≥").replace("<", "<").replace(">",">").replace("=\\=", "≠") + +def to_html(str): + return str.replace("<=", "≤").replace(">=", "≥").\ + replace("<", "<").replace(">", ">").replace("=\\=", "≠") + +getHtmlCompatibleString = to_html diff --git a/Orange/widgets/utils/domaineditor.py b/Orange/widgets/utils/domaineditor.py index b9f51e4dbff..5023b41e815 100644 --- a/Orange/widgets/utils/domaineditor.py +++ b/Orange/widgets/utils/domaineditor.py @@ -4,7 +4,7 @@ from Orange.data import DiscreteVariable, ContinuousVariable, StringVariable, \ TimeVariable from Orange.widgets import gui -from Orange.widgets.data.owcolor import HorizontalGridDelegate +from Orange.widgets.gui import HorizontalGridDelegate from Orange.widgets.utils.itemmodels import TableModel diff --git a/Orange/widgets/utils/progressbar.py b/Orange/widgets/utils/progressbar.py new file mode 100644 index 00000000000..bafe2fd636d --- /dev/null +++ b/Orange/widgets/utils/progressbar.py @@ -0,0 +1,159 @@ +import contextlib +import time +import warnings + +from PyQt4.QtCore import pyqtSignal as Signal, pyqtProperty, QEventLoop +from PyQt4.QtGui import qApp + +from Orange.widgets import gui + +class ProgressBarMixin: + # Set these here so we avoid having to call `__init__` fromm classes + # that use this mix-in + __progressBarValue = -1 + __progressState = 0 + startTime = time.time() # used in progressbar + + def progressBarInit(self, processEvents=QEventLoop.AllEvents): + """ + Initialize the widget's progress (i.e show and set progress to 0%). + + .. note:: + This method will by default call `QApplication.processEvents` + with `processEvents`. To suppress this behavior pass + ``processEvents=None``. + + :param processEvents: Process events flag + :type processEvents: `QEventLoop.ProcessEventsFlags` or `None` + """ + self.startTime = time.time() + self.setWindowTitle(self.captionTitle + " (0% complete)") + + if self.__progressState != 1: + self.__progressState = 1 + self.processingStateChanged.emit(1) + + self.progressBarSet(0, processEvents) + + def progressBarSet(self, value, processEvents=QEventLoop.AllEvents): + """ + Set the current progress bar to `value`. + + .. note:: + This method will by default call `QApplication.processEvents` + with `processEvents`. To suppress this behavior pass + ``processEvents=None``. + + :param float value: Progress value + :param processEvents: Process events flag + :type processEvents: `QEventLoop.ProcessEventsFlags` or `None` + """ + old = self.__progressBarValue + self.__progressBarValue = value + + if value > 0: + if self.__progressState != 1: + warnings.warn("progressBarSet() called without a " + "preceding progressBarInit()", + stacklevel=2) + self.__progressState = 1 + self.processingStateChanged.emit(1) + + usedTime = max(1, time.time() - self.startTime) + totalTime = 100.0 * usedTime / value + remainingTime = max(0, int(totalTime - usedTime)) + hrs = remainingTime // 3600 + mins = (remainingTime % 3600) // 60 + secs = remainingTime % 60 + if hrs > 0: + text = "{}:{:02}:{:02}".format(hrs, mins, secs) + else: + text = "{}:{}:{:02}".format(hrs, mins, secs) + self.setWindowTitle("{} ({:d}%, ETA: {})" + .format(self.captionTitle, value, text)) + else: + self.setWindowTitle(self.captionTitle + " (0% complete)") + + if old != value: + self.progressBarValueChanged.emit(value) + + if processEvents is not None and processEvents is not False: + qApp.processEvents(processEvents) + + def progressBarValue(self): + """Return the state of the progress bar + """ + return self.__progressBarValue + + progressBarValue = pyqtProperty( + float, fset=progressBarSet, fget=progressBarValue) + processingState = pyqtProperty(int, fget=lambda self: self.__progressState) + + def progressBarAdvance(self, value, processEvents=QEventLoop.AllEvents): + """ + Advance the progress bar. + + .. note:: + This method will by default call `QApplication.processEvents` + with `processEvents`. To suppress this behavior pass + ``processEvents=None``. + + Args: + value (int): progress value + processEvents (`QEventLoop.ProcessEventsFlags` or `None`): + process events flag + """ + self.progressBarSet(self.progressBarValue + value, processEvents) + + def progressBarFinished(self, processEvents=QEventLoop.AllEvents): + """ + Stop the widget's progress (i.e hide the progress bar). + + .. note:: + This method will by default call `QApplication.processEvents` + with `processEvents`. To suppress this behavior pass + ``processEvents=None``. + + :param processEvents: Process events flag + :type processEvents: `QEventLoop.ProcessEventsFlags` or `None` + """ + self.setWindowTitle(self.captionTitle) + if self.__progressState != 0: + self.__progressState = 0 + self.processingStateChanged.emit(0) + + if processEvents is not None and processEvents is not False: + qApp.processEvents(processEvents) + + @contextlib.contextmanager + def progressBar(self, iterations=0): + """ + Context manager for progress bar. + + Using it ensures that the progress bar is removed at the end without + needing the `finally` blocks. + + Usage: + + with self.progressBar(20) as progress: + ... + progress.advance() + + or + + with self.progressBar() as progress: + ... + progress.advance(0.15) + + or + + with self.progressBar(): + ... + self.progressBarSet(50) + + :param iterations: the number of iterations (optional) + :type iterations: int + """ + progress_bar = gui.ProgressBar(self, iterations) + yield progress_bar + progress_bar.finish() # Let us not rely on garbage collector diff --git a/Orange/widgets/utils/scaling.py b/Orange/widgets/utils/scaling.py index 7be88769a95..90e77f94e5a 100644 --- a/Orange/widgets/utils/scaling.py +++ b/Orange/widgets/utils/scaling.py @@ -55,7 +55,6 @@ class ScaleData: def __init__(self): self.raw_data = None # input data self.attribute_names = [] # list of attribute names from self.raw_data - self.attribute_name_index = {} # dict with indices to attributes self.attribute_flip_info = {} # dictionary with attrName: 0/1 attribute is flipped or not self.data_has_class = False @@ -111,8 +110,6 @@ def set_data(self, data, **args): len_data = data and len(data) or 0 self.attribute_names = [attr.name for attr in full_data.domain] - self.attribute_name_index = dict([(full_data.domain[i].name, i) - for i in range(len(full_data.domain))]) self.attribute_flip_info = {} self.data_domain = full_data.domain @@ -122,7 +119,7 @@ def set_data(self, data, **args): self.data_class_name = self.data_has_class and full_data.domain.class_var.name if self.data_has_class: - self.data_class_index = self.attribute_name_index[self.data_class_name] + self.data_class_index = self.data_domain.index(self.data_class_name) self.have_data = bool(self.raw_data and len(self.raw_data) > 0) self.domain_data_stat = getCached(full_data, @@ -244,7 +241,7 @@ def flip_attribute(self, attr_name): if self.data_domain[attr_name].is_discrete: return 0 - index = self.attribute_name_index[attr_name] + index = self.data_domain.index(attr_name) self.attribute_flip_info[attr_name] = 1 - self.attribute_flip_info.get(attr_name, 0) if self.data_domain[attr_name].is_continuous: self.attr_values[attr_name] = [-self.attr_values[attr_name][1], -self.attr_values[attr_name][0]] @@ -307,8 +304,8 @@ def get_xy_data_positions(self, xattr, yattr, filter_valid=False, Create x-y projection of attributes in attrlist. """ - xattr_index = self.attribute_name_index[xattr] - yattr_index = self.attribute_name_index[yattr] + xattr_index = self.data_domain.index(xattr) + yattr_index = self.data_domain.index(yattr) if filter_valid is True: filter_valid = self.get_valid_list([xattr_index, yattr_index]) if isinstance(filter_valid, np.ndarray): @@ -494,9 +491,9 @@ def get_optimal_clusters(self, attribute_name_order, add_result_funct): for i in range(len(attribute_name_order)): for j in range(i): try: - index = self.attribute_name_index - attr1 = index[attribute_name_order[j]] - attr2 = index[attribute_name_order[i]] + index = self.data_domain.index + attr1 = index(attribute_name_order[j]) + attr2 = index(attribute_name_order[i]) test_index += 1 if self.clusterOptimization.isOptimizationCanceled(): secs = time.time() - start_time diff --git a/Orange/widgets/visualize/owmosaic.py b/Orange/widgets/visualize/owmosaic.py index 604cfdc7a9f..a7dffd678b5 100644 --- a/Orange/widgets/visualize/owmosaic.py +++ b/Orange/widgets/visualize/owmosaic.py @@ -6,19 +6,22 @@ from PyQt4.QtCore import Qt, QSize from PyQt4.QtGui import ( - QGraphicsRectItem, QGraphicsView, QColor, QGraphicsScene, QPainter, QPen, - QGraphicsTextItem, QBrush, QGraphicsLineItem) + QColor, QGraphicsScene, QPainter, QPen, + QGraphicsLineItem) + from Orange.data import Table, filter from Orange.data.sql.table import SqlTable, LARGE_TABLE, DEFAULT_SAMPLE_TIME +from Orange.preprocess import Discretize +from Orange.preprocess.discretize import EqualFreq from Orange.statistics.distribution import get_distribution from Orange.widgets import gui from Orange.widgets.settings import ( Setting, DomainContextHandler, ContextSetting) -from Orange.widgets.utils import getHtmlCompatibleString +from Orange.widgets.utils import to_html from Orange.widgets.utils.scaling import get_variable_values_sorted +from Orange.widgets.visualize.utils import ( + CanvasText, CanvasRectangle, ViewWithPress) from Orange.widgets.widget import OWWidget, Default -from Orange.preprocess import Discretize -from Orange.preprocess.discretize import EqualFreq class OWMosaicDisplay(OWWidget): @@ -306,7 +309,7 @@ def draw_data(attr_list, x0_x1, y0_y1, side, condition, start = i * edge + whole * float(sum(counts[:i]) / total) end = i * edge + whole * float(sum(counts[:i + 1]) / total) val = values[i] - htmlval = getHtmlCompatibleString(val) + htmlval = to_html(val) if attr_vals != "": newattrvals = attr_vals + "-" + val else: @@ -740,97 +743,6 @@ def get_conditional_distribution(data, attrs): return cond_dist, dist -class CanvasText(QGraphicsTextItem): - def __init__(self, canvas, text="", x=0, y=0, - alignment=Qt.AlignLeft | Qt.AlignTop, bold=0, font=None, z=0, - html_text=None, tooltip=None, show=1, vertical=False): - QGraphicsTextItem.__init__(self, text, None) - - if font: - self.setFont(font) - if bold: - font = self.font() - font.setBold(bold) - self.setFont(font) - if html_text: - self.setHtml(html_text) - - self.alignment = alignment - self.vertical = vertical - if vertical: - self.setRotation(-90) - - self.setPos(x, y) - self.x, self.y = x, y - self.setZValue(z) - if tooltip: - self.setToolTip(tooltip) - if show: - self.show() - else: - self.hide() - - if canvas is not None: - canvas.addItem(self) - - def setPos(self, x, y): - self.x, self.y = x, y - rect = QGraphicsTextItem.boundingRect(self) - if self.vertical: - h, w = rect.height(), rect.width() - rect.setWidth(h) - rect.setHeight(-w) - if int(self.alignment & Qt.AlignRight): - x -= rect.width() - elif int(self.alignment & Qt.AlignHCenter): - x -= rect.width() / 2. - if int(self.alignment & Qt.AlignBottom): - y -= rect.height() - elif int(self.alignment & Qt.AlignVCenter): - y -= rect.height() / 2. - QGraphicsTextItem.setPos(self, x, y) - - -class CanvasRectangle(QGraphicsRectItem): - def __init__(self, canvas, x=0, y=0, width=0, height=0, - pen_color=QColor(128, 128, 128), brush_color=None, pen_width=1, - z=0, pen_style=Qt.SolidLine, pen=None, tooltip=None, show=1, - onclick=None): - super().__init__(x, y, width, height, None) - self.onclick = onclick - if brush_color: - self.setBrush(QBrush(brush_color)) - if pen: - self.setPen(pen) - else: - self.setPen(QPen(QBrush(pen_color), pen_width, pen_style)) - self.setZValue(z) - if tooltip: - self.setToolTip(tooltip) - if show: - self.show() - else: - self.hide() - - if canvas is not None: - canvas.addItem(self) - - def mousePressEvent(self, ev): - if self.onclick: - self.onclick(self, ev) - - -class ViewWithPress(QGraphicsView): - def __init__(self, *args, **kwargs): - self.handler = kwargs.pop("handler") - super().__init__(*args) - - def mousePressEvent(self, ev): - super().mousePressEvent(ev) - if not ev.isAccepted(): - self.handler() - - # test widget appearance if __name__ == "__main__": import sys diff --git a/Orange/widgets/visualize/owparallelgraph.py b/Orange/widgets/visualize/owparallelgraph.py index d8e9ea3faa5..9d84bc22795 100644 --- a/Orange/widgets/visualize/owparallelgraph.py +++ b/Orange/widgets/visualize/owparallelgraph.py @@ -100,7 +100,8 @@ def update_data(self, attributes, mid_labels=None): self.alpha_value_2 = TRANSPARENT self.attributes = attributes - self.attribute_indices = [self.attribute_name_index[name] for name in self.attributes] + self.attribute_indices = [self.data_domain.index(name) + for name in self.attributes] self.valid_data = self.get_valid_list(self.attribute_indices) self.visualized_mid_labels = mid_labels diff --git a/Orange/widgets/visualize/owscatterplot.py b/Orange/widgets/visualize/owscatterplot.py index 4afe7237994..985bf88d314 100644 --- a/Orange/widgets/visualize/owscatterplot.py +++ b/Orange/widgets/visualize/owscatterplot.py @@ -1,11 +1,7 @@ -from bisect import bisect_left -import sys - import numpy as np from PyQt4.QtCore import Qt, QTimer from PyQt4 import QtGui -from PyQt4.QtGui import QApplication, QTableView, QStandardItemModel, \ - QStandardItem +from PyQt4.QtGui import QApplication from sklearn.neighbors import NearestNeighbors from sklearn.metrics import r2_score @@ -19,6 +15,7 @@ from Orange.widgets.settings import \ DomainContextHandler, Setting, ContextSetting, SettingProvider from Orange.widgets.visualize.owscatterplotgraph import OWScatterPlotGraph +from Orange.widgets.visualize.utils import VizRankDialogAttrPair from Orange.widgets.widget import OWWidget, Default, AttributeList @@ -36,6 +33,57 @@ def font_resize(font, factor, minsize=None, maxsize=None): return font +class ScatterPlotVizRank(VizRankDialogAttrPair): + captionTitle = "Score plots" + K = 10 + + def check_preconditions(self): + if not super().check_preconditions(): + return False + if not self.master.data.domain.class_var: + self.information(33, "Data with a class variable is required.") + return False + self.master.information(33) + return True + + def iterate_states(self, initial_state): + # If we put initialization of `self.attrs` to `initialize`, + # `score_heuristic` would be run on every call to `set_data`. + if initial_state is None: # on the first call, compute order + self.attrs = self.score_heuristic() + yield from super().iterate_states(initial_state) + + def compute_score(self, state): + graph = self.master.graph + ind12 = [graph.data_domain.index(self.attrs[x]) for x in state] + valid = graph.get_valid_list(ind12) + X = graph.scaled_data[ind12, :][:, valid].T + Y = self.master.data.Y[valid] + if X.shape[0] < self.K: + return + n_neighbors = min(self.K, len(X) - 1) + knn = NearestNeighbors(n_neighbors=n_neighbors).fit(X) + ind = knn.kneighbors(return_distance=False) + if self.master.data.domain.has_discrete_class: + return -np.sum(Y[ind] == Y.reshape(-1, 1)) + else: + return -r2_score(Y, np.mean(Y[ind], axis=1)) * \ + (len(Y) / len(self.master.data)) + + def score_heuristic(self): + X = self.master.graph.scaled_data.T + Y = self.master.data.Y + mdomain = self.master.data.domain + dom = Domain([ContinuousVariable(str(i)) for i in range(X.shape[1])], + mdomain.class_vars) + data = Table(dom, X, Y) + relief = ReliefF if isinstance(dom.class_var, DiscreteVariable) \ + else RReliefF + weights = relief(n_iterations=100, k_nearest=self.K)(data) + attrs = sorted(zip(weights, mdomain.attributes), reverse=True) + return [a for _, a in attrs] + + class OWScatterPlot(OWWidget): name = 'Scatter Plot' description = 'Scatterplot visualization with explorative analysis and intelligent data visualization enhancements.' @@ -98,13 +146,14 @@ def __init__(self): callback=self.update_attr, **common_options) - self.vizrank = self.VizRank(self) + self.vizrank = ScatterPlotVizRank(self) vizrank_box = gui.hBox(box) gui.separator(vizrank_box, width=common_options["labelWidth"]) self.vizrank_button = gui.button( vizrank_box, self, "Score Plots", callback=self.vizrank.reshow, - tooltip="Find plots with good class separation") - self.vizrank_button.setEnabled(False) + tooltip="Find informative projections", enabled=False) + self.vizrank.pairSelected.connect(self.set_attr) + gui.separator(box) gui.valueSlider( @@ -249,7 +298,7 @@ def set_data(self, data): if not same_domain: self.init_attr_values() - self.vizrank._initialize() + self.vizrank.initialize() self.vizrank_button.setEnabled( self.data is not None and self.data.domain.class_var is not None and len(self.data.domain.attributes) > 1 and len(self.data) > 1) @@ -295,7 +344,7 @@ def set_subset_data(self, subset_data): def handleNewSignals(self): self.graph.new_data(self.data_metas_X, self.subset_data) if self.attribute_selection_list and \ - all(attr.name in self.graph.attribute_name_index + all(attr in self.graph.data_domain for attr in self.attribute_selection_list): self.attr_x = self.attribute_selection_list[0].name self.attr_y = self.attribute_selection_list[1].name @@ -366,8 +415,12 @@ def init_attr_values(self): self.graph.attr_size = "" self.graph.attr_label = "" - def update_attr(self, attributes=None): - self.update_graph(attributes=attributes) + def set_attr(self, attr_x, attr_y): + self.attr_x, self.attr_y = attr_x.name, attr_y.name + self.update_attr() + + def update_attr(self): + self.update_graph() self.cb_class_density.setEnabled(self.graph.can_draw_density()) self.send_features() @@ -378,10 +431,8 @@ def update_colors(self): def update_density(self): self.update_graph(reset_view=False) - def update_graph(self, attributes=None, reset_view=True, **_): + def update_graph(self, reset_view=True, **_): self.graph.zoomStack = [] - if attributes and len(attributes) == 2: - self.attr_x, self.attr_y = attributes if not self.graph.have_data: return self.graph.update_data(self.attr_x, self.attr_y, reset_view) @@ -451,151 +502,8 @@ def onDeleteWidget(self): self.graph.plot_widget.clear() - class VizRank(OWWidget): - name = "Rank projections (Scatter Plot)" - - want_control_area = False - - def __init__(self, parent_widget): - super().__init__() - self.parent_widget = parent_widget - self.running = False - self.progress = None - self.k = 10 - - self.projectionTable = QTableView() - self.mainArea.layout().addWidget(self.projectionTable) - self.projectionTable.setSelectionBehavior(QTableView.SelectRows) - self.projectionTable.setSelectionMode(QTableView.SingleSelection) - self.projectionTable.setSortingEnabled(True) - self.projectionTableModel = QStandardItemModel(self) - self.projectionTable.setModel(self.projectionTableModel) - self.projectionTable.selectionModel().selectionChanged.connect( - self.on_selection_changed) - - self.button = gui.button(self.mainArea, self, "Start evaluation", - callback=self.toggle, default=True) - self.resize(380, 512) - self._initialize() - - def _initialize(self): - self.running = False - self.projectionTableModel.clear() - self.projectionTableModel.setHorizontalHeaderLabels( - ["Score", "Feature 1", "Feature 2"]) - self.projectionTable.setColumnWidth(0, 60) - self.projectionTable.setColumnWidth(1, 120) - self.projectionTable.setColumnWidth(2, 120) - self.button.setText("Start evaluation") - self.button.setEnabled(False) - self.pause = False - self.data = None - self.attrs = [] - self.scores = [] - self.i, self.j = 0, 0 - if self.progress: - self.progress.finish() - self.progress = None - - - self.information(0) - if self.parent_widget.data: - if not self.parent_widget.data.domain.class_var: - self.information( - 0, "Data with a class variable is required.") - return - if len(self.parent_widget.data.domain.attributes) < 2: - self.information( - 0, 'At least 2 unique features are needed.') - return - if len(self.parent_widget.data) < 2: - self.information( - 0, 'At least 2 instances are needed.') - return - self.button.setEnabled(True) - - def on_selection_changed(self, selected, deselected): - """Called when the ranks view selection changes.""" - a1 = selected.indexes()[1].data() - a2 = selected.indexes()[2].data() - self.parent_widget.update_attr(attributes=(a1, a2)) - - def toggle(self): - self.running ^= 1 - if self.running: - self.button.setText("Pause") - self.run() - else: - self.button.setText("Continue") - self.button.setEnabled(False) - - def run(self): - graph = self.parent_widget.graph - y_full = self.parent_widget.data.Y - if not self.attrs: - self.attrs = self.score_heuristic() - if not self.progress: - self.progress = gui.ProgressBar( - self, len(self.attrs) * (len(self.attrs) - 1) / 2) - for i in range(self.i, len(self.attrs)): - ind1 = graph.attribute_name_index[self.attrs[i]] - for j in range(self.j, i): - if not self.running: - self.i, self.j = i, j - if not self.projectionTable.selectedIndexes(): - self.projectionTable.selectRow(0) - self.button.setEnabled(True) - return - ind2 = graph.attribute_name_index[self.attrs[j]] - X = graph.scaled_data[[ind1, ind2], :] - valid = graph.get_valid_list([ind1, ind2]) - X = X[:, valid].T - if X.shape[0] < self.k: - self.progress.advance() - continue - y = y_full[valid] - n_neighbors = min(self.k, len(X) - 1) - knn = NearestNeighbors(n_neighbors=n_neighbors).fit(X) - ind = knn.kneighbors(return_distance=False) - if self.parent_widget.data.domain.has_discrete_class: - score = np.sum(y[ind] == y.reshape(-1, 1)) / ( - len(y_full) * n_neighbors) - else: - score = r2_score(y, np.mean(y[ind], axis=1)) * ( - len(y) / len(y_full)) - pos = bisect_left(self.scores, score) - self.projectionTableModel.insertRow( - len(self.scores) - pos, - [QStandardItem("{:.4f}".format(score)), - QStandardItem(self.attrs[j]), - QStandardItem(self.attrs[i])]) - self.scores.insert(pos, score) - self.progress.advance() - self.j = 0 - self.progress.finish() - if not self.projectionTable.selectedIndexes(): - self.projectionTable.selectRow(0) - self.button.setText("Finished") - self.button.setEnabled(False) - - def score_heuristic(self): - X = self.parent_widget.graph.scaled_data.T - Y = self.parent_widget.data.Y - dom = Domain([ContinuousVariable(str(i)) - for i in range(X.shape[1])], - self.parent_widget.data.domain.class_vars) - data = Table(dom, X, Y) - relief = ReliefF if isinstance(dom.class_var, - DiscreteVariable) else RReliefF - weights = relief(n_iterations=100, k_nearest=self.k)(data) - attrs = sorted(zip(weights, - (x.name for x in - self.parent_widget.data.domain.attributes)), - reverse=True) - return [a for _, a in attrs] - - def test_main(argv=None): + import sys if argv is None: argv = sys.argv argv = list(argv) diff --git a/Orange/widgets/visualize/owscatterplotgraph.py b/Orange/widgets/visualize/owscatterplotgraph.py index 443c7902f6f..cca3f811877 100644 --- a/Orange/widgets/visualize/owscatterplotgraph.py +++ b/Orange/widgets/visualize/owscatterplotgraph.py @@ -567,8 +567,8 @@ def update_data(self, attr_x, attr_y, reset_view=True): if self.scaled_data is None or not len(self.scaled_data): self.valid_data = None else: - index_x = self.attribute_name_index[attr_x] - index_y = self.attribute_name_index[attr_y] + index_x = self.data_domain.index(attr_x) + index_y = self.data_domain.index(attr_y) self.valid_data = self.get_valid_list([index_x, index_y], also_class_if_exists=False) if not np.any(self.valid_data): @@ -676,7 +676,7 @@ def get_size_index(self): size_index = -1 attr_size = self.attr_size if attr_size != "" and attr_size != "(Same size)": - size_index = self.attribute_name_index[attr_size] + size_index = self.data_domain.index(attr_size) return size_index def compute_sizes(self): @@ -710,7 +710,7 @@ def get_color_index(self): color_index = -1 attr_color = self.attr_color if attr_color != "" and attr_color != "(Same color)": - color_index = self.attribute_name_index[attr_color] + color_index = self.data_domain.index(attr_color) color_var = self.data_domain[attr_color] colors = color_var.colors if color_var.is_discrete: @@ -868,7 +868,7 @@ def get_shape_index(self): if attr_shape and attr_shape != "(Same shape)" and \ len(self.data_domain[attr_shape].values) <= \ len(self.CurveSymbols): - shape_index = self.attribute_name_index[attr_shape] + shape_index = self.data_domain.index(attr_shape) return shape_index def compute_symbols(self): diff --git a/Orange/widgets/visualize/owsieve.py b/Orange/widgets/visualize/owsieve.py index 30ca6f341db..cc787a07961 100644 --- a/Orange/widgets/visualize/owsieve.py +++ b/Orange/widgets/visualize/owsieve.py @@ -1,13 +1,11 @@ from itertools import chain -from bisect import bisect_left import numpy as np -from scipy import stats +from scipy.stats.distributions import chi2 from PyQt4.QtCore import Qt, QSize from PyQt4.QtGui import ( - QGraphicsScene, QColor, QPen, QBrush, QTableView, QStandardItemModel, - QStandardItem, QSizePolicy, QGraphicsLineItem) + QGraphicsScene, QColor, QPen, QBrush, QSizePolicy, QGraphicsLineItem) from Orange.data import Table, filter from Orange.data.sql.table import SqlTable, LARGE_TABLE, DEFAULT_SAMPLE_TIME @@ -16,21 +14,48 @@ from Orange.statistics.contingency import get_contingency from Orange.widgets import gui from Orange.widgets.settings import DomainContextHandler, ContextSetting -from Orange.widgets.utils import getHtmlCompatibleString as to_html -from Orange.widgets.data.owcolor import HorizontalGridDelegate +from Orange.widgets.utils import to_html as to_html from Orange.widgets.utils.itemmodels import VariableListModel -from Orange.widgets.visualize.owmosaic import ( - CanvasText, CanvasRectangle, ViewWithPress) +from Orange.widgets.visualize.utils import ( + CanvasText, CanvasRectangle, ViewWithPress, VizRankDialogAttrPair) from Orange.widgets.widget import OWWidget, Default, AttributeList +class ChiSqStats: + """ + Compute and store statistics needed to show a plot for the given + pair of attributes. The class is also used for ranking. + """ + def __init__(self, data, attr1, attr2): + self.observed = get_contingency(data, attr1, attr2) + self.n = np.sum(self.observed) + self.probs_x = self.observed.sum(axis=0) / self.n + self.probs_y = self.observed.sum(axis=1) / self.n + self.expected = np.outer(self.probs_y, self.probs_x) * self.n + self.residuals = \ + (self.observed - self.expected) / np.sqrt(self.expected) + self.chisqs = self.residuals ** 2 + self.chisq = float(np.sum(self.chisqs)) + self.p = chi2.sf( + self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1)) + + +class SieveRank(VizRankDialogAttrPair): + captionTitle = "Sieve Rank" + + def initialize(self): + super().initialize() + self.attrs = self.master.attrs + + def compute_score(self, state): + return ChiSqStats(self.master.discrete_data, *state).p + + class OWSieveDiagram(OWWidget): """ A two-way contingency table providing information on the relation - between the observed and expected frequencies of a combination of feature - values + between the observed and expected frequencies of a combination of values """ - name = "Sieve Diagram" icon = "icons/SieveDiagram.svg" priority = 4200 @@ -53,25 +78,27 @@ def __init__(self): super().__init__() self.data = self.discrete_data = None - self.areas = None - self.input_features = None self.attrs = [] + self.input_features = None + self.areas = [] + self.selection = set() self.attr_box = gui.hBox(self.mainArea) model = VariableListModel() model.wrap(self.attrs) combo_args = dict( widget=self.attr_box, master=self, contentsLength=12, - callback=self.change_attr, sendSelectedValue=True, valueType=str, + callback=self.update_attr, sendSelectedValue=True, valueType=str, model=model) fixed_size = (QSizePolicy.Fixed, QSizePolicy.Fixed) self.attrXCombo = gui.comboBox(value="attrX", **combo_args) gui.widgetLabel(self.attr_box, "\u2715", sizePolicy=fixed_size) self.attrYCombo = gui.comboBox(value="attrY", **combo_args) - self.vizrank = self.VizRank(self) + self.vizrank = SieveRank(self) self.vizrank_button = gui.button( self.attr_box, self, "Score Combinations", sizePolicy=fixed_size, callback=self.vizrank.reshow, enabled=False) + self.vizrank.pairSelected.connect(self.set_attr) self.canvas = QGraphicsScene() self.canvasView = ViewWithPress( @@ -85,9 +112,24 @@ def __init__(self): box.layout().addWidget(self.report_button) def sizeHint(self): - # pylint: disable=missing-docstring return QSize(450, 550) + def resizeEvent(self, event): + super().resizeEvent(event) + self.update_graph() + + def showEvent(self, event): + super().showEvent(event) + self.update_graph() + + def closeEvent(self, event): + self.vizrank.close() + super().closeEvent(event) + + def hideEvent(self, event): + self.vizrank.hide() + super().hideEvent(event) + def set_data(self, data): """ Discretize continuous attributes, and put all attributes and discrete @@ -99,6 +141,9 @@ def set_data(self, data): Remove selection; again let the context override this. Initialize the vizrank dialog, but don't show it. + + Args: + data (Table): input data """ if isinstance(data, SqlTable) and data.approx_len() > LARGE_TABLE: data = data.sample_time(DEFAULT_SAMPLE_TIME) @@ -106,7 +151,7 @@ def set_data(self, data): self.closeContext() self.data = data self.areas = [] - self.selection = [] + self.selection = set() if self.data is None: self.attrs[:] = [] else: @@ -127,7 +172,8 @@ def set_data(self, data): self.attrY = self.attrs[len(self.attrs) > 1].name else: self.attrX = self.attrY = None - self.areas = self.selection = None + self.areas = [] + self.selection = set() self.openContext(self.data) self.resolve_shown_attributes() self.update_graph() @@ -138,25 +184,35 @@ def set_data(self, data): self.data is not None and len(self.data) > 1 and len(self.data.domain.attributes) > 1) - def change_attr(self, attributes=None): - """Reset the selection, update graph. Set the attributes, if given.""" - if attributes is not None: - self.attrX, self.attrY = attributes + def set_attr(self, attr_x, attr_y): + self.attrX, self.attrY = attr_x.name, attr_y.name + self.update_attr() + + def update_attr(self): + """Update the graph and selection.""" self.selection = set() self.update_graph() self.update_selection() def set_input_features(self, attr_list): - """Store the attributes from the input and call - `resolve_shown_attributes`""" + """ + Handler for the Features signal. + + The method stores the attributes and calls `resolve_shown_attributes` + + Args: + attr_list (AttributeList): data from the signal + """ self.input_features = attr_list self.resolve_shown_attributes() self.update_selection() def resolve_shown_attributes(self): - """Use the attributes from the input signal if the signal is present + """ + Use the attributes from the input signal if the signal is present and at least two attributes appear in the domain. If there are - multiple, use the first two. Combos are disabled if inputs are used.""" + multiple, use the first two. Combos are disabled if inputs are used. + """ self.warning(1) self.attr_box.setEnabled(True) if not self.input_features: # None or empty @@ -173,28 +229,18 @@ def resolve_shown_attributes(self): self.selection = set() self.update_graph() - def resizeEvent(self, event): - super().resizeEvent(event) - self.update_graph() - - def showEvent(self, event): - super().showEvent(event) - self.update_graph() - - def closeEvent(self, event): - self.vizrank.close() - super().closeEvent(event) - - def hideEvent(self, event): - self.vizrank.hide() - super().hideEvent(event) - def reset_selection(self): self.selection = set() self.update_selection() def select_area(self, area, event): - """Add or remove the clicked area from the selection""" + """ + Add or remove the clicked area from the selection + + Args: + area (QRect): the area that is clicked + event (QEvent): event description + """ if event.button() != Qt.LeftButton: return index = self.areas.index(area) @@ -205,7 +251,8 @@ def select_area(self, area, event): self.update_selection() def update_selection(self): - """Update the graph (pen width) to show the current selection. + """ + Update the graph (pen width) to show the current selection. Filter and output the data. """ if self.areas is None or not self.selection: @@ -238,22 +285,6 @@ def update_selection(self): selection = self.data[sel_idx] self.send("Selection", selection) - class ChiSqStats: - """Compute and store statistics needed to show a plot for the given - pair of attributes. The class is also used for ranking.""" - def __init__(self, data, attr1, attr2): - self.observed = get_contingency(data, attr1, attr2) - self.n = np.sum(self.observed) - self.probs_x = self.observed.sum(axis=0) / self.n - self.probs_y = self.observed.sum(axis=1) / self.n - self.expected = np.outer(self.probs_y, self.probs_x) * self.n - self.residuals = \ - (self.observed - self.expected) / np.sqrt(self.expected) - self.chisqs = self.residuals ** 2 - self.chisq = float(np.sum(self.chisqs)) - self.p = stats.distributions.chi2.sf( - self.chisq, (len(self.probs_x) - 1) * (len(self.probs_y) - 1)) - def update_graph(self): # Function uses weird names like r, g, b, but it does it with utmost # caution, hence @@ -271,8 +302,15 @@ def fmt(val): return str(int(val)) if val % 1 == 0 else "{:.2f}".format(val) def show_pearson(rect, pearson, pen_width): - """Color the given rectangle according to its corresponding - standardized Pearson residual""" + """ + Color the given rectangle according to its corresponding + standardized Pearson residual. + + Args: + rect (QRect): the rectangle being drawn + pearson (float): signed standardized pearson residual + pen_width (int): pen width (bolder pen is used for selection) + """ r = rect.rect() x, y, w, h = r.x(), r.y(), r.width(), r.height() if w == 0 or h == 0: @@ -359,7 +397,7 @@ def _oper(attr_name, txt): disc_x, disc_y = ddomain[attr_x], ddomain[attr_y] view = self.canvasView - chi = self.ChiSqStats(self.discrete_data, attr_x, attr_y) + chi = ChiSqStats(self.discrete_data, attr_x, attr_y) n = chi.n max_ylabel_w = max((width(val) for val in disc_y.values), default=0) max_ylabel_w = min(max_ylabel_w, 200) @@ -412,7 +450,7 @@ def _oper(attr_name, txt): xl = text("χ²={:.2f}, p={:.3f}".format(chi.chisq, chi.p), 0, bottom) # Assume similar height for both lines - text("N = " + fmt(chi.n), 0, bottom -xl.boundingRect().height()) + text("N = " + fmt(chi.n), 0, bottom - xl.boundingRect().height()) def get_widget_name_extension(self): if self.data is not None: @@ -421,122 +459,6 @@ def get_widget_name_extension(self): def send_report(self): self.report_plot() - class VizRank(OWWidget): - """VizRank dialog""" - name = "Rank projections (Sieve)" - want_control_area = False - - def __init__(self, parent_widget): - # pylint: disable=missing-docstring - super().__init__() - self.parent_widget = parent_widget - self.running = False - self.progress = None - self.i = self.j = 0 - self.pause = False - self.scores = [] - - self.rank_model = QStandardItemModel(self) - self.rank_table = view = QTableView( - selectionBehavior=QTableView.SelectRows, - selectionMode=QTableView.SingleSelection, - showGrid=False) - view.setItemDelegate(HorizontalGridDelegate()) - view.setModel(self.rank_model) - view.selectionModel().selectionChanged.connect( - self.on_selection_changed) - view.horizontalHeader().setStretchLastSection(True) - view.horizontalHeader().hide() - self.mainArea.layout().addWidget(view) - - self.button = gui.button(self.mainArea, self, "Start evaluation", - callback=self.toggle, default=True) - self.resize(320, 512) - self.initialize() - - def initialize(self): - """Reset the dialog - - The class peeks into the widget's data and does some checks. - This needs to be fixes ... some day. VizRank dialogues need to be - unified - pulled out from individual classes.""" - self.running = False - self.rank_model.clear() - self.rank_table.setColumnWidth(0, 120) - self.rank_table.setColumnWidth(1, 120) - self.button.setText("Start evaluation") - self.button.setEnabled(False) - self.pause = False - self.scores = [] - self.i = self.j = 0 - if self.progress: - self.progress.finish() - self.progress = None - - self.information(0) - if self.parent_widget.data: - if not self.parent_widget.data.domain.class_var: - self.information( - 0, "Data with a class variable is required.") - return - if len(self.parent_widget.data.domain.attributes) < 2: - self.information( - 0, 'At least 2 features are needed.') - return - if len(self.parent_widget.data) < 2: - self.information( - 0, 'At least 2 instances are needed.') - return - self.button.setEnabled(True) - - def on_selection_changed(self, selected, deselected): - """Called when the ranks view selection changes.""" - a1 = selected.indexes()[0].data() - a2 = selected.indexes()[1].data() - self.parent_widget.change_attr(attributes=(a1, a2)) - - def toggle(self): - """Start or pause the computation""" - self.running ^= 1 - if self.running: - self.button.setText("Pause") - self.run() - else: - self.button.setText("Continue") - self.button.setEnabled(False) - - def stop(self, i, j): - """Stop (pause) the computation""" - self.i, self.j = i, j - if not self.rank_table.selectedIndexes(): - self.rank_table.selectRow(0) - self.button.setEnabled(True) - - def run(self): - """Compute and show scores""" - widget = self.parent_widget - attrs = widget.attrs - if not self.progress: - self.progress = gui.ProgressBar(self, len(attrs)) - for i in range(self.i, len(attrs)): - for j in range(self.j, i): - if not self.running: - self.stop(i, j) - return - score = widget.ChiSqStats(widget.discrete_data, i, j).p - pos = bisect_left(self.scores, score) - self.rank_model.insertRow( - pos, - [QStandardItem(widget.attrs[i].name), - QStandardItem(widget.attrs[j].name)]) - self.scores.insert(pos, score) - self.progress.advance() - self.progress.finish() - if not self.rank_table.selectedIndexes(): - self.rank_table.selectRow(0) - self.button.setText("Finished") - self.button.setEnabled(False) - def main(): # pylint: disable=missing-docstring diff --git a/Orange/widgets/visualize/utils.py b/Orange/widgets/visualize/utils.py new file mode 100644 index 00000000000..f3fe92cbcda --- /dev/null +++ b/Orange/widgets/visualize/utils.py @@ -0,0 +1,354 @@ +from bisect import bisect_left + +from PyQt4.QtCore import Qt, pyqtSignal as Signal, QSize +from PyQt4.QtGui import ( + QStandardItemModel, QStandardItem, QTableView, QGraphicsTextItem, + QGraphicsRectItem, QColor, QBrush, QPen, QGraphicsView, QDialog, QVBoxLayout +) + +from Orange.data import Variable +from Orange.widgets import gui +from Orange.widgets.gui import HorizontalGridDelegate +from Orange.widgets.utils.progressbar import ProgressBarMixin + + +class VizRankDialog(QDialog, ProgressBarMixin): + """ + Base class for VizRank dialogs, providing a GUI with a table and a button, + and the skeleton for managing the evaluation of visualizations. + + Derived classes need to provide generators of combinations (e.g. pairs + of attribtutes) and the scoring function. The widget stores the current + upon pause, and restores it upon continuation. + + The class provides a table and a button. A widget constructs a single + instance of this dialog in its `__init__`, like (in Sieve): + + self.vizrank = SieveRank(self) + self.vizrank_button = gui.button( + box, self, "Score Combinations", callback=self.vizrank.reshow) + + The widget (the argument `self`) above is stored in `VizRankDialog`'s + attribute `master` since derived classes will need to interact with is. + + When the widget receives new data, it must call the VizRankDialog's + method :obj:`VizRankDialog.initialize()` to clear the GUI and reset the + state. + + Clicking the Start button calls method `run` (and renames the button to + Pause). Run sets up a progress bar by getting the number of combinations + from :obj:`VizRankDialog.state_count()`. It restores the paused state + (if any) and calls generator :obj:`VizRankDialog.iterate_states()`. For + each generated state, it calls :obj:`VizRankDialog.score(state)`, which + must return the score (lower is better) for this state. If the returned + state is not `None`, the data returned by `row_for_state` is inserted at + the appropriate place in the table. + + Args: + master (Orange.widget.OWWidget): widget to which the dialog belongs + + Attributes: + master (Orange.widget.OWWidget): widget to which the dialog belongs + captionTitle (str): the caption for the dialog. This can be a class + attribute. `captionTitle` is used by the `ProgressBarMixin`. + """ + + captionTitle = "" + + processingStateChanged = Signal(int) + progressBarValueChanged = Signal(float) + + def __init__(self, master): + """Initialize the attributes and set up the interface""" + super().__init__(windowTitle=self.captionTitle) + self.master = master + + self.keep_running = False + self.saved_state = None + self.saved_progress = 0 + self.scores = [] + + self.setLayout(QVBoxLayout()) + self.rank_model = QStandardItemModel(self) + self.rank_table = view = QTableView( + selectionBehavior=QTableView.SelectRows, + selectionMode=QTableView.SingleSelection, + showGrid=False) + view.setItemDelegate(HorizontalGridDelegate()) + view.setModel(self.rank_model) + view.selectionModel().selectionChanged.connect( + self.on_selection_changed) + view.horizontalHeader().setStretchLastSection(True) + view.horizontalHeader().hide() + self.layout().addWidget(view) + + self.button = gui.button( + self, self, "Start", callback=self.toggle, default=True) + + def reshow(self): + """Put the widget on top of all windows + """ + self.show() + self.raise_() + self.activateWindow() + + def initialize(self): + """ + Clear and initialize the dialog. + + This method must be called by the widget when the data is reset, + e.g. from `set_data` handler. + """ + self.keep_running = False + self.saved_state = None + self.saved_progress = 0 + self.scores = [] + self.rank_model.clear() + self.button.setText("Start") + self.button.setEnabled(self.check_preconditions()) + + def check_preconditions(self): + """Check whether there is sufficient data for ranking.""" + return True + + def on_selection_changed(self, selected, deselected): + """ + Set the new visualization in the widget when the user select a + row in the table. + + If derived class does not reimplement this, the table gives the + information but the user can't click it to select the visualization. + + Args: + selected: the index of the selected item + deselected: the index of the previously selected item + """ + pass + + def iterate_states(self, initial_state): + """ + Generate all possible states (e.g. attribute combinations) for the + given data. The content of the generated states is specific to the + visualization. + + This method must be defined in the derived classes. + + Args: + initial_state: initial state; None if this is the first call + """ + raise NotImplementedError + + def state_count(self): + """ + Return the number of states for the progress bar. + + Derived classes should implement this to ensure the proper behaviour of + the progress bar""" + return 0 + + def compute_score(self, state): + """ + Abstract method for computing the score for the given state. Smaller + scores are better. + + Args: + state: the state, e.g. the combination of attributes as generated + by :obj:`state_count`. + """ + raise NotImplementedError + + def row_for_state(self, state, score): + """ + Abstract method that return the items that are inserted into the table. + + Args: + state: the state, e.g. combination of attributes + score: score, computed by :obj:`compute_score` + """ + raise NotImplementedError + + def _select_first_if_none(self): + if not self.rank_table.selectedIndexes(): + self.rank_table.selectRow(0) + + def run(self): + """Compute and show scores""" + with self.progressBar(self.state_count()) as progress: + progress.advance(self.saved_progress) + for state in self.iterate_states(self.saved_state): + if not self.keep_running: + self.saved_state = state + self.saved_progress = progress.count + self._select_first_if_none() + return + score = self.compute_score(state) + if score is not None: + pos = bisect_left(self.scores, score) + self.rank_model.insertRow( + pos, self.row_for_state(score, state)) + self.scores.insert(pos, score) + progress.advance() + self._select_first_if_none() + self.button.setText("Finished") + self.button.setEnabled(False) + + def toggle(self): + """Start or pause the computation.""" + self.keep_running = not self.keep_running + if self.keep_running: + self.button.setText("Pause") + self.run() + else: + self.button.setText("Continue") + + +class VizRankDialogAttrPair(VizRankDialog): + """ + VizRank dialog for pairs of attributes. The class provides most of the + needed methods, except for `initialize` which is expected to store a + list of `Variable` instances to `self.attrs`, and method + `compute_score(state)` for scoring the combinations. + + The state is a pair of indices into `self.attrs`. + + When the user selects a pair, the dialog emits signal + `pairSelected(Variable, Variable)`. + """ + + pairSelected = Signal(Variable, Variable) + _AttrRole = next(gui.OrangeUserRole) + + def __init__(self, master): + super().__init__(master) + self.attrs = [] + + def sizeHint(self): + """Assuming two columns in the table, return `QSize(320, 512)` as + a reasonable default size.""" + return QSize(320, 512) + + def check_preconditions(self): + """Refuse ranking if there are less than two feature or instances.""" + if self.master.data is None or \ + len(self.master.data.domain.attributes) < 2 or \ + len(self.master.data) < 2: + self.master.information(33, "There is nothing to rank.") + return False + self.master.information(33) + return True + + def on_selection_changed(self, selected, deselected): + attrs = [selected.indexes()[i].data(self._AttrRole) for i in (0, 1)] + self.pairSelected.emit(*attrs) + + def state_count(self): + n_attrs = len(self.attrs) + return n_attrs * (n_attrs - 1) / 2 + + def iterate_states(self, initial_state): + si, sj = initial_state or (0, 0) + for i in range(si, len(self.attrs)): + for j in range(sj, i): + yield i, j + + def row_for_state(self, score, state): + items = [] + for x in state: + attr = self.attrs[x] + item = QStandardItem(attr.name) + item.setData(attr, self._AttrRole) + items.append(item) + return items + + +class CanvasText(QGraphicsTextItem): + def __init__(self, canvas, text="", x=0, y=0, + alignment=Qt.AlignLeft | Qt.AlignTop, bold=0, font=None, z=0, + html_text=None, tooltip=None, show=1, vertical=False): + QGraphicsTextItem.__init__(self, text, None) + + if font: + self.setFont(font) + if bold: + font = self.font() + font.setBold(bold) + self.setFont(font) + if html_text: + self.setHtml(html_text) + + self.alignment = alignment + self.vertical = vertical + if vertical: + self.setRotation(-90) + + self.setPos(x, y) + self.x, self.y = x, y + self.setZValue(z) + if tooltip: + self.setToolTip(tooltip) + if show: + self.show() + else: + self.hide() + + if canvas is not None: + canvas.addItem(self) + + def setPos(self, x, y): + self.x, self.y = x, y + rect = QGraphicsTextItem.boundingRect(self) + if self.vertical: + h, w = rect.height(), rect.width() + rect.setWidth(h) + rect.setHeight(-w) + if int(self.alignment & Qt.AlignRight): + x -= rect.width() + elif int(self.alignment & Qt.AlignHCenter): + x -= rect.width() / 2. + if int(self.alignment & Qt.AlignBottom): + y -= rect.height() + elif int(self.alignment & Qt.AlignVCenter): + y -= rect.height() / 2. + QGraphicsTextItem.setPos(self, x, y) + + +class CanvasRectangle(QGraphicsRectItem): + def __init__(self, canvas, x=0, y=0, width=0, height=0, + pen_color=QColor(128, 128, 128), brush_color=None, pen_width=1, + z=0, pen_style=Qt.SolidLine, pen=None, tooltip=None, show=1, + onclick=None): + super().__init__(x, y, width, height, None) + self.onclick = onclick + if brush_color: + self.setBrush(QBrush(brush_color)) + if pen: + self.setPen(pen) + else: + self.setPen(QPen(QBrush(pen_color), pen_width, pen_style)) + self.setZValue(z) + if tooltip: + self.setToolTip(tooltip) + if show: + self.show() + else: + self.hide() + + if canvas is not None: + canvas.addItem(self) + + def mousePressEvent(self, ev): + if self.onclick: + self.onclick(self, ev) + + +class ViewWithPress(QGraphicsView): + def __init__(self, *args, **kwargs): + self.handler = kwargs.pop("handler") + super().__init__(*args) + + def mousePressEvent(self, ev): + super().mousePressEvent(ev) + if not ev.isAccepted(): + self.handler() + + diff --git a/Orange/widgets/widget.py b/Orange/widgets/widget.py index 18ee34eab6d..43311298ce9 100644 --- a/Orange/widgets/widget.py +++ b/Orange/widgets/widget.py @@ -1,13 +1,9 @@ -import contextlib import sys -import time import os -import warnings import types from functools import reduce -from PyQt4.QtCore import QByteArray, Qt, pyqtSignal as Signal, pyqtProperty,\ - QEventLoop, QSettings, QUrl +from PyQt4.QtCore import QByteArray, Qt, pyqtSignal as Signal, QSettings, QUrl from PyQt4.QtGui import QDialog, QPixmap, QVBoxLayout, QSizePolicy, \ qApp, QStyle, QIcon, QApplication, \ QShortcut, QKeySequence, QDesktopServices, QSplitter, QSplitterHandle, \ @@ -21,6 +17,7 @@ from Orange.widgets.io import ClipboardFormat from Orange.widgets.settings import SettingsHandler from Orange.widgets.utils import saveplot, getdeepattr +from Orange.widgets.utils.progressbar import ProgressBarMixin from .utils.overlay import MessageOverlayWidget @@ -66,7 +63,7 @@ def __new__(mcs, name, bases, kwargs): return cls -class OWWidget(QDialog, Report, metaclass=WidgetMetaClass): +class OWWidget(QDialog, Report, ProgressBarMixin, metaclass=WidgetMetaClass): """Base widget class""" # Global widget count @@ -132,9 +129,12 @@ class OWWidget(QDialog, Report, metaclass=WidgetMetaClass): widgetStateChanged = Signal(str, int, str) blockingStateChanged = Signal(bool) - progressBarValueChanged = Signal(float) processingStateChanged = Signal(int) + # For reasons I don't understand, the signal has to be defined here and + # not in the mix-in class, otherwise PyQt can't connect to it. + progressBarValueChanged = Signal(float) + settingsHandler = None """:type: SettingsHandler""" @@ -173,8 +173,6 @@ def __new__(cls, *args, **kwargs): self.setFocusPolicy(Qt.StrongFocus) - self.startTime = time.time() # used in progressbar - self.widgetState = {"Info": {}, "Warning": {}, "Error": {}} self.__blocking = False @@ -182,8 +180,6 @@ def __new__(cls, *args, **kwargs): # flag indicating if the widget's position was already restored self.__was_restored = False - self.__progressBarValue = -1 - self.__progressState = 0 self.__statusMessage = "" self.__msgwidget = None @@ -428,7 +424,7 @@ def wheelEvent(self, event): event.accept() def setCaption(self, caption): - # we have to save caption title in case progressbar will change it + # save caption title in case progressbar will change it self.captionTitle = str(caption) self.setWindowTitle(caption) @@ -556,154 +552,6 @@ def handleNewSignals(self): """ pass - # ############################################ - # PROGRESS BAR FUNCTIONS - - def progressBarInit(self, processEvents=QEventLoop.AllEvents): - """ - Initialize the widget's progress (i.e show and set progress to 0%). - - .. note:: - This method will by default call `QApplication.processEvents` - with `processEvents`. To suppress this behavior pass - ``processEvents=None``. - - :param processEvents: Process events flag - :type processEvents: `QEventLoop.ProcessEventsFlags` or `None` - """ - self.startTime = time.time() - self.setWindowTitle(self.captionTitle + " (0% complete)") - - if self.__progressState != 1: - self.__progressState = 1 - self.processingStateChanged.emit(1) - - self.progressBarSet(0, processEvents) - - def progressBarSet(self, value, processEvents=QEventLoop.AllEvents): - """ - Set the current progress bar to `value`. - - .. note:: - This method will by default call `QApplication.processEvents` - with `processEvents`. To suppress this behavior pass - ``processEvents=None``. - - :param float value: Progress value - :param processEvents: Process events flag - :type processEvents: `QEventLoop.ProcessEventsFlags` or `None` - """ - old = self.__progressBarValue - self.__progressBarValue = value - - if value > 0: - if self.__progressState != 1: - warnings.warn("progressBarSet() called without a " - "preceding progressBarInit()", - stacklevel=2) - self.__progressState = 1 - self.processingStateChanged.emit(1) - - usedTime = max(1, time.time() - self.startTime) - totalTime = 100.0 * usedTime / value - remainingTime = max(0, int(totalTime - usedTime)) - hrs = remainingTime // 3600 - mins = (remainingTime % 3600) // 60 - secs = remainingTime % 60 - if hrs > 0: - text = "{}:{:02}:{:02}".format(hrs, mins, secs) - else: - text = "{}:{}:{:02}".format(hrs, mins, secs) - self.setWindowTitle("{} ({:d}%, ETA: {})" - .format(self.captionTitle, value, text)) - else: - self.setWindowTitle(self.captionTitle + " (0% complete)") - - if old != value: - self.progressBarValueChanged.emit(value) - - if processEvents is not None and processEvents is not False: - qApp.processEvents(processEvents) - - def progressBarValue(self): - """Return the state of the progress bar - """ - return self.__progressBarValue - - progressBarValue = pyqtProperty(float, fset=progressBarSet, - fget=progressBarValue) - - processingState = pyqtProperty(int, fget=lambda self: self.__progressState) - - def progressBarAdvance(self, value, processEvents=QEventLoop.AllEvents): - """ - Advance the progress bar. - - .. note:: - This method will by default call `QApplication.processEvents` - with `processEvents`. To suppress this behavior pass - ``processEvents=None``. - - Args: - value (int): progress value - processEvents (`QEventLoop.ProcessEventsFlags` or `None`): - process events flag - """ - self.progressBarSet(self.progressBarValue + value, processEvents) - - def progressBarFinished(self, processEvents=QEventLoop.AllEvents): - """ - Stop the widget's progress (i.e hide the progress bar). - - .. note:: - This method will by default call `QApplication.processEvents` - with `processEvents`. To suppress this behavior pass - ``processEvents=None``. - - :param processEvents: Process events flag - :type processEvents: `QEventLoop.ProcessEventsFlags` or `None` - """ - self.setWindowTitle(self.captionTitle) - if self.__progressState != 0: - self.__progressState = 0 - self.processingStateChanged.emit(0) - - if processEvents is not None and processEvents is not False: - qApp.processEvents(processEvents) - - @contextlib.contextmanager - def progressBar(self, iterations=0): - """ - Context manager for progress bar. - - Using it ensures that the progress bar is removed at the end without - needing the `finally` blocks. - - Usage: - - with self.progressBar(20) as progress: - ... - progress.advance() - - or - - with self.progressBar() as progress: - ... - progress.advance(0.15) - - or - - with self.progressBar(): - ... - self.progressBarSet(50) - - :param iterations: the number of iterations (optional) - :type iterations: int - """ - progress_bar = gui.ProgressBar(self, iterations) - yield progress_bar - progress_bar.finish() # Let us not rely on garbage collector - #: Widget's status message has changed. statusMessageChanged = Signal(str) From d29cf14167238302c1117ab91534baec29705d47 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 1 Jul 2016 16:48:05 +0200 Subject: [PATCH 9/9] VizRank: fix `iterate_states`, other small fixes --- Orange/widgets/visualize/utils.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Orange/widgets/visualize/utils.py b/Orange/widgets/visualize/utils.py index f3fe92cbcda..96bcc6b0a0d 100644 --- a/Orange/widgets/visualize/utils.py +++ b/Orange/widgets/visualize/utils.py @@ -1,4 +1,5 @@ from bisect import bisect_left +from operator import attrgetter from PyQt4.QtCore import Qt, pyqtSignal as Signal, QSize from PyQt4.QtGui import ( @@ -188,7 +189,7 @@ def run(self): pos, self.row_for_state(score, state)) self.scores.insert(pos, score) progress.advance() - self._select_first_if_none() + self._select_first_if_none() self.button.setText("Finished") self.button.setEnabled(False) @@ -199,6 +200,7 @@ def toggle(self): self.button.setText("Pause") self.run() else: + self._select_first_if_none() self.button.setText("Continue") @@ -250,11 +252,12 @@ def iterate_states(self, initial_state): for i in range(si, len(self.attrs)): for j in range(sj, i): yield i, j + sj = 0 def row_for_state(self, score, state): items = [] - for x in state: - attr = self.attrs[x] + attrs = sorted((self.attrs[x] for x in state), key=attrgetter("name")) + for attr in attrs: item = QStandardItem(attr.name) item.setData(attr, self._AttrRole) items.append(item)