Skip to content

Commit

Permalink
DBSCAN moved from Prototypes
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jul 1, 2019
1 parent 37e7b02 commit 29f048a
Show file tree
Hide file tree
Showing 3 changed files with 325 additions and 0 deletions.
53 changes: 53 additions & 0 deletions Orange/widgets/unsupervised/icons/DBSCAN.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
153 changes: 153 additions & 0 deletions Orange/widgets/unsupervised/owdbscan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import numpy as np

from AnyQt.QtWidgets import QLayout
from AnyQt.QtCore import Qt

from Orange.widgets import widget, gui
from Orange.widgets.settings import Setting
from Orange.data import Table, Domain, DiscreteVariable
from Orange.clustering import DBSCAN
from Orange import distance
from Orange.widgets.utils.annotated_data import ANNOTATED_DATA_SIGNAL_NAME
from Orange.widgets.utils.signals import Input, Output
from Orange.widgets.widget import Msg


class OWDBSCAN(widget.OWWidget):
name = "DBSCAN"
description = "Density-based spatial clustering."
icon = "icons/DBSCAN.svg"
priority = 2150

class Inputs:
data = Input("Data", Table)

class Outputs:
annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

class Error(widget.OWWidget.Error):
not_enough_instances = Msg("Not enough unique data instances. "
"At least two are required.")

METRICS = [
("Euclidean", "euclidean"),
("Manhattan", "manhattan"),
("Cosine", distance.Cosine),
("Jaccard", distance.Jaccard),
# ("Spearman", distance.SpearmanR),
# ("Spearman absolute", distance.SpearmanRAbsolute),
# ("Pearson", distance.PearsonR),
# ("Pearson absolute", distance.PearsonRAbsolute),
]

min_samples = Setting(5)
eps = Setting(0.5)
metric_idx = Setting(0)
auto_commit = Setting(True)

want_main_area = False

def __init__(self):
super().__init__()

self.data = None
self.db = None
self.model = None

box = gui.widgetBox(self.controlArea, "Parameters")
gui.spin(box, self, "min_samples", 1, 100, 1, callback=self._invalidate,
label="Core point neighbors")
gui.doubleSpin(box, self, "eps", 0.1, 10, 0.01,
callback=self._invalidate,
label="Neighborhood distance")

box = gui.widgetBox(self.controlArea, self.tr("Distance Metric"))
gui.comboBox(box, self, "metric_idx",
items=list(zip(*self.METRICS))[0],
callback=self._invalidate)

gui.auto_commit(self.controlArea, self, "auto_commit", "Apply",
orientation=Qt.Horizontal)
gui.rubber(self.controlArea)

self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width())
self.layout().setSizeConstraint(QLayout.SetFixedSize)

def adjustSize(self):
self.ensurePolished()
self.resize(self.controlArea.sizeHint())

def check_data_size(self):
if len(self.data) < 2:
self.Error.not_enough_instances()
return False
return True

def commit(self):
self.cluster()

def cluster(self):
if not self.check_data_size():
return
self.model = DBSCAN(
eps=self.eps,
min_samples=self.min_samples,
metric=self.METRICS[self.metric_idx][1]
).get_model(self.data)
self.send_data()

def send_data(self):
model = self.model

clusters = [c if c >= 0 else np.nan for c in model.labels]
k = len(set(clusters) - {np.nan})
clusters = np.array(clusters).reshape(len(self.data), 1)
core_samples = set(model.projector.core_sample_indices_)
in_core = np.array([1 if (i in core_samples) else 0
for i in range(len(self.data))])
in_core = in_core.reshape(len(self.data), 1)

clust_var = DiscreteVariable(
"Cluster", values=["C%d" % (x + 1) for x in range(k)])
in_core_var = DiscreteVariable("DBSCAN Core", values=["0", "1"])

domain = self.data.domain
attributes, classes = domain.attributes, domain.class_vars
meta_attrs = domain.metas
x, y, metas = self.data.X, self.data.Y, self.data.metas

meta_attrs += (clust_var, )
metas = np.hstack((metas, clusters))
meta_attrs += (in_core_var, )
metas = np.hstack((metas, in_core))

domain = Domain(attributes, classes, meta_attrs)
new_table = Table(domain, x, y, metas, self.data.W)

self.Outputs.annotated_data.send(new_table)

@Inputs.data
def set_data(self, data):
self.data = data
if self.data is None:
self.Outputs.annotated_data.send(None)
self.Error.clear()
if self.data is None:
return
self.unconditional_commit()

def _invalidate(self):
self.commit()


if __name__ == "__main__":
import sys
from AnyQt.QtWidgets import QApplication

a = QApplication(sys.argv)
ow = OWDBSCAN()
d = Table("iris.tab")
ow.set_data(d)
ow.show()
a.exec()
ow.saveSettings()
119 changes: 119 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owdbscan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import numpy as np
from scipy.sparse import csr_matrix

from Orange.data import Table
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.utils import simulate
from Orange.widgets.unsupervised.owdbscan import OWDBSCAN


class TestOWCSVFileImport(WidgetTest):
def setUp(self):
self.widget = self.create_widget(OWDBSCAN)
self.iris = Table("iris")

def tearDown(self):
self.widgets.remove(self.widget)
self.widget.onDeleteWidget()
self.widget = None

def test_cluster(self):
w = self.widget

self.send_signal(w.Inputs.data, self.iris)

output = self.get_output(w.Outputs.annotated_data)
self.assertIsNotNone(output)
self.assertEqual(len(self.iris), len(output))
self.assertTupleEqual(self.iris.X.shape, output.X.shape)
self.assertTupleEqual(self.iris.Y.shape, output.Y.shape)
self.assertEqual(2, output.metas.shape[1])

self.assertEqual("Cluster", str(output.domain.metas[0]))
self.assertEqual("DBSCAN Core", str(output.domain.metas[1]))

def test_bad_input(self):
w = self.widget

self.send_signal(w.Inputs.data, self.iris[:1])
self.assertTrue(w.Error.not_enough_instances.is_shown())

self.send_signal(w.Inputs.data, self.iris[:2])
self.assertFalse(w.Error.not_enough_instances.is_shown())

self.send_signal(w.Inputs.data, self.iris)
self.assertFalse(w.Error.not_enough_instances.is_shown())

def test_data_none(self):
w = self.widget

self.send_signal(w.Inputs.data, self.iris[:5])
self.send_signal(w.Inputs.data, None)

output = self.get_output(w.Outputs.annotated_data)
self.assertIsNone(output)

def test_change_eps(self):
w = self.widget

self.send_signal(w.Inputs.data, self.iris)

# change parameters
self.widget.controls.eps.valueChanged.emit(0.5)
output1 = self.get_output(w.Outputs.annotated_data)
self.widget.controls.eps.valueChanged.emit(1)
output2 = self.get_output(w.Outputs.annotated_data)

# on this data higher eps has greater sum of clusters - less nan
# values
self.assertGreater(np.nansum(output2.metas[:, 0]),
np.nansum(output1.metas[:, 0]))

def test_change_min_samples(self):
w = self.widget

self.send_signal(w.Inputs.data, self.iris)

# change parameters
self.widget.controls.min_samples.valueChanged.emit(5)
output1 = self.get_output(w.Outputs.annotated_data)
self.widget.controls.min_samples.valueChanged.emit(1)
output2 = self.get_output(w.Outputs.annotated_data)

# on this data lower min_samples has greater sum of clusters - less nan
# values
self.assertGreater(np.nansum(output2.metas[:, 0]),
np.nansum(output1.metas[:, 0]))

def test_change_metric_idx(self):
w = self.widget

self.send_signal(w.Inputs.data, self.iris)

# change parameters
cbox = self.widget.controls.metric_idx
simulate.combobox_activate_index(cbox, 0) # Euclidean
output1 = self.get_output(w.Outputs.annotated_data)
simulate.combobox_activate_index(cbox, 1) # Manhattan
output2 = self.get_output(w.Outputs.annotated_data)

# Manhattan has more nan clusters
self.assertGreater(np.nansum(output1.metas[:, 0]),
np.nansum(output2.metas[:, 0]))

def test_sparse_data(self):
self.iris.X = csr_matrix(self.iris.X)

w = self.widget

self.send_signal(w.Inputs.data, self.iris)

output = self.get_output(w.Outputs.annotated_data)
self.assertIsNotNone(output)
self.assertEqual(len(self.iris), len(output))
self.assertTupleEqual(self.iris.X.shape, output.X.shape)
self.assertTupleEqual(self.iris.Y.shape, output.Y.shape)
self.assertEqual(2, output.metas.shape[1])

self.assertEqual("Cluster", str(output.domain.metas[0]))
self.assertEqual("DBSCAN Core", str(output.domain.metas[1]))

0 comments on commit 29f048a

Please sign in to comment.