-
-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
37e7b02
commit 29f048a
Showing
3 changed files
with
325 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
import numpy as np | ||
|
||
from AnyQt.QtWidgets import QLayout | ||
from AnyQt.QtCore import Qt | ||
|
||
from Orange.widgets import widget, gui | ||
from Orange.widgets.settings import Setting | ||
from Orange.data import Table, Domain, DiscreteVariable | ||
from Orange.clustering import DBSCAN | ||
from Orange import distance | ||
from Orange.widgets.utils.annotated_data import ANNOTATED_DATA_SIGNAL_NAME | ||
from Orange.widgets.utils.signals import Input, Output | ||
from Orange.widgets.widget import Msg | ||
|
||
|
||
class OWDBSCAN(widget.OWWidget): | ||
name = "DBSCAN" | ||
description = "Density-based spatial clustering." | ||
icon = "icons/DBSCAN.svg" | ||
priority = 2150 | ||
|
||
class Inputs: | ||
data = Input("Data", Table) | ||
|
||
class Outputs: | ||
annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) | ||
|
||
class Error(widget.OWWidget.Error): | ||
not_enough_instances = Msg("Not enough unique data instances. " | ||
"At least two are required.") | ||
|
||
METRICS = [ | ||
("Euclidean", "euclidean"), | ||
("Manhattan", "manhattan"), | ||
("Cosine", distance.Cosine), | ||
("Jaccard", distance.Jaccard), | ||
# ("Spearman", distance.SpearmanR), | ||
# ("Spearman absolute", distance.SpearmanRAbsolute), | ||
# ("Pearson", distance.PearsonR), | ||
# ("Pearson absolute", distance.PearsonRAbsolute), | ||
] | ||
|
||
min_samples = Setting(5) | ||
eps = Setting(0.5) | ||
metric_idx = Setting(0) | ||
auto_commit = Setting(True) | ||
|
||
want_main_area = False | ||
|
||
def __init__(self): | ||
super().__init__() | ||
|
||
self.data = None | ||
self.db = None | ||
self.model = None | ||
|
||
box = gui.widgetBox(self.controlArea, "Parameters") | ||
gui.spin(box, self, "min_samples", 1, 100, 1, callback=self._invalidate, | ||
label="Core point neighbors") | ||
gui.doubleSpin(box, self, "eps", 0.1, 10, 0.01, | ||
callback=self._invalidate, | ||
label="Neighborhood distance") | ||
|
||
box = gui.widgetBox(self.controlArea, self.tr("Distance Metric")) | ||
gui.comboBox(box, self, "metric_idx", | ||
items=list(zip(*self.METRICS))[0], | ||
callback=self._invalidate) | ||
|
||
gui.auto_commit(self.controlArea, self, "auto_commit", "Apply", | ||
orientation=Qt.Horizontal) | ||
gui.rubber(self.controlArea) | ||
|
||
self.controlArea.setMinimumWidth(self.controlArea.sizeHint().width()) | ||
self.layout().setSizeConstraint(QLayout.SetFixedSize) | ||
|
||
def adjustSize(self): | ||
self.ensurePolished() | ||
self.resize(self.controlArea.sizeHint()) | ||
|
||
def check_data_size(self): | ||
if len(self.data) < 2: | ||
self.Error.not_enough_instances() | ||
return False | ||
return True | ||
|
||
def commit(self): | ||
self.cluster() | ||
|
||
def cluster(self): | ||
if not self.check_data_size(): | ||
return | ||
self.model = DBSCAN( | ||
eps=self.eps, | ||
min_samples=self.min_samples, | ||
metric=self.METRICS[self.metric_idx][1] | ||
).get_model(self.data) | ||
self.send_data() | ||
|
||
def send_data(self): | ||
model = self.model | ||
|
||
clusters = [c if c >= 0 else np.nan for c in model.labels] | ||
k = len(set(clusters) - {np.nan}) | ||
clusters = np.array(clusters).reshape(len(self.data), 1) | ||
core_samples = set(model.projector.core_sample_indices_) | ||
in_core = np.array([1 if (i in core_samples) else 0 | ||
for i in range(len(self.data))]) | ||
in_core = in_core.reshape(len(self.data), 1) | ||
|
||
clust_var = DiscreteVariable( | ||
"Cluster", values=["C%d" % (x + 1) for x in range(k)]) | ||
in_core_var = DiscreteVariable("DBSCAN Core", values=["0", "1"]) | ||
|
||
domain = self.data.domain | ||
attributes, classes = domain.attributes, domain.class_vars | ||
meta_attrs = domain.metas | ||
x, y, metas = self.data.X, self.data.Y, self.data.metas | ||
|
||
meta_attrs += (clust_var, ) | ||
metas = np.hstack((metas, clusters)) | ||
meta_attrs += (in_core_var, ) | ||
metas = np.hstack((metas, in_core)) | ||
|
||
domain = Domain(attributes, classes, meta_attrs) | ||
new_table = Table(domain, x, y, metas, self.data.W) | ||
|
||
self.Outputs.annotated_data.send(new_table) | ||
|
||
@Inputs.data | ||
def set_data(self, data): | ||
self.data = data | ||
if self.data is None: | ||
self.Outputs.annotated_data.send(None) | ||
self.Error.clear() | ||
if self.data is None: | ||
return | ||
self.unconditional_commit() | ||
|
||
def _invalidate(self): | ||
self.commit() | ||
|
||
|
||
if __name__ == "__main__": | ||
import sys | ||
from AnyQt.QtWidgets import QApplication | ||
|
||
a = QApplication(sys.argv) | ||
ow = OWDBSCAN() | ||
d = Table("iris.tab") | ||
ow.set_data(d) | ||
ow.show() | ||
a.exec() | ||
ow.saveSettings() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import numpy as np | ||
from scipy.sparse import csr_matrix | ||
|
||
from Orange.data import Table | ||
from Orange.widgets.tests.base import WidgetTest | ||
from Orange.widgets.tests.utils import simulate | ||
from Orange.widgets.unsupervised.owdbscan import OWDBSCAN | ||
|
||
|
||
class TestOWCSVFileImport(WidgetTest): | ||
def setUp(self): | ||
self.widget = self.create_widget(OWDBSCAN) | ||
self.iris = Table("iris") | ||
|
||
def tearDown(self): | ||
self.widgets.remove(self.widget) | ||
self.widget.onDeleteWidget() | ||
self.widget = None | ||
|
||
def test_cluster(self): | ||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.iris) | ||
|
||
output = self.get_output(w.Outputs.annotated_data) | ||
self.assertIsNotNone(output) | ||
self.assertEqual(len(self.iris), len(output)) | ||
self.assertTupleEqual(self.iris.X.shape, output.X.shape) | ||
self.assertTupleEqual(self.iris.Y.shape, output.Y.shape) | ||
self.assertEqual(2, output.metas.shape[1]) | ||
|
||
self.assertEqual("Cluster", str(output.domain.metas[0])) | ||
self.assertEqual("DBSCAN Core", str(output.domain.metas[1])) | ||
|
||
def test_bad_input(self): | ||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.iris[:1]) | ||
self.assertTrue(w.Error.not_enough_instances.is_shown()) | ||
|
||
self.send_signal(w.Inputs.data, self.iris[:2]) | ||
self.assertFalse(w.Error.not_enough_instances.is_shown()) | ||
|
||
self.send_signal(w.Inputs.data, self.iris) | ||
self.assertFalse(w.Error.not_enough_instances.is_shown()) | ||
|
||
def test_data_none(self): | ||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.iris[:5]) | ||
self.send_signal(w.Inputs.data, None) | ||
|
||
output = self.get_output(w.Outputs.annotated_data) | ||
self.assertIsNone(output) | ||
|
||
def test_change_eps(self): | ||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.iris) | ||
|
||
# change parameters | ||
self.widget.controls.eps.valueChanged.emit(0.5) | ||
output1 = self.get_output(w.Outputs.annotated_data) | ||
self.widget.controls.eps.valueChanged.emit(1) | ||
output2 = self.get_output(w.Outputs.annotated_data) | ||
|
||
# on this data higher eps has greater sum of clusters - less nan | ||
# values | ||
self.assertGreater(np.nansum(output2.metas[:, 0]), | ||
np.nansum(output1.metas[:, 0])) | ||
|
||
def test_change_min_samples(self): | ||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.iris) | ||
|
||
# change parameters | ||
self.widget.controls.min_samples.valueChanged.emit(5) | ||
output1 = self.get_output(w.Outputs.annotated_data) | ||
self.widget.controls.min_samples.valueChanged.emit(1) | ||
output2 = self.get_output(w.Outputs.annotated_data) | ||
|
||
# on this data lower min_samples has greater sum of clusters - less nan | ||
# values | ||
self.assertGreater(np.nansum(output2.metas[:, 0]), | ||
np.nansum(output1.metas[:, 0])) | ||
|
||
def test_change_metric_idx(self): | ||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.iris) | ||
|
||
# change parameters | ||
cbox = self.widget.controls.metric_idx | ||
simulate.combobox_activate_index(cbox, 0) # Euclidean | ||
output1 = self.get_output(w.Outputs.annotated_data) | ||
simulate.combobox_activate_index(cbox, 1) # Manhattan | ||
output2 = self.get_output(w.Outputs.annotated_data) | ||
|
||
# Manhattan has more nan clusters | ||
self.assertGreater(np.nansum(output1.metas[:, 0]), | ||
np.nansum(output2.metas[:, 0])) | ||
|
||
def test_sparse_data(self): | ||
self.iris.X = csr_matrix(self.iris.X) | ||
|
||
w = self.widget | ||
|
||
self.send_signal(w.Inputs.data, self.iris) | ||
|
||
output = self.get_output(w.Outputs.annotated_data) | ||
self.assertIsNotNone(output) | ||
self.assertEqual(len(self.iris), len(output)) | ||
self.assertTupleEqual(self.iris.X.shape, output.X.shape) | ||
self.assertTupleEqual(self.iris.Y.shape, output.Y.shape) | ||
self.assertEqual(2, output.metas.shape[1]) | ||
|
||
self.assertEqual("Cluster", str(output.domain.metas[0])) | ||
self.assertEqual("DBSCAN Core", str(output.domain.metas[1])) |