Skip to content

Commit

Permalink
Merge pull request #4338 from VesnaT/outlier_detection
Browse files Browse the repository at this point in the history
[RFC][ENH] Outliers: Widget upgrade
  • Loading branch information
BlazZupan authored Jan 24, 2020
2 parents c6c8300 + 40f5521 commit 4256b16
Show file tree
Hide file tree
Showing 7 changed files with 437 additions and 185 deletions.
2 changes: 1 addition & 1 deletion Orange/classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from .tree import *
from .simple_tree import *
from .simple_random_forest import *
from .elliptic_envelope import *
from .outlier_detection import *
from .rules import *
from .sgd import *
from .neural_network import *
Expand Down
41 changes: 0 additions & 41 deletions Orange/classification/elliptic_envelope.py

This file was deleted.

73 changes: 73 additions & 0 deletions Orange/classification/outlier_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# pylint: disable=unused-argument
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from Orange.base import SklLearner, SklModel
from Orange.data import Table, Domain

__all__ = ["LocalOutlierFactorLearner", "IsolationForestLearner",
"EllipticEnvelopeLearner"]


class _OutlierDetector(SklLearner):
def __call__(self, data: Table):
data = data.transform(Domain(data.domain.attributes))
return super().__call__(data)


class LocalOutlierFactorLearner(_OutlierDetector):
__wraps__ = LocalOutlierFactor
name = "Local Outlier Factor"

def __init__(self, n_neighbors=20, algorithm="auto", leaf_size=30,
metric="minkowski", p=2, metric_params=None,
contamination="auto", novelty=True, n_jobs=None,
preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()


class IsolationForestLearner(_OutlierDetector):
__wraps__ = IsolationForest
name = "Isolation Forest"

def __init__(self, n_estimators=100, max_samples='auto',
contamination='auto', max_features=1.0, bootstrap=False,
n_jobs=None, behaviour='deprecated', random_state=None,
verbose=0, warm_start=False, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()


class EllipticEnvelopeClassifier(SklModel):
def mahalanobis(self, observations):
"""Computes squared Mahalanobis distances of given observations.
Parameters
----------
observations : ndarray (n_samples, n_features) or Orange Table
Returns
-------
distances : ndarray (n_samples,)
Squared Mahalanobis distances given observations.
"""
if isinstance(observations, Table):
observations = observations.X
return self.skl_model.mahalanobis(observations)


class EllipticEnvelopeLearner(_OutlierDetector):
__wraps__ = EllipticEnvelope
__returns__ = EllipticEnvelopeClassifier
name = "Covariance Estimator"

def __init__(self, store_precision=True, assume_centered=False,
support_fraction=None, contamination=0.1,
random_state=None, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()

def __call__(self, data: Table):
data = data.transform(Domain(data.domain.attributes))
return super().__call__(data)
1 change: 1 addition & 0 deletions Orange/classification/svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma="auto", coef0=0.0,


class OneClassSVMLearner(SklLearnerBase):
name = "One class SVM"
__wraps__ = skl_svm.OneClassSVM
preprocessors = svm_pps

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

import numpy as np
from Orange.data import Table, Domain, ContinuousVariable
from Orange.classification import EllipticEnvelopeLearner
from Orange.classification import EllipticEnvelopeLearner, \
IsolationForestLearner, LocalOutlierFactorLearner


class TestEllipticEnvelopeLearner(unittest.TestCase):
Expand Down Expand Up @@ -44,7 +45,7 @@ def test_mahalanobis(self):

def test_EllipticEnvelope_ignores_y(self):
domain = Domain((ContinuousVariable("x1"), ContinuousVariable("x2")),
class_vars=(ContinuousVariable("y1"), ContinuousVariable("y2")))
(ContinuousVariable("y1"), ContinuousVariable("y2")))
X = np.random.random((40, 2))
Y = np.random.random((40, 2))
table = Table(domain, X, Y)
Expand All @@ -60,3 +61,25 @@ def test_EllipticEnvelope_ignores_y(self):
np.testing.assert_array_equal(pred1, pred2)
np.testing.assert_array_equal(pred2, pred3)
np.testing.assert_array_equal(pred3, pred4)


class TestOutlierDetection(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.iris = Table("iris")

def test_LocalOutlierFactorDetector(self):
detector = LocalOutlierFactorLearner(contamination=0.1)
detect = detector(self.iris)
is_inlier = detect(self.iris)
self.assertEqual(len(np.where(is_inlier == -1)[0]), 14)

def test_IsolationForestDetector(self):
detector = IsolationForestLearner(contamination=0.1)
detect = detector(self.iris)
is_inlier = detect(self.iris)
self.assertEqual(len(np.where(is_inlier == -1)[0]), 15)


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit 4256b16

Please sign in to comment.