Skip to content

Commit

Permalink
Merge pull request #1495 from ajdapretnar/gini-formula
Browse files Browse the repository at this point in the history
[FIX] Gini impurity: formula and docstring fixed.
  • Loading branch information
lanzagar authored Sep 16, 2016
2 parents abbf69f + c839687 commit 3f3ebd4
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
11 changes: 6 additions & 5 deletions Orange/preprocess/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,9 @@ def _entropy(D):

def _gini(D):
"""Gini index of class-distribution matrix"""
P = D / np.sum(D, axis=0)
return sum((np.ones(1 if len(D.shape) == 1 else D.shape[1]) - np.sum(np.square(P), axis=0))
* 0.5 * np.sum(D, axis=0) / np.sum(D))
P = np.asarray(D / np.sum(D, axis=0))
return np.sum((1 - np.sum(P ** 2, axis=0)) *
np.sum(D, axis=0) / np.sum(D))


def _symmetrical_uncertainty(X, Y):
Expand Down Expand Up @@ -287,8 +287,9 @@ def from_contingency(self, cont, nan_adjustment):

class Gini(ClassificationScorer):
"""
Gini index is the probability that two randomly chosen instances will have different
classes. See `Wikipedia entry on gini index <http://en.wikipedia.org/wiki/Gini_coefficient>`_.
Gini impurity is the probability that two randomly chosen instances will have different
classes. See `Wikipedia entry on Gini impurity
<https://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity>`_.
"""
def from_contingency(self, cont, nan_adjustment):
return (_gini(np.sum(cont, axis=1)) - _gini(cont)) * nan_adjustment
Expand Down
2 changes: 1 addition & 1 deletion Orange/tests/test_score_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_gain_ratio(self):

def test_gini(self):
scorer = Gini()
correct = [0.11893, 0.10427, 0.13117, 0.14650, 0.05973]
correct = [0.23786, 0.20855, 0.26235, 0.29300, 0.11946]
np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)],
correct, decimal=5)

Expand Down
2 changes: 1 addition & 1 deletion Orange/widgets/data/owrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def table(shape, fill=None):
SCORES = [
score_meta("Information Gain", "Inf. gain", score.InfoGain),
score_meta("Gain Ratio", "Gain Ratio", score.GainRatio),
score_meta("Gini Gain", "Gini", score.Gini),
score_meta("Gini Decrease", "Gini", score.Gini),
score_meta("ANOVA", "ANOVA", score.ANOVA),
score_meta("Chi2", "Chi2", score.Chi2),
score_meta("Univariate Linear Regression", "Univar. Lin. Reg.", score.UnivariateLinearRegression),
Expand Down

0 comments on commit 3f3ebd4

Please sign in to comment.