-
Notifications
You must be signed in to change notification settings - Fork 93
/
huber_loss.py
65 lines (57 loc) · 2.7 KB
/
huber_loss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""Huber Loss for Regression or Binary Classification. Robust loss, combination of quadratic loss and linear loss."""
import typing
import numpy as np
from h2oaicore.metrics import CustomScorer
from sklearn.preprocessing import LabelEncoder
# temp. references:
# Wikipedia: https://en.wikipedia.org/wiki/Huber_loss
# https://stackoverflow.com/questions/45006341/xgboost-how-to-use-mae-as-objective-function
# Tukey loss: https://web.as.uky.edu/statistics/users/pbreheny/764-F11/notes/12-1.pdf
class MyHuberLossScorer(CustomScorer):
'''
Huber Loss Scorer is a loss function used in robust regression, that is less
sensitive to outliers in data than the squared error loss. This custom
scorer supports both regression and binary binary classification problems
using different formulas and different defaults for delta (see below).
For more details see: https://en.wikipedia.org/wiki/Huber_loss
Parameters
----------
delta : numeric
Hyperparameter with defaults =1.345 for regression and =0.1 for binary
classification
'''
_delta_regression = 1.345
_delta_binary = 0.1
_description = "My Huber Loss for Regression or Binary Classification [delta=%f or %f]." % (
_delta_regression, _delta_binary)
_binary = True
_regression = True
_maximize = False
_perfect_score = 0
_display_name = "Huber"
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
if sample_weight is None:
sample_weight = np.ones(actual.shape[0])
isRegression = labels is None
delta = MyHuberLossScorer._delta_regression if isRegression else MyHuberLossScorer._delta_binary
if isRegression:
abs_error = np.abs(np.subtract(actual, predicted))
loss = np.where(abs_error < delta, .5 * (abs_error) ** 2, delta * (abs_error - 0.5 * delta))
else:
lb = LabelEncoder()
labels = lb.fit_transform(labels)
actual = lb.transform(actual)
all0s = np.zeros(actual.shape[0])
predicted = np.subtract(np.multiply(predicted, 2), 1)
actual = np.where(actual == 0, -1, 1)
actual_mult_predict = np.multiply(actual, predicted)
loss = np.where(actual_mult_predict >= -1,
np.square(np.maximum(all0s, np.subtract(1, actual_mult_predict))),
-4 * actual_mult_predict)
loss = np.sum(np.multiply(sample_weight, loss)) / np.sum(sample_weight)
return float(loss) if actual.shape[0] > 0 else 0