From 1b53a3408b71220dc6a41f95acdc21ca7d1a59c1 Mon Sep 17 00:00:00 2001 From: Evan Purkhiser Date: Thu, 24 Oct 2024 17:40:53 -0400 Subject: [PATCH] ref(crons): Acocunt for math edge cases in _evaluate_tick_decision (#79717) --- src/sentry/monitors/clock_dispatch.py | 9 ++- tests/sentry/monitors/test_clock_dispatch.py | 73 ++++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/src/sentry/monitors/clock_dispatch.py b/src/sentry/monitors/clock_dispatch.py index 765fa8fe21db1f..1639dce673cd8b 100644 --- a/src/sentry/monitors/clock_dispatch.py +++ b/src/sentry/monitors/clock_dispatch.py @@ -142,8 +142,8 @@ def _evaluate_tick_decision(tick: datetime): if past_minute_volume is None: return - # Can't make any decisions if we don't have historic data - if len(historic_volume) == 0: + # We need AT LEAST two data points to calculate standard deviation + if len(historic_volume) < 2: return # Record some statistics about the past_minute_volume volume in comparison @@ -157,7 +157,10 @@ def _evaluate_tick_decision(tick: datetime): # Calculate the z-score of our past minutes volume in comparison to the # historic volume data. The z-score is measured in terms of standard # deviations from the mean - z_score = (past_minute_volume - historic_mean) / historic_stdev + if historic_stdev != 0.0: + z_score = (past_minute_volume - historic_mean) / historic_stdev + else: + z_score = 0.0 # Percentage deviation from the mean for our past minutes volume pct_deviation = (abs(past_minute_volume - historic_mean) / historic_mean) * 100 diff --git a/tests/sentry/monitors/test_clock_dispatch.py b/tests/sentry/monitors/test_clock_dispatch.py index 0f4b7bc5a5ad30..f53855de55b8d5 100644 --- a/tests/sentry/monitors/test_clock_dispatch.py +++ b/tests/sentry/monitors/test_clock_dispatch.py @@ -323,3 +323,76 @@ def test_evaluate_tick_decision_volume_drop(metrics, logger): 52.0, sample_rate=1.0, ) + + +@mock.patch("sentry.monitors.clock_dispatch.logger") +@mock.patch("sentry.monitors.clock_dispatch.metrics") +@override_options({"crons.tick_volume_anomaly_detection": True}) +def test_evaluate_tick_decision_low_history(metrics, logger): + tick = timezone.now().replace(second=0, microsecond=0) + + # This is the timestamp we're looking at just after the tick + past_ts = tick - timedelta(minutes=1) + + # Only add one historic value (and the current value being evaluated) + update_check_in_volume([past_ts - MONITOR_VOLUME_DECISION_STEP] * 900) + update_check_in_volume([past_ts] * 900) + + _evaluate_tick_decision(tick) + + # We should do nothing because there was not enough daata to make any + # calculation + assert not logger.info.called + assert not metrics.gauge.called + + +@mock.patch("sentry.monitors.clock_dispatch.logger") +@mock.patch("sentry.monitors.clock_dispatch.metrics") +@override_options({"crons.tick_volume_anomaly_detection": True}) +def test_evaluate_tick_decision_uniform(metrics, logger): + tick = timezone.now().replace(second=0, microsecond=0) + + # This is the timestamp we're looking at just after the tick + past_ts = tick - timedelta(minutes=1) + + # Fill with a uniform history (all values the same). This will give us a + # standard deviation of 0. In this case the z-value needs to be computed + # differently. + fill_historic_volume( + start=past_ts - MONITOR_VOLUME_DECISION_STEP, + length=MONITOR_VOLUME_RETENTION, + step=MONITOR_VOLUME_DECISION_STEP, + counts=[1000], + ) + update_check_in_volume([past_ts] * 1000) + + _evaluate_tick_decision(tick) + + logger.info.assert_called_with( + "monitors.clock_dispatch.volume_history", + extra={ + "reference_datetime": str(tick), + "evaluation_minute": past_ts.strftime("%H:%M"), + "history_count": 30, + "z_score": 0.0, + "pct_deviation": 0.0, + "historic_mean": 1000, + "historic_stdev": 0.0, + }, + ) + + metrics.gauge.assert_any_call( + "monitors.task.volume_history.count", + 30, + sample_rate=1.0, + ) + metrics.gauge.assert_any_call( + "monitors.task.volume_history.z_score", + 0.0, + sample_rate=1.0, + ) + metrics.gauge.assert_any_call( + "monitors.task.volume_history.pct_deviation", + 0.0, + sample_rate=1.0, + )