Skip to content

Commit

Permalink
Merge pull request #5120 from grafana/dev
Browse files Browse the repository at this point in the history
v1.10.1
  • Loading branch information
matiasb authored Oct 3, 2024
2 parents 44e7d99 + 4d9846e commit 4f837bf
Show file tree
Hide file tree
Showing 24 changed files with 18 additions and 1,138 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,11 @@
from apps.alerts.models.escalation_policy import EscalationPolicy
from apps.alerts.tasks import (
custom_webhook_result,
declare_incident,
notify_all_task,
notify_group_task,
notify_user_task,
resolve_by_last_step_task,
)
from apps.alerts.utils import is_declare_incident_step_enabled
from apps.schedules.ical_utils import list_users_to_notify_from_ical
from apps.user_management.models import User

Expand Down Expand Up @@ -138,7 +136,6 @@ def execute(self, alert_group: "AlertGroup", reason) -> StepExecutionResultData:
EscalationPolicy.STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: self._escalation_step_notify_if_num_alerts_in_time_window,
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS: self._escalation_step_notify_multiple_users,
EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS_IMPORTANT: self._escalation_step_notify_multiple_users,
EscalationPolicy.STEP_DECLARE_INCIDENT: self._escalation_step_declare_incident,
None: self._escalation_step_not_configured,
}
result = action_map[self.step](alert_group, reason)
Expand Down Expand Up @@ -413,32 +410,6 @@ def _escalation_step_notify_team_members(self, alert_group: "AlertGroup", reason

self._execute_tasks(tasks)

def _escalation_step_declare_incident(self, alert_group: "AlertGroup", _reason: str) -> None:
grafana_declare_incident_enabled = is_declare_incident_step_enabled(
organization=alert_group.channel.organization
)
if not grafana_declare_incident_enabled:
AlertGroupLogRecord(
type=AlertGroupLogRecord.TYPE_ESCALATION_FAILED,
alert_group=alert_group,
reason="Declare Incident step is not enabled",
escalation_policy=self.escalation_policy,
escalation_error_code=AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
escalation_policy_step=self.step,
).save()
return
tasks = []
declare_incident_task = declare_incident.signature(
args=(alert_group.pk,),
kwargs={
"escalation_policy_pk": self.id,
"severity": self.severity,
},
immutable=True,
)
tasks.append(declare_incident_task)
self._execute_tasks(tasks)

def _escalation_step_notify_if_time(self, alert_group: "AlertGroup", _reason: str) -> StepExecutionResultData:
eta = None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,4 @@ class Migration(migrations.Migration):
name='step',
field=models.IntegerField(choices=[(0, 'Wait'), (1, 'Notify User'), (2, 'Notify Whole Channel'), (3, 'Repeat Escalation (5 times max)'), (4, 'Resolve'), (5, 'Notify Group'), (6, 'Notify Schedule'), (7, 'Notify User (Important)'), (8, 'Notify Group (Important)'), (9, 'Notify Schedule (Important)'), (10, 'Trigger Outgoing Webhook'), (11, 'Notify User (next each time)'), (12, 'Continue escalation only if time is from'), (13, 'Notify multiple Users'), (14, 'Notify multiple Users (Important)'), (15, 'Continue escalation if >X alerts per Y minutes'), (16, 'Trigger Webhook'), (17, 'Notify all users in a Team'), (18, 'Notify all users in a Team (Important)'), (19, 'Declare Incident')], default=None, null=True),
),
migrations.CreateModel(
name='DeclaredIncident',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('incident_id', models.CharField(db_index=True, max_length=50)),
('created_at', models.DateTimeField(auto_now_add=True)),
('is_active', models.BooleanField(default=True)),
('channel_filter', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='declared_incidents', to='alerts.channelfilter')),
('organization', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='declared_incidents', to='user_management.organization')),
],
),
migrations.AddField(
model_name='alertgroup',
name='declared_incident',
field=models.ForeignKey(default=None, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='attached_alert_groups', to='alerts.declaredincident'),
),
]
1 change: 0 additions & 1 deletion engine/apps/alerts/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from .alert_receive_channel_connection import AlertReceiveChannelConnection # noqa: F401
from .channel_filter import ChannelFilter # noqa: F401
from .custom_button import CustomButton # noqa: F401
from .declared_incident import DeclaredIncident # noqa: F401
from .escalation_chain import EscalationChain # noqa: F401
from .escalation_policy import EscalationPolicy # noqa: F401
from .grafana_alerting_contact_point import GrafanaAlertingContactPoint # noqa: F401
Expand Down
11 changes: 0 additions & 11 deletions engine/apps/alerts/models/alert_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
AlertGroupLogRecord,
AlertReceiveChannel,
BundledNotification,
DeclaredIncident,
ResolutionNote,
ResolutionNoteSlackMessage,
)
Expand Down Expand Up @@ -207,7 +206,6 @@ class AlertGroup(AlertGroupSlackRenderingMixin, EscalationSnapshotMixin, models.
slack_messages: "RelatedManager['SlackMessage']"
users: "RelatedManager['User']"
labels: "RelatedManager['AlertGroupAssociatedLabel']"
declared_incident: typing.Optional["DeclaredIncident"]

objects: models.Manager["AlertGroup"] = AlertGroupQuerySet.as_manager()

Expand Down Expand Up @@ -423,17 +421,8 @@ def status(self) -> int:
# https://code.djangoproject.com/ticket/28545
is_open_for_grouping = models.BooleanField(default=None, null=True, blank=True)

# todo: rework using this field to use DeclaredIncident model field instead
grafana_incident_id = models.CharField(max_length=100, null=True, default=None)

declared_incident = models.ForeignKey(
"alerts.DeclaredIncident",
on_delete=models.SET_NULL,
null=True,
default=None,
related_name="attached_alert_groups",
)

@staticmethod
def get_silenced_state_filter():
"""
Expand Down
113 changes: 9 additions & 104 deletions engine/apps/alerts/models/alert_group_log_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,18 @@

from apps.alerts import tasks
from apps.alerts.constants import ActionSource
from apps.alerts.incident_appearance.renderers.constants import DEFAULT_BACKUP_TITLE
from apps.alerts.utils import render_relative_timeline
from apps.slack.slack_formatter import SlackFormatter
from common.utils import clean_markup

if typing.TYPE_CHECKING:
from apps.alerts.models import AlertGroup, CustomButton, EscalationPolicy, Invitation
from apps.user_management.models import Organization, User
from apps.user_management.models import User

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)


class RelatedIncidentData(typing.TypedDict):
incident_link: typing.Optional[str]
incident_title: str


class AlertGroupLogRecord(models.Model):
alert_group: "AlertGroup"
author: typing.Optional["User"]
Expand Down Expand Up @@ -167,9 +161,7 @@ class AlertGroupLogRecord(models.Model):
ERROR_ESCALATION_TRIGGER_CUSTOM_WEBHOOK_ERROR,
ERROR_ESCALATION_NOTIFY_TEAM_MEMBERS_STEP_IS_NOT_CONFIGURED,
ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED,
ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED,
ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED,
) = range(22)
) = range(20)

type = models.IntegerField(choices=TYPE_CHOICES)

Expand Down Expand Up @@ -233,60 +225,16 @@ class AlertGroupLogRecord(models.Model):
escalation_policy_step = models.IntegerField(null=True, default=None)
step_specific_info = JSONField(null=True, default=None)

STEP_SPECIFIC_INFO_KEYS = [
"schedule_name",
"custom_button_name",
"usergroup_handle",
"source_integration_name",
"incident_link",
"incident_title",
]

def _make_log_line_link(self, url, title, html=False, for_slack=False, substitute_with_tag=False):
if html and url:
return f"<a href='{url}'>{title}</a>"
elif for_slack and url:
return f"<{url}|{title}>"
elif substitute_with_tag:
return f"{{{{{substitute_with_tag}}}}}"
else:
return title
STEP_SPECIFIC_INFO_KEYS = ["schedule_name", "custom_button_name", "usergroup_handle", "source_integration_name"]

def render_log_line_json(self):
time = humanize.naturaldelta(self.alert_group.started_at - self.created_at)
created_at = DateTimeField().to_representation(self.created_at)
organization = self.alert_group.channel.organization
author = self.author.short(organization) if self.author is not None else None
escalation_chain = self.alert_group.channel_filter.escalation_chain if self.alert_group.channel_filter else None
step_info = self.get_step_specific_info()
related_incident = self.render_incident_data_from_step_info(organization, step_info)
escalation_chain_data = (
{
"pk": escalation_chain.public_primary_key,
"title": escalation_chain.name,
}
if escalation_chain
else None
)
schedule = (
{
"pk": self.escalation_policy.notify_schedule.public_primary_key,
"title": self.escalation_policy.notify_schedule.name,
}
if self.escalation_policy and self.escalation_policy.notify_schedule
else None
)
webhook = (
{
"pk": step_info["webhook_id"],
"title": step_info.get("webhook_name", "webhook"),
}
if step_info and "webhook_id" in step_info
else None
)

sf = SlackFormatter(organization)
action = sf.format(self.rendered_log_line_action(substitute_with_tag=True))
action = sf.format(self.rendered_log_line_action(substitute_author_with_tag=True))
action = clean_markup(action)

result = {
Expand All @@ -296,10 +244,6 @@ def render_log_line_json(self):
"type": self.type,
"created_at": created_at,
"author": author,
"incident": related_incident,
"escalation_chain": escalation_chain_data,
"schedule": schedule,
"webhook": webhook,
}
return result

Expand All @@ -314,7 +258,7 @@ def rendered_incident_log_line(self, for_slack=False, html=False):
result += self.rendered_log_line_action(for_slack=for_slack, html=html)
return result

def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_tag=False):
def rendered_log_line_action(self, for_slack=False, html=False, substitute_author_with_tag=False):
from apps.alerts.models import EscalationPolicy

result = ""
Expand All @@ -332,7 +276,7 @@ def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_
elif self.action_source == ActionSource.BACKSYNC:
author_name = "source integration " + step_specific_info.get("source_integration_name", "")
elif self.author:
if substitute_with_tag:
if substitute_author_with_tag:
author_name = "{{author}}"
elif for_slack:
author_name = self.author.get_username_with_slack_verbal()
Expand All @@ -359,9 +303,7 @@ def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_
result += f'alert group assigned to route "{channel_filter.str_for_clients}"'

if escalation_chain is not None:
tag = "escalation_chain" if substitute_with_tag else False
escalation_chain_text = self._make_log_line_link(None, escalation_chain.name, html, for_slack, tag)
result += f' with escalation chain "{escalation_chain_text}"'
result += f' with escalation chain "{escalation_chain.name}"'
else:
result += " with no escalation chain, skipping escalation"
else:
Expand Down Expand Up @@ -437,19 +379,9 @@ def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_
important_text = ""
if escalation_policy_step == EscalationPolicy.STEP_NOTIFY_SCHEDULE_IMPORTANT:
important_text = " (Important)"
tag = "schedule" if substitute_with_tag else False
schedule_text = self._make_log_line_link(None, schedule_name, html, for_slack, tag)
result += f'triggered step "Notify on-call from Schedule {schedule_text}{important_text}"'
result += f'triggered step "Notify on-call from Schedule {schedule_name}{important_text}"'
elif escalation_policy_step == EscalationPolicy.STEP_REPEAT_ESCALATION_N_TIMES:
result += "escalation started from the beginning"
elif escalation_policy_step == EscalationPolicy.STEP_DECLARE_INCIDENT:
organization = self.alert_group.channel.organization
incident_data = self.render_incident_data_from_step_info(organization, step_specific_info)
incident_link = incident_data["incident_link"]
incident_title = incident_data["incident_title"]
tag = "related_incident" if substitute_with_tag else False
incident_text = self._make_log_line_link(incident_link, incident_title, html, for_slack, tag)
result += self.reason + f": {incident_text}"
else:
result += f'triggered step "{EscalationPolicy.get_step_display_name(escalation_policy_step)}"'
elif self.type == AlertGroupLogRecord.TYPE_SILENCE:
Expand Down Expand Up @@ -553,10 +485,7 @@ def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_
trigger = f"{author_name}"
else:
trigger = trigger or "escalation chain"
tag = "webhook" if substitute_with_tag else False
webhook_text = self._make_log_line_link(None, webhook_name, html, for_slack, tag)
result += f"outgoing webhook `{webhook_text}` triggered by {trigger}"

result += f"outgoing webhook `{webhook_name}` triggered by {trigger}"
elif self.type == AlertGroupLogRecord.TYPE_FAILED_ATTACHMENT:
if self.alert_group.slack_message is not None:
result += (
Expand Down Expand Up @@ -665,32 +594,8 @@ def rendered_log_line_action(self, for_slack=False, html=False, substitute_with_
result += f"failed to notify User Group{usergroup_handle_text} in Slack"
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_TRIGGER_WEBHOOK_IS_DISABLED:
result += 'skipped escalation step "Trigger Outgoing Webhook" because it is disabled'
elif (
self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_DECLARE_INCIDENT_STEP_IS_NOT_ENABLED
):
result += 'skipped escalation step "Declare Incident": step is not enabled'
elif self.escalation_error_code == AlertGroupLogRecord.ERROR_ESCALATION_INCIDENT_COULD_NOT_BE_DECLARED:
result += "failed to declare an Incident"
if self.reason:
result += f": {self.reason}"
return result

def render_incident_data_from_step_info(
self, organization: "Organization", step_specific_info: dict
) -> RelatedIncidentData | None:
from apps.alerts.models.declared_incident import get_incident_url

if not step_specific_info or not all(key in step_specific_info for key in ["incident_title", "incident_id"]):
return None

incident_link = (
get_incident_url(organization, step_specific_info["incident_id"])
if step_specific_info["incident_id"]
else None
)
incident_title = step_specific_info["incident_title"] or DEFAULT_BACKUP_TITLE
return {"incident_link": incident_link, "incident_title": incident_title}

def get_step_specific_info(self):
step_specific_info = None
# in some cases step_specific_info was saved with using json.dumps
Expand Down
38 changes: 0 additions & 38 deletions engine/apps/alerts/models/declared_incident.py

This file was deleted.

3 changes: 0 additions & 3 deletions engine/apps/alerts/models/escalation_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class EscalationPolicy(OrderedModel):
STEP_NOTIFY_IF_TIME,
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
STEP_REPEAT_ESCALATION_N_TIMES,
STEP_DECLARE_INCIDENT,
]
# Steps can be stored in db while interacting with internal api
# Includes important versions of default steps
Expand Down Expand Up @@ -219,7 +218,6 @@ class EscalationPolicy(OrderedModel):
STEP_NOTIFY_IF_TIME,
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW,
STEP_REPEAT_ESCALATION_N_TIMES,
STEP_DECLARE_INCIDENT,
]

PUBLIC_STEP_CHOICES_MAP = {
Expand All @@ -241,7 +239,6 @@ class EscalationPolicy(OrderedModel):
STEP_NOTIFY_IF_TIME: "notify_if_time_from_to",
STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW: "notify_if_num_alerts_in_window",
STEP_REPEAT_ESCALATION_N_TIMES: "repeat_escalation",
STEP_DECLARE_INCIDENT: "declare_incident",
}

public_primary_key = models.CharField(
Expand Down
1 change: 0 additions & 1 deletion engine/apps/alerts/tasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
)
from .check_escalation_finished import check_escalation_finished_task # noqa: F401
from .custom_webhook_result import custom_webhook_result # noqa: F401
from .declare_incident import declare_incident # noqa: F401
from .delete_alert_group import delete_alert_group # noqa: F401
from .delete_alert_group import finish_delete_alert_group # noqa: F401
from .delete_alert_group import send_alert_group_signal_for_delete # noqa: F401
Expand Down
Loading

0 comments on commit 4f837bf

Please sign in to comment.