From 1484bfd0b5efb4a96980f16b3adbf0e7539f7f39 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 11 Nov 2024 22:59:40 -0800 Subject: [PATCH] add `hashing_metadata` field to table --- migrations_lockfile.txt | 2 +- ..._hashing_metadata_to_grouphash_metadata.py | 34 +++++++++++++++++++ src/sentry/models/grouphashmetadata.py | 8 +++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 src/sentry/migrations/0787_add_hashing_metadata_to_grouphash_metadata.py diff --git a/migrations_lockfile.txt b/migrations_lockfile.txt index 7caf1b4c4acede..0744da4ab5be92 100644 --- a/migrations_lockfile.txt +++ b/migrations_lockfile.txt @@ -10,7 +10,7 @@ hybridcloud: 0016_add_control_cacheversion nodestore: 0002_nodestore_no_dictfield remote_subscriptions: 0003_drop_remote_subscription replays: 0004_index_together -sentry: 0786_drop_broadcasts_cta_column +sentry: 0787_add_hashing_metadata_to_grouphash_metadata social_auth: 0002_default_auto_field uptime: 0017_unique_on_timeout workflow_engine: 0011_action_updates diff --git a/src/sentry/migrations/0787_add_hashing_metadata_to_grouphash_metadata.py b/src/sentry/migrations/0787_add_hashing_metadata_to_grouphash_metadata.py new file mode 100644 index 00000000000000..8fd61112a211e9 --- /dev/null +++ b/src/sentry/migrations/0787_add_hashing_metadata_to_grouphash_metadata.py @@ -0,0 +1,34 @@ +# Generated by Django 5.1.1 on 2024-11-12 06:09 + +from django.db import migrations + +import sentry.db.models.fields.gzippeddict +from sentry.new_migrations.migrations import CheckedMigration + + +class Migration(CheckedMigration): + # This flag is used to mark that a migration shouldn't be automatically run in production. + # This should only be used for operations where it's safe to run the migration after your + # code has deployed. So this should not be used for most operations that alter the schema + # of a table. + # Here are some things that make sense to mark as post deployment: + # - Large data migrations. Typically we want these to be run manually so that they can be + # monitored and not block the deploy for a long period of time while they run. + # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to + # run this outside deployments so that we don't block them. Note that while adding an index + # is a schema change, it's completely safe to run the operation after the code has deployed. + # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment + + is_post_deployment = False + + dependencies = [ + ("sentry", "0786_drop_broadcasts_cta_column"), + ] + + operations = [ + migrations.AddField( + model_name="grouphashmetadata", + name="hashing_metadata", + field=sentry.db.models.fields.gzippeddict.GzippedDictField(null=True), + ), + ] diff --git a/src/sentry/models/grouphashmetadata.py b/src/sentry/models/grouphashmetadata.py index 0f6d3bfb90d0fb..7978811deca716 100644 --- a/src/sentry/models/grouphashmetadata.py +++ b/src/sentry/models/grouphashmetadata.py @@ -5,6 +5,8 @@ from sentry.db.models import Model, region_silo_model from sentry.db.models.base import sane_repr from sentry.db.models.fields.foreignkey import FlexibleForeignKey +from sentry.db.models.fields.gzippeddict import GzippedDictField +from sentry.types.grouphash_metadata import HashingMetadata # The overall grouping method used @@ -56,6 +58,12 @@ class GroupHashMetadata(Model): latest_grouping_config = models.CharField(null=True) # The primary grouping method (message, stacktrace, fingerprint, etc.) hash_basis = models.CharField(choices=HashBasis, null=True) + # Metadata about the inputs to the hashing process and the hashing process itself (what + # fingerprinting rules were matched? did we parameterize the message? etc.). For the specific + # data stored, see the class definitions of the `HashingMetadata` subtypes. + hashing_metadata: models.Field[HashingMetadata | None, HashingMetadata | None] = ( + GzippedDictField(null=True) + ) # SEER