From 4d5b15b33913983cce05427dfd9cd7efde6a6026 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Fri, 28 Feb 2025 14:23:29 -0800 Subject: [PATCH] fix(grouping): Fix new secondary hash save prevention (#86057) An error which occurred while we were adding metadata to a newly-created secondary grouphash got me wondering how we'd even landed there in the first place, as we're not supposed to be saving new secondary grouphashes. As it turns out, our test for whether or not to bail before saving was only catching cases in which _all_ of the secondary grouphashes were new. This fixes that by instead filtering the list of grouphashes, so that a) existing secondary grouphashes can have metadata added to them, and b) new secondary grouphashes are all prevented from being stored, regardless of what other hashes have been calculated. Note that since `test_secondary_grouphashes_not_saved_when_creating_new_group` still passes, we know that the current behavior with all secondary hashes are new is preserved, and since `test_filters_new_secondary_hashes_when_creating_grouphashes` is no longer xfailed, we know the problem behavior has been fixed. --- src/sentry/grouping/ingest/hashing.py | 19 ++++++++++++------- tests/sentry/grouping/ingest/test_hashing.py | 3 --- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/sentry/grouping/ingest/hashing.py b/src/sentry/grouping/ingest/hashing.py index 60690116e09ac2..7e532e15bd5cae 100644 --- a/src/sentry/grouping/ingest/hashing.py +++ b/src/sentry/grouping/ingest/hashing.py @@ -2,7 +2,7 @@ import copy import logging -from collections.abc import Sequence +from collections.abc import Iterable, Sequence from typing import TYPE_CHECKING import sentry_sdk @@ -213,17 +213,22 @@ def get_or_create_grouphashes( event: Event, project: Project, variants: dict[str, BaseVariant], - hashes: Sequence[str], + hashes: Iterable[str], grouping_config: str, ) -> list[GroupHash]: is_secondary = grouping_config == project.get_option("sentry:secondary_grouping_config") grouphashes: list[GroupHash] = [] - # The only utility of secondary hashes is to link new primary hashes to an existing group. - # Secondary hashes which are also new are therefore of no value, so there's no need to store or - # annotate them and we can bail now. - if is_secondary and not GroupHash.objects.filter(project=project, hash__in=hashes).exists(): - return grouphashes + if is_secondary: + # The only utility of secondary hashes is to link new primary hashes to an existing group + # via an existing grouphash. Secondary hashes which are new are therefore of no value, so + # filter them out before creating grouphash records. + existing_hashes = set( + GroupHash.objects.filter(project=project, hash__in=hashes).values_list( + "hash", flat=True + ) + ) + hashes = filter(lambda hash_value: hash_value in existing_hashes, hashes) for hash_value in hashes: grouphash, created = GroupHash.objects.get_or_create(project=project, hash=hash_value) diff --git a/tests/sentry/grouping/ingest/test_hashing.py b/tests/sentry/grouping/ingest/test_hashing.py index b1d04db6912947..3e1ff50ea8a225 100644 --- a/tests/sentry/grouping/ingest/test_hashing.py +++ b/tests/sentry/grouping/ingest/test_hashing.py @@ -3,8 +3,6 @@ from time import time from unittest.mock import MagicMock, patch -import pytest - from sentry.grouping.ingest.hashing import ( _calculate_event_grouping, _calculate_secondary_hashes, @@ -156,7 +154,6 @@ def test_secondary_grouphashes_not_saved_when_creating_new_group( hash=hashes_by_config[LEGACY_GROUPING_CONFIG] ).exists() - @pytest.mark.xfail(reason="new secondary hashes not filtered if not all are new") def test_filters_new_secondary_hashes_when_creating_grouphashes(self): project = self.project project.update_option("sentry:grouping_config", LEGACY_GROUPING_CONFIG)