Skip to content

Commit

Permalink
fix(grouping): Fix new secondary hash save prevention (#86057)
Browse files Browse the repository at this point in the history
An error which occurred while we were adding metadata to a newly-created secondary grouphash got me wondering how we'd even landed there in the first place, as we're not supposed to be saving new secondary grouphashes. As it turns out, our test for whether or not to bail before saving was only catching cases in which _all_ of the secondary grouphashes were new. This fixes that by instead filtering the list of grouphashes, so that a) existing secondary grouphashes can have metadata added to them, and b) new secondary grouphashes are all prevented from being stored, regardless of what other hashes have been calculated.

Note that since `test_secondary_grouphashes_not_saved_when_creating_new_group` still passes, we know that the current behavior with all secondary hashes are new is preserved, and since `test_filters_new_secondary_hashes_when_creating_grouphashes` is no longer xfailed, we know the problem behavior has been fixed.
  • Loading branch information
lobsterkatie authored Feb 28, 2025
1 parent a578f2e commit 4d5b15b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
19 changes: 12 additions & 7 deletions src/sentry/grouping/ingest/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import copy
import logging
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from typing import TYPE_CHECKING

import sentry_sdk
Expand Down Expand Up @@ -213,17 +213,22 @@ def get_or_create_grouphashes(
event: Event,
project: Project,
variants: dict[str, BaseVariant],
hashes: Sequence[str],
hashes: Iterable[str],
grouping_config: str,
) -> list[GroupHash]:
is_secondary = grouping_config == project.get_option("sentry:secondary_grouping_config")
grouphashes: list[GroupHash] = []

# The only utility of secondary hashes is to link new primary hashes to an existing group.
# Secondary hashes which are also new are therefore of no value, so there's no need to store or
# annotate them and we can bail now.
if is_secondary and not GroupHash.objects.filter(project=project, hash__in=hashes).exists():
return grouphashes
if is_secondary:
# The only utility of secondary hashes is to link new primary hashes to an existing group
# via an existing grouphash. Secondary hashes which are new are therefore of no value, so
# filter them out before creating grouphash records.
existing_hashes = set(
GroupHash.objects.filter(project=project, hash__in=hashes).values_list(
"hash", flat=True
)
)
hashes = filter(lambda hash_value: hash_value in existing_hashes, hashes)

for hash_value in hashes:
grouphash, created = GroupHash.objects.get_or_create(project=project, hash=hash_value)
Expand Down
3 changes: 0 additions & 3 deletions tests/sentry/grouping/ingest/test_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from time import time
from unittest.mock import MagicMock, patch

import pytest

from sentry.grouping.ingest.hashing import (
_calculate_event_grouping,
_calculate_secondary_hashes,
Expand Down Expand Up @@ -156,7 +154,6 @@ def test_secondary_grouphashes_not_saved_when_creating_new_group(
hash=hashes_by_config[LEGACY_GROUPING_CONFIG]
).exists()

@pytest.mark.xfail(reason="new secondary hashes not filtered if not all are new")
def test_filters_new_secondary_hashes_when_creating_grouphashes(self):
project = self.project
project.update_option("sentry:grouping_config", LEGACY_GROUPING_CONFIG)
Expand Down

0 comments on commit 4d5b15b

Please sign in to comment.