diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index 90379a2e9f63a9..5ec190e215201c 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -4,6 +4,7 @@ import sentry_sdk from django.conf import settings +from django.utils import timezone from sentry import features, options from sentry import ratelimits as ratelimiter @@ -419,13 +420,20 @@ def maybe_check_seer_for_matching_grouphash( ) return seer_matched_grouphash + timestamp = timezone.now() + gh_metadata.update( # Technically the time of the metadata record creation and the time of the Seer # request will be some milliseconds apart, but a) the difference isn't meaningful # for us, and b) forcing them to be the same (rather than just close) lets us use # their equality as a signal that the Seer call happened during ingest rather than # during a backfill, without having to store that information separately. - seer_date_sent=gh_metadata.date_added, + # + # In rare race condition cases, `date_added` will be None (if different events win + # the race to create the relevant `GroupHash` and `GroupHashMetadata` records), so + # we set that if necessary here as well. + date_added=gh_metadata.date_added or timestamp, + seer_date_sent=gh_metadata.date_added or timestamp, seer_event_sent=event.event_id, seer_model=seer_response_data["similarity_model_version"], seer_matched_grouphash=seer_matched_grouphash, diff --git a/tests/sentry/event_manager/grouping/test_seer_grouping.py b/tests/sentry/event_manager/grouping/test_seer_grouping.py index 2b9520ea4a7cde..73833e405579d4 100644 --- a/tests/sentry/event_manager/grouping/test_seer_grouping.py +++ b/tests/sentry/event_manager/grouping/test_seer_grouping.py @@ -5,6 +5,7 @@ from unittest.mock import MagicMock, patch from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION +from sentry.grouping.ingest.grouphash_metadata import create_or_update_grouphash_metadata_if_needed from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping from sentry.models.grouphash import GroupHash from sentry.seer.similarity.types import SeerSimilarIssueData @@ -277,3 +278,43 @@ def test_event_not_sent_to_seer(self): assert event_grouphash and event_grouphash.metadata self.assert_correct_seer_metadata(event_grouphash, None, None, None, None, None) + + @with_feature("organizations:grouphash-metadata-creation") + @patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True) + def test_fills_in_missing_date_added(self, _): + + # Mimic the effects of the race condition wherein two events with the same new hash race to + # create `GroupHash` and `GroupHashMetadata` records, and each event wins one of the races, + # which results in the metadata not having a `date_added` value + def race_condition_create_or_update_grouphash_metadata( + event, project, grouphash, created, grouping_config, variants + ): + create_or_update_grouphash_metadata_if_needed( + event, project, grouphash, created, grouping_config, variants + ) + assert grouphash.metadata + grouphash.metadata.update(date_added=None) + assert not grouphash.metadata.date_added + + with ( + patch( + "sentry.grouping.ingest.seer.get_similarity_data_from_seer", + return_value=[], + ), + patch( + "sentry.grouping.ingest.hashing.create_or_update_grouphash_metadata_if_needed", + wraps=race_condition_create_or_update_grouphash_metadata, + ) as mock_create_or_update_grouphash_metadata, + ): + event = save_new_event(get_event_data(), self.project) + event_grouphash = GroupHash.objects.filter( + hash=event.get_primary_hash(), project_id=self.project.id + ).first() + assert event_grouphash and event_grouphash.metadata + + # Our mock was called, so we know that going into the Seer call, the grouphash had no + # `date_added` in its metadata, because that's asserted in the mock + mock_create_or_update_grouphash_metadata.assert_called() + # Now, however, it does, and it's the same as the Seer timestamp + assert event_grouphash.metadata.date_added + assert event_grouphash.metadata.seer_date_sent == event_grouphash.metadata.date_added