diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index f5c11b368d8968..7b1721b274ac28 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -511,6 +511,17 @@ default=False, flags=FLAG_AUTOMATOR_MODIFIABLE, ) +# Trace sampling rates for replay summary endpoint. +register( + "replay.endpoints.project_replay_summary.trace_sample_rate_post", + default=0.0, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) +register( + "replay.endpoints.project_replay_summary.trace_sample_rate_get", + default=0.0, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) # User Feedback Options register( diff --git a/src/sentry/replays/endpoints/project_replay_summary.py b/src/sentry/replays/endpoints/project_replay_summary.py index 41ab2221eb96d2..add6dfd185f860 100644 --- a/src/sentry/replays/endpoints/project_replay_summary.py +++ b/src/sentry/replays/endpoints/project_replay_summary.py @@ -1,12 +1,13 @@ import logging from typing import Any +import sentry_sdk from django.conf import settings from drf_spectacular.utils import extend_schema from rest_framework.request import Request from rest_framework.response import Response -from sentry import features +from sentry import features, options from sentry.api.api_owners import ApiOwner from sentry.api.api_publish_status import ApiPublishStatus from sentry.api.base import region_silo_endpoint @@ -24,7 +25,7 @@ ) from sentry.seer.seer_setup import has_seer_access from sentry.seer.signed_seer_api import make_signed_seer_api_request -from sentry.utils import json +from sentry.utils import json, metrics logger = logging.getLogger(__name__) @@ -62,9 +63,15 @@ class ProjectReplaySummaryEndpoint(ProjectEndpoint): } permission_classes = (ReplaySummaryPermission,) - def __init__(self, **options) -> None: + def __init__(self, **kw) -> None: storage.initialize_client() - super().__init__(**options) + self.sample_rate_post = options.get( + "replay.endpoints.project_replay_summary.trace_sample_rate_post" + ) + self.sample_rate_get = options.get( + "replay.endpoints.project_replay_summary.trace_sample_rate_get" + ) + super().__init__(**kw) def make_seer_request(self, path: str, post_body: dict[str, Any]) -> Response: """Make a POST request to a Seer endpoint. Raises HTTPError and logs non-200 status codes.""" @@ -133,91 +140,128 @@ def has_replay_summary_access(self, project: Project, request: Request) -> bool: def get(self, request: Request, project: Project, replay_id: str) -> Response: """Poll for the status of a replay summary task in Seer.""" - if not self.has_replay_summary_access(project, request): - return self.respond( - {"detail": "Replay summaries are not available for this organization."}, status=403 - ) - # We skip checking Seer permissions here for performance, and because summaries can't be created without them anyway. + with sentry_sdk.start_transaction( + name="replays.endpoints.project_replay_summary.get", + op="replays.endpoints.project_replay_summary.get", + custom_sampling_context={"sample_rate": self.sample_rate_get}, + ): - # Request Seer for the state of the summary task. - return self.make_seer_request( - SEER_POLL_STATE_ENDPOINT_PATH, - { - "replay_id": replay_id, - }, - ) + if not self.has_replay_summary_access(project, request): + return self.respond( + {"detail": "Replay summaries are not available for this organization."}, + status=403, + ) + + # We skip checking Seer permissions here for performance, and because summaries can't be created without them anyway. + + # Request Seer for the state of the summary task. + return self.make_seer_request( + SEER_POLL_STATE_ENDPOINT_PATH, + { + "replay_id": replay_id, + }, + ) def post(self, request: Request, project: Project, replay_id: str) -> Response: """Download replay segment data and parse it into logs. Then post to Seer to start a summary task.""" - if not self.has_replay_summary_access(project, request): - return self.respond( - {"detail": "Replay summaries are not available for this organization."}, status=403 + + with sentry_sdk.start_transaction( + name="replays.endpoints.project_replay_summary.post", + op="replays.endpoints.project_replay_summary.post", + custom_sampling_context={"sample_rate": self.sample_rate_post}, + ): + + if not self.has_replay_summary_access(project, request): + return self.respond( + {"detail": "Replay summaries are not available for this organization."}, + status=403, + ) + + filter_params = self.get_filter_params(request, project) + num_segments = request.data.get("num_segments", 0) + temperature = request.data.get("temperature", None) + + # Limit data with the frontend's segment count, to keep summaries consistent with the video displayed in the UI. + # While the replay is live, the FE and BE may have different counts. + if num_segments > MAX_SEGMENTS_TO_SUMMARIZE: + logger.warning( + "Replay Summary: hit max segment limit.", + extra={ + "replay_id": replay_id, + "project_id": project.id, + "organization_id": project.organization.id, + "segment_limit": MAX_SEGMENTS_TO_SUMMARIZE, + }, + ) + num_segments = MAX_SEGMENTS_TO_SUMMARIZE + + # Fetch the replay's error and trace IDs from the replay_id. + snuba_response = query_replay_instance( + project_id=project.id, + replay_id=replay_id, + start=filter_params["start"], + end=filter_params["end"], + organization=project.organization, + request_user_id=request.user.id, + ) + processed_response = process_raw_response( + snuba_response, + fields=request.query_params.getlist("field"), ) + error_ids = processed_response[0].get("error_ids", []) if processed_response else [] + trace_ids = processed_response[0].get("trace_ids", []) if processed_response else [] - filter_params = self.get_filter_params(request, project) - num_segments = request.data.get("num_segments", 0) - temperature = request.data.get("temperature", None) + # Fetch same-trace errors. + trace_connected_errors = fetch_trace_connected_errors( + project=project, + trace_ids=trace_ids, + start=filter_params["start"], + end=filter_params["end"], + limit=100, + ) + trace_connected_error_ids = {x["id"] for x in trace_connected_errors} - # Limit data with the frontend's segment count, to keep summaries consistent with the video displayed in the UI. - # While the replay is live, the FE and BE may have different counts. - if num_segments > MAX_SEGMENTS_TO_SUMMARIZE: - logger.warning( - "Replay Summary: hit max segment limit.", - extra={ + # Fetch directly linked errors, if they weren't returned by the trace query. + replay_errors = fetch_error_details( + project_id=project.id, + error_ids=[x for x in error_ids if x not in trace_connected_error_ids], + ) + + error_events = replay_errors + trace_connected_errors + + metrics.distribution( + "replays.endpoints.project_replay_summary.direct_errors", + value=len(replay_errors), + ) + metrics.distribution( + "replays.endpoints.project_replay_summary.trace_connected_errors", + value=len(trace_connected_errors), + ) + metrics.distribution( + "replays.endpoints.project_replay_summary.num_trace_ids", + value=len(trace_ids), + ) + + # Download segment data. + # XXX: For now this is capped to 100 and blocking. DD shows no replays with >25 segments, but we should still stress test and figure out how to deal with large replays. + segment_md = fetch_segments_metadata(project.id, replay_id, 0, num_segments) + segment_data = iter_segment_data(segment_md) + + # Combine replay and error data and parse into logs. + logs = get_summary_logs(segment_data, error_events, project.id) + + # Post to Seer to start a summary task. + # XXX: Request isn't streaming. Limitation of Seer authentication. Would be much faster if we + # could stream the request data since the GCS download will (likely) dominate latency. + return self.make_seer_request( + SEER_START_TASK_ENDPOINT_PATH, + { + "logs": logs, + "num_segments": num_segments, "replay_id": replay_id, - "project_id": project.id, "organization_id": project.organization.id, - "segment_limit": MAX_SEGMENTS_TO_SUMMARIZE, + "project_id": project.id, + "temperature": temperature, }, ) - num_segments = MAX_SEGMENTS_TO_SUMMARIZE - - # Fetch the replay's error and trace IDs from the replay_id. - snuba_response = query_replay_instance( - project_id=project.id, - replay_id=replay_id, - start=filter_params["start"], - end=filter_params["end"], - organization=project.organization, - request_user_id=request.user.id, - ) - processed_response = process_raw_response( - snuba_response, - fields=request.query_params.getlist("field"), - ) - error_ids = processed_response[0].get("error_ids", []) if processed_response else [] - trace_ids = processed_response[0].get("trace_ids", []) if processed_response else [] - - # Fetch error details. - replay_errors = fetch_error_details(project_id=project.id, error_ids=error_ids) - trace_connected_errors = fetch_trace_connected_errors( - project=project, - trace_ids=trace_ids, - start=filter_params["start"], - end=filter_params["end"], - ) - error_events = replay_errors + trace_connected_errors - - # Download segment data. - # XXX: For now this is capped to 100 and blocking. DD shows no replays with >25 segments, but we should still stress test and figure out how to deal with large replays. - segment_md = fetch_segments_metadata(project.id, replay_id, 0, num_segments) - segment_data = iter_segment_data(segment_md) - - # Combine replay and error data and parse into logs. - logs = get_summary_logs(segment_data, error_events, project.id) - - # Post to Seer to start a summary task. - # XXX: Request isn't streaming. Limitation of Seer authentication. Would be much faster if we - # could stream the request data since the GCS download will (likely) dominate latency. - return self.make_seer_request( - SEER_START_TASK_ENDPOINT_PATH, - { - "logs": logs, - "num_segments": num_segments, - "replay_id": replay_id, - "organization_id": project.organization.id, - "project_id": project.id, - "temperature": temperature, - }, - ) diff --git a/src/sentry/replays/query.py b/src/sentry/replays/query.py index a646fc2592efcf..4f03ac09e9c8e5 100644 --- a/src/sentry/replays/query.py +++ b/src/sentry/replays/query.py @@ -2,7 +2,7 @@ from collections.abc import Generator, Sequence from datetime import datetime -from typing import Any +from typing import Any, Literal from snuba_sdk import ( Column, @@ -34,6 +34,8 @@ make_full_aggregation_query, query_using_optimized_search, ) +from sentry.search.events.types import SnubaParams +from sentry.snuba.utils import get_dataset from sentry.utils.snuba import raw_snql_query MAX_PAGE_SIZE = 100 @@ -902,3 +904,55 @@ def compute_has_viewed(viewed_by_id: int | None) -> Function: ], alias="has_viewed", ) + + +def query_trace_connected_events( + dataset_label: Literal["errors", "issuePlatform", "discover"], + selected_columns: list[str], + query: str | None, + snuba_params: SnubaParams, + equations: list[str] | None = None, + orderby: list[str] | None = None, + offset: int = 0, + limit: int = 10, + referrer: str = "api.replay.details-page", +) -> dict[str, Any]: + """ + Query for trace-connected events, with a reusable query configuration for replays. + + Args: + dataset: The Snuba dataset to query against + selected_columns: List of columns to select + query: Optional query string + snuba_params: Snuba parameters including project IDs, time range, etc. + equations: Optional list of equations + orderby: Optional ordering specification + offset: Pagination offset + limit: Pagination limit + referrer: Referrer string for tracking + + Returns: + Query result from the dataset + """ + query_details = { + "selected_columns": selected_columns, + "query": query, + "snuba_params": snuba_params, + "equations": equations, + "orderby": orderby, + "offset": offset, + "limit": limit, + "referrer": referrer, + "auto_fields": True, + "auto_aggregations": True, + "use_aggregate_conditions": True, + "allow_metric_aggregates": False, + "transform_alias_to_input_format": True, + } + + dataset = get_dataset(dataset_label) + + if dataset is None: + raise ValueError(f"Unknown dataset: {dataset_label}") + + return dataset.query(**query_details) diff --git a/src/sentry/replays/usecases/summarize.py b/src/sentry/replays/usecases/summarize.py index 02ab91c6615364..6c1de34da514ec 100644 --- a/src/sentry/replays/usecases/summarize.py +++ b/src/sentry/replays/usecases/summarize.py @@ -8,19 +8,18 @@ from sentry import nodestore from sentry.constants import ObjectStatus +from sentry.issues.grouptype import FeedbackGroup from sentry.models.project import Project +from sentry.replays.query import query_trace_connected_events from sentry.replays.usecases.ingest.event_parser import EventType from sentry.replays.usecases.ingest.event_parser import ( get_timestamp_ms as get_replay_event_timestamp_ms, ) from sentry.replays.usecases.ingest.event_parser import parse_network_content_lengths, which -from sentry.search.events.builder.discover import DiscoverQueryBuilder -from sentry.search.events.types import QueryBuilderConfig, SnubaParams +from sentry.search.events.types import SnubaParams from sentry.services.eventstore.models import Event -from sentry.snuba.dataset import Dataset from sentry.snuba.referrer import Referrer from sentry.utils import json -from sentry.utils.snuba import bulk_snuba_queries logger = logging.getLogger(__name__) @@ -79,6 +78,7 @@ def fetch_trace_connected_errors( trace_ids: list[str], start: datetime | None, end: datetime | None, + limit: int, ) -> list[EventDict]: """Fetch error details given trace IDs and return a list of EventDict objects.""" try: @@ -90,65 +90,95 @@ def fetch_trace_connected_errors( Project.objects.filter(organization=project.organization, status=ObjectStatus.ACTIVE) ) - queries = [] - for trace_id in trace_ids: - snuba_params = SnubaParams( - projects=org_projects, - start=start, - end=end, - organization=project.organization, - ) - - # Generate a query for each trace ID. This will be executed in bulk. - error_query = DiscoverQueryBuilder( - Dataset.Events, - params={}, - snuba_params=snuba_params, - query=f"trace:{trace_id}", - selected_columns=[ - "id", - "timestamp_ms", - "timestamp", - "title", - "message", - ], - orderby=["id"], - limit=100, - config=QueryBuilderConfig( - auto_fields=False, - ), - ) - queries.append(error_query) + snuba_params = SnubaParams( + projects=org_projects, + start=start, + end=end, + organization=project.organization, + ) - if not queries: - return [] + trace_ids_query = " OR ".join([f"trace:{trace_id}" for trace_id in trace_ids]) + + # Query for errors dataset + error_query_results = query_trace_connected_events( + dataset_label="errors", + selected_columns=[ + "id", + "timestamp_ms", + "timestamp", + "title", + "message", + ], + query=trace_ids_query, + snuba_params=snuba_params, + orderby=["-timestamp"], + limit=limit, + referrer=Referrer.API_REPLAY_SUMMARIZE_BREADCRUMBS.value, + ) - # Execute all queries - results = bulk_snuba_queries( - [query.get_snql_query() for query in queries], + # Query for issuePlatform dataset + issue_query_results = query_trace_connected_events( + dataset_label="issuePlatform", + selected_columns=[ + "event_id", + "title", + "subtitle", + "timestamp", + "occurrence_type_id", + ], + query=trace_ids_query, + snuba_params=snuba_params, + orderby=["-timestamp"], + limit=limit, referrer=Referrer.API_REPLAY_SUMMARIZE_BREADCRUMBS.value, ) # Process results and convert to EventDict objects error_events = [] - for result, query in zip(results, queries): - error_data = query.process_results(result)["data"] - - for event in error_data: - timestamp = _parse_iso_timestamp_to_ms( - event.get("timestamp_ms") - ) or _parse_iso_timestamp_to_ms(event.get("timestamp")) - - if timestamp: - error_events.append( - EventDict( - category="error", - id=event["id"], - title=event.get("title", ""), - timestamp=timestamp, # this should be in milliseconds - message=event.get("message", ""), - ) + + # Process error query results + for event in error_query_results["data"]: + timestamp = _parse_iso_timestamp_to_ms( + event.get("timestamp_ms") + ) or _parse_iso_timestamp_to_ms(event.get("timestamp")) + message = event.get("message", "") + + if timestamp: + error_events.append( + EventDict( + category="error", + id=event.get("id"), + title=event.get("title", ""), + timestamp=timestamp, + message=message, ) + ) + + # Process issuePlatform query results + for event in issue_query_results["data"]: + timestamp = _parse_iso_timestamp_to_ms(event.get("timestamp")) + message = event.get("subtitle", "") or event.get("message", "") + + if event.get("occurrence_type_id") == FeedbackGroup.type_id: + category = "feedback" + else: + category = "error" + + # NOTE: The issuePlatform dataset query can return feedback. + # We also fetch feedback from nodestore in fetch_feedback_details + # for feedback breadcrumbs. + # We avoid creating duplicate feedback logs + # by filtering for unique feedback IDs during log generation. + if timestamp: + error_events.append( + EventDict( + category=category, + id=event.get("event_id"), + title=event.get("title", ""), + timestamp=timestamp, + message=message, + ) + ) return error_events @@ -207,7 +237,7 @@ def get_summary_logs( error_events: list[EventDict], project_id: int, ) -> list[str]: - # Sort error events by timestamp + # Sort error events by timestamp. This list includes all feedback events still. error_events.sort(key=lambda x: x["timestamp"]) return list(generate_summary_logs(segment_data, error_events, project_id)) @@ -217,8 +247,12 @@ def generate_summary_logs( error_events: list[EventDict], project_id, ) -> Generator[str]: - """Generate log messages from events and errors in chronological order.""" + """ + Generate log messages from events and errors in chronological order. + Avoid processing duplicate feedback events. + """ error_idx = 0 + seen_feedback_ids = {error["id"] for error in error_events if error["category"] == "feedback"} # Process segments for _, segment in segment_data: @@ -232,15 +266,23 @@ def generate_summary_logs( error_idx < len(error_events) and error_events[error_idx]["timestamp"] < timestamp ): error = error_events[error_idx] - yield generate_error_log_message(error) + + if error["category"] == "error": + yield generate_error_log_message(error) + elif error["category"] == "feedback": + yield generate_feedback_log_message(error) + error_idx += 1 # Yield the current event's log message if event_type == EventType.FEEDBACK: feedback_id = event["data"]["payload"].get("data", {}).get("feedbackId") - feedback = fetch_feedback_details(feedback_id, project_id) - if feedback: - yield generate_feedback_log_message(feedback) + # Filter out duplicate feedback events. + if feedback_id not in seen_feedback_ids: + feedback = fetch_feedback_details(feedback_id, project_id) + + if feedback: + yield generate_feedback_log_message(feedback) elif message := as_log_message(event): yield message @@ -248,7 +290,12 @@ def generate_summary_logs( # Yield any remaining error messages while error_idx < len(error_events): error = error_events[error_idx] - yield generate_error_log_message(error) + + if error["category"] == "error": + yield generate_error_log_message(error) + elif error["category"] == "feedback": + yield generate_feedback_log_message(error) + error_idx += 1 diff --git a/tests/sentry/replays/endpoints/test_project_replay_summary.py b/tests/sentry/replays/endpoints/test_project_replay_summary.py index 29bb7ac5f24e45..b6a1943586e0e4 100644 --- a/tests/sentry/replays/endpoints/test_project_replay_summary.py +++ b/tests/sentry/replays/endpoints/test_project_replay_summary.py @@ -1,19 +1,23 @@ import uuid import zlib -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta +from typing import Any from unittest.mock import Mock, patch import requests from django.conf import settings from django.urls import reverse +from sentry.feedback.lib.utils import FeedbackCreationSource +from sentry.feedback.usecases.ingest.create_feedback import create_feedback_issue from sentry.replays.endpoints.project_replay_summary import ( SEER_POLL_STATE_ENDPOINT_PATH, SEER_START_TASK_ENDPOINT_PATH, ) from sentry.replays.lib.storage import FilestoreBlob, RecordingSegmentStorageMeta from sentry.replays.testutils import mock_replay -from sentry.testutils.cases import TransactionTestCase +from sentry.replays.usecases.summarize import EventDict +from sentry.testutils.cases import SnubaTestCase, TransactionTestCase from sentry.testutils.skips import requires_snuba from sentry.utils import json @@ -32,6 +36,7 @@ def json(self): @requires_snuba class ProjectReplaySummaryTestCase( TransactionTestCase, + SnubaTestCase, ): endpoint = "sentry-api-0-project-replay-summary" @@ -173,14 +178,15 @@ def test_post_simple(self, mock_make_seer_api_request: Mock) -> None: def test_post_with_both_direct_and_trace_connected_errors( self, mock_make_seer_api_request: Mock ) -> None: - """Test handling of breadcrumbs with both direct and trace connected errors""" + """Test handling of breadcrumbs with both direct and trace connected errors. Error logs should not be duplicated.""" mock_response = MockSeerResponse(200, json_data={"hello": "world"}) mock_make_seer_api_request.return_value = mock_response now = datetime.now(UTC) trace_id = uuid.uuid4().hex + span_id = "1" + uuid.uuid4().hex[:15] - # Create a direct error event + # Create a direct error event that is also trace connected. direct_event_id = uuid.uuid4().hex direct_error_timestamp = now.timestamp() - 2 self.store_event( @@ -195,14 +201,20 @@ def test_post_with_both_direct_and_trace_connected_errors( } ] }, - "contexts": {"replay": {"replay_id": self.replay_id}}, + "contexts": { + "replay": {"replay_id": self.replay_id}, + "trace": { + "type": "trace", + "trace_id": trace_id, + "span_id": span_id, + }, + }, }, project_id=self.project.id, ) # Create a trace connected error event connected_event_id = uuid.uuid4().hex - span_id = "1" + uuid.uuid4().hex[:15] connected_error_timestamp = now.timestamp() - 1 project_2 = self.create_project() self.store_event( @@ -256,17 +268,21 @@ def test_post_with_both_direct_and_trace_connected_errors( mock_make_seer_api_request.assert_called_once() request_body = json.loads(mock_make_seer_api_request.call_args[1]["body"].decode()) logs = request_body["logs"] + assert len(logs) == 3 assert any("ZeroDivisionError" in log for log in logs) assert any("division by zero" in log for log in logs) assert any("ConnectionError" in log for log in logs) assert any("Failed to connect to database" in log for log in logs) @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request") - def test_post_with_feedback(self, mock_make_seer_api_request: Mock) -> None: - """Test handling of breadcrumbs with user feedback""" + def test_post_with_feedback_breadcrumb(self, mock_make_seer_api_request: Mock) -> None: + """Test handling of a feedback breadcrumb when the feedback + is in nodestore, but hasn't reached Snuba yet. + If the feedback is in Snuba (guaranteed for SDK v8.0.0+), + it should be de-duped like in the duplicate_feedback test below.""" mock_response = MockSeerResponse( 200, - json_data={"feedback": "Feedback was submitted"}, + json_data={"hello": "world"}, ) mock_make_seer_api_request.return_value = mock_response @@ -275,6 +291,7 @@ def test_post_with_feedback(self, mock_make_seer_api_request: Mock) -> None: self.store_event( data={ + "type": "feedback", "event_id": feedback_event_id, "timestamp": now.timestamp(), "contexts": { @@ -295,14 +312,208 @@ def test_post_with_feedback(self, mock_make_seer_api_request: Mock) -> None: { "type": 5, "timestamp": float(now.timestamp()), + "data": { + "tag": "breadcrumb", + "payload": { + "category": "sentry.feedback", + "data": {"feedbackId": feedback_event_id}, + }, + }, + }, + ] + self.save_recording_segment(0, json.dumps(data).encode()) + + with self.feature(self.features): + response = self.client.post( + self.url, data={"num_segments": 1}, content_type="application/json" + ) + + assert response.status_code == 200 + assert response.json() == {"hello": "world"} + + mock_make_seer_api_request.assert_called_once() + request_body = json.loads(mock_make_seer_api_request.call_args[1]["body"].decode()) + logs = request_body["logs"] + assert "User submitted feedback: 'Great website!'" in logs[0] + + @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request") + def test_post_with_trace_errors_both_datasets(self, mock_make_seer_api_request): + """Test that trace connected error snuba query works correctly with both datasets.""" + mock_response = MockSeerResponse(200, {"hello": "world"}) + mock_make_seer_api_request.return_value = mock_response + + now = datetime.now(UTC) + project_1 = self.create_project() + project_2 = self.create_project() + + # Create regular error event - errors dataset + event_id_1 = uuid.uuid4().hex + trace_id_1 = uuid.uuid4().hex + timestamp_1 = (now - timedelta(minutes=2)).timestamp() + self.store_event( + data={ + "event_id": event_id_1, + "timestamp": timestamp_1, + "exception": { + "values": [ + { + "type": "ValueError", + "value": "Invalid input", + } + ] + }, + "contexts": { + "trace": { + "type": "trace", + "trace_id": trace_id_1, + "span_id": "1" + uuid.uuid4().hex[:15], + } + }, + }, + project_id=project_1.id, + ) + + # Create feedback event - issuePlatform dataset + event_id_2 = uuid.uuid4().hex + trace_id_2 = uuid.uuid4().hex + timestamp_2 = (now - timedelta(minutes=5)).timestamp() + + feedback_data = { + "type": "feedback", + "event_id": event_id_2, + "timestamp": timestamp_2, + "contexts": { + "feedback": { + "contact_email": "test@example.com", + "name": "Test User", + "message": "Great website", + "replay_id": self.replay_id, + "url": "https://example.com", + }, + "trace": { + "type": "trace", + "trace_id": trace_id_2, + "span_id": "2" + uuid.uuid4().hex[:15], + }, + }, + } + + create_feedback_issue( + feedback_data, project_2, FeedbackCreationSource.NEW_FEEDBACK_ENVELOPE + ) + + # Store the replay with all trace IDs + self.store_replay(trace_ids=[trace_id_1, trace_id_2]) + + data = [ + { + "type": 5, + "timestamp": 0.0, "data": { "tag": "breadcrumb", "payload": {"category": "console", "message": "hello"}, }, }, + ] + self.save_recording_segment(0, json.dumps(data).encode()) + + with self.feature(self.features): + response = self.client.post( + self.url, data={"num_segments": 1}, content_type="application/json" + ) + + assert response.status_code == 200 + assert response.get("Content-Type") == "application/json" + assert response.json() == {"hello": "world"} + + mock_make_seer_api_request.assert_called_once() + call_args = mock_make_seer_api_request.call_args + request_body = json.loads(call_args[1]["body"].decode()) + logs = request_body["logs"] + assert len(logs) == 3 + + # Verify that feedback event is included + assert "Great website" in logs[1] + assert "User submitted feedback" in logs[1] + + # Verify that regular error event is included + assert "ValueError" in logs[2] + assert "Invalid input" in logs[2] + assert "User experienced an error" in logs[2] + + @patch("sentry.replays.usecases.summarize.fetch_feedback_details") + @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request") + def test_post_with_trace_errors_duplicate_feedback( + self, mock_make_seer_api_request, mock_fetch_feedback_details + ): + """Test that duplicate feedback events are filtered. + Duplicates may happen when the replay has a feedback breadcrumb, + and the feedback is also returned from the Snuba query for trace-connected errors.""" + mock_response = MockSeerResponse(200, {"hello": "world"}) + mock_make_seer_api_request.return_value = mock_response + + now = datetime.now(UTC) + feedback_event_id = uuid.uuid4().hex + feedback_event_id_2 = uuid.uuid4().hex + trace_id = uuid.uuid4().hex + trace_id_2 = uuid.uuid4().hex + + # Create feedback event - issuePlatform dataset + feedback_data: dict[str, Any] = { + "type": "feedback", + "event_id": feedback_event_id, + "timestamp": (now - timedelta(minutes=3)).timestamp(), + "contexts": { + "feedback": { + "contact_email": "test@example.com", + "name": "Test User", + "message": "Great website", + "replay_id": self.replay_id, + "url": "https://example.com", + }, + "trace": { + "type": "trace", + "trace_id": trace_id, + "span_id": "1" + uuid.uuid4().hex[:15], + }, + }, + } + + # Create another feedback event - issuePlatform dataset + feedback_data_2: dict[str, Any] = { + "type": "feedback", + "event_id": feedback_event_id_2, + "timestamp": (now - timedelta(minutes=2)).timestamp(), + "contexts": { + "feedback": { + "contact_email": "test2@example.com", + "name": "Test User 2", + "message": "Broken website", + "replay_id": self.replay_id, + "url": "https://example.com", + }, + "trace": { + "type": "trace", + "trace_id": trace_id_2, + "span_id": "1" + uuid.uuid4().hex[:15], + }, + }, + } + + create_feedback_issue( + feedback_data, self.project, FeedbackCreationSource.NEW_FEEDBACK_ENVELOPE + ) + create_feedback_issue( + feedback_data_2, self.project, FeedbackCreationSource.NEW_FEEDBACK_ENVELOPE + ) + + self.store_replay(trace_ids=[trace_id, trace_id_2]) + + # mock SDK feedback event with same event_id as the first feedback event + data = [ { "type": 5, - "timestamp": float(now.timestamp()), + "timestamp": float((now - timedelta(minutes=3)).timestamp()), "data": { "tag": "breadcrumb", "payload": { @@ -314,19 +525,36 @@ def test_post_with_feedback(self, mock_make_seer_api_request: Mock) -> None: ] self.save_recording_segment(0, json.dumps(data).encode()) + # Mock fetch_feedback_details to return a dup of the first feedback event. + # In prod this is from nodestore. We had difficulties writing to nodestore in tests. + mock_fetch_feedback_details.return_value = EventDict( + id=feedback_event_id, + title="User Feedback", + message=feedback_data["contexts"]["feedback"]["message"], + timestamp=float(feedback_data["timestamp"]), + category="feedback", + ) + with self.feature(self.features): response = self.client.post( self.url, data={"num_segments": 1}, content_type="application/json" ) assert response.status_code == 200 - assert response.json() == {"feedback": "Feedback was submitted"} + assert response.get("Content-Type") == "application/json" + assert response.json() == {"hello": "world"} mock_make_seer_api_request.assert_called_once() - request_body = json.loads(mock_make_seer_api_request.call_args[1]["body"].decode()) + call_args = mock_make_seer_api_request.call_args + request_body = json.loads(call_args[1]["body"].decode()) logs = request_body["logs"] - assert any("Great website!" in log for log in logs) - assert any("User submitted feedback" in log for log in logs) + + # Verify that only the unique feedback logs are included + assert len(logs) == 2 + assert "User submitted feedback" in logs[0] + assert "Great website" in logs[0] + assert "User submitted feedback" in logs[1] + assert "Broken website" in logs[1] @patch("sentry.replays.endpoints.project_replay_summary.MAX_SEGMENTS_TO_SUMMARIZE", 1) @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request")