Skip to content

Commit 198921a

Browse files
committed
fix(#2009): emit truncation on VAD interrupt
1 parent 553c1bf commit 198921a

File tree

2 files changed

+40
-5
lines changed

2 files changed

+40
-5
lines changed

src/agents/realtime/openai_realtime.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -621,10 +621,38 @@ async def _handle_ws_event(self, event: dict[str, Any]):
621621
last_audio = self._audio_state_tracker.get_last_audio_item()
622622
if last_audio is not None:
623623
item_id, content_index = last_audio
624+
playback_state = self._get_playback_state()
625+
playback_item_id = playback_state.get("current_item_id")
626+
playback_content_index = playback_state.get("current_item_content_index") or 0
627+
playback_elapsed_ms = playback_state.get("elapsed_ms")
624628
await self._emit_event(
625629
RealtimeModelAudioInterruptedEvent(item_id=item_id, content_index=content_index)
626630
)
627631

632+
elapsed_override = parsed.audio_end_ms
633+
if (
634+
elapsed_override is None
635+
or elapsed_override <= 0
636+
or (
637+
self._playback_tracker
638+
and playback_elapsed_ms is not None
639+
and float(elapsed_override) > playback_elapsed_ms
640+
)
641+
):
642+
effective_elapsed_ms = playback_elapsed_ms
643+
else:
644+
effective_elapsed_ms = float(elapsed_override)
645+
646+
if playback_item_id and effective_elapsed_ms is not None:
647+
truncated_ms = max(int(round(effective_elapsed_ms)), 0)
648+
await self._send_raw_message(
649+
_ConversionHelper.convert_interrupt(
650+
playback_item_id,
651+
playback_content_index,
652+
truncated_ms,
653+
)
654+
)
655+
628656
# Reset trackers so subsequent playback state queries don't
629657
# reference audio that has been interrupted client‑side.
630658
self._audio_state_tracker.on_interrupted()
@@ -643,9 +671,6 @@ async def _handle_ws_event(self, event: dict[str, Any]):
643671
)
644672
if not automatic_response_cancellation_enabled:
645673
await self._cancel_response()
646-
# Avoid sending conversation.item.truncate here. When the session's
647-
# turn_detection.interrupt_response is enabled (GA default), the server emits
648-
# conversation.item.truncated after the VAD start and takes care of history updates.
649674
elif parsed.type == "response.created":
650675
self._ongoing_response = True
651676
await self._emit_event(RealtimeModelTurnStartedEvent())

tests/realtime/test_openai_realtime.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import pytest
77
import websockets
8-
98
from agents import Agent
109
from agents.exceptions import UserError
1110
from agents.handoffs import handoff
@@ -443,7 +442,7 @@ async def test_transcription_related_and_timeouts_and_speech_started(self, model
443442

444443
# Prepare tracker state to simulate ongoing audio
445444
model._audio_state_tracker.set_audio_format("pcm16")
446-
model._audio_state_tracker.on_audio_delta("i1", 0, b"aaaa")
445+
model._audio_state_tracker.on_audio_delta("i1", 0, b"a" * 48)
447446
model._ongoing_response = True
448447

449448
# Patch sending to avoid websocket dependency
@@ -464,6 +463,17 @@ async def test_transcription_related_and_timeouts_and_speech_started(self, model
464463
}
465464
)
466465

466+
truncate_events = [
467+
call.args[0]
468+
for call in model._send_raw_message.await_args_list
469+
if getattr(call.args[0], "type", None) == "conversation.item.truncate"
470+
]
471+
assert truncate_events
472+
truncate_event = truncate_events[0]
473+
assert truncate_event.item_id == "i1"
474+
assert truncate_event.content_index == 0
475+
assert truncate_event.audio_end_ms == 1
476+
467477
# Output transcript delta
468478
await model._handle_ws_event(
469479
{

0 commit comments

Comments
 (0)