diff --git a/specs/signalwire-rest/calling-api/calls/models/examples.tsp b/specs/signalwire-rest/calling-api/calls/models/examples.tsp index b85051f25..1fe1a6f72 100644 --- a/specs/signalwire-rest/calling-api/calls/models/examples.tsp +++ b/specs/signalwire-rest/calling-api/calls/models/examples.tsp @@ -65,3 +65,115 @@ const aiMessageExample = #{ }, }, }; + +// Live Transcribe examples +const liveTranscribeStartExample = #{ + request: #{ + command: "calling.live_transcribe", + id: callId, + params: #{ + action: #{ + start: #{ + lang: "en-US", + direction: #["local-caller", "remote-caller"], + webhook: "https://example.com/transcription-events", + live_events: true, + ai_summary: true, + ai_summary_prompt: "Summarize the key points of this conversation.", + ai_model: "gpt-4.1-nano", + speech_engine: "deepgram", + }, + }, + }, + }, +}; + +const liveTranscribeStopExample = #{ + request: #{ + command: "calling.live_transcribe", + id: callId, + params: #{ + action: "stop", + }, + }, +}; + +const liveTranscribeSummarizeExample = #{ + request: #{ + command: "calling.live_transcribe", + id: callId, + params: #{ + action: #{ + summarize: #{ + webhook: "https://example.com/summary", + prompt: "Provide a bullet-point summary of the main topics discussed.", + }, + }, + }, + }, +}; + +// Live Translate examples +const liveTranslateStartExample = #{ + request: #{ + command: "calling.live_translate", + id: callId, + params: #{ + action: #{ + start: #{ + from_lang: "en-US", + to_lang: "es-ES", + direction: #["local-caller", "remote-caller"], + from_voice: "elevenlabs.josh", + to_voice: "elevenlabs.josh", + filter_from: "professional", + webhook: "https://example.com/translation-events", + live_events: true, + ai_summary: true, + ai_model: "gpt-4.1-nano", + speech_engine: "deepgram", + }, + }, + }, + }, +}; + +const liveTranslateStopExample = #{ + request: #{ + command: "calling.live_translate", + id: callId, + params: #{ + action: "stop", + }, + }, +}; + +const liveTranslateSummarizeExample = #{ + request: #{ + command: "calling.live_translate", + id: callId, + params: #{ + action: #{ + summarize: #{ + webhook: "https://example.com/summary", + prompt: "Summarize the key agreements reached in both languages.", + }, + }, + }, + }, +}; + +const liveTranslateInjectExample = #{ + request: #{ + command: "calling.live_translate", + id: callId, + params: #{ + action: #{ + inject: #{ + message: "Please hold while I transfer you to a specialist.", + direction: "remote-caller", + }, + }, + }, + }, +}; diff --git a/specs/signalwire-rest/calling-api/calls/models/requests.tsp b/specs/signalwire-rest/calling-api/calls/models/requests.tsp index 635b26860..8258dcd43 100644 --- a/specs/signalwire-rest/calling-api/calls/models/requests.tsp +++ b/specs/signalwire-rest/calling-api/calls/models/requests.tsp @@ -115,6 +115,255 @@ model CallAIMessageRequest { }; } +// ============================================ +// Live Transcribe Commands +// ============================================ + +alias TranscribeDirection = "local-caller" | "remote-caller"; +alias SpeechEngine = "deepgram" | "google"; +alias SupportedAIModels = "gpt-4o-mini" | "gpt-4.1-mini" | "gpt-4.1-nano"; + +@summary("Start") +model LiveTranscribeStartAction { + @doc("Starts live transcription of the call.") + start: { + @doc("The language to transcribe (e.g., 'en-US', 'es-ES').") + @example("en-US") + lang: string; + + @doc("The direction(s) of the call to transcribe.") + @example(#["local-caller", "remote-caller"]) + direction: TranscribeDirection[]; + + @doc("The webhook URL to receive transcription events.") + @example("https://example.com/webhook") + webhook?: string; + + @doc("Whether to send real-time utterance events as speech is recognized.") + @example(true) + live_events?: boolean; + + @doc("Whether to generate an AI summary when transcription ends.") + @example(true) + ai_summary?: boolean; + + @doc("The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled.") + @example("Summarize the key points of this conversation.") + ai_summary_prompt?: string; + + @doc("The speech recognition engine to use.") + @example("deepgram") + speech_engine?: SpeechEngine = "deepgram"; + + @doc("Speech timeout in milliseconds.") + @example(60000) + speech_timeout?: int32 = 60000; + + @doc("Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.") + @example(300) + vad_silence_ms?: int32; + + @doc("Voice activity detection threshold (0-1800).") + @example(400) + vad_thresh?: int32 = 400; + + @doc("Debug level for logging (0-2).") + @example(0) + debug_level?: int32 = 0; + }; +} + +@summary("Summarize") +model LiveTranscribeSummarizeAction { + @doc("Request an on-demand AI summary of the conversation.") + summarize: { + @doc("The webhook URL to receive the summary.") + @example("https://example.com/webhook") + webhook?: string; + + @doc("The AI prompt that instructs how to summarize the conversation.") + @example("Provide a bullet-point summary of the main topics discussed.") + prompt?: string; + }; +} + +@summary("Stop") +@doc("Stops the live transcription session.") +enum LiveTranscribeStopAction { + stop, +} + +alias LiveTranscribeAction = LiveTranscribeStartAction | LiveTranscribeSummarizeAction | LiveTranscribeStopAction; + +@summary("Live Transcribe") +model CallLiveTranscribeRequest { + @doc(uuidDescription) + @example(CallIdExample) + id: uuid; + + @doc("The `calling.live_transcribe` command is used to control live transcription on an active call.") + @example("calling.live_transcribe") + command: "calling.live_transcribe"; + + @doc(paramsDescription) + params: { + @doc("The transcription action to perform: start, stop, or summarize.") + action: LiveTranscribeAction; + }; +} + +// ============================================ +// Live Translate Commands +// ============================================ + +@summary("Filter Presets") +@doc(""" +Preset translation filter values that adjust the tone or style of translated speech. + +- `polite` - Translates to a polite version, removing anything insulting while maintaining sentiment +- `rude` - Translates to a rude and insulting version while maintaining sentiment +- `professional` - Translates to sound professional, removing slang or lingo +- `shakespeare` - Translates to sound like Shakespeare, speaking in iambic pentameter +- `gen-z` - Translates to use Gen-Z slang and expressions +""") +enum TranslationFilterPreset { + polite, + rude, + professional, + shakespeare, + `gen-z`, +} + +@summary("Custom Filter") +@doc("Custom translation filter with a prompt prefix. Use `prompt:` followed by your custom instructions (e.g., `prompt:Use formal business language`).") +@pattern("^prompt:.+$") +scalar CustomTranslationFilter extends string; + +alias TranslationFilter = TranslationFilterPreset | CustomTranslationFilter; + +@summary("Start") +model LiveTranslateStartAction { + @doc("Starts live translation of the call.") + start: { + @doc("The language to translate from (e.g., 'en-US').") + @example("en-US") + from_lang: string; + + @doc("The language to translate to (e.g., 'es-ES').") + @example("es-ES") + to_lang: string; + + @doc("The direction(s) of the call to translate.") + @example(#["local-caller", "remote-caller"]) + direction: TranscribeDirection[]; + + @doc("The TTS voice for the source language.") + @example("elevenlabs.josh") + from_voice?: string; + + @doc("The TTS voice for the target language.") + @example("elevenlabs.josh") + to_voice?: string; + + @doc("Translation filter for the source language direction.") + @example("professional") + filter_from?: TranslationFilter; + + @doc("Translation filter for the target language direction.") + @example("professional") + filter_to?: TranslationFilter; + + @doc("The webhook URL to receive translation events.") + @example("https://example.com/webhook") + webhook?: string; + + @doc("Whether to send real-time translation events.") + @example(true) + live_events?: boolean; + + @doc("Whether to generate AI summaries in both languages when translation ends.") + @example(true) + ai_summary?: boolean; + + @doc("The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled.") + @example("Summarize this translated conversation.") + ai_summary_prompt?: string; + + @doc("The speech recognition engine to use.") + @example("deepgram") + speech_engine?: SpeechEngine = "deepgram"; + + @doc("Speech timeout in milliseconds.") + @example(60000) + speech_timeout?: int32 = 60000; + + @doc("Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.") + @example(300) + vad_silence_ms?: int32; + + @doc("Voice activity detection threshold (0-1800).") + @example(400) + vad_thresh?: int32 = 400; + + @doc("Debug level for logging (0-2).") + @example(0) + debug_level?: int32 = 0; + }; +} + +@summary("Summarize") +model LiveTranslateSummarizeAction { + @doc("Request an on-demand AI summary of the translated conversation.") + summarize: { + @doc("The webhook URL to receive the summary.") + @example("https://example.com/webhook") + webhook?: string; + + @doc("The AI prompt that instructs how to summarize the conversation.") + @example("Summarize the key agreements reached in both languages.") + prompt?: string; + }; +} + +@summary("Inject") +model LiveTranslateInjectAction { + @doc("Inject a message into the conversation to be translated and spoken.") + inject: { + @doc("The text message to inject and translate.") + @example("Please hold while I transfer you to a specialist.") + message: string; + + @doc("The direction to send the translated message.") + @example("remote-caller") + direction: TranscribeDirection; + }; +} + +@summary("Stop") +@doc("Stops the live translation session.") +enum LiveTranslateStopAction { + stop, +} + +alias LiveTranslateAction = LiveTranslateStartAction | LiveTranslateSummarizeAction | LiveTranslateInjectAction | LiveTranslateStopAction; + +@summary("Live Translate") +model CallLiveTranslateRequest { + @doc(uuidDescription) + @example(CallIdExample) + id: uuid; + + @doc("The `calling.live_translate` command is used to control live translation on an active call.") + @example("calling.live_translate") + command: "calling.live_translate"; + + @doc(paramsDescription) + params: { + @doc("The translation action to perform: start, stop, summarize, or inject.") + action: LiveTranslateAction; + }; +} + @summary("Create call") model CallCreateRequest { @doc("The `dial` command is used to create a new call.") @@ -234,6 +483,10 @@ Call request union for JSON-RPC style method dispatch. Use the `command` field t - **`calling.ai_unhold`** - Resume an AI call from hold state. Reactivates the AI agent and continues the conversation from where it was paused. - **`calling.ai_message`** - Inject a message into an active AI conversation. Allows you to dynamically add context, instructions, or system messages to guide the AI agent's behavior during the call. + +- **`calling.live_transcribe`** - Control live transcription on an active call. Start real-time speech-to-text transcription, stop transcription, or request an on-demand AI summary of the conversation. + +- **`calling.live_translate`** - Control live translation on an active call. Start real-time language translation between call participants, stop translation, request summaries, or inject messages to be translated and spoken. """) union CallRequest { dial: CallCreateRequest, @@ -242,4 +495,6 @@ union CallRequest { `calling.ai_hold`: CallHoldRequest, `calling.ai_unhold`: CallUnholdRequest, `calling.ai_message`: CallAIMessageRequest, + `calling.live_transcribe`: CallLiveTranscribeRequest, + `calling.live_translate`: CallLiveTranslateRequest, } diff --git a/specs/signalwire-rest/calling-api/tsp-output/@typespec/openapi3/openapi.yaml b/specs/signalwire-rest/calling-api/tsp-output/@typespec/openapi3/openapi.yaml index 569890787..6cba4a9c5 100644 --- a/specs/signalwire-rest/calling-api/tsp-output/@typespec/openapi3/openapi.yaml +++ b/specs/signalwire-rest/calling-api/tsp-output/@typespec/openapi3/openapi.yaml @@ -369,6 +369,69 @@ components: default: Tell the user you are putting them on hold. description: An object of parameters that will be utilized by the active command. title: Hold call + CallLiveTranscribeRequest: + type: object + required: + - id + - command + - params + properties: + id: + allOf: + - $ref: '#/components/schemas/uuid' + description: The unique identifying ID of a existing call. + example: 3fa85f64-5717-4562-b3fc-2c963f66afa6 + command: + type: string + enum: + - calling.live_transcribe + description: The `calling.live_transcribe` command is used to control live transcription on an active call. + example: calling.live_transcribe + params: + type: object + properties: + action: + anyOf: + - $ref: '#/components/schemas/LiveTranscribeStartAction' + - $ref: '#/components/schemas/LiveTranscribeSummarizeAction' + - $ref: '#/components/schemas/LiveTranscribeStopAction' + description: 'The transcription action to perform: start, stop, or summarize.' + required: + - action + description: An object of parameters that will be utilized by the active command. + title: Live Transcribe + CallLiveTranslateRequest: + type: object + required: + - id + - command + - params + properties: + id: + allOf: + - $ref: '#/components/schemas/uuid' + description: The unique identifying ID of a existing call. + example: 3fa85f64-5717-4562-b3fc-2c963f66afa6 + command: + type: string + enum: + - calling.live_translate + description: The `calling.live_translate` command is used to control live translation on an active call. + example: calling.live_translate + params: + type: object + properties: + action: + anyOf: + - $ref: '#/components/schemas/LiveTranslateStartAction' + - $ref: '#/components/schemas/LiveTranslateSummarizeAction' + - $ref: '#/components/schemas/LiveTranslateInjectAction' + - $ref: '#/components/schemas/LiveTranslateStopAction' + description: 'The translation action to perform: start, stop, summarize, or inject.' + required: + - action + description: An object of parameters that will be utilized by the active command. + title: Live Translate CallRequest: type: object oneOf: @@ -378,6 +441,8 @@ components: - $ref: '#/components/schemas/CallHoldRequest' - $ref: '#/components/schemas/CallUnholdRequest' - $ref: '#/components/schemas/CallAIMessageRequest' + - $ref: '#/components/schemas/CallLiveTranscribeRequest' + - $ref: '#/components/schemas/CallLiveTranslateRequest' discriminator: propertyName: command mapping: @@ -387,6 +452,8 @@ components: calling.ai_hold: '#/components/schemas/CallHoldRequest' calling.ai_unhold: '#/components/schemas/CallUnholdRequest' calling.ai_message: '#/components/schemas/CallAIMessageRequest' + calling.live_transcribe: '#/components/schemas/CallLiveTranscribeRequest' + calling.live_translate: '#/components/schemas/CallLiveTranslateRequest' description: |- Call request union for JSON-RPC style method dispatch. Use the `command` field to specify which call method to invoke: @@ -401,6 +468,10 @@ components: - **`calling.ai_unhold`** - Resume an AI call from hold state. Reactivates the AI agent and continues the conversation from where it was paused. - **`calling.ai_message`** - Inject a message into an active AI conversation. Allows you to dynamically add context, instructions, or system messages to guide the AI agent's behavior during the call. + + - **`calling.live_transcribe`** - Control live transcription on an active call. Start real-time speech-to-text transcription, stop transcription, or request an on-demand AI summary of the conversation. + + - **`calling.live_translate`** - Control live translation on an active call. Start real-time language translation between call participants, stop translation, request summaries, or inject messages to be translated and spoken. CallResponse: type: object required: @@ -623,6 +694,275 @@ components: type: number description: Charged amount. example: 0.121176 + CustomTranslationFilter: + type: string + pattern: ^prompt:.+$ + description: Custom translation filter with a prompt prefix. Use `prompt:` followed by your custom instructions (e.g., `prompt:Use formal business language`). + title: Custom Filter + LiveTranscribeStartAction: + type: object + required: + - start + properties: + start: + type: object + properties: + lang: + type: string + description: The language to transcribe (e.g., 'en-US', 'es-ES'). + example: en-US + direction: + type: array + items: + type: string + enum: + - local-caller + - remote-caller + description: The direction(s) of the call to transcribe. + example: + - local-caller + - remote-caller + webhook: + type: string + description: The webhook URL to receive transcription events. + example: https://example.com/webhook + live_events: + type: boolean + description: Whether to send real-time utterance events as speech is recognized. + example: true + ai_summary: + type: boolean + description: Whether to generate an AI summary when transcription ends. + example: true + ai_summary_prompt: + type: string + description: The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled. + example: Summarize the key points of this conversation. + speech_engine: + type: string + enum: + - deepgram + - google + description: The speech recognition engine to use. + example: deepgram + default: deepgram + speech_timeout: + type: integer + format: int32 + description: Speech timeout in milliseconds. + example: 60000 + default: 60000 + vad_silence_ms: + type: integer + format: int32 + description: 'Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.' + example: 300 + vad_thresh: + type: integer + format: int32 + description: Voice activity detection threshold (0-1800). + example: 400 + default: 400 + debug_level: + type: integer + format: int32 + description: Debug level for logging (0-2). + example: 0 + default: 0 + required: + - lang + - direction + description: Starts live transcription of the call. + title: Start + LiveTranscribeStopAction: + type: string + enum: + - stop + description: Stops the live transcription session. + title: Stop + LiveTranscribeSummarizeAction: + type: object + required: + - summarize + properties: + summarize: + type: object + properties: + webhook: + type: string + description: The webhook URL to receive the summary. + example: https://example.com/webhook + prompt: + type: string + description: The AI prompt that instructs how to summarize the conversation. + example: Provide a bullet-point summary of the main topics discussed. + description: Request an on-demand AI summary of the conversation. + title: Summarize + LiveTranslateInjectAction: + type: object + required: + - inject + properties: + inject: + type: object + properties: + message: + type: string + description: The text message to inject and translate. + example: Please hold while I transfer you to a specialist. + direction: + type: string + enum: + - local-caller + - remote-caller + description: The direction to send the translated message. + example: remote-caller + required: + - message + - direction + description: Inject a message into the conversation to be translated and spoken. + title: Inject + LiveTranslateStartAction: + type: object + required: + - start + properties: + start: + type: object + properties: + from_lang: + type: string + description: The language to translate from (e.g., 'en-US'). + example: en-US + to_lang: + type: string + description: The language to translate to (e.g., 'es-ES'). + example: es-ES + direction: + type: array + items: + type: string + enum: + - local-caller + - remote-caller + description: The direction(s) of the call to translate. + example: + - local-caller + - remote-caller + from_voice: + type: string + description: The TTS voice for the source language. + example: elevenlabs.josh + to_voice: + type: string + description: The TTS voice for the target language. + example: elevenlabs.josh + filter_from: + anyOf: + - $ref: '#/components/schemas/TranslationFilterPreset' + - $ref: '#/components/schemas/CustomTranslationFilter' + description: Translation filter for the source language direction. + example: professional + filter_to: + anyOf: + - $ref: '#/components/schemas/TranslationFilterPreset' + - $ref: '#/components/schemas/CustomTranslationFilter' + description: Translation filter for the target language direction. + example: professional + webhook: + type: string + description: The webhook URL to receive translation events. + example: https://example.com/webhook + live_events: + type: boolean + description: Whether to send real-time translation events. + example: true + ai_summary: + type: boolean + description: Whether to generate AI summaries in both languages when translation ends. + example: true + ai_summary_prompt: + type: string + description: The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled. + example: Summarize this translated conversation. + speech_engine: + type: string + enum: + - deepgram + - google + description: The speech recognition engine to use. + example: deepgram + default: deepgram + speech_timeout: + type: integer + format: int32 + description: Speech timeout in milliseconds. + example: 60000 + default: 60000 + vad_silence_ms: + type: integer + format: int32 + description: 'Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.' + example: 300 + vad_thresh: + type: integer + format: int32 + description: Voice activity detection threshold (0-1800). + example: 400 + default: 400 + debug_level: + type: integer + format: int32 + description: Debug level for logging (0-2). + example: 0 + default: 0 + required: + - from_lang + - to_lang + - direction + description: Starts live translation of the call. + title: Start + LiveTranslateStopAction: + type: string + enum: + - stop + description: Stops the live translation session. + title: Stop + LiveTranslateSummarizeAction: + type: object + required: + - summarize + properties: + summarize: + type: object + properties: + webhook: + type: string + description: The webhook URL to receive the summary. + example: https://example.com/webhook + prompt: + type: string + description: The AI prompt that instructs how to summarize the conversation. + example: Summarize the key agreements reached in both languages. + description: Request an on-demand AI summary of the translated conversation. + title: Summarize + TranslationFilterPreset: + type: string + enum: + - polite + - rude + - professional + - shakespeare + - gen-z + description: |- + Preset translation filter values that adjust the tone or style of translated speech. + + - `polite` - Translates to a polite version, removing anything insulting while maintaining sentiment + - `rude` - Translates to a rude and insulting version while maintaining sentiment + - `professional` - Translates to sound professional, removing slang or lingo + - `shakespeare` - Translates to sound like Shakespeare, speaking in iambic pentameter + - `gen-z` - Translates to use Gen-Z slang and expressions + title: Filter Presets Types.StatusCodes.StatusCode401: type: object required: diff --git a/specs/swml/Methods/ai/ai_params.tsp b/specs/swml/Methods/ai/ai_params.tsp index 6c56bd816..677c46608 100644 --- a/specs/swml/Methods/ai/ai_params.tsp +++ b/specs/swml/Methods/ai/ai_params.tsp @@ -1,9 +1,8 @@ import "@typespec/json-schema"; +import "../../Shared/Types/main.tsp"; using TypeSpec.JsonSchema; -alias SupportedAIModels = "gpt-4o-mini" | "gpt-4.1-mini" | "gpt-4.1-nano" | string; - @minValue(10000) @maxValue(600000) scalar AttentionTimeout extends integer; diff --git a/specs/swml/Methods/live_transcribe/main.tsp b/specs/swml/Methods/live_transcribe/main.tsp index b1af7996c..77290fb41 100644 --- a/specs/swml/Methods/live_transcribe/main.tsp +++ b/specs/swml/Methods/live_transcribe/main.tsp @@ -11,15 +11,6 @@ enum TranscribeDirection { `local-caller`, } -@summary("SpeechEngine enum") -enum SpeechEngine { - @doc("Use Deepgram for speech recognition.") - deepgram, - - @doc("Use Google for speech recognition.") - google, -} - @summary("TranscribeStartAction object") model TranscribeStartAction { @doc("Starts live transcription of the call. The transcription will be sent to the specified URL.") @@ -44,27 +35,28 @@ model TranscribeStartAction { @example(30000) speech_timeout?: integer | SWMLVar = 60000; - @doc("Voice activity detection silence time in milliseconds.") + @doc("Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.") @example(500) vad_silence_ms?: integer | SWMLVar = 300; - @doc("Voice activity detection threshold.") + @doc("Voice activity detection threshold (0-1800).") @example(400) vad_thresh?: integer | SWMLVar = 400; - @doc("Debug level for logging.") + @doc("Debug level for logging (0-2).") @example(0) debug_level?: integer | SWMLVar = 0; @doc("The direction of the call that should be transcribed.") direction: TranscribeDirection[]; - @doc("The speech engine to use for transcription.") + @doc("The speech engine to use for speech recognition.") + @example(SpeechEngine.google) speech_engine?: SpeechEngine = SpeechEngine.deepgram; - @doc("The prompt for summarization.") + @doc("The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled.") @example("Summarize the key points of this conversation.") - summary_prompt?: string; + ai_summary_prompt?: string; }; } diff --git a/specs/swml/Methods/live_translate/main.tsp b/specs/swml/Methods/live_translate/main.tsp index fa75f6368..1e08ad8c0 100644 --- a/specs/swml/Methods/live_translate/main.tsp +++ b/specs/swml/Methods/live_translate/main.tsp @@ -11,15 +11,31 @@ enum TranslateDirection { `local-caller`, } -@summary("SpeechEngine enum") -enum SpeechEngine { - @doc("Use Deepgram for speech recognition.") - deepgram, - - @doc("Use Google for speech recognition.") - google, +@summary("Filter Presets") +@doc(""" +Preset translation filter values that adjust the tone or style of translated speech. + +- `polite` - Translates to a polite version, removing anything insulting while maintaining sentiment +- `rude` - Translates to a rude and insulting version while maintaining sentiment +- `professional` - Translates to sound professional, removing slang or lingo +- `shakespeare` - Translates to sound like Shakespeare, speaking in iambic pentameter +- `gen-z` - Translates to use Gen-Z slang and expressions +""") +enum TranslationFilterPreset { + polite, + rude, + professional, + shakespeare, + `gen-z`, } +@summary("Custom Filter") +@doc("Custom translation filter with a prompt prefix. Use `prompt:` followed by your custom instructions (e.g., `prompt:Use formal business language`).") +@pattern("^prompt:.+$") +scalar CustomTranslationFilter extends string; + +alias TranslationFilter = TranslationFilterPreset | CustomTranslationFilter; + @summary("StartAction object") model StartAction { @doc("Starts live translation of the call. The translation will be sent to the specified URL.") @@ -44,6 +60,12 @@ model StartAction { @example("Polly.Lucia") to_voice?: string; + @doc("Translation filter for the source language direction.") + filter_from?: TranslationFilter; + + @doc("Translation filter for the target language direction.") + filter_to?: TranslationFilter; + @doc("Whether to enable live events.") @example(true) live_events?: boolean | SWMLVar; @@ -56,27 +78,29 @@ model StartAction { @example(30000) speech_timeout?: integer | SWMLVar = 60000; - @doc("Voice activity detection silence time in milliseconds.") + @doc("Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google.") @example(500) vad_silence_ms?: integer | SWMLVar = 300; - @doc("Voice activity detection threshold.") + @doc("Voice activity detection threshold (0-1800).") @example(400) vad_thresh?: integer | SWMLVar = 400; - @doc("Debug level for logging.") + @doc("Debug level for logging (0-2).") @example(0) debug_level?: integer | SWMLVar = 0; @doc("The direction of the call that should be translated.") direction: TranslateDirection[]; - @doc("The speech engine to use for transcription.") + @doc("The speech engine to use for speech recognition.") + @example(SpeechEngine.google) speech_engine?: SpeechEngine = SpeechEngine.deepgram; - @doc("The prompt for summarization.") + @doc("The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled.") @example("Summarize the key points of this bilingual conversation.") - summary_prompt?: string; + ai_summary_prompt?: string; + }; } @@ -97,7 +121,7 @@ model SummarizeAction { @example("https://example.com/summary-webhook") webhook?: string; - @doc("The prompt for summarization.") + @doc("The AI prompt that instructs how to summarize the conversation.") @example("Provide a brief summary of the translated conversation.") prompt?: string; }; @@ -105,14 +129,14 @@ model SummarizeAction { @summary("InjectAction object") model InjectAction { - @doc("Injects a message into the conversation.") + @doc("Injects a message into the conversation to be translated and spoken to the specified party.") inject: { @doc("The message to be injected") @example("Please hold while I transfer you to a specialist.") message: string; - @doc("The direction of the message that should be injected.") - direction: TranslateDirection[]; + @doc("The direction of the message.") + direction: TranslateDirection; }; } diff --git a/specs/swml/Shared/Types/main.tsp b/specs/swml/Shared/Types/main.tsp index 1412108b6..17a6ee33e 100644 --- a/specs/swml/Shared/Types/main.tsp +++ b/specs/swml/Shared/Types/main.tsp @@ -3,3 +3,14 @@ import "../../../_shared/types/main.tsp"; @doc("A SWML variable reference using \${varname} or %{varname} syntax for dynamic value substitution at runtime.") @pattern("^[\\$%]\\{.*\\}$") scalar SWMLVar extends string; + +alias SupportedAIModels = "gpt-4o-mini" | "gpt-4.1-mini" | "gpt-4.1-nano" | string; + +@doc("Speech recognition engine options.") +enum SpeechEngine { + @doc("Use Deepgram for speech recognition.") + deepgram, + + @doc("Use Google for speech recognition.") + google, +} diff --git a/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json b/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json index 00df89b53..85add9fa0 100644 --- a/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json +++ b/specs/swml/tsp-output/@typespec/json-schema/SWMLObject.json @@ -178,34 +178,34 @@ 3600 ], "minimum": 7, - "description": "Maximum time in seconds to wait for an answer. Cannot be less than `7` seconds. Defaults to 14400 seconds." + "description": "Maximum duration in seconds for the call. Cannot be less than 7 seconds. Defaults to 14400 seconds (4 hours)." }, "codecs": { "type": "string", "examples": [ "PCMU,PCMA,OPUS" ], - "description": "Comma-seperated string of codecs to offer.\n Valid codecs are: [`PCMU,PCMA,G722,G729,AMR-WB,OPUS,VP8,H264`]" + "description": "Comma-separated string of codecs to offer. Valid codecs are: PCMU, PCMA, G722, G729, AMR-WB, OPUS, VP8, H264." }, "sip_auth_username": { "type": "string", "examples": [ "user123" ], - "description": "SIP authentication username" + "description": "Username to use for SIP authentication." }, "sip_auth_password": { "type": "string", "examples": [ "securepassword" ], - "description": "SIP authentication password" + "description": "Password to use for SIP authentication." } }, "unevaluatedProperties": { "not": {} }, - "description": "Answers an incoming call.", + "description": "Answer incoming call and set an optional maximum duration.", "title": "answer" } }, @@ -222,7 +222,7 @@ "properties": { "ai": { "$ref": "#/$defs/AIObject", - "description": "Creates a new AI agent.", + "description": "Creates an AI agent that conducts voice conversations using automatic speech recognition (ASR),\nlarge language models (LLMs), and text-to-speech (TTS) synthesis.\nThe agent processes caller speech in real-time, generates contextually appropriate responses,\nand can execute custom functions to interact with external systems through SignalWire AI Gateway (SWAIG).", "title": "ai" } }, @@ -326,7 +326,7 @@ "properties": { "enter_queue": { "$ref": "#/$defs/EnterQueueObject", - "description": "Place the call in a queue.", + "description": "Place the current call in a named queue where it will wait to be connected to an available agent or resource.\nWhile waiting, callers will hear music or custom audio.\nWhen an agent connects to the queue (using the connect method), the caller and agent are bridged together.\nAfter the bridge completes, execution continues with the SWML script specified in transfer_after_bridge.", "title": "enter_queue" } }, @@ -396,7 +396,7 @@ "title": "cond" } ], - "description": "`switch` on `return_value` if result is an object (`{}`), or use as a cond if result is an array (`[]`)" + "description": "Action to take based on the result of the call. This will run once the peer leg of the call has ended.\nWill use the switch method when the return_value is an object, and will use the cond method when the return_value is an array." } }, "required": [ @@ -405,7 +405,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Execute a section or URL as a subroutine and return back to current document." + "description": "Execute a specified section or URL as a subroutine, and upon completion, return to the current document.\nUse the return statement to pass any return values or objects back to the current document." } }, "required": [ @@ -427,14 +427,14 @@ "examples": [ "greeting" ], - "description": "The name of the label in the current section to jump to." + "description": "Mark any point of the SWML section with a label so that goto can jump to it." }, "when": { "type": "string", "examples": [ "vars.retry_count < 3" ], - "description": "The condition that triggers the `goto` jump." + "description": "JavaScript expression to evaluate. When the condition evaluates to true, the goto jump is triggered." }, "max": { "anyOf": [ @@ -471,7 +471,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Jump to a specified label in the current SWML document." + "description": "Jump to a label within the current section, optionally based on a condition.\nThe goto method will only navigate to a label within the same section." } }, "required": [ @@ -490,7 +490,7 @@ "examples": [ "greeting" ], - "description": "The name of the label in the current section to jump to." + "description": "Mark any point of the SWML section with a label so that goto can jump to it." } }, "required": [ @@ -518,7 +518,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Starts live transcription of the call. The transcription will be sent to the specified URL.", + "description": "Start live transcription of the call. The transcription will be sent to the specified webhook URL.", "title": "live_transcribe" } }, @@ -547,7 +547,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Starts a live translation session of the call. The translation will be sent to the specified URL.", + "description": "Start live translation of the call. The translation will be sent to the specified webhook URL.", "title": "live_translate" } }, @@ -589,7 +589,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Hangup the call.", + "description": "End the call with an optional reason.", "title": "hangup" } }, @@ -612,7 +612,7 @@ "examples": [ "my-video-room" ], - "description": "The name of the room to join." + "description": "Name of the room to join. Allowed characters: A-Z, a-z, 0-9, underscore, and hyphen." } }, "required": [ @@ -621,7 +621,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Joins a RELAY room.", + "description": "Join a RELAY room. If the room doesn't exist, it creates a new room.", "title": "join_room" } }, @@ -638,7 +638,7 @@ "properties": { "join_conference": { "$ref": "#/$defs/JoinConferenceObject", - "description": "Join an ad-hoc audio conference with RELAY and CXML calls.", + "description": "Join an ad-hoc audio conference started on either the SignalWire or Compatibility API.\nThis method allows you to connect the current call to a named conference where multiple participants can communicate simultaneously.", "title": "join_conference" } }, @@ -842,7 +842,7 @@ "billing" ] ], - "description": "Words or phrases to help the speech recognition." + "description": "Expected words or phrases to help the speech recognition." }, "speech_engine": { "type": "array", @@ -871,7 +871,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Play a prompt and wait for digit or speech input.\nSpeech detection is not enabled unless at least one speech parameter is set.\nIf only speech parameters are set (and no digit parameters), digit detection is not enabled.\nTo enable both digit and speech detection, set at least one parameter for each.", + "description": "Play a prompt and wait for input. The input can be received either as digits from the keypad,\nor from speech, or both depending on what parameters are set.\nBy default, only digit input is enabled. To enable speech input, set at least one speech parameter.\nTo enable both digit and speech input, set at least one parameter for each.", "title": "prompt" } }, @@ -998,7 +998,7 @@ "examples": [ 44 ], - "description": "Input sensitivity for the recording.\nDefault is `44.0`." + "description": "How sensitive the recording voice activity detector is to background noise.\nA larger value is more sensitive. Allowed values from 0.0 to 100.0.\nDefault is 44.0." }, "initial_timeout": { "anyOf": [ @@ -1053,7 +1053,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Record the call audio in the foreground. Use this, for example, to record voicemails.", + "description": "Record the call audio in the foreground, pausing further SWML execution until recording ends.\nUse this, for example, to record voicemails.\nTo record calls in the background in a non-blocking fashion, use the record_call method.", "title": "record" } }, @@ -1161,7 +1161,7 @@ "examples": [ 44 ], - "description": "Input sensitivity for the recording.\nDefault is `44.0`." + "description": "How sensitive the recording voice activity detector is to background noise.\nA larger value is more sensitive. Allowed values from 0.0 to 100.0.\nDefault is 44.0." }, "initial_timeout": { "anyOf": [ @@ -1217,7 +1217,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Record call in the background.\nUnlike the `record` method, the `record_call` method will start the recording and continue executing the SWML script without allowing the recording to happen in the background.\nTo stop call recordings started with `record_call`, use the `stop_call_record` method.", + "description": "Record call in the background.\nUnlike the record method, the record_call method will start the recording and continue executing\nthe SWML script while allowing the recording to happen in the background.\nTo stop call recordings started with record_call, use the stop_record_call method.", "title": "record_call" } }, @@ -1240,7 +1240,7 @@ "examples": [ "https://api.example.com/webhook" ], - "description": "The URL to which the request is to be sent." + "description": "URL to send the HTTPS request to. Authentication can also be set in the URL in the format of username:password@url." }, "method": { "anyOf": [ @@ -1276,7 +1276,7 @@ "Authorization": "Bearer token123" } ], - "description": "An object representing the headers to be included in the request." + "description": "Object containing HTTP headers to set. Valid header values are Accept, Authorization, Content-Type, Range, and custom X- headers." }, "body": { "anyOf": [ @@ -1295,7 +1295,7 @@ "message": "Call completed" } ], - "description": "The body of the request. Can be a `string` or an `object`." + "description": "Request body. Content-Type header should be explicitly set, but if not set, the most likely type\nwill be set based on the first non-whitespace character." }, "timeout": { "anyOf": [ @@ -1383,7 +1383,7 @@ "result": "completed" } ], - "description": "Return from `execute` or exit script. Takes user-defined key to be used to store the value.", + "description": "Return a value from an execute call or exit the script. The value can be any type.", "title": "return" } }, @@ -1406,7 +1406,7 @@ "examples": [ "1234#" ], - "description": "The DTMF digits to send." + "description": "The digits to send. Valid values are 0123456789*#ABCDWw. Character W is a 1 second delay, and w is a 500ms delay." } }, "required": [ @@ -1415,7 +1415,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Send DTMF digits.", + "description": "Send digit presses as DTMF tones.", "title": "send_digits" } }, @@ -1494,7 +1494,7 @@ "$ref": "#/$defs/SMSWithMedia" } ], - "description": "Send a text message to a PSTN phone number.", + "description": "Send an outbound SMS or MMS message to a PSTN phone number.", "title": "send_sms" } }, @@ -1520,7 +1520,7 @@ "is_valid": true } ], - "description": "Set script variables to the specified values.\nNo specific parameters.\nAccepts an object mapping variable names to values.", + "description": "Set script variables to the specified values.\nAccepts an object mapping variable names to values.\nVariables set using set can be removed using unset.", "title": "set" } }, @@ -1548,7 +1548,7 @@ 5000 ], "minimum": -1, - "description": "The amount of time to sleep in milliseconds. Must be a `positive integer`. Can also be set to a `-1` integer for the sleep to never end.", + "description": "The amount of time to sleep in milliseconds before continuing to the next action.\nMust be a positive integer. Can also be set to `-1` for the sleep to never end.", "title": "sleep" } }, @@ -1586,14 +1586,14 @@ "examples": [ "sipuser" ], - "description": "SIP authentication username" + "description": "Username to use for SIP authentication." }, "sip_auth_password": { "type": "string", "examples": [ "sippassword" ], - "description": "SIP authentication password" + "description": "Password to use for SIP authentication." } }, "required": [ @@ -1626,7 +1626,7 @@ "examples": [ {} ], - "description": "Stops the current denoise session.", + "description": "Stop noise reduction that was started with denoise.", "title": "stop_denoise" } }, @@ -1707,7 +1707,7 @@ "examples": [ "prompt_result" ], - "description": "Name of the variable whose value needs to be compared" + "description": "Name of the variable whose value needs to be compared." }, "case": { "type": "object", @@ -1718,14 +1718,14 @@ "$ref": "#/$defs/SWMLMethod" } }, - "description": "Object of values mapped to array of instructions to execute" + "description": "Object of key-mapped values to array of SWML methods to execute." }, "default": { "type": "array", "items": { "$ref": "#/$defs/SWMLMethod" }, - "description": "Array of instructions to execute if no cases match" + "description": "Array of SWML methods to execute if no cases match." } }, "required": [ @@ -1735,7 +1735,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Execute a sequence of instructions depending on which value matches a variable.", + "description": "Execute different instructions based on a variable's value.", "title": "switch" } }, @@ -1758,7 +1758,7 @@ "examples": [ "wss://example.com/tap-stream" ], - "description": "Destination of the tap media stream." + "description": "Destination of the tap media stream: rtp://IP:port, ws://example.com, or wss://example.com." }, "control_id": { "type": "string", @@ -1855,7 +1855,7 @@ "examples": [ "https://example.com/transfer-handler" ], - "description": "Specifies where to transfer the call. The value can be one of:\n- `` - section in the SWML document to jump to\n- A URL (http or https) that returns a SWML document - Sends HTTP POST\n- An inline SWML document (as a JSON string)" + "description": "Specifies where to transfer to. The value can be one of:\n- - section in the SWML document to jump to\n- A URL (http or https) - URL to fetch next document from. Sends HTTP POST.\n Authentication can also be set in the URL in the format of username:password@url.\n- An inline SWML document (as a JSON string)" }, "params": { "type": "object", @@ -1888,7 +1888,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Transfer the execution of the script to a new URL or SWML Section.", + "description": "Transfer the execution of the script to a different SWML section, URL, or Relay application.\nOnce the transfer is complete, the script will continue executing SWML from the new location.", "title": "transfer" } }, @@ -1933,7 +1933,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Un set a variable that as been `set`.", + "description": "Unset specified variables. The variables may have been set using the set method\nor as a byproduct of other statements or methods.", "title": "unset" } }, @@ -1993,7 +1993,7 @@ "examples": [ "en-US" ], - "description": "The language of the payment prompt. Default is `en-US`.\n\nSupported languages can be found here: https://developer.signalwire.com/swml/methods/pay/language" + "description": "Language to use for prompts being played to the caller by the `pay` method. Default is `en-US`." }, "max_attempts": { "anyOf": [ @@ -2139,7 +2139,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "The `pay` object. Enables secure payment processing during a call.\n When implemented, it handles the complete payment flow including data collection, validation, and processing through your configured\n payment gateway." + "description": "Enables secure payment processing during voice calls. When implemented, it manages the entire payment flow\nincluding data collection, validation, and processing through your configured payment gateway." } }, "required": [ @@ -2234,7 +2234,7 @@ 1.25 ], "minimum": 0, - "description": "How much voice in ms to decide MACHINE. Default 1.25." + "description": "The number of seconds of ongoing voice activity required to classify as MACHINE. Default 1.25." }, "machine_words_threshold": { "anyOf": [ @@ -2249,7 +2249,7 @@ 6 ], "minimum": 0, - "description": "How many words to count to decide MACHINE. Default 6." + "description": "The minimum number of words that must be detected in a single utterance before classifying the call as MACHINE. Default 6." }, "status_url": { "type": "string", @@ -2301,13 +2301,13 @@ "examples": [ true ], - "description": "If `false`, the detector will run asynchronously and `status_url` must be set. Default `true`." + "description": "If false, the detector will run asynchronously and status_url must be set.\nIf true, the detector will wait for detection to complete before moving to the next SWML instruction.\nDefault is true." } }, "unevaluatedProperties": { "not": {} }, - "description": "A detection method that is a combination of AMD (Answer Machine Detection) and Fax detection.\nDetect if the user on the other end of the call is a machine (fax, voicemail, etc.) or a `human`.\nThe detection result(s) will be sent to the declared `status_url` as a POST request." + "description": "A detection method that combines AMD (Answering Machine Detection) and fax detection.\nDetect whether the user on the other end of the call is a machine (fax, voicemail, etc.) or a human.\nThe detection result(s) will be sent to the specified status_url as a POST request\nand will also be saved in the detect_result variable." } }, "required": [ @@ -2372,7 +2372,7 @@ "support_hours": "9am-5pm EST" } ], - "description": "A powerful and flexible environmental variable which can accept arbitrary data that is set initially in the SWML script\nor from the SWML `set_global_data` action. This data can be referenced `globally`.\nAll contained information can be accessed and expanded within the prompt - for example, by using a template string." + "description": "A key-value object for storing data that persists throughout the AI session.\nCan be set initially in the SWML script or modified during the conversation using the set_global_data action.\nThe global_data object is accessible everywhere in the AI session: prompts, AI parameters,\nand SWML returned from SWAIG functions. Access properties using template strings (e.g. ${global_data.property_name})." }, "hints": { "type": "array", @@ -2426,7 +2426,7 @@ }, "prompt": { "$ref": "#/$defs/AIPrompt", - "description": "Establishes the initial set of instructions and settings to configure the agent." + "description": "Defines the AI agent's personality, goals, behaviors, and instructions for handling conversations.\nThe prompt establishes how the agent should interact with callers, what information it should gather,\nand how it should respond to various scenarios. It is recommended to write prompts using markdown formatting." } }, "required": [ @@ -2574,7 +2574,7 @@ "title": "cond" } ], - "description": "`switch` on `return_value` when object `{}` or `cond` when array `[]`" + "description": "Action to take based on the result of the call. This will run once the peer leg of the call has ended.\nWill use the switch method when the return_value is an object, and will use the cond method when the return_value is an array." }, "timeout": { "anyOf": [ @@ -2654,14 +2654,14 @@ "examples": [ "sipuser" ], - "description": "Username for authentication." + "description": "SIP username to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "password": { "type": "string", "examples": [ "sippassword" ], - "description": "Password for authentication." + "description": "SIP password to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "encryption": { "anyOf": [ @@ -2810,7 +2810,7 @@ "title": "cond" } ], - "description": "`switch` on `return_value` when object `{}` or `cond` when array `[]`" + "description": "Action to take based on the result of the call. This will run once the peer leg of the call has ended.\nWill use the switch method when the return_value is an object, and will use the cond method when the return_value is an array." }, "timeout": { "anyOf": [ @@ -2890,14 +2890,14 @@ "examples": [ "sipuser" ], - "description": "Username for authentication." + "description": "SIP username to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "password": { "type": "string", "examples": [ "sippassword" ], - "description": "Password for authentication." + "description": "SIP password to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "encryption": { "anyOf": [ @@ -3045,7 +3045,7 @@ "title": "cond" } ], - "description": "`switch` on `return_value` when object `{}` or `cond` when array `[]`" + "description": "Action to take based on the result of the call. This will run once the peer leg of the call has ended.\nWill use the switch method when the return_value is an object, and will use the cond method when the return_value is an array." }, "timeout": { "anyOf": [ @@ -3125,14 +3125,14 @@ "examples": [ "sipuser" ], - "description": "Username for authentication." + "description": "SIP username to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "password": { "type": "string", "examples": [ "sippassword" ], - "description": "Password for authentication." + "description": "SIP password to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "encryption": { "anyOf": [ @@ -3281,7 +3281,7 @@ "title": "cond" } ], - "description": "`switch` on `return_value` when object `{}` or `cond` when array `[]`" + "description": "Action to take based on the result of the call. This will run once the peer leg of the call has ended.\nWill use the switch method when the return_value is an object, and will use the cond method when the return_value is an array." }, "timeout": { "anyOf": [ @@ -3361,14 +3361,14 @@ "examples": [ "sipuser" ], - "description": "Username for authentication." + "description": "SIP username to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "password": { "type": "string", "examples": [ "sippassword" ], - "description": "Password for authentication." + "description": "SIP password to use for authentication when dialing a SIP URI. Has no effect on calls to phone numbers." }, "encryption": { "anyOf": [ @@ -3449,7 +3449,7 @@ "examples": [ "support-queue" ], - "description": "Name of the queue to enter." + "description": "Name of the queue to enter. If a queue with this name does not exist, it will be automatically created." }, "transfer_after_bridge": { "anyOf": [ @@ -3860,7 +3860,7 @@ "examples": [ "prompt_result" ], - "description": "Name of the variable whose value needs to be compared" + "description": "Name of the variable whose value needs to be compared." }, "case": { "type": "object", @@ -3871,14 +3871,14 @@ "$ref": "#/$defs/SWMLMethod" } }, - "description": "Object of values mapped to array of instructions to execute" + "description": "Object of key-mapped values to array of SWML methods to execute." }, "default": { "type": "array", "items": { "$ref": "#/$defs/SWMLMethod" }, - "description": "Array of instructions to execute if no cases match" + "description": "Array of SWML methods to execute if no cases match." } }, "required": [ @@ -3888,7 +3888,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Execute a sequence of instructions depending on which value matches a variable.", + "description": "Execute different instructions based on a variable's value.", "title": "switch" }, { @@ -4104,21 +4104,21 @@ "examples": [ "+15559876543" ], - "description": "The phone number to send the SMS to." + "description": "Phone number to send SMS message to in E.164 format." }, "from_number": { "type": "string", "examples": [ "+15551234567" ], - "description": "The phone number to send the SMS from.\nDefault is the calling party's caller ID number." + "description": "Phone number the SMS message will be sent from.\nDefault is the calling party's caller ID number." }, "region": { "type": "string", "examples": [ "us" ], - "description": "The region to use for the SMS." + "description": "Region of the world to originate the message from. Chosen based on account preferences or device location if not specified." }, "tags": { "type": "array", @@ -4159,21 +4159,21 @@ "examples": [ "+15559876543" ], - "description": "The phone number to send the SMS to." + "description": "Phone number to send SMS message to in E.164 format." }, "from_number": { "type": "string", "examples": [ "+15551234567" ], - "description": "The phone number to send the SMS from.\nDefault is the calling party's caller ID number." + "description": "Phone number the SMS message will be sent from.\nDefault is the calling party's caller ID number." }, "region": { "type": "string", "examples": [ "us" ], - "description": "The region to use for the SMS." + "description": "Region of the world to originate the message from. Chosen based on account preferences or device location if not specified." }, "tags": { "type": "array", @@ -5023,7 +5023,7 @@ "examples": [ "Analyze the conversation and provide insights to help the agent respond better." ], - "description": "The system prompt that guides the inner dialog AI's behavior. Only used when `enable_inner_dialog` is `true`." + "description": "The system prompt that guides the inner dialog AI's behavior. This prompt shapes how the background AI\nanalyzes the conversation and what kind of insights it provides to the main agent.\nOnly used when `enable_inner_dialog` is `true`." }, "inner_dialog_synced": { "anyOf": [ @@ -5038,7 +5038,7 @@ "examples": [ true ], - "description": "When enabled, synchronizes the inner dialog with the main conversation flow, waiting for user input before injection. Only used when `enable_inner_dialog` is `true`." + "description": "When enabled, synchronizes the inner dialog with the main conversation flow.\nThis ensures the inner dialog AI waits for the main conversation turn to complete\nbefore providing its analysis, rather than running fully asynchronously.\nOnly used when `enable_inner_dialog` is `true`." }, "initial_sleep_ms": { "anyOf": [ @@ -5155,17 +5155,17 @@ "anyOf": [ { "type": "string", - "const": "nova-2" + "const": "deepgram:nova-2" }, { "type": "string", - "const": "nova-3" + "const": "deepgram:nova-3" } ], "examples": [ - "nova-3" + "deepgram:nova-3" ], - "description": "The OpenAI ASR (Automatic Speech Recognition) engine to use. Allowed values are `nova-2` and `nova-3`. Defaults to `nova-3`." + "description": "The ASR (Automatic Speech Recognition) engine to use. Common values include `deepgram:nova-2`, `deepgram:nova-3`, and other supported ASR engines." }, "outbound_attention_timeout": { "anyOf": [ @@ -5290,7 +5290,7 @@ "examples": [ true ], - "description": "When enabled, the AI agent remains silent until directly addressed by name (using `ai_name`).\nThe user must say the agent's name to activate it for each interaction.\nCannot be used together with `enable_pause`." + "description": "When enabled, the AI agent remains silent until directly addressed by name (using `ai_name`).\nThis creates a \"push-to-talk\" style interaction where the agent only responds when explicitly\ncalled upon, useful for scenarios where the agent should listen but not interrupt.\nCannot be used together with `enable_pause`." }, "start_paused": { "anyOf": [ @@ -5305,7 +5305,7 @@ "examples": [ true ], - "description": "When enabled, the AI agent starts in a paused state and will not respond until the user\nspeaks the agent's name (set via `ai_name`). Automatically enables `enable_pause`." + "description": "When enabled, the AI agent starts in a paused state and will not respond until the user\nspeaks the agent's name (set via `ai_name`). Automatically enables `enable_pause`.\nThis is useful for scenarios where you want the agent to wait for explicit activation." }, "static_greeting": { "type": "string", @@ -5814,7 +5814,7 @@ "items": { "$ref": "#/$defs/SWAIGIncludes" }, - "description": "An array of objects to include remote function signatures.\nThe object fields are url to specify where the remote functions are defined and functions which is an array of the function names as strings." + "description": "An array of objects to include remote function signatures.\nThis allows you to include functions that are defined in a remote location.\nThe object fields are `url` to specify where the remote functions are defined and `functions` which is an array of the function names as strings." } }, "unevaluatedProperties": { @@ -5829,21 +5829,21 @@ "examples": [ "vars.digit == '1'" ], - "description": "The condition to be evaluated and act on." + "description": "The JavaScript condition to act on." }, "then": { "type": "array", "items": { "$ref": "#/$defs/SWMLMethod" }, - "description": "The list of SWML instructions to be executed if the condition is `true`" + "description": "Sequence of SWML methods to execute when the condition evaluates to true." }, "else": { "type": "array", "items": { "$ref": "#/$defs/SWMLMethod" }, - "description": "The list of SWML instructions to be executed if the condition is `false`." + "description": "Sequence of SWML methods to execute when none of the other conditions evaluate to true." } }, "required": [ @@ -5863,7 +5863,7 @@ "items": { "$ref": "#/$defs/SWMLMethod" }, - "description": "The list of SWML instructions to be executed if the condition is `false`." + "description": "Sequence of SWML methods to execute when none of the other conditions evaluate to true." } }, "required": [ @@ -6053,10 +6053,11 @@ "$ref": "#/$defs/SWMLVar" } ], + "default": 60000, "examples": [ 30 ], - "description": "The timeout for speech recognition." + "description": "Speech timeout in milliseconds." }, "vad_silence_ms": { "anyOf": [ @@ -6070,7 +6071,7 @@ "examples": [ 500 ], - "description": "Voice activity detection silence time in milliseconds." + "description": "Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google." }, "vad_thresh": { "anyOf": [ @@ -6081,10 +6082,11 @@ "$ref": "#/$defs/SWMLVar" } ], + "default": 400, "examples": [ 3 ], - "description": "Voice activity detection threshold." + "description": "Voice activity detection threshold (0-1800)." }, "debug_level": { "anyOf": [ @@ -6095,10 +6097,11 @@ "$ref": "#/$defs/SWMLVar" } ], + "default": 0, "examples": [ 0 ], - "description": "Debug level for logging" + "description": "Debug level for logging (0-2)." }, "direction": { "type": "array", @@ -6108,24 +6111,24 @@ "description": "The direction of the call that should be transcribed." }, "speech_engine": { - "type": "string", + "$ref": "#/$defs/SpeechEngine", + "default": "deepgram", "examples": [ "google" ], - "description": "The speech engine to be used." + "description": "The speech engine to use for speech recognition." }, - "summary_prompt": { + "ai_summary_prompt": { "type": "string", "examples": [ "Summarize the key points of this conversation." ], - "description": "The prompt for summarizaton." + "description": "The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled." } }, "required": [ "lang", - "direction", - "speech_engine" + "direction" ], "unevaluatedProperties": { "not": {} @@ -6195,6 +6198,28 @@ ], "description": "The TTS voice you want to use for the target language." }, + "filter_from": { + "anyOf": [ + { + "$ref": "#/$defs/TranslationFilterPreset" + }, + { + "$ref": "#/$defs/CustomTranslationFilter" + } + ], + "description": "Translation filter for the source language direction." + }, + "filter_to": { + "anyOf": [ + { + "$ref": "#/$defs/TranslationFilterPreset" + }, + { + "$ref": "#/$defs/CustomTranslationFilter" + } + ], + "description": "Translation filter for the target language direction." + }, "live_events": { "anyOf": [ { @@ -6232,10 +6257,11 @@ "$ref": "#/$defs/SWMLVar" } ], + "default": 60000, "examples": [ 30 ], - "description": "The timeout for speech recognition." + "description": "Speech timeout in milliseconds." }, "vad_silence_ms": { "anyOf": [ @@ -6249,7 +6275,7 @@ "examples": [ 500 ], - "description": "Voice activity detection silence time in milliseconds." + "description": "Voice activity detection silence time in milliseconds. Default depends on speech engine: `300` for Deepgram, `500` for Google." }, "vad_thresh": { "anyOf": [ @@ -6260,10 +6286,11 @@ "$ref": "#/$defs/SWMLVar" } ], + "default": 400, "examples": [ 3 ], - "description": "Voice activity detection threshold." + "description": "Voice activity detection threshold (0-1800)." }, "debug_level": { "anyOf": [ @@ -6274,10 +6301,11 @@ "$ref": "#/$defs/SWMLVar" } ], + "default": 0, "examples": [ 0 ], - "description": "Debug level for logging" + "description": "Debug level for logging (0-2)." }, "direction": { "type": "array", @@ -6287,25 +6315,25 @@ "description": "The direction of the call that should be translated." }, "speech_engine": { - "type": "string", + "$ref": "#/$defs/SpeechEngine", + "default": "deepgram", "examples": [ "google" ], - "description": "The speech engine to be used." + "description": "The speech engine to use for speech recognition." }, - "summary_prompt": { + "ai_summary_prompt": { "type": "string", "examples": [ "Summarize the key points of this bilingual conversation." ], - "description": "The prompt for summarizaton." + "description": "The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled." } }, "required": [ "from_lang", "to_lang", - "direction", - "speech_engine" + "direction" ], "unevaluatedProperties": { "not": {} @@ -6348,11 +6376,8 @@ "description": "The message to be injected" }, "direction": { - "type": "array", - "items": { - "$ref": "#/$defs/TranslateDirection" - }, - "description": "The direction of the message that should be injected." + "$ref": "#/$defs/TranslateDirection", + "description": "The direction of the message." } }, "required": [ @@ -6362,7 +6387,7 @@ "unevaluatedProperties": { "not": {} }, - "description": "Injects a message into the conversation." + "description": "Injects a message into the conversation to be translated and spoken to the specified party." } }, "required": [ @@ -6391,21 +6416,21 @@ "examples": [ "French" ], - "description": "Any arbirary name for the language object." + "description": "Name of the language (e.g., 'French', 'English'). This value is used in the system prompt to instruct the LLM what language is being spoken." }, "code": { "type": "string", "examples": [ "fr-FR" ], - "description": "The language code for the ai to hear. For example, 'fr-FR'." + "description": "The language code for ASR (Automatic Speech Recognition) purposes. By default, SignalWire uses Deepgram's\nNova-3 STT engine, so this value should match a code from Deepgram's Nova-3 language codes.\nIf a different STT model was selected using the `openai_asr_engine` parameter, you must select a code supported by that engine." }, "voice": { "type": "string", "examples": [ - "fr-FR-Neural2-B" + "gcloud.fr-FR-Neural2-B" ], - "description": "Voice to use for the language. For example, 'fr-FR-Neural2-B'." + "description": "Voice to use for the language. String format: `.`.\nSelect engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, or `deepgram`.\nFor example, `gcloud.fr-FR-Neural2-B`." }, "_model": { "type": "string", @@ -6471,21 +6496,21 @@ "examples": [ "French" ], - "description": "Any arbirary name for the language object." + "description": "Name of the language (e.g., 'French', 'English'). This value is used in the system prompt to instruct the LLM what language is being spoken." }, "code": { "type": "string", "examples": [ "fr-FR" ], - "description": "The language code for the ai to hear. For example, 'fr-FR'." + "description": "The language code for ASR (Automatic Speech Recognition) purposes. By default, SignalWire uses Deepgram's\nNova-3 STT engine, so this value should match a code from Deepgram's Nova-3 language codes.\nIf a different STT model was selected using the `openai_asr_engine` parameter, you must select a code supported by that engine." }, "voice": { "type": "string", "examples": [ - "fr-FR-Neural2-B" + "gcloud.fr-FR-Neural2-B" ], - "description": "Voice to use for the language. For example, 'fr-FR-Neural2-B'." + "description": "Voice to use for the language. String format: `.`.\nSelect engine from `gcloud`, `polly`, `elevenlabs`, `cartesia`, or `deepgram`.\nFor example, `gcloud.fr-FR-Neural2-B`." }, "_model": { "type": "string", @@ -6542,7 +6567,7 @@ "hmm" ] ], - "description": "An array of strings to be used as fillers in the conversation. This helps the AI break silence between responses." + "description": "An array of strings to be used as fillers in the conversation. This helps the AI break silence between responses.\nNote: `speech_fillers` are used between every 'turn' taken by the LLM, including at the beginning of the call.\nFor more targeted fillers, consider using `function_fillers`." } }, "required": [ @@ -7635,6 +7660,14 @@ ], "title": "TranscribeDirection enum" }, + "SpeechEngine": { + "type": "string", + "enum": [ + "deepgram", + "google" + ], + "description": "Speech recognition engine options." + }, "TranscribeSummarizeAction": { "type": "object", "properties": { @@ -7670,6 +7703,24 @@ }, "title": "TranscribeSummarizeAction object" }, + "TranslationFilterPreset": { + "type": "string", + "enum": [ + "polite", + "rude", + "professional", + "shakespeare", + "gen-z" + ], + "description": "Preset translation filter values that adjust the tone or style of translated speech.\n\n- `polite` - Translates to a polite version, removing anything insulting while maintaining sentiment\n- `rude` - Translates to a rude and insulting version while maintaining sentiment\n- `professional` - Translates to sound professional, removing slang or lingo\n- `shakespeare` - Translates to sound like Shakespeare, speaking in iambic pentameter\n- `gen-z` - Translates to use Gen-Z slang and expressions", + "title": "Filter Presets" + }, + "CustomTranslationFilter": { + "type": "string", + "pattern": "^prompt:.+$", + "description": "Custom translation filter with a prompt prefix. Use `prompt:` followed by your custom instructions (e.g., `prompt:Use formal business language`).", + "title": "Custom Filter" + }, "TranslateDirection": { "type": "string", "enum": [ @@ -7696,7 +7747,7 @@ "examples": [ "Provide a brief summary of the translated conversation." ], - "description": "The prompt for summarizaton." + "description": "The AI prompt that instructs how to summarize the conversation." } }, "unevaluatedProperties": { @@ -7860,7 +7911,7 @@ "examples": [ "get_weather" ], - "description": "The unique name for the function." + "description": "A unique name for the function. This can be any user-defined string or can reference a reserved function. Reserved functions are SignalWire functions that will be executed at certain points in the conversation." } }, "required": [ @@ -7934,7 +7985,7 @@ "function": { "type": "string", "const": "startup_hook", - "description": "The unique name for the function. For the start_hook function, the function name is 'start_hook'." + "description": "A unique name for the function. This can be any user-defined string or can reference a reserved function. Reserved functions are SignalWire functions that will be executed at certain points in the conversation. For the start_hook function, the function name is 'start_hook'." } }, "required": [ @@ -8008,7 +8059,7 @@ "function": { "type": "string", "const": "hangup_hook", - "description": "The unique name for the function. For the stop_hook function, the function name is 'stop_hook'." + "description": "A unique name for the function. This can be any user-defined string or can reference a reserved function. Reserved functions are SignalWire functions that will be executed at certain points in the conversation. For the stop_hook function, the function name is 'stop_hook'." } }, "required": [ @@ -8082,7 +8133,7 @@ "function": { "type": "string", "const": "summarize_conversation", - "description": "The unique name for the function.. For the summarize_conversation function, the function name is 'summarize_conversation'." + "description": "A unique name for the function. This can be any user-defined string or can reference a reserved function. Reserved functions are SignalWire functions that will be executed at certain points in the conversation.. For the summarize_conversation function, the function name is 'summarize_conversation'." } }, "required": [ @@ -8310,7 +8361,7 @@ "items": { "$ref": "#/$defs/Expression" }, - "description": "An array of objects that define patterns and corresponding actions." + "description": "An array of objects that have pattern matching logic to process the user's input data. A user can define multiple expressions to match against the user's input data." }, "webhooks": { "type": "array", @@ -8333,7 +8384,7 @@ "items": { "$ref": "#/$defs/ContextSteps" }, - "description": "An array of objects that define the steps in the context. These steps are used to define the flow of the conversation.", + "description": "An array of step objects that define the conversation flow for this context. Steps execute sequentially unless otherwise specified.", "title": "steps" }, "isolated": { @@ -8341,21 +8392,21 @@ "examples": [ true ], - "description": "A boolean value, if set to `true`, the conversation history will be reset and the agent will only have the context of the original system prompt." + "description": "When `true`, resets conversation history to only the system prompt when entering this context. Useful for focused tasks that shouldn't be influenced by previous conversation." }, "enter_fillers": { "type": "array", "items": { "$ref": "#/$defs/FunctionFillers" }, - "description": "An array of objects that define the enter fillers for the context. Enter fillers are used when entering the context." + "description": "Language-specific filler phrases played when transitioning into this context. Helps provide smooth context switches." }, "exit_fillers": { "type": "array", "items": { "$ref": "#/$defs/FunctionFillers" }, - "description": "An array of objects that define the exit fillers for the context. Exit fillers are used when exiting the context." + "description": "Language-specific filler phrases played when leaving this context. Ensures natural transitions out of specialized modes." }, "pom": { "type": "array", @@ -8382,7 +8433,7 @@ "items": { "$ref": "#/$defs/ContextSteps" }, - "description": "An array of objects that define the steps in the context. These steps are used to define the flow of the conversation.", + "description": "An array of step objects that define the conversation flow for this context. Steps execute sequentially unless otherwise specified.", "title": "steps" }, "isolated": { @@ -8390,21 +8441,21 @@ "examples": [ true ], - "description": "A boolean value, if set to `true`, the conversation history will be reset and the agent will only have the context of the original system prompt." + "description": "When `true`, resets conversation history to only the system prompt when entering this context. Useful for focused tasks that shouldn't be influenced by previous conversation." }, "enter_fillers": { "type": "array", "items": { "$ref": "#/$defs/FunctionFillers" }, - "description": "An array of objects that define the enter fillers for the context. Enter fillers are used when entering the context." + "description": "Language-specific filler phrases played when transitioning into this context. Helps provide smooth context switches." }, "exit_fillers": { "type": "array", "items": { "$ref": "#/$defs/FunctionFillers" }, - "description": "An array of objects that define the exit fillers for the context. Exit fillers are used when exiting the context." + "description": "Language-specific filler phrases played when leaving this context. Ensures natural transitions out of specialized modes." }, "text": { "type": "string", @@ -10593,7 +10644,7 @@ "examples": [ "Customer wants to order Pizza" ], - "description": "The conditions that must be met for the conversation to proceed to the next step.\nIf a condition is not met, the conversation will not proceed to the next step.\nIt's **highly** recommended you create a custom criteria for the step to get the intended behavior." + "description": "The criteria that must be met for the AI to proceed to the next step.\nThe criteria is an instruction given to the AI.\nIt's **highly** recommended you create a custom criteria for the step to get the intended behavior." }, "functions": { "type": "array", @@ -10620,7 +10671,7 @@ "Confirm Order" ] ], - "description": "An array of valid contexts that the conversation can transition to from this step." + "description": "An array of context names that the AI can transition to from this step. This must be a valid `contexts.name` that is present in your `contexts` object." }, "skip_user_turn": { "anyOf": [ @@ -10641,7 +10692,7 @@ "examples": [ true ], - "description": "A boolean value, if set to `true`, will end the contexts conversation and transition to a normal interaction." + "description": "A boolean value that determines if the step is the last in the context. If `true`, the context ends after this step. Cannot be used along with the `valid_steps` parameter." }, "valid_steps": { "type": "array", @@ -10689,7 +10740,7 @@ "examples": [ "Customer wants to order Pizza" ], - "description": "The conditions that must be met for the conversation to proceed to the next step.\nIf a condition is not met, the conversation will not proceed to the next step.\nIt's **highly** recommended you create a custom criteria for the step to get the intended behavior." + "description": "The criteria that must be met for the AI to proceed to the next step.\nThe criteria is an instruction given to the AI.\nIt's **highly** recommended you create a custom criteria for the step to get the intended behavior." }, "functions": { "type": "array", @@ -10716,7 +10767,7 @@ "Confirm Order" ] ], - "description": "An array of valid contexts that the conversation can transition to from this step." + "description": "An array of context names that the AI can transition to from this step. This must be a valid `contexts.name` that is present in your `contexts` object." }, "skip_user_turn": { "anyOf": [ @@ -10737,7 +10788,7 @@ "examples": [ true ], - "description": "A boolean value, if set to `true`, will end the contexts conversation and transition to a normal interaction." + "description": "A boolean value that determines if the step is the last in the context. If `true`, the context ends after this step. Cannot be used along with the `valid_steps` parameter." }, "valid_steps": { "type": "array", diff --git a/website/docs/main/swml/reference/methods/live_transcribe/action/start.mdx b/website/docs/main/swml/reference/methods/live_transcribe/action/start.mdx index 808dcd05b..3d89cae3d 100644 --- a/website/docs/main/swml/reference/methods/live_transcribe/action/start.mdx +++ b/website/docs/main/swml/reference/methods/live_transcribe/action/start.mdx @@ -50,7 +50,8 @@ Start a live translation session. type="boolean" default="false" > - Whether to enable AI summarization. + Whether to enable automatic AI summarization. + When enabled, an AI-generated summary of the conversation will be sent to your webhook when the transcription session ends. - Voice activity detection silence time in milliseconds.
**Possible Values:** [`Minimum value: 1`, `Maximum Value: None`] + Voice activity detection silence time in milliseconds. + Default depends on the speech engine: `300` for Deepgram, `500` for Google.
**Possible Values:** [`Minimum value: 1`, `Maximum Value: None`]
- The speech engine to use for transcription.
**Possible Values:** [`deepgram`, `google`] + The speech recognition engine to use.
**Possible Values:** [`deepgram`, `google`]
- The prompt for summarization. + The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled. + This prompt is sent to an AI model to guide how it generates the summary. + + Example: "Summarize the key points and action items from this conversation." ## **Example** @@ -117,14 +122,14 @@ live_transcribe: webhook: 'https://example.com/webhook' lang: en live_events: true - ai_summary: false - speech_timeout: 30 + ai_summary: true + ai_summary_prompt: Summarize this conversation + speech_timeout: 60000 vad_silence_ms: 500 - vad_thresh: 0.6 - debug_level: 2 + vad_thresh: 400 + debug_level: 0 direction: - remote-caller - local-caller - speech_engine: default - summary_prompt: Summarize this conversation + speech_engine: deepgram ``` diff --git a/website/docs/main/swml/reference/methods/live_transcribe/action/stop.mdx b/website/docs/main/swml/reference/methods/live_transcribe/action/stop.mdx index 19f9f8026..12c98db1d 100644 --- a/website/docs/main/swml/reference/methods/live_transcribe/action/stop.mdx +++ b/website/docs/main/swml/reference/methods/live_transcribe/action/stop.mdx @@ -10,7 +10,11 @@ import APIField from "@site/src/components/APIField"; # action.stop -Stop a live translation session. +Stop a live transcription session. + +This action is designed for use on active calls that have an existing transcription session running. +You can send this action via the [Call Commands REST API](/rest/signalwire-rest/endpoints/calling/call-commands), +or include it in a SWML section executed via [`transfer`](/swml/methods/transfer) or [`execute`](/swml/methods/execute) during a call. - The prompt for summarization. + The AI prompt that instructs the AI model how to summarize the conversation. + This guides the style and content of the generated summary. + + Example: "Provide a bullet-point summary of the main topics discussed." diff --git a/website/docs/main/swml/reference/methods/live_translate/action/inject.mdx b/website/docs/main/swml/reference/methods/live_translate/action/inject.mdx index 1f3cfda29..8fe65efe9 100644 --- a/website/docs/main/swml/reference/methods/live_translate/action/inject.mdx +++ b/website/docs/main/swml/reference/methods/live_translate/action/inject.mdx @@ -10,7 +10,11 @@ import APIField from "@site/src/components/APIField"; # action.inject -Inject a message into the conversation. +Inject a message into the conversation to be translated and spoken to the specified party. + +This action is designed for use on active calls that have an existing translation session running. +You can send this action via the [Call Commands REST API](/rest/signalwire-rest/endpoints/calling/call-commands), +or include it in a SWML section executed via [`transfer`](/swml/methods/transfer) or [`execute`](/swml/methods/execute) during a call. Learn more about our supported Voices & Languages [here](/voice/getting-started/voice-and-languages). + + Translation filter to apply to the source language direction. + Adjusts the tone or style of translated speech. + + **Preset Values:** + - `polite` - Translates to a polite version, removing anything insulting while maintaining sentiment + - `rude` - Translates to a rude and insulting version while maintaining sentiment + - `professional` - Translates to sound professional, removing slang or lingo + - `shakespeare` - Translates to sound like Shakespeare, speaking in iambic pentameter + - `gen-z` - Translates to use Gen-Z slang and expressions + + **Custom:** Use `prompt:` prefix for custom instructions (e.g., `prompt:Use formal business language`). + + + + Translation filter to apply to the target language direction. + Adjusts the tone or style of translated speech. + + **Preset Values:** + - `polite` - Translates to a polite version, removing anything insulting while maintaining sentiment + - `rude` - Translates to a rude and insulting version while maintaining sentiment + - `professional` - Translates to sound professional, removing slang or lingo + - `shakespeare` - Translates to sound like Shakespeare, speaking in iambic pentameter + - `gen-z` - Translates to use Gen-Z slang and expressions + + **Custom:** Use `prompt:` prefix for custom instructions. + + - Whether to enable AI summarization. + Whether to enable automatic AI summarization. + When enabled, AI-generated summaries in both languages will be sent to your webhook when the translation session ends. - The timeout for speech recognition.
**Possible Values:** [`Minimum value: 1500`, `Maximum Value: None`] + The timeout for speech recognition. + + **Possible Values:** [`Minimum value: 1500`, `Maximum Value: None`]
- Voice activity detection silence time in milliseconds.
**Possible Values:** [`Minimum value: 1`, `Maximum Value: None`] + Voice activity detection silence time in milliseconds. + Default depends on the speech engine: `300` for Deepgram, `500` for Google. + + **Possible Values:** [`Minimum value: 1`, `Maximum Value: None`]
- Voice activity detection threshold.
**Possible Values:** [`Minimum value: 0`, `Maximum Value: 1800`] + Voice activity detection threshold. + + **Possible Values:** [`Minimum value: 0`, `Maximum Value: 1800`]
- The direction of the call that should be translated.
**Possible Values:** [`remote-caller`, `local-caller`] + The direction of the call that should be translated. + + **Possible Values:** [`remote-caller`, `local-caller`]
- The speech engine to use for transcription.
**Possible Values:** [`deepgram`, `google`] + The speech recognition engine to use. + + **Possible Values:** [`deepgram`, `google`]
- The prompt for summarization. + The AI prompt that instructs how to summarize the conversation when `ai_summary` is enabled. + This prompt is sent to an AI model to guide how it generates the summary. ## **Example** @@ -139,19 +186,20 @@ live_translate: action: start: webhook: 'https://example.com/webhook' - from_lang: en - to_lang: es - from_voice: en-US - to_voice: es-ES + from_lang: en-US + to_lang: es-ES + from_voice: elevenlabs.josh + to_voice: elevenlabs.josh + filter_from: professional live_events: true - ai_summary: false - speech_timeout: 30 + ai_summary: true + ai_summary_prompt: Summarize this conversation + speech_timeout: 60000 vad_silence_ms: 500 - vad_thresh: 0.6 - debug_level: 2 + vad_thresh: 400 + debug_level: 0 direction: - remote-caller - local-caller - speech_engine: default - summary_prompt: Summarize this conversation + speech_engine: deepgram ``` diff --git a/website/docs/main/swml/reference/methods/live_translate/action/stop.mdx b/website/docs/main/swml/reference/methods/live_translate/action/stop.mdx index 5a59889e3..ecb9a7335 100644 --- a/website/docs/main/swml/reference/methods/live_translate/action/stop.mdx +++ b/website/docs/main/swml/reference/methods/live_translate/action/stop.mdx @@ -12,6 +12,10 @@ import APIField from "@site/src/components/APIField"; Stop a live translation session. +This action is designed for use on active calls that have an existing translation session running. +You can send this action via the [Call Commands REST API](/rest/signalwire-rest/endpoints/calling/call-commands), +or include it in a SWML section executed via [`transfer`](/swml/methods/transfer) or [`execute`](/swml/methods/execute) during a call. + - The prompt for summarization. + The AI prompt that instructs the AI model how to summarize the conversation. + This guides the style and content of the generated summary. diff --git a/website/docs/main/swml/reference/methods/live_translate/index.mdx b/website/docs/main/swml/reference/methods/live_translate/index.mdx index 1b7861470..3c30737f3 100644 --- a/website/docs/main/swml/reference/methods/live_translate/index.mdx +++ b/website/docs/main/swml/reference/methods/live_translate/index.mdx @@ -34,6 +34,25 @@ Start live translation of the call. The translation will be sent to the specifie The action to be performed.
An object that contains **one** of the [`live_translate actions`](./action/index.mdx)
**Possible Values:** [[`stop`](/swml/methods/live_translate/action/stop), [`start`](/swml/methods/live_translate/action/start), [`summarize`](/swml/methods/live_translate/action/summarize), [`inject`](/swml/methods/live_translate/action/inject)] +## Action Usage Context + +| Action | Call Start | Live Call | +|--------|-----------|-----------| +| `start` | ✅ Primary use | ✅ Can start mid-call | +| `stop` | ❌ No session to stop | ✅ Designed for this | +| `summarize` | ❌ No content to summarize | ✅ Designed for this | +| `inject` | ❌ No session exists | ✅ Designed for this | + +**Call Start:** The initial SWML document returned when a call first arrives. + +**Live Call:** Actions sent to active calls via: +- The [Call Commands REST API](/rest/signalwire-rest/endpoints/calling/call-commands) +- SWML sections executed via [`transfer`](/swml/methods/transfer) or [`execute`](/swml/methods/execute) during a call + +:::info ai_summary vs summarize action +- **`ai_summary: true`** (in `start`): Automatically generates summary when session **ends** +- **`summarize` action**: On-demand summary **during** an active session +::: ### **Example**