Skip to content

Commit c5f3a68

Browse files
authored
[Firebase AI] Add Live API transcripts (#1351)
* [Firebase AI] Add Live API transcripts * Clean up config setup
1 parent b49b8f3 commit c5f3a68

File tree

4 files changed

+88
-7
lines changed

4 files changed

+88
-7
lines changed

docs/readme.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ Support
109109

110110
Release Notes
111111
-------------
112+
### Upcoming
113+
- Changes
114+
- Firebase AI: Add support for receiving Live API Transcripts.
115+
112116
### 13.4.0
113117
- Changes
114118
- General: Update to Firebase C++ SDK version 13.2.0.

firebaseai/src/LiveGenerationConfig.cs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,32 @@ internal Dictionary<string, object> ToJson()
5353
if (!string.IsNullOrWhiteSpace(voice))
5454
{
5555
dict["voiceConfig"] = new Dictionary<string, object>() {
56-
{ "prebuiltVoiceConfig" , new Dictionary<string, object>() {
57-
{ "voiceName", voice }
58-
} }
59-
};
56+
{ "prebuiltVoiceConfig" , new Dictionary<string, object>() {
57+
{ "voiceName", voice }
58+
} }
59+
};
6060
}
6161

6262
return dict;
6363
}
6464
}
6565

66+
/// <summary>
67+
/// A struct used to configure speech transcription settings.
68+
/// </summary>
69+
public readonly struct AudioTranscriptionConfig
70+
{
71+
/// <summary>
72+
/// Intended for internal use only.
73+
/// This method is used for serializing the object to JSON for the API request.
74+
/// </summary>
75+
internal Dictionary<string, object> ToJson()
76+
{
77+
Dictionary<string, object> dict = new();
78+
return dict;
79+
}
80+
}
81+
6682
/// <summary>
6783
/// A struct defining model parameters to be used when generating live session content.
6884
/// </summary>
@@ -76,6 +92,11 @@ public readonly struct LiveGenerationConfig
7692
private readonly int? _maxOutputTokens;
7793
private readonly float? _presencePenalty;
7894
private readonly float? _frequencyPenalty;
95+
private readonly AudioTranscriptionConfig? _inputAudioTranscription;
96+
private readonly AudioTranscriptionConfig? _outputAudioTranscription;
97+
98+
internal readonly AudioTranscriptionConfig? InputAudioTranscription => _inputAudioTranscription;
99+
internal readonly AudioTranscriptionConfig? OutputAudioTranscription => _outputAudioTranscription;
79100

80101
/// <summary>
81102
/// Creates a new `LiveGenerationConfig` value.
@@ -168,7 +189,9 @@ public LiveGenerationConfig(
168189
float? topK = null,
169190
int? maxOutputTokens = null,
170191
float? presencePenalty = null,
171-
float? frequencyPenalty = null)
192+
float? frequencyPenalty = null,
193+
AudioTranscriptionConfig? inputAudioTranscription = null,
194+
AudioTranscriptionConfig? outputAudioTranscription = null)
172195
{
173196
_speechConfig = speechConfig;
174197
_responseModalities = responseModalities != null ?
@@ -179,6 +202,8 @@ public LiveGenerationConfig(
179202
_maxOutputTokens = maxOutputTokens;
180203
_presencePenalty = presencePenalty;
181204
_frequencyPenalty = frequencyPenalty;
205+
_inputAudioTranscription = inputAudioTranscription;
206+
_outputAudioTranscription = outputAudioTranscription;
182207
}
183208

184209
/// <summary>

firebaseai/src/LiveGenerativeModel.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,16 @@ public async Task<LiveSession> ConnectAsync(CancellationToken cancellationToken
156156
if (_liveConfig != null)
157157
{
158158
setupDict["generationConfig"] = _liveConfig?.ToJson();
159+
160+
// Input/Output Transcriptions are defined on the config, but need to be set here.
161+
if (_liveConfig?.InputAudioTranscription.HasValue ?? false)
162+
{
163+
setupDict["inputAudioTranscription"] = _liveConfig?.InputAudioTranscription?.ToJson();
164+
}
165+
if (_liveConfig?.OutputAudioTranscription.HasValue ?? false)
166+
{
167+
setupDict["outputAudioTranscription"] = _liveConfig?.OutputAudioTranscription?.ToJson();
168+
}
159169
}
160170
if (_systemInstruction.HasValue)
161171
{

firebaseai/src/LiveSessionResponse.cs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,26 @@ public interface ILiveSessionMessage { }
179179
/// </summary>
180180
public readonly bool Interrupted { get; }
181181

182-
private LiveSessionContent(ModelContent? content, bool turnComplete, bool interrupted)
182+
/// <summary>
183+
/// The input transcription. Note that the transcription is independent to
184+
/// the Content, and doesn't imply any ordering between them.
185+
/// </summary>
186+
public readonly Transcription? InputTranscription { get; }
187+
188+
/// <summary>
189+
/// The output transcription. Note that the transcription is independent to
190+
/// the Content, and doesn't imply any ordering between them.
191+
/// </summary>
192+
public readonly Transcription? OutputTranscription { get; }
193+
194+
private LiveSessionContent(ModelContent? content, bool turnComplete, bool interrupted,
195+
Transcription? input, Transcription? output)
183196
{
184197
Content = content;
185198
TurnComplete = turnComplete;
186199
Interrupted = interrupted;
200+
InputTranscription = input;
201+
OutputTranscription = output;
187202
}
188203

189204
/// <summary>
@@ -195,7 +210,9 @@ internal static LiveSessionContent FromJson(Dictionary<string, object> jsonDict)
195210
return new LiveSessionContent(
196211
jsonDict.ParseNullableObject("modelTurn", ModelContent.FromJson),
197212
jsonDict.ParseValue<bool>("turnComplete"),
198-
jsonDict.ParseValue<bool>("interrupted")
213+
jsonDict.ParseValue<bool>("interrupted"),
214+
jsonDict.ParseNullableObject("inputTranscription", Transcription.FromJson),
215+
jsonDict.ParseNullableObject("outputTranscription", Transcription.FromJson)
199216
);
200217
}
201218
}
@@ -271,4 +288,29 @@ internal static LiveSessionToolCallCancellation FromJson(Dictionary<string, obje
271288
}
272289
}
273290

291+
/// <summary>
292+
/// A transcription of the audio sent in a live session.
293+
/// </summary>
294+
public readonly struct Transcription
295+
{
296+
/// <summary>
297+
/// The transcribed text.
298+
/// </summary>
299+
public readonly string Text { get; }
300+
301+
private Transcription(string text)
302+
{
303+
Text = text;
304+
}
305+
306+
/// <summary>
307+
/// Intended for internal use only.
308+
/// This method is used for deserializing JSON responses and should not be called directly.
309+
/// </summary>
310+
internal static Transcription FromJson(Dictionary<string, object> jsonDict)
311+
{
312+
return new Transcription(jsonDict.ParseValue<string>("text"));
313+
}
314+
}
315+
274316
}

0 commit comments

Comments
 (0)