diff --git a/akka-javasdk-testkit/src/main/java/akka/javasdk/testkit/TestModelProvider.java b/akka-javasdk-testkit/src/main/java/akka/javasdk/testkit/TestModelProvider.java index 624dc43af..217739dea 100644 --- a/akka-javasdk-testkit/src/main/java/akka/javasdk/testkit/TestModelProvider.java +++ b/akka-javasdk-testkit/src/main/java/akka/javasdk/testkit/TestModelProvider.java @@ -5,9 +5,12 @@ package akka.javasdk.testkit; import akka.japi.Pair; +import akka.javasdk.agent.MessageContent; import akka.javasdk.agent.ModelProvider; import dev.langchain4j.agent.tool.ToolExecutionRequest; import dev.langchain4j.data.message.AiMessage; +import dev.langchain4j.data.message.ImageContent; +import dev.langchain4j.data.message.TextContent; import dev.langchain4j.data.message.ToolExecutionResultMessage; import dev.langchain4j.model.chat.ChatModel; import dev.langchain4j.model.chat.StreamingChatModel; @@ -15,6 +18,7 @@ import dev.langchain4j.model.chat.response.ChatResponse; import dev.langchain4j.model.chat.response.StreamingChatResponseHandler; import dev.langchain4j.model.output.FinishReason; +import java.net.MalformedURLException; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -67,7 +71,30 @@ public sealed interface InputMessage { } /** Represents a user message. */ - public record UserMessage(String content) implements InputMessage {} + public record UserMessage(List contents) implements InputMessage { + + public UserMessage(String content) { + this(List.of(MessageContent.TextMessageContent.from(content))); + } + + @Override + public String content() { + if (isTextOnly()) { + return text(); + } else { + throw new IllegalStateException("This is not text only user message"); + } + } + + public boolean isTextOnly() { + return contents.size() == 1 + && contents.getFirst() instanceof MessageContent.TextMessageContent; + } + + public String text() { + return ((MessageContent.TextMessageContent) contents.getFirst()).text(); + } + } /** Represents a tool result. This is used to simulate a response from a tool invocation. */ public record ToolResult(String name, String content) implements InputMessage {} @@ -203,7 +230,37 @@ private InputMessage getLastInputMessage(ChatRequest chatRequest) { .map( chatMessage -> { if (chatMessage instanceof dev.langchain4j.data.message.UserMessage userMessage) { - return new UserMessage(userMessage.singleText()); + List contents = + userMessage.contents().stream() + .map( + content -> + switch (content) { + case TextContent textContent -> + MessageContent.TextMessageContent.from(textContent.text()); + case ImageContent imageContent -> { + if (imageContent.image().url() != null) { + try { + yield MessageContent.ImageMessageContent.fromUrl( + imageContent.image().url().toURL(), + toDetailLevel(imageContent.detailLevel())); + } catch (MalformedURLException e) { + throw new RuntimeException( + "Can't transform " + + imageContent.image().url() + + " to URL", + e); + } + } else { + throw new IllegalStateException( + "Not supported image content without url."); + } + } + default -> + throw new IllegalStateException( + "Not supported content type: " + content); + }) + .toList(); + return new UserMessage(contents); } else { ToolExecutionResultMessage result = (ToolExecutionResultMessage) chatMessage; return new ToolResult(result.toolName(), result.text()); @@ -212,6 +269,15 @@ private InputMessage getLastInputMessage(ChatRequest chatRequest) { .orElseThrow(() -> new RuntimeException("No input message found")); } + private MessageContent.ImageMessageContent.DetailLevel toDetailLevel( + ImageContent.DetailLevel detailLevel) { + return switch (detailLevel) { + case LOW -> MessageContent.ImageMessageContent.DetailLevel.LOW; + case HIGH -> MessageContent.ImageMessageContent.DetailLevel.HIGH; + case AUTO -> MessageContent.ImageMessageContent.DetailLevel.AUTO; + }; + } + /** Retrieves the AI response for a given input message based on the defined predicates. */ private AiResponse getResponse(InputMessage inputMessage) { return responsePredicates.stream() diff --git a/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/AgentIntegrationTest.java b/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/AgentIntegrationTest.java index 64451d962..7e55c67cc 100644 --- a/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/AgentIntegrationTest.java +++ b/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/AgentIntegrationTest.java @@ -11,6 +11,7 @@ import akka.javasdk.CommandException; import akka.javasdk.DependencyProvider; import akka.javasdk.agent.AgentRegistry; +import akka.javasdk.agent.MessageContent; import akka.javasdk.testkit.TestKit; import akka.javasdk.testkit.TestKitSupport; import akka.javasdk.testkit.TestModelProvider; @@ -59,6 +60,7 @@ public T getDependency(Class clazz) { .withModelProvider(SomeStructureResponseSchemaAgent.class, testModelProvider) .withModelProvider(SomeStreamingAgent.class, testModelProvider) .withModelProvider(SomeAgentWithBadlyConfiguredTool.class, testModelProvider) + .withModelProvider(SomeMultiModalUserMessageAgent.class, testModelProvider) .withDependencyProvider(depsProvider); } @@ -88,6 +90,34 @@ public void shouldMapStringResponse() { assertThat(result.response()).isEqualTo("123456"); } + @Test + public void shouldTestMultiModalUserMessage() { + // given + String response = "multi modal user message"; + testModelProvider + .whenUserMessage( + userMessage -> + ((MessageContent.TextMessageContent) userMessage.contents().get(0)) + .text() + .equals("testing") + && ((MessageContent.ImageUrlMessageContent) userMessage.contents().get(1)) + .url() + .toString() + .equals("https://example.com")) + .reply(response); + + // when + String result = + componentClient + .forAgent() + .inSession(newSessionId()) + .method(SomeMultiModalUserMessageAgent::ask) + .invoke(); + + // then + assertThat(result).isEqualTo(response); + } + @Test public void shouldMapStructuredResponse() { // given diff --git a/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/SessionMemoryEntityTest.java b/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/SessionMemoryEntityTest.java index 331357aa7..bd286d68f 100644 --- a/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/SessionMemoryEntityTest.java +++ b/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/SessionMemoryEntityTest.java @@ -10,11 +10,16 @@ import akka.Done; import akka.javasdk.agent.AgentRegistry; import akka.javasdk.agent.MemoryFilter; +import akka.javasdk.agent.MessageContent; import akka.javasdk.agent.SessionHistory; import akka.javasdk.agent.SessionMemoryEntity; import akka.javasdk.agent.SessionMemoryEntity.AddInteractionCmd; +import akka.javasdk.agent.SessionMemoryEntity.AddMultimodalInteractionCmd; import akka.javasdk.agent.SessionMessage; import akka.javasdk.agent.SessionMessage.AiMessage; +import akka.javasdk.agent.SessionMessage.MessageContent.ImageUriMessageContent; +import akka.javasdk.agent.SessionMessage.MessageContent.TextMessageContent; +import akka.javasdk.agent.SessionMessage.MultimodalUserMessage; import akka.javasdk.agent.SessionMessage.UserMessage; import akka.javasdk.impl.agent.AgentRegistryImpl; import akka.javasdk.testkit.EventSourcedResult; @@ -83,6 +88,58 @@ public void shouldAddMessageToHistory() { assertThat(historyResult.getReply().messages()).containsExactly(userMessage, aiMessage); } + @Test + public void shouldAddMultiModalMessageToHistory() { + // given + var testKit = + EventSourcedTestKit.of( + (context) -> new SessionMemoryEntity(config, context, agentRegistryEmpty)); + var timestamp = Instant.now(); + String aiMsg = "I'm fine, thanks for asking!"; + SessionMessage.MessageContent text = new TextMessageContent("Hello, how are you?"); + SessionMessage.MessageContent image = + new ImageUriMessageContent("uri", MessageContent.ImageMessageContent.DetailLevel.AUTO); + var contents = List.of(text, image); + MultimodalUserMessage userMessage = + new MultimodalUserMessage(timestamp, contents, COMPONENT_ID); + var aiMessage = new AiMessage(timestamp, aiMsg, COMPONENT_ID); + + // when + EventSourcedResult result = + testKit + .method(SessionMemoryEntity::addMultimodalInteraction) + .invoke(new AddMultimodalInteractionCmd(userMessage, List.of(aiMessage))); + + // then + assertThat(result.getReply()).isEqualTo(done()); + + // Check events - ignoring timestamp comparison + var events = result.getAllEvents(); + assertThat(events).hasSize(2); + assertThat(events.getFirst()) + .isInstanceOf(SessionMemoryEntity.Event.MultimodalUserMessageAdded.class); + var userEvent = (SessionMemoryEntity.Event.MultimodalUserMessageAdded) events.getFirst(); + assertThat(userEvent.componentId()).isEqualTo(COMPONENT_ID); + assertThat(userEvent.contents()).isEqualTo(contents); + assertThat(userEvent.sizeInBytes()).isEqualTo(userMessage.size()); + assertThat(userEvent.timestamp()).isEqualTo(timestamp); + + assertThat(events.get(1)).isInstanceOf(SessionMemoryEntity.Event.AiMessageAdded.class); + var aiEvent = (SessionMemoryEntity.Event.AiMessageAdded) events.get(1); + assertThat(aiEvent.componentId()).isEqualTo(COMPONENT_ID); + assertThat(aiEvent.message()).isEqualTo(aiMsg); + assertThat(aiEvent.sizeInBytes()).isEqualTo(aiMessage.size()); + assertThat(aiEvent.historySizeInBytes()).isEqualTo(userMessage.size() + aiMessage.size()); + assertThat(aiEvent.timestamp()).isEqualTo(timestamp); + + // when retrieving history + EventSourcedResult historyResult = + testKit.method(SessionMemoryEntity::getHistory).invoke(emptyGetHistory); + + // then + assertThat(historyResult.getReply().messages()).containsExactly(userMessage, aiMessage); + } + @Test public void shouldAddMultipleMessagesToHistory() { // given @@ -143,11 +200,22 @@ public void shouldBeCompactable() { .method(SessionMemoryEntity::addInteraction) .invoke(new AddInteractionCmd(userMessage1, aiMessage1)); + SessionMessage.MessageContent text = new TextMessageContent("Hello, how are you?"); + SessionMessage.MessageContent image = + new ImageUriMessageContent("uri", MessageContent.ImageMessageContent.DetailLevel.AUTO); + var contents = List.of(text, image); + MultimodalUserMessage userMessage = + new MultimodalUserMessage(timestamp, contents, COMPONENT_ID); + + testKit + .method(SessionMemoryEntity::addMultimodalInteraction) + .invoke(new AddMultimodalInteractionCmd(userMessage, List.of(aiMessage1))); + // when EventSourcedResult historyResult = testKit.method(SessionMemoryEntity::getHistory).invoke(emptyGetHistory); var sequenceNumber = historyResult.getReply().sequenceNumber(); - assertThat(sequenceNumber).isEqualTo(2L); + assertThat(sequenceNumber).isEqualTo(4L); var userMessage2 = new UserMessage(timestamp, "Hey", COMPONENT_ID); var aiMessage2 = new AiMessage(timestamp, "Hi!", COMPONENT_ID); var cmd = new SessionMemoryEntity.CompactionCmd(userMessage2, aiMessage2, sequenceNumber); @@ -226,9 +294,14 @@ public void shouldHandleConcurrentUpdatesWhenCompacting() { testKit.method(SessionMemoryEntity::getHistory).invoke(emptyGetHistory); // then - assertThat(historyResult2.getReply().messages().stream().map(SessionMessage::text).toList()) - .containsExactly( - userMessage2.text(), aiMessage2.text(), userMessage3.text(), aiMessage3.text()); + UserMessage m1 = (UserMessage) historyResult2.getReply().messages().get(0); + AiMessage m2 = (AiMessage) historyResult2.getReply().messages().get(1); + UserMessage m3 = (UserMessage) historyResult2.getReply().messages().get(2); + AiMessage m4 = (AiMessage) historyResult2.getReply().messages().get(3); + assertThat(m1.text()).isEqualTo(userMessage2.text()); + assertThat(m2.text()).isEqualTo(aiMessage2.text()); + assertThat(m3.text()).isEqualTo(userMessage3.text()); + assertThat(m4.text()).isEqualTo(aiMessage3.text()); } @Test @@ -459,8 +532,8 @@ public void shouldReturnOnlyLastNMessages() { var timestamp = Instant.now(); // Add several interactions - String[] userMsgs = {"U1", "U2", "U3", "U4"}; - String[] aiMsgs = {"A1", "A2", "A3", "A4"}; + String[] userMsgs = {"U1", "U2"}; + String[] aiMsgs = {"A1", "A2"}; for (int i = 0; i < userMsgs.length; i++) { testKit .method(SessionMemoryEntity::addInteraction) @@ -469,6 +542,18 @@ public void shouldReturnOnlyLastNMessages() { new UserMessage(timestamp, userMsgs[i], COMPONENT_ID), new AiMessage(timestamp, aiMsgs[i], COMPONENT_ID))); } + SessionMessage.MessageContent[] textContents = { + new TextMessageContent("U3"), new TextMessageContent("U4") + }; + String[] aiMsgs2 = {"A3", "A4"}; + for (int i = 0; i < textContents.length; i++) { + testKit + .method(SessionMemoryEntity::addMultimodalInteraction) + .invoke( + new AddMultimodalInteractionCmd( + new MultimodalUserMessage(timestamp, List.of(textContents[i]), COMPONENT_ID), + List.of(new AiMessage(timestamp, aiMsgs2[i], COMPONENT_ID)))); + } // Request only the last 4 messages (should be: U3, A3, U4, A4) var lastN = 4; @@ -480,9 +565,9 @@ public void shouldReturnOnlyLastNMessages() { // The expected last 4 messages var expected = List.of( - new UserMessage(timestamp, "U3", COMPONENT_ID), + new MultimodalUserMessage(timestamp, List.of(textContents[0]), COMPONENT_ID), new AiMessage(timestamp, "A3", COMPONENT_ID), - new UserMessage(timestamp, "U4", COMPONENT_ID), + new MultimodalUserMessage(timestamp, List.of(textContents[1]), COMPONENT_ID), new AiMessage(timestamp, "A4", COMPONENT_ID)); assertThat(result.getReply().messages()).containsExactlyElementsOf(expected); diff --git a/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/SomeMultiModalUserMessageAgent.java b/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/SomeMultiModalUserMessageAgent.java new file mode 100644 index 000000000..0e7e31323 --- /dev/null +++ b/akka-javasdk-tests/src/test/java/akkajavasdk/components/agent/SomeMultiModalUserMessageAgent.java @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2021-2025 Lightbend Inc. + */ + +package akkajavasdk.components.agent; + +import akka.javasdk.agent.Agent; +import akka.javasdk.agent.MessageContent; +import akka.javasdk.agent.UserMessage; +import akka.javasdk.annotations.Component; + +@Component(id = "some-multi-modal-user-message-agent") +public class SomeMultiModalUserMessageAgent extends Agent { + public record SomeResponse(String response) {} + + public Effect ask() { + return effects() + .systemMessage("You are a helpful...") + .userMessage( + UserMessage.from( + MessageContent.TextMessageContent.from("testing"), + MessageContent.ImageMessageContent.fromUrl("https://example.com"))) + .thenReply(); + } +} diff --git a/akka-javasdk-tests/src/test/resources/META-INF/akka-javasdk-components.conf b/akka-javasdk-tests/src/test/resources/META-INF/akka-javasdk-components.conf index ba6b4eaa4..bed320ec9 100644 --- a/akka-javasdk-tests/src/test/resources/META-INF/akka-javasdk-components.conf +++ b/akka-javasdk-tests/src/test/resources/META-INF/akka-javasdk-components.conf @@ -2,6 +2,7 @@ akka.javasdk { components { agent = [ "akkajavasdk.components.agent.SomeAgent", + "akkajavasdk.components.agent.SomeMultiModalUserMessageAgent", "akkajavasdk.components.agent.SomeAgentAcceptingInt", "akkajavasdk.components.agent.SomeAgentReturningErrors", "akkajavasdk.components.agent.SomeAgentWithTool", diff --git a/akka-javasdk/src/main/java/akka/javasdk/agent/Agent.java b/akka-javasdk/src/main/java/akka/javasdk/agent/Agent.java index eedcf2b62..3965eb786 100644 --- a/akka-javasdk/src/main/java/akka/javasdk/agent/Agent.java +++ b/akka-javasdk/src/main/java/akka/javasdk/agent/Agent.java @@ -228,6 +228,26 @@ interface Builder { */ OnSuccessBuilder userMessage(String message); + /** + * The user message to the AI model, supporting multimodal content. + * + *

This overload accepts a {@link UserMessage} which can contain multiple content types, + * currently text and images. + * + *

Example with text and image: + * + *

{@code
+       * UserMessage message = UserMessage.from(
+       *     MessageContent.TextMessageContent.from("What's in this image?"),
+       *     ImageMessageContent.from("https://example.com/image.jpg")
+       * );
+       * }
+ * + * @param message The user message containing multimodal content (text, images, etc.) + * @return The next builder stage for configuring the effect + */ + OnSuccessBuilder userMessage(UserMessage message); + /** * Create a message reply without calling the model. * diff --git a/akka-javasdk/src/main/java/akka/javasdk/agent/MessageContent.java b/akka-javasdk/src/main/java/akka/javasdk/agent/MessageContent.java new file mode 100644 index 000000000..491772e31 --- /dev/null +++ b/akka-javasdk/src/main/java/akka/javasdk/agent/MessageContent.java @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2021-2025 Lightbend Inc. + */ + +package akka.javasdk.agent; + +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URL; + +/** + * Represents a piece of content within a multimodal message to an AI model. + * + *

Message content can be text or images, allowing agents to send multimodal inputs. + * + * @see UserMessage + */ +public sealed interface MessageContent { + + /** + * Text content within a user message. + * + * @param text The text content + */ + record TextMessageContent(String text) implements MessageContent { + + /** + * Creates text content from a string. + * + * @param text The text content + * @return A new TextMessageContent instance + */ + public static TextMessageContent from(String text) { + return new TextMessageContent(text); + } + } + + /** + * Image content within a user message, referenced by URL. + * + * @param url The URL pointing to the image + * @param detailLevel The level of detail for image processing + */ + record ImageUrlMessageContent(URL url, ImageMessageContent.DetailLevel detailLevel) + implements MessageContent {} + + /** Factory methods for creating image message content. */ + record ImageMessageContent() { + + /** + * Creates image content from a URL string with automatic detail level. + * + * @param url The URL string pointing to the image + * @return A new ImageUrlMessageContent instance with AUTO detail level + */ + public static ImageUrlMessageContent fromUrl(String url) { + try { + return ImageMessageContent.fromUrl(URI.create(url).toURL()); + } catch (MalformedURLException e) { + throw new RuntimeException("Can't transform " + url + " to URL", e); + } + } + + /** + * Creates image content from a URL with automatic detail level. + * + * @param url The URL pointing to the image + * @return A new ImageUrlMessageContent instance with AUTO detail level + */ + public static ImageUrlMessageContent fromUrl(URL url) { + return new ImageUrlMessageContent(url, DetailLevel.AUTO); + } + + /** + * Creates image content from a URL with a specific detail level. + * + * @param url The URL pointing to the image + * @param detailLevel The level of detail for image processing + * @return A new ImageUrlMessageContent instance + */ + public static ImageUrlMessageContent fromUrl(URL url, DetailLevel detailLevel) { + return new ImageUrlMessageContent(url, detailLevel); + } + + /** + * Controls the level of detail used when processing images. + * + *

The detail level affects both the quality of image analysis and the number of tokens + * consumed by the AI model. + */ + public enum DetailLevel { + /** Lower resolution processing, faster and uses fewer tokens */ + LOW, + /** Higher resolution processing, more detailed analysis but uses more tokens */ + HIGH, + /** Let the model automatically choose the appropriate detail level */ + AUTO; + } + } +} diff --git a/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemory.java b/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemory.java index 0f4f35824..a8a7a0eda 100644 --- a/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemory.java +++ b/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemory.java @@ -42,6 +42,23 @@ public interface SessionMemory { void addInteraction( String sessionId, SessionMessage.UserMessage userMessage, List messages); + /** + * Adds an interaction between a user and an AI model to the session history for the specified + * session, supporting multimodal content. + * + *

This overload accepts a {@link SessionMessage.MultimodalUserMessage} which can contain + * multiple content types including text and images, enabling multimodal interactions. + * + * @param sessionId The unique identifier for the contextual session + * @param userMessage The user message containing multimodal content (text, images, etc.) + * @param messages All other messages generated during this interaction, typically AiMessage but + * also Tool Call responses. + */ + void addInteraction( + String sessionId, + SessionMessage.MultimodalUserMessage userMessage, + List messages); + /** * Retrieves the complete session history for the specified session. For very long sessions, this * might return a compacted version of the history. diff --git a/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemoryEntity.java b/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemoryEntity.java index 2a21a9a29..2f2862202 100644 --- a/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemoryEntity.java +++ b/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemoryEntity.java @@ -10,6 +10,7 @@ import akka.javasdk.agent.SessionMemoryEntity.Event; import akka.javasdk.agent.SessionMemoryEntity.State; import akka.javasdk.agent.SessionMessage.AiMessage; +import akka.javasdk.agent.SessionMessage.MultimodalUserMessage; import akka.javasdk.agent.SessionMessage.ToolCallResponse; import akka.javasdk.agent.SessionMessage.UserMessage; import akka.javasdk.annotations.Component; @@ -129,9 +130,11 @@ private static long enforceMaxCapacity( maxSize); } - // remove all messages that are not UserMessage since those were driven by the deleted - // UserMessage - while (!messages.isEmpty() && !(messages.getFirst() instanceof UserMessage)) { + // remove all messages that are not UserMessage or MultimodalUserMessage since those were + // driven by the deleted UserMessage + while (!messages.isEmpty() + && !(messages.getFirst() instanceof UserMessage + || messages.getFirst() instanceof MultimodalUserMessage)) { freedSpace += messages.removeFirst().size(); logger.debug( "Removed orphan message for sessionId [{}]. Remaining size [{}], maxSizeInBytes [{}]", @@ -160,6 +163,14 @@ record LimitedWindowSet(Instant timestamp, int maxSizeInBytes) implements Event record UserMessageAdded(Instant timestamp, String componentId, String message, int sizeInBytes) implements Event {} + @TypeName("akka-memory-multimodal-user-message-added") + record MultimodalUserMessageAdded( + Instant timestamp, + String componentId, + List contents, + int sizeInBytes) + implements Event {} + @TypeName("akka-memory-ai-message-added") record AiMessageAdded( Instant timestamp, @@ -212,17 +223,41 @@ public AddInteractionCmd(UserMessage userMessage, AiMessage aiMessage) { } } + public record AddMultimodalInteractionCmd( + MultimodalUserMessage userMessage, List messages) {} + + public Effect addMultimodalInteraction(AddMultimodalInteractionCmd cmd) { + var userMessageEvent = + new Event.MultimodalUserMessageAdded( + cmd.userMessage.timestamp(), + cmd.userMessage.componentId(), + cmd.userMessage.contents(), + cmd.userMessage.size()); + return addInteraction(cmd.messages, cmd.userMessage.componentId(), userMessageEvent); + } + public Effect addInteraction(AddInteractionCmd cmd) { - if (cmd.messages.stream() + var userMessageEvent = + new Event.UserMessageAdded( + cmd.userMessage.timestamp(), + cmd.userMessage.componentId(), + cmd.userMessage.text(), + cmd.userMessage.size()); + return addInteraction(cmd.messages, cmd.userMessage.componentId(), userMessageEvent); + } + + private Effect addInteraction( + List messages, String componentId, Event userMessageEvent) { + if (messages.stream() .filter(msg -> msg instanceof AiMessage) .map(msg -> ((AiMessage) msg).componentId()) - .anyMatch(aiComponentId -> !cmd.userMessage.componentId().equals(aiComponentId))) { + .anyMatch(aiComponentId -> !componentId.equals(aiComponentId))) { return effects().error("componentId in userMessage must be the same as in all aiMessages"); } var modelAndToolEvents = - cmd.messages.stream() + messages.stream() .map( msg -> { return (Event) @@ -236,7 +271,7 @@ public Effect addInteraction(AddInteractionCmd cmd) { 0L, // filled in later aiMessage.toolCallRequests()); - case SessionMessage.ToolCallResponse toolCallResponse -> + case ToolCallResponse toolCallResponse -> new Event.ToolResponseMessageAdded( toolCallResponse.timestamp(), toolCallResponse.componentId(), @@ -251,13 +286,6 @@ public Effect addInteraction(AddInteractionCmd cmd) { }) .toList(); - var userMessageEvent = - new Event.UserMessageAdded( - cmd.userMessage.timestamp(), - cmd.userMessage.componentId(), - cmd.userMessage.text(), - cmd.userMessage.size()); - List allEvents = new ArrayList<>(); allEvents.add(userMessageEvent); allEvents.addAll(modelAndToolEvents); @@ -334,6 +362,7 @@ public ReadOnlyEffect getHistory(GetHistoryCmd cmd) { } } + // keeping UserMessage instead of MultimodalUserMessage for compaction public record CompactionCmd(UserMessage userMessage, AiMessage aiMessage, long sequenceNumber) {} public Effect compactHistory(CompactionCmd cmd) { @@ -398,13 +427,21 @@ public Effect compactHistory(CompactionCmd cmd) { 0L, // filled in later aiMessage.toolCallRequests())); } + case MultimodalUserMessage multimodalUserMessage -> { + events.add( + new Event.MultimodalUserMessageAdded( + multimodalUserMessage.timestamp(), + multimodalUserMessage.componentId(), + multimodalUserMessage.contents(), + multimodalUserMessage.size())); + } } }); } var eventsWithSize = updateHistorySize(events); - return effects().persistAll(eventsWithSize).thenReply(__ -> Done.done()); + return effects().persistAll(eventsWithSize).thenReply(__ -> done()); } private List updateHistorySize(List events) { @@ -420,6 +457,10 @@ private List updateHistorySize(List events) { size += evt.sizeInBytes(); result.add(evt); } + case Event.MultimodalUserMessageAdded evt -> { + size += evt.sizeInBytes(); + result.add(evt); + } case Event.ToolResponseMessageAdded evt -> { size += evt.sizeInBytes(); result.add(evt); @@ -454,9 +495,16 @@ public State applyEvent(Event event) { currentState().withMaxSize(limitedWindowSet.maxSizeInBytes); case Event.UserMessageAdded userMsg -> + currentState() + .addMessage(new UserMessage(userMsg.timestamp, userMsg.message, userMsg.componentId)); + + case Event.MultimodalUserMessageAdded multimodalUserMsg -> currentState() .addMessage( - new UserMessage(userMsg.timestamp(), userMsg.message(), userMsg.componentId)); + new MultimodalUserMessage( + multimodalUserMsg.timestamp, + multimodalUserMsg.contents, + multimodalUserMsg.componentId)); case Event.AiMessageAdded aiMsg -> currentState() @@ -468,11 +516,11 @@ public State applyEvent(Event event) { currentState() .addMessage( new ToolCallResponse( - toolMsg.timestamp(), + toolMsg.timestamp, toolMsg.componentId, - toolMsg.id(), + toolMsg.id, toolMsg.name, - toolMsg.content())); + toolMsg.content)); case Event.HistoryCleared __ -> currentState().clear(); diff --git a/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMessage.java b/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMessage.java index 603c7a5a0..1fe510954 100644 --- a/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMessage.java +++ b/akka-javasdk/src/main/java/akka/javasdk/agent/SessionMessage.java @@ -5,19 +5,22 @@ package akka.javasdk.agent; import akka.javasdk.agent.SessionMessage.AiMessage; +import akka.javasdk.agent.SessionMessage.MultimodalUserMessage; import akka.javasdk.agent.SessionMessage.ToolCallResponse; import akka.javasdk.agent.SessionMessage.UserMessage; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import java.time.Instant; import java.util.List; +import java.util.Optional; /** Interface for message representation used inside the SessionMemoryEntity state. */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "@type") @JsonSubTypes({ @JsonSubTypes.Type(value = UserMessage.class, name = "UM"), @JsonSubTypes.Type(value = AiMessage.class, name = "AIM"), - @JsonSubTypes.Type(value = ToolCallResponse.class, name = "TCR") + @JsonSubTypes.Type(value = ToolCallResponse.class, name = "TCR"), + @JsonSubTypes.Type(value = MultimodalUserMessage.class, name = "MUM") }) public sealed interface SessionMessage { static int sizeInBytes(String text) { @@ -26,10 +29,49 @@ static int sizeInBytes(String text) { int size(); - String text(); - String componentId(); + @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") + @JsonSubTypes({ + @JsonSubTypes.Type(value = MessageContent.TextMessageContent.class, name = "T"), + @JsonSubTypes.Type(value = MessageContent.ImageUriMessageContent.class, name = "IU") + }) + sealed interface MessageContent { + + record TextMessageContent(String text) implements MessageContent {} + + record ImageUriMessageContent( + String uri, akka.javasdk.agent.MessageContent.ImageMessageContent.DetailLevel detailLevel) + implements MessageContent {} + } + + // need to introduce new message to keep backward compatibility + record MultimodalUserMessage(Instant timestamp, List contents, String componentId) + implements SessionMessage { + + /** returns text from the first MessageContent.TextMessageContent */ + public Optional text() { + return contents.stream() + .filter(c -> c instanceof MessageContent.TextMessageContent) + .map(c -> ((MessageContent.TextMessageContent) c).text()) + .findFirst(); + } + + @Override + public int size() { + return contents.stream() + .map( + content -> + switch (content) { + case MessageContent.TextMessageContent text -> sizeInBytes(text.text()); + case MessageContent.ImageUriMessageContent image -> + sizeInBytes(image.uri()) + sizeInBytes(image.detailLevel().toString()); + }) + .mapToInt(Integer::intValue) + .sum(); + } + } + record UserMessage(Instant timestamp, String text, String componentId) implements SessionMessage { public UserMessage(Instant now, String text) { diff --git a/akka-javasdk/src/main/java/akka/javasdk/agent/UserMessage.java b/akka-javasdk/src/main/java/akka/javasdk/agent/UserMessage.java new file mode 100644 index 000000000..1ff442971 --- /dev/null +++ b/akka-javasdk/src/main/java/akka/javasdk/agent/UserMessage.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2021-2025 Lightbend Inc. + */ + +package akka.javasdk.agent; + +import akka.javasdk.agent.MessageContent.TextMessageContent; +import java.util.List; + +/** + * Represents a user message that can contain multimodal content for interaction with AI models. + * + *

A {@code UserMessage} encapsulates one or more {@link MessageContent} elements, allowing + * agents to send text, images, or both. + * + *

Multimodal message with text and image: + * + *

{@code
+ * UserMessage message = UserMessage.from(
+ *     MessageContent.TextMessageContent.from("What's in this image?"),
+ *     ImageMessageContent.from("https://example.com/photo.jpg")
+ * );
+ * }
+ * + * @param contents The list of message content elements (text, images, etc.) + * @see MessageContent + * @see Agent.Effect.Builder#userMessage(UserMessage) + */ +public record UserMessage(List contents) { + + public boolean isTextOnly() { + return contents.size() == 1 && contents.get(0) instanceof TextMessageContent; + } + + public String text() { + return ((TextMessageContent) contents.get(0)).text(); + } + + public static UserMessage from(String text) { + return new UserMessage(List.of(TextMessageContent.from(text))); + } + + public static UserMessage from(MessageContent... messageContent) { + return new UserMessage(List.of(messageContent)); + } +} diff --git a/akka-javasdk/src/main/java/akka/javasdk/impl/agent/SessionMemoryClient.java b/akka-javasdk/src/main/java/akka/javasdk/impl/agent/SessionMemoryClient.java index c29901e8f..4044ac479 100644 --- a/akka-javasdk/src/main/java/akka/javasdk/impl/agent/SessionMemoryClient.java +++ b/akka-javasdk/src/main/java/akka/javasdk/impl/agent/SessionMemoryClient.java @@ -51,6 +51,23 @@ public SessionMemoryClient(ComponentClient componentClient, MemorySettings memor this.memorySettings = memorySettings; } + @Override + public void addInteraction( + String sessionId, + SessionMessage.MultimodalUserMessage userMessage, + List messages) { + if (memorySettings.write()) { + logger.debug("Adding interaction to sessionId [{}]", sessionId); + componentClient + .forEventSourcedEntity(sessionId) + .method(SessionMemoryEntity::addMultimodalInteraction) + .invoke(new SessionMemoryEntity.AddMultimodalInteractionCmd(userMessage, messages)); + } else { + logger.debug( + "Memory writing is disabled, interaction not added to sessionId [{}]", sessionId); + } + } + @Override public void addInteraction( String sessionId, SessionMessage.UserMessage userMessage, List messages) { diff --git a/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentEffectImpl.scala b/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentEffectImpl.scala index 6bdfc8db7..bbc9ce752 100644 --- a/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentEffectImpl.scala +++ b/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentEffectImpl.scala @@ -13,6 +13,7 @@ import scala.jdk.FunctionConverters.enrichAsScalaFromFunction import akka.annotation.InternalApi import akka.javasdk.CommandException import akka.javasdk.Metadata +import akka.javasdk.agent import akka.javasdk.agent.Agent.Effect import akka.javasdk.agent.Agent.Effect.Builder import akka.javasdk.agent.Agent.Effect.FailureBuilder @@ -22,6 +23,7 @@ import akka.javasdk.agent.Agent.Effect.OnSuccessBuilder import akka.javasdk.agent.MemoryProvider import akka.javasdk.agent.ModelProvider import akka.javasdk.agent.RemoteMcpTools +import akka.javasdk.agent.UserMessage import akka.javasdk.impl.agent.BaseAgentEffectBuilder.PrimaryEffectImpl import akka.javasdk.impl.agent.BaseAgentEffectBuilder.RequestModel import akka.javasdk.impl.effect.ErrorReplyImpl @@ -41,7 +43,7 @@ private[javasdk] object BaseAgentEffectBuilder { RequestModel( modelProvider = ModelProvider.fromConfig(), systemMessage = ConstantSystemMessage(""), - userMessage = "", + userMessage = UserMessage.from(""), responseType = classOf[String], includeJsonSchema = false, responseMapping = None, @@ -59,7 +61,7 @@ private[javasdk] object BaseAgentEffectBuilder { final case class RequestModel( modelProvider: ModelProvider, systemMessage: SystemMessage, - userMessage: String, + userMessage: UserMessage, responseType: Class[_], includeJsonSchema: Boolean, responseMapping: Option[Function1[Any, Any]], @@ -168,7 +170,13 @@ private[javasdk] final class BaseAgentEffectBuilder[Reply] this } - override def userMessage(message: String): OnSuccessBuilder = { + override def userMessage(text: String): OnSuccessBuilder = { + updateRequestModel(_.copy(userMessage = UserMessage.from(text))) + this + } + + override def userMessage(message: agent.UserMessage): OnSuccessBuilder = { + updateRequestModel(_.copy(userMessage = message)) this } @@ -226,7 +234,6 @@ private[javasdk] final class BaseAgentEffectBuilder[Reply] updateRequestModel(_.addTools(toolInstancesOrClasses.asScala.toSeq)) this } - } /** diff --git a/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentImpl.scala b/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentImpl.scala index 8ef180997..578eb90ba 100644 --- a/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentImpl.scala +++ b/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentImpl.scala @@ -4,6 +4,7 @@ package akka.javasdk.impl.agent +import java.net.URI import java.time.Instant import java.util.concurrent.TimeUnit @@ -22,28 +23,15 @@ import akka.javasdk.CommandException import akka.javasdk.DependencyProvider import akka.javasdk.Metadata import akka.javasdk.Tracing -import akka.javasdk.agent.Agent -import akka.javasdk.agent.AgentContext -import akka.javasdk.agent.Guardrail -import akka.javasdk.agent.InternalServerException -import akka.javasdk.agent.JsonParsingException -import akka.javasdk.agent.McpToolCallExecutionException -import akka.javasdk.agent.MemoryProvider -import akka.javasdk.agent.ModelException -import akka.javasdk.agent.ModelProvider -import akka.javasdk.agent.ModelTimeoutException -import akka.javasdk.agent.RateLimitException -import akka.javasdk.agent.RemoteMcpTools -import akka.javasdk.agent.SessionHistory -import akka.javasdk.agent.SessionMemory -import akka.javasdk.agent.SessionMessage +import akka.javasdk.agent +import akka.javasdk.agent.MessageContent.ImageMessageContent +import akka.javasdk.agent.MessageContent.ImageUrlMessageContent import akka.javasdk.agent.SessionMessage.AiMessage +import akka.javasdk.agent.SessionMessage.MultimodalUserMessage import akka.javasdk.agent.SessionMessage.ToolCallRequest import akka.javasdk.agent.SessionMessage.ToolCallResponse import akka.javasdk.agent.SessionMessage.UserMessage -import akka.javasdk.agent.ToolCallExecutionException -import akka.javasdk.agent.ToolCallLimitReachedException -import akka.javasdk.agent.UnsupportedFeatureException +import akka.javasdk.agent._ import akka.javasdk.client.ComponentClient import akka.javasdk.impl.AbstractContext import akka.javasdk.impl.ComponentDescriptor @@ -271,7 +259,7 @@ private[impl] final class AgentImpl[A <: Agent]( new SpiAgent.RequestModelEffect( modelProvider = spiModelProvider, systemMessage = systemMessage, - userMessage = new SpiAgent.UserMessage(req.userMessage), + userMessage = toSpiUserMessage(req.userMessage), additionalContext = additionalContext, toolDescriptors = toolDescriptors, callToolFunction = request => Future(toolExecutor.execute(request))(sdkExecutionContext), @@ -311,6 +299,28 @@ private[impl] final class AgentImpl[A <: Agent]( }(sdkExecutionContext) + private def toSpiUserMessage(userMessage: agent.UserMessage): SpiAgent.UserMessage = { + val contents = userMessage.contents().asScala.map(asd => toSpiMessageContent(asd)) + new SpiAgent.UserMessage(contents.toSeq) + } + + private def toSpiMessageContent(messageContent: MessageContent): SpiAgent.MessageContent = { + messageContent match { + case content: ImageUrlMessageContent => + new SpiAgent.ImageUriMessageContent(content.url().toURI, toSpiDetailLevel(content.detailLevel())) + case content: MessageContent.TextMessageContent => + new SpiAgent.TextMessageContent(content.text()) + } + } + + private def toSpiDetailLevel(level: ImageMessageContent.DetailLevel): SpiAgent.ImageMessageContent.DetailLevel = { + level match { + case ImageMessageContent.DetailLevel.LOW => SpiAgent.ImageMessageContent.Low + case ImageMessageContent.DetailLevel.HIGH => SpiAgent.ImageMessageContent.High + case ImageMessageContent.DetailLevel.AUTO => SpiAgent.ImageMessageContent.Auto + } + } + private def toSpiMcpEndpoints(remoteMcpTools: Seq[RemoteMcpTools]): Seq[SpiAgent.McpToolEndpointDescriptor] = remoteMcpTools.map { case remoteMcp: RemoteMcpToolsImpl => @@ -376,7 +386,7 @@ private[impl] final class AgentImpl[A <: Agent]( private def onSuccess( sessionMemoryClient: SessionMemory, - userMessage: String, + userMessage: agent.UserMessage, userMessageAt: Instant, agentRole: Option[String], responses: Seq[SpiAgent.Response]): Unit = { @@ -394,10 +404,31 @@ private[impl] final class AgentImpl[A <: Agent]( new ToolCallResponse(res.timestamp, componentId, res.id, res.name, res.content) } - sessionMemoryClient.addInteraction( - sessionId, - new UserMessage(userMessageAt, userMessage, componentId), - responseMessages.asJava) + if (userMessage.isTextOnly) { + sessionMemoryClient.addInteraction( + sessionId, + new UserMessage(userMessageAt, userMessage.text(), componentId), + responseMessages.asJava) + } else { + val contents = userMessage + .contents() + .asScala + .map(s => toSessionMemoryContent(s)) + .asJava + sessionMemoryClient.addInteraction( + sessionId, + new MultimodalUserMessage(userMessageAt, contents, componentId), + responseMessages.asJava) + } + } + + private def toSessionMemoryContent(messageContent: MessageContent): SessionMessage.MessageContent = { + messageContent match { + case content: MessageContent.TextMessageContent => + new SessionMessage.MessageContent.TextMessageContent(content.text) + case content: ImageUrlMessageContent => + new SessionMessage.MessageContent.ImageUriMessageContent(content.url().toString, content.detailLevel()) + } } private def toSpiContextMessages(sessionHistory: SessionHistory): Vector[SpiAgent.ContextMessage] = { @@ -418,6 +449,19 @@ private[impl] final class AgentImpl[A <: Agent]( new SpiAgent.ContextMessage.AiMessage(m.text(), toolRequests) case m: UserMessage => new SpiAgent.ContextMessage.UserMessage(m.text()) + + case m: MultimodalUserMessage => + val contents = m + .contents() + .asScala + .map { + case content: SessionMessage.MessageContent.TextMessageContent => + new SpiAgent.TextMessageContent(content.text()) + case content: SessionMessage.MessageContent.ImageUriMessageContent => + new SpiAgent.ImageUriMessageContent(URI.create(content.uri()), toSpiDetailLevel(content.detailLevel())) + } + .toSeq + new SpiAgent.ContextMessage.UserMessage(contents) case m: ToolCallResponse => new ContextMessage.ToolCallResponseMessage(m.id(), m.name(), m.text()) case m => diff --git a/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentStreamEffectImpl.scala b/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentStreamEffectImpl.scala index 25595360f..56dd858b4 100644 --- a/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentStreamEffectImpl.scala +++ b/akka-javasdk/src/main/scala/akka/javasdk/impl/agent/AgentStreamEffectImpl.scala @@ -13,6 +13,7 @@ import akka.javasdk.agent.Agent.StreamEffect import akka.javasdk.agent.MemoryProvider import akka.javasdk.agent.ModelProvider import akka.javasdk.agent.RemoteMcpTools +import akka.javasdk.agent.UserMessage import akka.javasdk.impl.effect.ErrorReplyImpl import akka.javasdk.impl.effect.MessageReplyImpl import akka.javasdk.impl.effect.NoSecondaryEffectImpl @@ -87,7 +88,7 @@ private[javasdk] final class AgentStreamEffectImpl } override def userMessage(message: String): StreamEffect.OnSuccessBuilder = { - updateRequestModel(_.copy(userMessage = message)) + updateRequestModel(_.copy(userMessage = UserMessage.from(message))) this } diff --git a/akka-javasdk/src/test/java/akka/javasdk/agent/DummyAgent.java b/akka-javasdk/src/test/java/akka/javasdk/agent/DummyAgent.java index ef5519814..0c6cd7807 100644 --- a/akka-javasdk/src/test/java/akka/javasdk/agent/DummyAgent.java +++ b/akka-javasdk/src/test/java/akka/javasdk/agent/DummyAgent.java @@ -41,6 +41,12 @@ public void addInteraction( SessionMessage.UserMessage userMessage, List messages) {} + @Override + public void addInteraction( + String sessionId, + SessionMessage.MultimodalUserMessage userMessage, + List messages) {} + @Override public SessionHistory getHistory(String sessionId) { return null; diff --git a/docs/src/modules/reference/nav.adoc b/docs/src/modules/reference/nav.adoc index 462a62afb..942927e6a 100644 --- a/docs/src/modules/reference/nav.adoc +++ b/docs/src/modules/reference/nav.adoc @@ -65,7 +65,8 @@ *** xref:cli/akka-cli/akka_auth_tokens.adoc[] *** xref:cli/akka-cli/akka_auth_use-token.adoc[] *** xref:cli/akka-cli/akka_auth.adoc[] -*** xref:cli/akka-cli/akka_code_ai-assistance-update.adoc[] +*** xref:cli/akka-cli/akka_code_check.adoc[] +*** xref:cli/akka-cli/akka_code_context-update.adoc[] *** xref:cli/akka-cli/akka_code_init.adoc[] *** xref:cli/akka-cli/akka_code_token.adoc[] *** xref:cli/akka-cli/akka_code.adoc[] diff --git a/docs/src/modules/sdk/pages/agents/prompt.adoc b/docs/src/modules/sdk/pages/agents/prompt.adoc index a5695c401..cfa910869 100644 --- a/docs/src/modules/sdk/pages/agents/prompt.adoc +++ b/docs/src/modules/sdk/pages/agents/prompt.adoc @@ -20,6 +20,23 @@ include::example$doc-snippets/src/main/java/com/example/application/ActivityAgen Keep in mind that some models have preferences in how you wrap or label user input within the system prompt and you'll need to take that into account when defining your system message. +== Multimodal user message + +Multimodal AI models can process not only text but also images, enabling agents to analyze visual content, extract information from diagrams, or answer questions about images. + +To send images along with text to an AI model, use the `UserMessage` class which supports multimodal content: + +[source,java,indent=0] +.{sample-base-url}/doc-snippets/src/main/java/com/example/application/ImageProcessingAgent.java[ImageProcessingAgent.java] +---- +include::example$doc-snippets/src/main/java/com/example/application/ImageProcessingAgent.java[tag=multimodal-user-message] +---- +<1> Create a `UserMessage` with multiple content elements +<2> Add text content using `TextMessageContent.from()` +<3> Add image content using `ImageMessageContent.from()` + +NOTE: Not all AI models support vision capabilities. Ensure your configured model provider supports image inputs before using multimodal messages. + == Using dynamic prompts with templates As an alternative to hard-coded prompts, there is a built-in prompt template entity. The advantage of using the prompt template entity is that you can change the prompts at runtime without restarting or redeploying the service. Because the prompt template is managed as an entity, you retain full change history. diff --git a/docs/styles/config/vocabularies/Akka/accept.txt b/docs/styles/config/vocabularies/Akka/accept.txt index 4b354c534..a3d9fc867 100644 --- a/docs/styles/config/vocabularies/Akka/accept.txt +++ b/docs/styles/config/vocabularies/Akka/accept.txt @@ -111,6 +111,8 @@ maxInstances migratable minInstances mountPath +multimodal +Multimodal mutator mutators namespace diff --git a/samples/doc-snippets/src/main/java/com/example/application/CompactionAgent.java b/samples/doc-snippets/src/main/java/com/example/application/CompactionAgent.java index b21715edb..6582ff97c 100644 --- a/samples/doc-snippets/src/main/java/com/example/application/CompactionAgent.java +++ b/samples/doc-snippets/src/main/java/com/example/application/CompactionAgent.java @@ -5,6 +5,7 @@ import akka.javasdk.agent.ModelProvider; import akka.javasdk.agent.SessionHistory; import akka.javasdk.agent.SessionMessage; +import akka.javasdk.agent.SessionMessage.MessageContent; import akka.javasdk.annotations.Component; import akka.javasdk.client.ComponentClient; import java.util.stream.Collectors; @@ -53,6 +54,18 @@ public Effect summarizeSessionHistory(SessionHistory history) { // <2> .map(msg -> { return switch (msg) { case SessionMessage.UserMessage userMsg -> "\n\nUSER:\n" + userMsg.text(); // <3> + // end::compaction[] + case SessionMessage.MultimodalUserMessage multimodalUserMsg -> multimodalUserMsg + .contents() + .stream() + .map(content -> + switch (content) { + case MessageContent.ImageUriMessageContent image -> "image from " + + image.uri(); + case MessageContent.TextMessageContent text -> text.text(); + }) + .reduce("", (acc, text) -> "\n\nUSER:\n" + text, String::concat); + // tag::compaction[] case SessionMessage.AiMessage aiMessage -> { var aiText = "\n\nAI:\n" + aiMessage.text(); yield aiMessage diff --git a/samples/doc-snippets/src/main/java/com/example/application/ImageProcessingAgent.java b/samples/doc-snippets/src/main/java/com/example/application/ImageProcessingAgent.java new file mode 100644 index 000000000..b8ec713f4 --- /dev/null +++ b/samples/doc-snippets/src/main/java/com/example/application/ImageProcessingAgent.java @@ -0,0 +1,25 @@ +package com.example.application; + +import akka.javasdk.agent.Agent; +import akka.javasdk.agent.MessageContent.ImageMessageContent; +import akka.javasdk.agent.MessageContent.TextMessageContent; +import akka.javasdk.agent.UserMessage; +import akka.javasdk.annotations.Component; + +@Component(id = "image-processing-agent") +public class ImageProcessingAgent extends Agent { + + // tag::multimodal-user-message[] + public Effect ask() { + return effects() + .systemMessage("You are image analyses tool") + .userMessage( + UserMessage.from( // <1> + TextMessageContent.from("What do you see?"), // <2> + ImageMessageContent.fromUrl("https://example/image.png") // <3> + ) + ) + .thenReply(); + } + // end::multimodal-user-message[] +}