Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,20 @@
package akka.javasdk.testkit;

import akka.japi.Pair;
import akka.javasdk.agent.MessageContent;
import akka.javasdk.agent.ModelProvider;
import dev.langchain4j.agent.tool.ToolExecutionRequest;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ImageContent;
import dev.langchain4j.data.message.TextContent;
import dev.langchain4j.data.message.ToolExecutionResultMessage;
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import dev.langchain4j.model.output.FinishReason;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -67,7 +71,30 @@ public sealed interface InputMessage {
}

/** Represents a user message. */
public record UserMessage(String content) implements InputMessage {}
public record UserMessage(List<MessageContent> contents) implements InputMessage {

public UserMessage(String content) {
this(List.of(MessageContent.TextMessageContent.from(content)));
}

@Override
public String content() {
if (isTextOnly()) {
return text();
} else {
throw new IllegalStateException("This is not text only user message");
}
}

public boolean isTextOnly() {
return contents.size() == 1
&& contents.getFirst() instanceof MessageContent.TextMessageContent;
}

public String text() {
return ((MessageContent.TextMessageContent) contents.getFirst()).text();
}
}

/** Represents a tool result. This is used to simulate a response from a tool invocation. */
public record ToolResult(String name, String content) implements InputMessage {}
Expand Down Expand Up @@ -203,7 +230,37 @@ private InputMessage getLastInputMessage(ChatRequest chatRequest) {
.map(
chatMessage -> {
if (chatMessage instanceof dev.langchain4j.data.message.UserMessage userMessage) {
return new UserMessage(userMessage.singleText());
List<MessageContent> contents =
userMessage.contents().stream()
.<MessageContent>map(
content ->
switch (content) {
case TextContent textContent ->
MessageContent.TextMessageContent.from(textContent.text());
case ImageContent imageContent -> {
if (imageContent.image().url() != null) {
try {
yield MessageContent.ImageMessageContent.fromUrl(
imageContent.image().url().toURL(),
toDetailLevel(imageContent.detailLevel()));
} catch (MalformedURLException e) {
throw new RuntimeException(
"Can't transform "
+ imageContent.image().url()
+ " to URL",
e);
}
} else {
throw new IllegalStateException(
"Not supported image content without url.");
}
}
default ->
throw new IllegalStateException(
"Not supported content type: " + content);
})
.toList();
return new UserMessage(contents);
} else {
ToolExecutionResultMessage result = (ToolExecutionResultMessage) chatMessage;
return new ToolResult(result.toolName(), result.text());
Expand All @@ -212,6 +269,15 @@ private InputMessage getLastInputMessage(ChatRequest chatRequest) {
.orElseThrow(() -> new RuntimeException("No input message found"));
}

private MessageContent.ImageMessageContent.DetailLevel toDetailLevel(
ImageContent.DetailLevel detailLevel) {
return switch (detailLevel) {
case LOW -> MessageContent.ImageMessageContent.DetailLevel.LOW;
case HIGH -> MessageContent.ImageMessageContent.DetailLevel.HIGH;
case AUTO -> MessageContent.ImageMessageContent.DetailLevel.AUTO;
};
}

/** Retrieves the AI response for a given input message based on the defined predicates. */
private AiResponse getResponse(InputMessage inputMessage) {
return responsePredicates.stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import akka.javasdk.CommandException;
import akka.javasdk.DependencyProvider;
import akka.javasdk.agent.AgentRegistry;
import akka.javasdk.agent.MessageContent;
import akka.javasdk.testkit.TestKit;
import akka.javasdk.testkit.TestKitSupport;
import akka.javasdk.testkit.TestModelProvider;
Expand Down Expand Up @@ -59,6 +60,7 @@ public <T> T getDependency(Class<T> clazz) {
.withModelProvider(SomeStructureResponseSchemaAgent.class, testModelProvider)
.withModelProvider(SomeStreamingAgent.class, testModelProvider)
.withModelProvider(SomeAgentWithBadlyConfiguredTool.class, testModelProvider)
.withModelProvider(SomeMultiModalUserMessageAgent.class, testModelProvider)
.withDependencyProvider(depsProvider);
}

Expand Down Expand Up @@ -88,6 +90,34 @@ public void shouldMapStringResponse() {
assertThat(result.response()).isEqualTo("123456");
}

@Test
public void shouldTestMultiModalUserMessage() {
// given
String response = "multi modal user message";
testModelProvider
.whenUserMessage(
userMessage ->
((MessageContent.TextMessageContent) userMessage.contents().get(0))
.text()
.equals("testing")
&& ((MessageContent.ImageUrlMessageContent) userMessage.contents().get(1))
.url()
.toString()
.equals("https://example.com"))
.reply(response);

// when
String result =
componentClient
.forAgent()
.inSession(newSessionId())
.method(SomeMultiModalUserMessageAgent::ask)
.invoke();

// then
assertThat(result).isEqualTo(response);
}

@Test
public void shouldMapStructuredResponse() {
// given
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import akka.javasdk.agent.SessionHistory;
import akka.javasdk.agent.SessionMemoryEntity;
import akka.javasdk.agent.SessionMemoryEntity.AddInteractionCmd;
import akka.javasdk.agent.SessionMessage;
import akka.javasdk.agent.SessionMessage.AiMessage;
import akka.javasdk.agent.SessionMessage.UserMessage;
import akka.javasdk.impl.agent.AgentRegistryImpl;
Expand Down Expand Up @@ -226,9 +225,14 @@ public void shouldHandleConcurrentUpdatesWhenCompacting() {
testKit.method(SessionMemoryEntity::getHistory).invoke(emptyGetHistory);

// then
assertThat(historyResult2.getReply().messages().stream().map(SessionMessage::text).toList())
.containsExactly(
userMessage2.text(), aiMessage2.text(), userMessage3.text(), aiMessage3.text());
UserMessage m1 = (UserMessage) historyResult2.getReply().messages().get(0);
AiMessage m2 = (AiMessage) historyResult2.getReply().messages().get(1);
UserMessage m3 = (UserMessage) historyResult2.getReply().messages().get(2);
AiMessage m4 = (AiMessage) historyResult2.getReply().messages().get(3);
assertThat(m1.text()).isEqualTo(userMessage2.text());
assertThat(m2.text()).isEqualTo(aiMessage2.text());
assertThat(m3.text()).isEqualTo(userMessage3.text());
assertThat(m4.text()).isEqualTo(aiMessage3.text());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (C) 2021-2025 Lightbend Inc. <https://www.lightbend.com>
*/

package akkajavasdk.components.agent;

import akka.javasdk.agent.Agent;
import akka.javasdk.agent.MessageContent;
import akka.javasdk.agent.UserMessage;
import akka.javasdk.annotations.Component;

@Component(id = "some-multi-modal-user-message-agent")
public class SomeMultiModalUserMessageAgent extends Agent {
public record SomeResponse(String response) {}

public Effect<String> ask() {
return effects()
.systemMessage("You are a helpful...")
.userMessage(
UserMessage.from(
MessageContent.TextMessageContent.from("testing"),
MessageContent.ImageMessageContent.fromUrl("https://example.com")))
.thenReply();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ akka.javasdk {
components {
agent = [
"akkajavasdk.components.agent.SomeAgent",
"akkajavasdk.components.agent.SomeMultiModalUserMessageAgent",
"akkajavasdk.components.agent.SomeAgentAcceptingInt",
"akkajavasdk.components.agent.SomeAgentReturningErrors",
"akkajavasdk.components.agent.SomeAgentWithTool",
Expand Down
20 changes: 20 additions & 0 deletions akka-javasdk/src/main/java/akka/javasdk/agent/Agent.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,26 @@ interface Builder {
*/
OnSuccessBuilder userMessage(String message);

/**
* The user message to the AI model, supporting multimodal content.
*
* <p>This overload accepts a {@link UserMessage} which can contain multiple content types,
* currently text and images.
*
* <p>Example with text and image:
*
* <pre>{@code
* UserMessage message = UserMessage.from(
* MessageContent.TextMessageContent.from("What's in this image?"),
* ImageMessageContent.from("https://example.com/image.jpg")
* );
* }</pre>
*
* @param message The user message containing multimodal content (text, images, etc.)
* @return The next builder stage for configuring the effect
*/
OnSuccessBuilder userMessage(UserMessage message);

/**
* Create a message reply without calling the model.
*
Expand Down
100 changes: 100 additions & 0 deletions akka-javasdk/src/main/java/akka/javasdk/agent/MessageContent.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright (C) 2021-2025 Lightbend Inc. <https://www.lightbend.com>
*/

package akka.javasdk.agent;

import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;

/**
* Represents a piece of content within a multimodal message to an AI model.
*
* <p>Message content can be text or images, allowing agents to send multimodal inputs.
*
* @see UserMessage
*/
public sealed interface MessageContent {

/**
* Text content within a user message.
*
* @param text The text content
*/
record TextMessageContent(String text) implements MessageContent {

/**
* Creates text content from a string.
*
* @param text The text content
* @return A new TextMessageContent instance
*/
public static TextMessageContent from(String text) {
return new TextMessageContent(text);
}
}

/**
* Image content within a user message, referenced by URL.
*
* @param url The URL pointing to the image
* @param detailLevel The level of detail for image processing
*/
record ImageUrlMessageContent(URL url, ImageMessageContent.DetailLevel detailLevel)
implements MessageContent {}

/** Factory methods for creating image message content. */
record ImageMessageContent() {

/**
* Creates image content from a URL string with automatic detail level.
*
* @param url The URL string pointing to the image
* @return A new ImageUrlMessageContent instance with AUTO detail level
*/
public static ImageUrlMessageContent fromUrl(String url) {
try {
return ImageMessageContent.fromUrl(URI.create(url).toURL());
} catch (MalformedURLException e) {
throw new RuntimeException("Can't transform " + url + " to URL", e);
}
}

/**
* Creates image content from a URL with automatic detail level.
*
* @param url The URL pointing to the image
* @return A new ImageUrlMessageContent instance with AUTO detail level
*/
public static ImageUrlMessageContent fromUrl(URL url) {
return new ImageUrlMessageContent(url, DetailLevel.AUTO);
}

/**
* Creates image content from a URL with a specific detail level.
*
* @param url The URL pointing to the image
* @param detailLevel The level of detail for image processing
* @return A new ImageUrlMessageContent instance
*/
public static ImageUrlMessageContent fromUrl(URL url, DetailLevel detailLevel) {
return new ImageUrlMessageContent(url, detailLevel);
}

/**
* Controls the level of detail used when processing images.
*
* <p>The detail level affects both the quality of image analysis and the number of tokens
* consumed by the AI model.
*/
public enum DetailLevel {
/** Lower resolution processing, faster and uses fewer tokens */
LOW,
/** Higher resolution processing, more detailed analysis but uses more tokens */
HIGH,
/** Let the model automatically choose the appropriate detail level */
AUTO;
}
}
}
17 changes: 17 additions & 0 deletions akka-javasdk/src/main/java/akka/javasdk/agent/SessionMemory.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,23 @@ public interface SessionMemory {
void addInteraction(
String sessionId, SessionMessage.UserMessage userMessage, List<SessionMessage> messages);

/**
* Adds an interaction between a user and an AI model to the session history for the specified
* session, supporting multimodal content.
*
* <p>This overload accepts a {@link SessionMessage.CompoundUserMessage} which can contain
* multiple content types including text and images, enabling multimodal interactions.
*
* @param sessionId The unique identifier for the contextual session
* @param userMessage The compound user message containing multimodal content (text, images, etc.)
* @param messages All other messages generated during this interaction, typically AiMessage but
* also Tool Call responses.
*/
void addInteraction(
String sessionId,
SessionMessage.CompoundUserMessage userMessage,
Copy link
Contributor Author

@aludwiko aludwiko Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initially, I used CompoundUserMessage for all interactions, text-only and multimodal, but I think most of the time, the user message will be text-based so it's worth optimizing it and supporting both options. Especially that we need to be backward compatible with events anyway.

List<SessionMessage> messages);

/**
* Retrieves the complete session history for the specified session. For very long sessions, this
* might return a compacted version of the history.
Expand Down
Loading
Loading