Skip to content

Commit 94543a1

Browse files
authored
Add Valyu Extractor and Fast mode (run-llama#19915)
1 parent 2c853f8 commit 94543a1

File tree

8 files changed

+1130
-179
lines changed

8 files changed

+1130
-179
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,38 @@
11
# CHANGELOG
22

3+
## [0.4.0] - 2025-09-22
4+
5+
### Added
6+
7+
- **Fast Mode Parameter**: Added `fast_mode` parameter to enable faster but shorter search results
8+
9+
- Can be set during `ValyuToolSpec` initialization for default behavior
10+
- Can be overridden in individual search calls
11+
- User-configurable but not exposed to LLM models for direct control
12+
- Good for general purpose queries where speed is prioritized over comprehensiveness
13+
14+
- **Contents API Integration**: Added comprehensive URL content extraction functionality
15+
- New `get_contents()` method in `ValyuToolSpec` for extracting content from URLs
16+
- New `ValyuRetriever` class for integrating URL content extraction with LlamaIndex retrieval pipelines
17+
- Model can only specify URLs - all extraction parameters controlled by user
18+
- Supports AI-powered summarization and structured data extraction
19+
- Configurable extraction effort, response length, and cost limits
20+
- Automatic URL parsing from natural language queries
21+
22+
### Updated
23+
24+
- Updated Valyu SDK to version 2.2.1 for fast mode and contents API support
25+
- Enhanced test coverage with fast mode parameter testing and comprehensive contents API tests
26+
- Updated examples to demonstrate fast mode usage and contents API functionality
27+
- Added retriever examples and comprehensive API demonstrations
28+
29+
## [0.3.1] - Previous Release
30+
31+
### Features
32+
33+
- Enhanced search parameters support
34+
- Updated SDK integration
35+
336
## [0.1.0] - 2025-02-11
437

538
- Initial release

llama-index-integrations/tools/llama-index-tools-valyu/examples/context.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,35 @@
22
import os
33

44
# Set up OpenAI
5-
from llama_index.core.agent.workflow import FunctionAgent
6-
from llama_index.llms.openai import OpenAI
5+
from llama_index.agent.openai import OpenAIAgent
76
from llama_index.tools.valyu import ValyuToolSpec
87

98

10-
async def main():
11-
valyu_tool = ValyuToolSpec(
12-
api_key=os.environ["VALYU_API_KEY"],
13-
max_price=100, # default is 100
14-
)
9+
valyu_tool = ValyuToolSpec(
10+
api_key=os.environ["VALYU_API_KEY"],
11+
max_price=100, # default is 100
12+
fast_mode=True, # Enable fast mode for faster but shorter results
13+
# Contents API configuration
14+
contents_summary=True, # Enable AI summarization for content extraction
15+
contents_extract_effort="normal", # Extraction thoroughness
16+
contents_response_length="medium", # Content length per URL
17+
)
1518

16-
agent = FunctionAgent(
17-
tools=valyu_tool.to_tool_list(),
18-
llm=OpenAI(model="gpt-4.1"),
19-
)
19+
agent = OpenAIAgent.from_tools(
20+
valyu_tool.to_tool_list(),
21+
verbose=True,
22+
)
2023

21-
print(
22-
await agent.run(
23-
"What are the key considerations and empirical evidence for implementing statistical arbitrage strategies using cointegrated pairs trading, specifically focusing on the optimal lookback period for calculating correlation coefficients and the impact of transaction costs on strategy profitability in high-frequency trading environments?"
24-
)
25-
)
24+
# Example 1: Search query
25+
print("=== Search Example ===")
26+
search_response = agent.chat(
27+
"What are the key considerations and empirical evidence for implementing statistical arbitrage strategies using cointegrated pairs trading, specifically focusing on the optimal lookback period for calculating correlation coefficients and the impact of transaction costs on strategy profitability in high-frequency trading environments?"
28+
)
29+
print(search_response)
2630

27-
if __name__ == "__main__":
28-
import asyncio
29-
30-
asyncio.run(main())
31+
# Example 2: URL content extraction
32+
print("\n=== URL Content Extraction Example ===")
33+
content_response = agent.chat(
34+
"Please extract and summarize the content from these URLs: https://arxiv.org/abs/1706.03762 and https://en.wikipedia.org/wiki/Transformer_(machine_learning_model)"
35+
)
36+
print(content_response)
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Example demonstrating the ValyuRetriever for URL content extraction.
3+
4+
This example shows how to use the ValyuRetriever to extract content from URLs
5+
and integrate it with LlamaIndex retrieval pipelines.
6+
"""
7+
8+
import os
9+
from llama_index.tools.valyu import ValyuRetriever
10+
from llama_index.core import QueryBundle
11+
12+
13+
def main():
14+
"""Demonstrate ValyuRetriever usage."""
15+
16+
# Initialize the Valyu retriever
17+
valyu_retriever = ValyuRetriever(
18+
api_key=os.environ.get("VALYU_API_KEY", "your-api-key-here"),
19+
verbose=True,
20+
# Configure contents extraction (user-controlled settings)
21+
contents_summary=True, # Enable AI summarization
22+
contents_extract_effort="normal", # Extraction thoroughness
23+
contents_response_length="medium", # Content length per URL
24+
# Note: contents_max_price is set by developer/user, not exposed to models
25+
)
26+
27+
# Example 1: Single URL retrieval
28+
print("=== Single URL Retrieval ===")
29+
query_bundle = QueryBundle(
30+
query_str="https://en.wikipedia.org/wiki/Transformer_(machine_learning_model)"
31+
)
32+
33+
try:
34+
nodes = valyu_retriever.retrieve(query_bundle)
35+
print(f"Retrieved {len(nodes)} documents:")
36+
37+
for i, node in enumerate(nodes):
38+
print(f"\nDocument {i+1}:")
39+
print(f"URL: {node.node.metadata.get('url', 'N/A')}")
40+
print(f"Title: {node.node.metadata.get('title', 'N/A')}")
41+
print(f"Content length: {len(node.node.text)} characters")
42+
print(f"Score: {node.score}")
43+
# Show content preview
44+
preview = (
45+
node.node.text[:200] + "..."
46+
if len(node.node.text) > 200
47+
else node.node.text
48+
)
49+
print(f"Content preview: {preview}")
50+
51+
except Exception as e:
52+
print(f"Error: {e}")
53+
print("Note: This example requires a valid VALYU_API_KEY environment variable")
54+
55+
# Example 2: Multiple URLs
56+
print("\n=== Multiple URLs Retrieval ===")
57+
multi_url_query = QueryBundle(
58+
query_str="https://arxiv.org/abs/1706.03762 https://en.wikipedia.org/wiki/Attention_(machine_learning)"
59+
)
60+
61+
try:
62+
nodes = valyu_retriever.retrieve(multi_url_query)
63+
print(f"Retrieved {len(nodes)} documents from multiple URLs")
64+
65+
for i, node in enumerate(nodes):
66+
print(
67+
f"Document {i+1}: {node.node.metadata.get('title', 'Unknown')} - {len(node.node.text)} chars"
68+
)
69+
70+
except Exception as e:
71+
print(f"Error: {e}")
72+
73+
# Example 3: Natural language query with URLs
74+
print("\n=== Natural Language Query with URLs ===")
75+
natural_query = QueryBundle(
76+
query_str="Please extract content from these research papers: https://arxiv.org/abs/1706.03762 and also from https://en.wikipedia.org/wiki/Large_language_model"
77+
)
78+
79+
try:
80+
nodes = valyu_retriever.retrieve(natural_query)
81+
print(
82+
f"Extracted content from {len(nodes)} URLs found in natural language query"
83+
)
84+
85+
except Exception as e:
86+
print(f"Error: {e}")
87+
88+
89+
def demonstrate_url_parsing():
90+
"""Demonstrate URL parsing capabilities."""
91+
print("\n=== URL Parsing Examples ===")
92+
93+
retriever = ValyuRetriever(
94+
api_key="test-key"
95+
) # API key not needed for parsing demo
96+
97+
test_cases = [
98+
"https://example.com",
99+
"https://site1.com, https://site2.com",
100+
"Please extract content from https://news.com and https://blog.com",
101+
"Check out these links: https://paper1.org https://paper2.org",
102+
"No URLs in this text",
103+
]
104+
105+
for i, test_case in enumerate(test_cases, 1):
106+
urls = retriever._parse_urls_from_query(test_case)
107+
print(f"Test {i}: '{test_case}'")
108+
print(f" Extracted URLs: {urls}")
109+
110+
111+
if __name__ == "__main__":
112+
main()
113+
demonstrate_url_parsing()
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from llama_index.tools.valyu.base import ValyuToolSpec
2+
from llama_index.tools.valyu.retriever import ValyuRetriever
23

3-
__all__ = ["ValyuToolSpec"]
4+
__all__ = ["ValyuToolSpec", "ValyuRetriever"]

0 commit comments

Comments
 (0)