Skip to content

Commit f28e78c

Browse files
refactor: unify rag storage with instance-specific client support (#3455)
- ignore line length errors globally - migrate knowledge/memory and crew query_knowledge to `SearchResult` - remove legacy chromadb utils; fix empty metadata handling - restore openai as default embedding provider; support instance-specific clients - update and fix tests for `SearchResult` migration and rag changes
1 parent 81bd81e commit f28e78c

30 files changed

+1941
-961
lines changed

pyproject.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,14 @@ select = [
131131
"I001", # sort imports
132132
"I002", # remove unused imports
133133
]
134-
ignore = ["E501"] # ignore line too long
134+
ignore = ["E501"] # ignore line too long globally
135135

136136
[tool.ruff.lint.per-file-ignores]
137-
"tests/**/*.py" = ["S101"] # Allow assert statements in tests
137+
"tests/**/*.py" = ["S101", "RET504"] # Allow assert statements and unnecessary assignments before return in tests
138138

139139
[tool.mypy]
140-
exclude = ["src/crewai/cli/templates", "tests"]
140+
exclude = ["src/crewai/cli/templates", "tests/"]
141+
141142

142143
[tool.bandit]
143144
exclude_dirs = ["src/crewai/cli/templates"]

src/crewai/crew.py

Lines changed: 233 additions & 194 deletions
Large diffs are not rendered by default.

src/crewai/knowledge/knowledge.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import os
2-
from typing import Any, Dict, List, Optional
2+
from typing import Any
33

44
from pydantic import BaseModel, ConfigDict, Field
55

66
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
77
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
8+
from crewai.rag.types import SearchResult
89

910
os.environ["TOKENIZERS_PARALLELISM"] = "false" # removes logging from fastembed
1011

@@ -13,23 +14,23 @@ class Knowledge(BaseModel):
1314
"""
1415
Knowledge is a collection of sources and setup for the vector store to save and query relevant context.
1516
Args:
16-
sources: List[BaseKnowledgeSource] = Field(default_factory=list)
17-
storage: Optional[KnowledgeStorage] = Field(default=None)
18-
embedder: Optional[Dict[str, Any]] = None
17+
sources: list[BaseKnowledgeSource] = Field(default_factory=list)
18+
storage: KnowledgeStorage | None = Field(default=None)
19+
embedder: dict[str, Any] | None = None
1920
"""
2021

21-
sources: List[BaseKnowledgeSource] = Field(default_factory=list)
22+
sources: list[BaseKnowledgeSource] = Field(default_factory=list)
2223
model_config = ConfigDict(arbitrary_types_allowed=True)
23-
storage: Optional[KnowledgeStorage] = Field(default=None)
24-
embedder: Optional[Dict[str, Any]] = None
25-
collection_name: Optional[str] = None
24+
storage: KnowledgeStorage | None = Field(default=None)
25+
embedder: dict[str, Any] | None = None
26+
collection_name: str | None = None
2627

2728
def __init__(
2829
self,
2930
collection_name: str,
30-
sources: List[BaseKnowledgeSource],
31-
embedder: Optional[Dict[str, Any]] = None,
32-
storage: Optional[KnowledgeStorage] = None,
31+
sources: list[BaseKnowledgeSource],
32+
embedder: dict[str, Any] | None = None,
33+
storage: KnowledgeStorage | None = None,
3334
**data,
3435
):
3536
super().__init__(**data)
@@ -40,11 +41,10 @@ def __init__(
4041
embedder=embedder, collection_name=collection_name
4142
)
4243
self.sources = sources
43-
self.storage.initialize_knowledge_storage()
4444

4545
def query(
46-
self, query: List[str], results_limit: int = 3, score_threshold: float = 0.35
47-
) -> List[Dict[str, Any]]:
46+
self, query: list[str], results_limit: int = 3, score_threshold: float = 0.35
47+
) -> list[SearchResult]:
4848
"""
4949
Query across all knowledge sources to find the most relevant information.
5050
Returns the top_k most relevant chunks.
@@ -55,12 +55,11 @@ def query(
5555
if self.storage is None:
5656
raise ValueError("Storage is not initialized.")
5757

58-
results = self.storage.search(
58+
return self.storage.search(
5959
query,
6060
limit=results_limit,
6161
score_threshold=score_threshold,
6262
)
63-
return results
6463

6564
def add_sources(self):
6665
try:
Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from abc import ABC, abstractmethod
2-
from typing import Any, Dict, List, Optional
2+
from typing import Any
3+
4+
from crewai.rag.types import SearchResult
35

46

57
class BaseKnowledgeStorage(ABC):
@@ -8,22 +10,17 @@ class BaseKnowledgeStorage(ABC):
810
@abstractmethod
911
def search(
1012
self,
11-
query: List[str],
13+
query: list[str],
1214
limit: int = 3,
13-
filter: Optional[dict] = None,
15+
metadata_filter: dict[str, Any] | None = None,
1416
score_threshold: float = 0.35,
15-
) -> List[Dict[str, Any]]:
17+
) -> list[SearchResult]:
1618
"""Search for documents in the knowledge base."""
17-
pass
1819

1920
@abstractmethod
20-
def save(
21-
self, documents: List[str], metadata: Dict[str, Any] | List[Dict[str, Any]]
22-
) -> None:
21+
def save(self, documents: list[str]) -> None:
2322
"""Save documents to the knowledge base."""
24-
pass
2523

2624
@abstractmethod
2725
def reset(self) -> None:
2826
"""Reset the knowledge base."""
29-
pass

0 commit comments

Comments
 (0)