diff --git a/docs/snippets/basic_usage.mdx b/docs/snippets/basic_usage.mdx index a62a519..e33df7f 100644 --- a/docs/snippets/basic_usage.mdx +++ b/docs/snippets/basic_usage.mdx @@ -26,7 +26,7 @@ export const PyBasicVectorSearch = "query_vector = [0.03, 0.85, 0.61, 0.90]\nres export const PyBasicVectorSearchQ1 = "# Who are the characters similar to \"wizard\"?\nquery_vector_1 = [0.03, 0.85, 0.61, 0.90]\nr1 = (\n table.search(query_vector_1)\n .limit(5)\n .select([\"name\", \"role\", \"description\"])\n .to_polars()\n)\nprint(r1)\n"; -export const PyBasicVectorSearchQ2 = "# Who are the characters similar to \"wizard\" with high magic stats?\nquery_vector_2 = [0.03, 0.85, 0.61, 0.90]\nr2 = (\n table.search()\n .where(\"stats.magic > 3\")\n .select([\"name\", \"role\", \"description\"])\n .limit(5)\n .to_polars()\n)\nprint(r2)\n"; +export const PyBasicVectorSearchQ2 = "# Who are the characters with high magic stats?\nquery_vector_2 = [0.03, 0.85, 0.61, 0.90]\nr2 = (\n table.search(query_vector_2)\n .where(\"stats.magic > 3\")\n .select([\"name\", \"role\", \"description\"])\n .limit(5)\n .to_polars()\n)\nprint(r2)\n"; export const PyBasicVectorSearchQ3 = "# Who are the strongest characters?\nr3 = (\n table.search()\n .where(\"stats.strength > 3\")\n .select([\"name\", \"role\", \"description\"])\n .limit(5)\n .to_polars()\n)\nprint(r3)\n"; diff --git a/docs/snippets/quickstart.mdx b/docs/snippets/quickstart.mdx index faf453e..570b8da 100644 --- a/docs/snippets/quickstart.mdx +++ b/docs/snippets/quickstart.mdx @@ -14,8 +14,6 @@ export const PyQuickstartVectorSearch1 = "# Let's search for vectors similar to export const PyQuickstartVectorSearch2 = "# Let's search for vectors similar to \"wizard\"\nquery_vector = [0.7, 0.3, 0.5]\n\nresults = table.search(query_vector).limit(2).to_polars()\nprint(results)\n"; -export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n"; - export const TsQuickstartAddData = "const moreData = [\n { id: \"7\", text: \"mage\", vector: [0.6, 0.3, 0.4] },\n { id: \"8\", text: \"bard\", vector: [0.3, 0.8, 0.4] },\n];\n\n// Add data to table\nawait table.add(moreData);\n"; export const TsQuickstartCreateTable = "const data = [\n { id: \"1\", text: \"knight\", vector: [0.9, 0.4, 0.8] },\n { id: \"2\", text: \"ranger\", vector: [0.8, 0.4, 0.7] },\n { id: \"9\", text: \"priest\", vector: [0.6, 0.2, 0.6] },\n { id: \"4\", text: \"rogue\", vector: [0.7, 0.4, 0.7] },\n];\nlet table = await db.createTable(\"adventurers\", data, { mode: \"overwrite\" });\n"; @@ -26,6 +24,8 @@ export const TsQuickstartOpenTable = "table = await db.openTable(\"adventurers\" export const TsQuickstartOutputArray = "result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n"; +export const TsQuickstartOutputPandas = "result = await table.search(queryVector).limit(2).toArray();\n"; + export const TsQuickstartVectorSearch1 = "// Let's search for vectors similar to \"warrior\"\nlet queryVector = [0.8, 0.3, 0.8];\n\nlet result = await table.search(queryVector).limit(2).toArray();\nconsole.table(result);\n"; export const TsQuickstartVectorSearch2 = "// Let's search for vectors similar to \"wizard\"\nqueryVector = [0.7, 0.3, 0.5];\n\nconst results = await table.search(queryVector).limit(2).toArray();\nconsole.table(results);\n"; diff --git a/docs/snippets/search.mdx b/docs/snippets/search.mdx index ffb0890..e227b11 100644 --- a/docs/snippets/search.mdx +++ b/docs/snippets/search.mdx @@ -8,10 +8,10 @@ export const PyBasicHybridSearch = "data = [\n {\"text\": \"rebel spaceships export const PyBasicHybridSearchAsync = "uri = \"data/sample-lancedb\"\nasync_db = await lancedb.connect_async(uri)\ndata = [\n {\"text\": \"rebel spaceships striking from a hidden base\"},\n {\"text\": \"have won their first victory against the evil Galactic Empire\"},\n {\"text\": \"during the battle rebel spies managed to steal secret plans\"},\n {\"text\": \"to the Empire's ultimate weapon the Death Star\"},\n]\nasync_tbl = await async_db.create_table(\"documents_async\", schema=Documents)\n# ingest docs with auto-vectorization\nawait async_tbl.add(data)\n# Create a fts index before the hybrid search\nawait async_tbl.create_index(\"text\", config=FTS())\ntext_query = \"flower moon\"\n# hybrid search with default re-ranker\nawait (await async_tbl.search(\"flower moon\", query_type=\"hybrid\")).to_pandas()\n"; -export const PyClassDocuments = "class Documents(LanceModel):\n vector: Vector(embeddings.ndims()) = embeddings.VectorField()\n text: str = embeddings.SourceField()\n"; - export const PyClassDefinition = "class Metadata(BaseModel):\n source: str\n timestamp: datetime\n\n\nclass Document(BaseModel):\n content: str\n meta: Metadata\n\n\nclass LanceSchema(LanceModel):\n id: str\n vector: Vector(1536)\n payload: Document\n"; +export const PyClassDocuments = "class Documents(LanceModel):\n vector: Vector(embeddings.ndims()) = embeddings.VectorField()\n text: str = embeddings.SourceField()\n"; + export const PyCreateTableAsyncWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n LanceSchema(\n id=f\"id{i}\",\n vector=np.random.randn(1536),\n payload=Document(\n content=f\"document{i}\",\n meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n ),\n )\n for i in range(100)\n]\n\nasync_tbl = await async_db.create_table(\"documents_async\", data=data)\n"; export const PyCreateTableWithNestedSchema = "# Let's add 100 sample rows to our dataset\ndata = [\n LanceSchema(\n id=f\"id{i}\",\n vector=np.random.randn(1536),\n payload=Document(\n content=f\"document{i}\",\n meta=Metadata(source=f\"source{i % 10}\", timestamp=datetime.now()),\n ),\n )\n for i in range(100)\n]\n\n# Synchronous client\ntbl = db.create_table(\"documents\", data=data)\n"; diff --git a/docs/tables/index.mdx b/docs/tables/index.mdx index 8292e52..6e3f8d2 100644 --- a/docs/tables/index.mdx +++ b/docs/tables/index.mdx @@ -380,9 +380,9 @@ the desired columns). We have Merlin, The Lady of the Lake, and Morgan le Fay in the top results, who all have magical abilities. -Next, let's try to answer a more complex question that involves filtering on a -nested struct field. Filtering is done using the `where` method, where you can -pass in SQL-like expressions. +Next, let's try to answer a different question that involves vector search while +filtering on a nested struct field. Filtering is done using the `where` method, +into which you can pass SQL-like expressions. > Q2: _Who are the characters similar to "wizard" with high magic stats?_ diff --git a/tests/py/test_basic_usage.py b/tests/py/test_basic_usage.py index 44c6f5d..cd27e80 100644 --- a/tests/py/test_basic_usage.py +++ b/tests/py/test_basic_usage.py @@ -124,10 +124,10 @@ def test_basic_usage(db_path_factory): # --8<-- [end:basic_vector_search_q1] # --8<-- [start:basic_vector_search_q2] - # Who are the characters similar to "wizard" with high magic stats? + # Who are the characters with high magic stats? query_vector_2 = [0.03, 0.85, 0.61, 0.90] r2 = ( - table.search() + table.search(query_vector_2) .where("stats.magic > 3") .select(["name", "role", "description"]) .limit(5)