Skip to content

Commit 304fe37

Browse files
authored
chore: fix lance dependency and remove the implicit feather fallback (#70)
* Fix lance dependency and remove the implicit feather fallback * Fix lint
1 parent 6f23140 commit 304fe37

File tree

4 files changed

+433
-50
lines changed

4 files changed

+433
-50
lines changed

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ dependencies = [
88
"uvicorn>=0.24.0",
99
"pydantic>=2.0.0",
1010
"openai>=1.52.0",
11-
"lance>=0.17.0",
11+
"pylance",
1212
]
1313
description = "Python bindings for the lance-graph Cypher engine"
1414
authors = [{ name = "Lance Devs", email = "[email protected]" }]

python/python/knowledge_graph/store.py

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Persistent storage helpers built on Lance datasets (with Feather fallback)."""
1+
"""Persistent storage helpers built on Lance datasets."""
22

33
from __future__ import annotations
44

@@ -46,42 +46,35 @@ def list_datasets(self) -> Dict[str, "Path"]:
4646
datasets: Dict[str, Path] = {}
4747
if not self._root.exists():
4848
return datasets
49-
valid_suffixes = {".lance", ".arrow"}
5049
for child in self._root.iterdir():
5150
if child.is_dir() and child.suffix == ".lance":
5251
datasets[child.stem] = child
53-
elif child.is_file() and child.suffix in valid_suffixes:
54-
datasets[child.stem] = child
5552
return datasets
5653

5754
def _dataset_path(self, name: str) -> "Path":
5855
"""Create the canonical path for a dataset."""
5956
safe_name = name.replace("/", "_")
60-
suffix = ".lance" if self._get_lance() else ".arrow"
61-
return self._root / f"{safe_name}{suffix}"
57+
return self._root / f"{safe_name}.lance"
6258

63-
def _get_lance(self) -> Optional[ModuleType]:
59+
def _get_lance(self) -> ModuleType:
6460
if not self._lance_attempted:
6561
self._lance_attempted = True
6662
try:
6763
module = import_module("lance")
68-
except ImportError:
69-
module = None
70-
else:
71-
has_writer = hasattr(module, "write_dataset")
72-
has_loader = hasattr(module, "dataset")
73-
if not (has_writer and has_loader):
74-
LOGGER.warning(
75-
"Installed `lance` package missing dataset APIs; "
76-
"falling back to Feather storage."
77-
)
78-
module = None
79-
self._lance = module
80-
if module is None:
81-
LOGGER.debug(
82-
"Lance storage unavailable; using Feather files under %s.",
83-
self._root,
64+
except ImportError as e:
65+
raise ImportError(
66+
"Lance module is required but not installed. "
67+
"Install it with: pip install pylance"
68+
) from e
69+
70+
has_loader = hasattr(module, "dataset")
71+
if not (has_loader):
72+
raise ImportError(
73+
"Installed `lance` package is missing required dataset APIs."
8474
)
75+
self._lance = module
76+
if self._lance is None:
77+
raise ImportError("Lance module failed to load")
8578
return self._lance
8679

8780
def load_tables(
@@ -90,7 +83,6 @@ def load_tables(
9083
) -> Mapping[str, "pa.Table"]:
9184
"""Load Lance datasets as PyArrow tables."""
9285
lance = self._get_lance()
93-
use_lance = lance is not None
9486

9587
self.ensure_layout()
9688
available = self.list_datasets()
@@ -101,13 +93,8 @@ def load_tables(
10193
path = available.get(name, self._dataset_path(name))
10294
if not path.exists():
10395
raise FileNotFoundError(f"Dataset '{name}' not found at {path}")
104-
if path.suffix == ".lance" and use_lance:
105-
dataset = lance.dataset(str(path)) # type: ignore[union-attr]
106-
table = dataset.scanner().to_table()
107-
else:
108-
import pyarrow.feather as feather
109-
110-
table = feather.read_table(str(path))
96+
dataset = lance.dataset(str(path))
97+
table = dataset.scanner().to_table()
11198
tables[name] = table
11299
return tables
113100

@@ -123,10 +110,5 @@ def write_tables(self, tables: Mapping[str, "pa.Table"]) -> None:
123110
f"Dataset '{name}' must be a pyarrow.Table (got {type(table)!r})"
124111
)
125112
path = self._dataset_path(name)
126-
if path.suffix == ".lance" and lance is not None:
127-
mode = "overwrite" if path.exists() else "create"
128-
lance.write_dataset(table, str(path), mode=mode) # type: ignore[union-attr]
129-
else:
130-
import pyarrow.feather as feather
131-
132-
feather.write_feather(table, str(path))
113+
mode = "overwrite" if path.exists() else "create"
114+
lance.write_dataset(table, str(path), mode=mode)

0 commit comments

Comments
 (0)