1- """Persistent storage helpers built on Lance datasets (with Feather fallback) ."""
1+ """Persistent storage helpers built on Lance datasets."""
22
33from __future__ import annotations
44
@@ -46,42 +46,35 @@ def list_datasets(self) -> Dict[str, "Path"]:
4646 datasets : Dict [str , Path ] = {}
4747 if not self ._root .exists ():
4848 return datasets
49- valid_suffixes = {".lance" , ".arrow" }
5049 for child in self ._root .iterdir ():
5150 if child .is_dir () and child .suffix == ".lance" :
5251 datasets [child .stem ] = child
53- elif child .is_file () and child .suffix in valid_suffixes :
54- datasets [child .stem ] = child
5552 return datasets
5653
5754 def _dataset_path (self , name : str ) -> "Path" :
5855 """Create the canonical path for a dataset."""
5956 safe_name = name .replace ("/" , "_" )
60- suffix = ".lance" if self ._get_lance () else ".arrow"
61- return self ._root / f"{ safe_name } { suffix } "
57+ return self ._root / f"{ safe_name } .lance"
6258
63- def _get_lance (self ) -> Optional [ ModuleType ] :
59+ def _get_lance (self ) -> ModuleType :
6460 if not self ._lance_attempted :
6561 self ._lance_attempted = True
6662 try :
6763 module = import_module ("lance" )
68- except ImportError :
69- module = None
70- else :
71- has_writer = hasattr (module , "write_dataset" )
72- has_loader = hasattr (module , "dataset" )
73- if not (has_writer and has_loader ):
74- LOGGER .warning (
75- "Installed `lance` package missing dataset APIs; "
76- "falling back to Feather storage."
77- )
78- module = None
79- self ._lance = module
80- if module is None :
81- LOGGER .debug (
82- "Lance storage unavailable; using Feather files under %s." ,
83- self ._root ,
64+ except ImportError as e :
65+ raise ImportError (
66+ "Lance module is required but not installed. "
67+ "Install it with: pip install pylance"
68+ ) from e
69+
70+ has_loader = hasattr (module , "dataset" )
71+ if not (has_loader ):
72+ raise ImportError (
73+ "Installed `lance` package is missing required dataset APIs."
8474 )
75+ self ._lance = module
76+ if self ._lance is None :
77+ raise ImportError ("Lance module failed to load" )
8578 return self ._lance
8679
8780 def load_tables (
@@ -90,7 +83,6 @@ def load_tables(
9083 ) -> Mapping [str , "pa.Table" ]:
9184 """Load Lance datasets as PyArrow tables."""
9285 lance = self ._get_lance ()
93- use_lance = lance is not None
9486
9587 self .ensure_layout ()
9688 available = self .list_datasets ()
@@ -101,13 +93,8 @@ def load_tables(
10193 path = available .get (name , self ._dataset_path (name ))
10294 if not path .exists ():
10395 raise FileNotFoundError (f"Dataset '{ name } ' not found at { path } " )
104- if path .suffix == ".lance" and use_lance :
105- dataset = lance .dataset (str (path )) # type: ignore[union-attr]
106- table = dataset .scanner ().to_table ()
107- else :
108- import pyarrow .feather as feather
109-
110- table = feather .read_table (str (path ))
96+ dataset = lance .dataset (str (path ))
97+ table = dataset .scanner ().to_table ()
11198 tables [name ] = table
11299 return tables
113100
@@ -123,10 +110,5 @@ def write_tables(self, tables: Mapping[str, "pa.Table"]) -> None:
123110 f"Dataset '{ name } ' must be a pyarrow.Table (got { type (table )!r} )"
124111 )
125112 path = self ._dataset_path (name )
126- if path .suffix == ".lance" and lance is not None :
127- mode = "overwrite" if path .exists () else "create"
128- lance .write_dataset (table , str (path ), mode = mode ) # type: ignore[union-attr]
129- else :
130- import pyarrow .feather as feather
131-
132- feather .write_feather (table , str (path ))
113+ mode = "overwrite" if path .exists () else "create"
114+ lance .write_dataset (table , str (path ), mode = mode )
0 commit comments