Skip to content

Commit 5961ec0

Browse files
Facets now read from esmvalcore intake configuration file instead of passed to to_iris call by user.
1 parent da7658e commit 5961ec0

File tree

7 files changed

+171
-7
lines changed

7 files changed

+171
-7
lines changed

intake_esm/_search.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,10 @@ def search(
4545
column_has_iterables = column in columns_with_iterables
4646
for value in values:
4747
if column_has_iterables:
48-
mask = df[column].str.contains(value, regex=False)
48+
try:
49+
mask = df[column].str.contains(value, regex=False)
50+
except AttributeError:
51+
mask = df[column].apply(tuple).str.contains(value, regex=False)
4952
elif column_is_stringtype and is_pattern(value):
5053
mask = df[column].str.contains(value, regex=True, case=True, flags=0)
5154
elif pd.isna(value):

intake_esm/cat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class ESMCatalogModel(pydantic.BaseModel):
109109
id: str = ''
110110
catalog_dict: list[dict] | None = None
111111
catalog_file: pydantic.StrictStr | None = None
112+
fhandle: pydantic.StrictStr | None = None
112113
description: pydantic.StrictStr | None = None
113114
title: pydantic.StrictStr | None = None
114115
last_updated: datetime.datetime | datetime.date | None = None
@@ -262,6 +263,7 @@ def load(
262263
df=pl.DataFrame(cat.catalog_dict).to_pandas(),
263264
)
264265

266+
cat.fhandle = json_file
265267
cat._cast_agg_columns_with_iterables()
266268
return cat
267269

intake_esm/core.py

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
if typing.TYPE_CHECKING:
99
import esmvalcore
1010
import esmvalcore.dataset
11-
from esmvalcore.typing import FacetValue
1211

1312
import dask
1413
import packaging.version
@@ -847,7 +846,7 @@ def to_dask(self, **kwargs) -> xr.Dataset:
847846

848847
def to_iris(
849848
self,
850-
facet_map: dict['FacetValue', str],
849+
search: dict[str, str],
851850
cmorizer: typing.Any | None = None,
852851
**kwargs,
853852
) -> 'esmvalcore.dataset.Dataset':
@@ -889,10 +888,22 @@ def to_iris(
889888
raise ValueError(
890889
f'Expected exactly one dataset. Received {len(self)} datasets. Please refine your search.'
891890
)
892-
else:
893-
from esmvalcore.dataset import Dataset
894891

895-
ds = Dataset(**facet_map)
892+
# Use esmvalcore to load the intake configuration & work out how we
893+
# need to map our facets
894+
895+
from esmvalcore.config._intake import load_intake_config
896+
from esmvalcore.dataset import Dataset
897+
898+
facet_map, project = _read_facets(load_intake_config(), self.esmcat.fhandle)
899+
900+
facets = {k: search.get(v) for k, v in facet_map.items()}
901+
facets = {k: v for k, v in facets.items() if v is not None}
902+
903+
facets.pop('version', None) # If there's a version, chuck it
904+
facets['project'] = project
905+
906+
ds = Dataset(**facets)
896907

897908
ds.files = self.unique().path
898909
ds.augment_facets()
@@ -926,3 +937,47 @@ def _get_threaded(threaded: bool | None) -> bool:
926937
) from e
927938

928939
return threaded
940+
941+
942+
def _read_facets(
943+
cfg: dict,
944+
fhandle: str | None,
945+
project: str | None = None,
946+
) -> tuple[dict[str, typing.Any], str]:
947+
"""
948+
Extract facet mapping from ESMValCore configuration for a given catalog file handle.
949+
950+
Recursively traverses the ESMValCore configuration structure to find the
951+
facet mapping that corresponds to the specified file handle.
952+
953+
Parameters
954+
----------
955+
cfg : dict
956+
The ESMValCore intake configuration dictionary.
957+
fhandle : str
958+
The file handle/path of the intake-esm catalog to match.
959+
project : str, optional
960+
The current project name in the configuration hierarchy.
961+
962+
Returns
963+
-------
964+
tuple
965+
A tuple containing:
966+
- dict: Facet mapping between ESMValCore facets and catalog columns
967+
- str: The project name associated with the catalog file
968+
"""
969+
if fhandle is None:
970+
raise ValueError('Unable to ascertain facets without valid file handle.')
971+
972+
for _project, val in cfg.items():
973+
if not (isinstance(val, list)):
974+
return _read_facets(val, fhandle, project or _project)
975+
for facet_info in val:
976+
file, facets = facet_info.get('file'), facet_info.get('facets')
977+
if file == fhandle:
978+
return facets, project # type: ignore[return-value]
979+
else:
980+
raise ValueError(
981+
f'No facets found for {fhandle} in the config file. '
982+
'Please check the config file and ensure it is valid.'
983+
)
1.21 KB
Binary file not shown.
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"esmcat_version": "0.0.1",
3+
"attributes": [
4+
{
5+
"column_name": "filename",
6+
"vocabulary": ""
7+
},
8+
{
9+
"column_name": "file_id",
10+
"vocabulary": ""
11+
},
12+
{
13+
"column_name": "path",
14+
"vocabulary": ""
15+
},
16+
{
17+
"column_name": "filename_timestamp",
18+
"vocabulary": ""
19+
},
20+
{
21+
"column_name": "frequency",
22+
"vocabulary": ""
23+
},
24+
{
25+
"column_name": "start_date",
26+
"vocabulary": ""
27+
},
28+
{
29+
"column_name": "end_date",
30+
"vocabulary": ""
31+
},
32+
{
33+
"column_name": "variable",
34+
"vocabulary": ""
35+
},
36+
{
37+
"column_name": "variable_long_name",
38+
"vocabulary": ""
39+
},
40+
{
41+
"column_name": "variable_standard_name",
42+
"vocabulary": ""
43+
},
44+
{
45+
"column_name": "variable_cell_methods",
46+
"vocabulary": ""
47+
},
48+
{
49+
"column_name": "variable_units",
50+
"vocabulary": ""
51+
},
52+
{
53+
"column_name": "realm",
54+
"vocabulary": ""
55+
}
56+
],
57+
"assets": {
58+
"column_name": "path",
59+
"format": "netcdf",
60+
"format_column_name": null
61+
},
62+
"aggregation_control": {
63+
"variable_column_name": "variable",
64+
"groupby_attrs": ["file_id", "frequency"],
65+
"aggregations": [
66+
{
67+
"type": "join_existing",
68+
"attribute_name": "start_date",
69+
"options": {
70+
"dim": "time",
71+
"combine": "by_coords"
72+
}
73+
}
74+
]
75+
},
76+
"id": "01deg_jra55v13_ryf9091",
77+
"description": "0.1 degree ACCESS-OM2 global model configuration with JRA55-do v1.3 RYF9091 repeat year forcing (May 1990 to Apr 1991)",
78+
"title": null,
79+
"last_updated": "2025-03-04T01:25:35Z",
80+
"catalog_file": "access-columns-with-iterables.csv.gz"
81+
}

tests/test_core.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import intake_esm
2020

2121
from .utils import (
22+
access_columns_with_iterables_cat,
2223
catalog_dict_records,
2324
cdf_cat_sample_cesmle,
2425
cdf_cat_sample_cmip5,
@@ -201,6 +202,18 @@ def test_catalog_search(path, query, expected_size):
201202
assert len(new_cat) == expected_size
202203

203204

205+
@pytest.mark.parametrize(
206+
'path, columns_with_iterables, query, expected_size',
207+
[
208+
(access_columns_with_iterables_cat, ['variable'], {'variable': ['aice_m']}, 1),
209+
],
210+
)
211+
def test_catalog_search_columns_with_iterables(path, columns_with_iterables, query, expected_size):
212+
cat = intake.open_esm_datastore(path, columns_with_iterables=columns_with_iterables)
213+
new_cat = cat.search(**query)
214+
assert len(new_cat) == expected_size
215+
216+
204217
def test_catalog_with_registry_search():
205218
cat = intake.open_esm_datastore(zarr_cat_aws_cesm, registry=registry)
206219
new_cat = cat.search(variable='FOO')
@@ -648,7 +661,14 @@ def test_to_iris_unavailable():
648661
)
649662
with pytest.raises(ImportError, match=r'`to_iris\(\)` requires the esmvalcore package'):
650663
_ = cat_sub.to_iris(
651-
facet_map={},
664+
search=dict(
665+
variable_id=['pr'],
666+
experiment_id='ssp370',
667+
activity_id='AerChemMIP',
668+
source_id='BCC-ESM1',
669+
table_id='Amon',
670+
grid_label='gn',
671+
),
652672
xarray_open_kwargs={
653673
'consolidated': True,
654674
'backend_kwargs': {'storage_options': {'token': 'anon'}},

tests/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
'https://raw.githubusercontent.com/NCAR/cesm-lens-aws/master/intake-catalogs/aws-cesm1-le.json'
1717
)
1818
mixed_cat_sample_cmip6 = os.path.join(here, 'sample-catalogs/cmip6-bcc-mixed-formats.json')
19+
access_columns_with_iterables_cat = os.path.join(
20+
here, 'sample-catalogs/access-columns-with-iterables.json'
21+
)
1922

2023

2124
sample_df = pd.DataFrame(

0 commit comments

Comments
 (0)