Skip to content

Commit 6bce595

Browse files
committed
stats: add loan stats endpoint and extend loans index
* the endpoint returns a histogram for loans where requested metrics are grouped by and aggregated * extend the loans with a new stats object * the stats object contains `waiting_time`, `loan_duration` and `available_items_during_request`
1 parent 715e8ac commit 6bce595

File tree

15 files changed

+961
-9
lines changed

15 files changed

+961
-9
lines changed

invenio_app_ils/circulation/indexer.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from invenio_circulation.proxies import current_circulation
1616
from invenio_indexer.api import RecordIndexer
1717
from invenio_pidstore.errors import PIDDeletedError
18+
from invenio_search import current_search_client
1819

1920
from invenio_app_ils.circulation.utils import resolve_item_from_loan
2021
from invenio_app_ils.documents.api import DOCUMENT_PID_TYPE
@@ -97,3 +98,68 @@ def index_extra_fields_for_loan(loan_dict):
9798

9899
can_circulate_items_count = document["circulation"]["can_circulate_items_count"]
99100
loan_dict["can_circulate_items_count"] = can_circulate_items_count
101+
102+
103+
def index_stats_fields_for_loan(loan_dict):
104+
"""Indexer hook to modify the loan record dict before indexing"""
105+
106+
creation_date = datetime.fromisoformat(loan_dict["_created"]).date()
107+
start_date = (
108+
datetime.fromisoformat(loan_dict["start_date"]).date()
109+
if loan_dict.get("start_date")
110+
else None
111+
)
112+
end_date = (
113+
datetime.fromisoformat(loan_dict["end_date"]).date()
114+
if loan_dict.get("end_date")
115+
else None
116+
)
117+
118+
# Collect extra information relevant for stats
119+
stats = {}
120+
121+
# Time ranges in days
122+
if start_date and end_date:
123+
loan_duration = (end_date - start_date).days
124+
stats["loan_duration"] = loan_duration
125+
126+
if creation_date and start_date:
127+
waiting_time = (start_date - creation_date).days
128+
stats["waiting_time"] = waiting_time if waiting_time >= 0 else None
129+
130+
# Document availability during loan request
131+
stat_events_index_name = "events-stats-loan-transitions"
132+
if current_search_client.indices.exists(index=stat_events_index_name):
133+
search_body = {}
134+
135+
loan_pid = loan_dict["pid"]
136+
search_body = {
137+
"query": {
138+
"bool": {
139+
"must": [
140+
{"term": {"field": "available_items_during_request_count"}},
141+
{"term": {"pid_value": loan_pid}},
142+
],
143+
}
144+
},
145+
}
146+
147+
search_result = current_search_client.search(
148+
index=stat_events_index_name, body=search_body
149+
)
150+
hits = search_result["hits"]["hits"]
151+
if len(hits) == 1:
152+
request_transition_event = hits[0]["_source"]
153+
available_items_during_request_count = request_transition_event["value"]
154+
stats["available_items_during_request"] = (
155+
available_items_during_request_count > 0
156+
)
157+
elif len(hits) > 1:
158+
raise ValueError(
159+
f"Multiple request transition events for loan {loan_pid}."
160+
"Expected zero or one."
161+
)
162+
163+
if not "extensions" in loan_dict:
164+
loan_dict["extensions"] = {}
165+
loan_dict["extensions"]["stats"] = stats

invenio_app_ils/circulation/stats/api.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# Copyright (C) 2019 CERN.
3+
# Copyright (C) 2019-2025 CERN.
44
#
55
# invenio-app-ils is free software; you can redistribute it and/or modify it
66
# under the terms of the MIT License; see LICENSE file for more details.
77

88
"""APIs for ILS circulation statistics."""
99

10+
from invenio_search.engine import dsl
11+
1012
from invenio_app_ils.circulation.search import get_most_loaned_documents
13+
from invenio_app_ils.circulation.stats.schemas import (
14+
_OS_NATIVE_AGGREGATE_FUNCTION_TYPES,
15+
)
1116
from invenio_app_ils.proxies import current_app_ils
1217

1318

@@ -49,3 +54,94 @@ def fetch_most_loaned_documents(from_date, to_date, bucket_size):
4954
)
5055

5156
return res
57+
58+
59+
def _generate_metric_agg_field_name(metric):
60+
"""Return the aggregation name used for a metric.
61+
62+
:param metric: Must include 'field' and 'aggregation' keys.
63+
:returns: The aggregation field name in the form '<aggregation>_<field>'.
64+
"""
65+
66+
return f"{metric['aggregation']}__{metric['field']}"
67+
68+
69+
def get_loan_statistics(date_fields, search, group_by, metrics):
70+
"""Aggregate loan statistics for requested metrics.
71+
72+
:param search: The base search object to apply aggregations on
73+
:param group_by: List of group dictionaries with 'field' and optional 'interval' keys.
74+
Example: [{"field": "start_date", "interval": "monthly"}, {"field": "state"}]
75+
:param metrics: List of metric dictionaries with 'field' and 'aggregation' keys.
76+
Example: [{"field": "loan_duration", "aggregation": "avg"}]
77+
:returns: OpenSearch aggregation results with multi-terms histogram and optional metrics
78+
"""
79+
80+
# Build composite aggregation
81+
sources = []
82+
for grouping in group_by:
83+
grouping_field = grouping["field"]
84+
85+
if grouping_field in date_fields:
86+
sources.append(
87+
{
88+
grouping_field: {
89+
"date_histogram": {
90+
"field": grouping_field,
91+
"calendar_interval": grouping["interval"],
92+
"format": "yyyy-MM-dd",
93+
}
94+
}
95+
}
96+
)
97+
else:
98+
sources.append({grouping_field: {"terms": {"field": grouping_field}}})
99+
100+
composite_agg = dsl.A("composite", sources=sources, size=1000)
101+
102+
for metric in metrics:
103+
agg_name = _generate_metric_agg_field_name(metric)
104+
105+
grouping_field = metric["field"]
106+
agg_type = metric["aggregation"]
107+
field_config = {"field": grouping_field}
108+
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
109+
composite_agg = composite_agg.metric(
110+
agg_name, dsl.A(agg_type, **field_config)
111+
)
112+
elif agg_type == "median":
113+
composite_agg = composite_agg.metric(
114+
agg_name, dsl.A("percentiles", percents=[50], **field_config)
115+
)
116+
117+
search.aggs.bucket("loan_aggregations", composite_agg)
118+
119+
# Only retrieve aggregation results
120+
search = search[:0]
121+
result = search.execute()
122+
123+
# Parse aggregation results
124+
buckets = []
125+
if hasattr(result.aggregations, "loan_aggregations"):
126+
for bucket in result.aggregations.loan_aggregations.buckets:
127+
bucket_data = {
128+
"key": bucket.key.to_dict(),
129+
"doc_count": bucket.doc_count,
130+
}
131+
132+
for metric in metrics:
133+
agg_name = _generate_metric_agg_field_name(metric)
134+
135+
if hasattr(bucket, agg_name):
136+
agg_result = getattr(bucket, agg_name)
137+
agg_type = metric["aggregation"]
138+
139+
if agg_type in _OS_NATIVE_AGGREGATE_FUNCTION_TYPES:
140+
bucket_data[agg_name] = agg_result.value
141+
elif agg_type == "median":
142+
median_value = agg_result.values.get("50.0")
143+
bucket_data[agg_name] = median_value
144+
145+
buckets.append(bucket_data)
146+
147+
return buckets
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (C) 2025 CERN.
4+
#
5+
# invenio-app-ils is free software; you can redistribute it and/or modify it
6+
# under the terms of the MIT License; see LICENSE file for more details.
7+
8+
"""Marshmallow schemas for loan statistics validation."""
9+
10+
import json
11+
import re
12+
13+
from marshmallow import (
14+
Schema,
15+
ValidationError,
16+
fields,
17+
pre_load,
18+
validate,
19+
validates_schema,
20+
)
21+
22+
from invenio_app_ils.errors import InvalidParameterError
23+
24+
_OS_VALID_FIELD_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.]+$")
25+
_OS_NATIVE_AGGREGATE_FUNCTION_TYPES = {"avg", "sum", "min", "max"}
26+
_VALID_AGGREGATE_FUNCTION_TYPES = _OS_NATIVE_AGGREGATE_FUNCTION_TYPES.union({"median"})
27+
_VALID_DATE_INTERVALS = {"1d", "1w", "1M", "1q", "1y"}
28+
29+
30+
def validate_field_name(field_name):
31+
"""Validate a field name for search to prevent injection attacks.
32+
33+
:param field_name: The field name to validate
34+
:raises InvalidParameterError: If field name is invalid or potentially malicious
35+
"""
36+
if not _OS_VALID_FIELD_NAME_PATTERN.match(field_name):
37+
raise InvalidParameterError(
38+
description=(
39+
f"Invalid field name '{field_name}'. "
40+
"Field names may contain only alphanumeric characters, underscores, "
41+
"and dots."
42+
)
43+
)
44+
45+
46+
class SecureFieldNameField(fields.String):
47+
"""Marshmallow field that validates field names to prevent injection attacks."""
48+
49+
def _deserialize(self, value, attr, data, **kwargs):
50+
"""Deserialize and validate field name."""
51+
52+
field_name = super()._deserialize(value, attr, data, **kwargs)
53+
validate_field_name(field_name)
54+
return field_name
55+
56+
57+
class GroupByItemSchema(Schema):
58+
field = SecureFieldNameField(required=True)
59+
interval = fields.String(validate=validate.OneOf(_VALID_DATE_INTERVALS))
60+
61+
@validates_schema
62+
def validate_date_fields(self, data, **kwargs):
63+
"""Validate that date fields have an interval and non-date fields do not."""
64+
65+
date_fields = self.context["date_fields"]
66+
field = data.get("field")
67+
interval = data.get("interval")
68+
if field in date_fields and not interval:
69+
raise ValidationError(
70+
{"interval": ["Interval is required for date fields."]}
71+
)
72+
if field not in date_fields and interval is not None:
73+
raise ValidationError(
74+
{"interval": ["Interval must not be provided for non-date fields."]}
75+
)
76+
77+
78+
class MetricItemSchema(Schema):
79+
"""Schema for validating a single metric item."""
80+
81+
field = SecureFieldNameField(required=True)
82+
aggregation = fields.String(
83+
required=True, validate=validate.OneOf(_VALID_AGGREGATE_FUNCTION_TYPES)
84+
)
85+
86+
87+
class HistogramParamsSchema(Schema):
88+
"""Schema for validating the query string parameters for the histogram endpoint"""
89+
90+
metrics = fields.List(fields.Nested(MetricItemSchema))
91+
group_by = fields.List(
92+
fields.Nested(GroupByItemSchema), required=True, validate=validate.Length(min=1)
93+
)
94+
q = fields.String()
95+
96+
def __init__(self, date_fields, *args, **kwargs):
97+
super().__init__(*args, **kwargs)
98+
self.context = {"date_fields": set(date_fields)}
99+
100+
@pre_load
101+
def parse_query_string(self, data, **kwargs):
102+
"""Parse the metrics and group_by parameters from JSON strings."""
103+
104+
for key in ("metrics", "group_by"):
105+
data[key] = json.loads(data.get(key))
106+
return data

0 commit comments

Comments
 (0)