Skip to content

Commit 923b5e7

Browse files
authored
chore: use enum as date_trunc granularity (#18390)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #. ## Rationale for this change Found when was testing #18356 ``` > select date_trunc('YY', now()); Execution error: Unsupported date_trunc granularity: yy ``` Which is confusing, I would like to get a list of supported values <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent e65dafe commit 923b5e7

File tree

2 files changed

+156
-74
lines changed

2 files changed

+156
-74
lines changed

datafusion/functions/src/datetime/date_trunc.rs

Lines changed: 148 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,77 @@ use chrono::{
4747
DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
4848
};
4949

50+
/// Represents the granularity for date truncation operations
51+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52+
enum DateTruncGranularity {
53+
Microsecond,
54+
Millisecond,
55+
Second,
56+
Minute,
57+
Hour,
58+
Day,
59+
Week,
60+
Month,
61+
Quarter,
62+
Year,
63+
}
64+
65+
impl DateTruncGranularity {
66+
/// List of all supported granularity values
67+
/// Cannot use HashMap here as it would require lazy_static or once_cell,
68+
/// Rust does not support const HashMap yet.
69+
const SUPPORTED_GRANULARITIES: &[&str] = &[
70+
"microsecond",
71+
"millisecond",
72+
"second",
73+
"minute",
74+
"hour",
75+
"day",
76+
"week",
77+
"month",
78+
"quarter",
79+
"year",
80+
];
81+
82+
/// Parse a granularity string into a DateTruncGranularity enum
83+
fn from_str(s: &str) -> Result<Self> {
84+
// Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
85+
match s.to_lowercase().as_str() {
86+
"microsecond" => Ok(Self::Microsecond),
87+
"millisecond" => Ok(Self::Millisecond),
88+
"second" => Ok(Self::Second),
89+
"minute" => Ok(Self::Minute),
90+
"hour" => Ok(Self::Hour),
91+
"day" => Ok(Self::Day),
92+
"week" => Ok(Self::Week),
93+
"month" => Ok(Self::Month),
94+
"quarter" => Ok(Self::Quarter),
95+
"year" => Ok(Self::Year),
96+
_ => {
97+
let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
98+
exec_err!(
99+
"Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100+
)
101+
}
102+
}
103+
}
104+
105+
/// Returns true if this granularity can be handled with simple arithmetic
106+
/// (fine granularity: second, minute, millisecond, microsecond)
107+
fn is_fine_granularity(&self) -> bool {
108+
matches!(
109+
self,
110+
Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
111+
)
112+
}
113+
114+
/// Returns true if this granularity can be handled with simple arithmetic in UTC
115+
/// (hour and day in addition to fine granularities)
116+
fn is_fine_granularity_utc(&self) -> bool {
117+
self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118+
}
119+
}
120+
50121
#[user_doc(
51122
doc_section(label = "Time and Date Functions"),
52123
description = "Truncates a timestamp value to a specified precision.",
@@ -172,7 +243,7 @@ impl ScalarUDFImpl for DateTruncFunc {
172243
let args = args.args;
173244
let (granularity, array) = (&args[0], &args[1]);
174245

175-
let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
246+
let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
176247
granularity
177248
{
178249
v.to_lowercase()
@@ -183,54 +254,46 @@ impl ScalarUDFImpl for DateTruncFunc {
183254
return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
184255
};
185256

257+
let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258+
186259
fn process_array<T: ArrowTimestampType>(
187260
array: &dyn Array,
188-
granularity: String,
261+
granularity: DateTruncGranularity,
189262
tz_opt: &Option<Arc<str>>,
190263
) -> Result<ColumnarValue> {
191264
let parsed_tz = parse_tz(tz_opt)?;
192265
let array = as_primitive_array::<T>(array)?;
193266

194-
// fast path for fine granularities
195-
if matches!(
196-
granularity.as_str(),
197-
// For modern timezones, it's correct to truncate "minute" in this way.
198-
// Both datafusion and arrow are ignoring historical timezone's non-minute granularity
199-
// bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
200-
"second" | "minute" | "millisecond" | "microsecond"
201-
) ||
267+
// fast path for fine granularity
268+
// For modern timezones, it's correct to truncate "minute" in this way.
269+
// Both datafusion and arrow are ignoring historical timezone's non-minute granularity
270+
// bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
202271
// In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
203-
(parsed_tz.is_none() && matches!(granularity.as_str(), "hour" | "day"))
272+
if granularity.is_fine_granularity()
273+
|| (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
204274
{
205275
let result = general_date_trunc_array_fine_granularity(
206276
T::UNIT,
207277
array,
208-
granularity.as_str(),
278+
granularity,
209279
)?;
210280
return Ok(ColumnarValue::Array(result));
211281
}
212282

213283
let array: PrimitiveArray<T> = array
214-
.try_unary(|x| {
215-
general_date_trunc(T::UNIT, x, parsed_tz, granularity.as_str())
216-
})?
284+
.try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
217285
.with_timezone_opt(tz_opt.clone());
218286
Ok(ColumnarValue::Array(Arc::new(array)))
219287
}
220288

221289
fn process_scalar<T: ArrowTimestampType>(
222290
v: &Option<i64>,
223-
granularity: String,
291+
granularity: DateTruncGranularity,
224292
tz_opt: &Option<Arc<str>>,
225293
) -> Result<ColumnarValue> {
226294
let parsed_tz = parse_tz(tz_opt)?;
227295
let value = if let Some(v) = v {
228-
Some(general_date_trunc(
229-
T::UNIT,
230-
*v,
231-
parsed_tz,
232-
granularity.as_str(),
233-
)?)
296+
Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
234297
} else {
235298
None
236299
};
@@ -308,57 +371,57 @@ impl ScalarUDFImpl for DateTruncFunc {
308371
}
309372
}
310373

311-
fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
374+
fn _date_trunc_coarse<T>(
375+
granularity: DateTruncGranularity,
376+
value: Option<T>,
377+
) -> Result<Option<T>>
312378
where
313379
T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
314380
{
315381
let value = match granularity {
316-
"millisecond" => value,
317-
"microsecond" => value,
318-
"second" => value.and_then(|d| d.with_nanosecond(0)),
319-
"minute" => value
382+
DateTruncGranularity::Millisecond => value,
383+
DateTruncGranularity::Microsecond => value,
384+
DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
385+
DateTruncGranularity::Minute => value
320386
.and_then(|d| d.with_nanosecond(0))
321387
.and_then(|d| d.with_second(0)),
322-
"hour" => value
388+
DateTruncGranularity::Hour => value
323389
.and_then(|d| d.with_nanosecond(0))
324390
.and_then(|d| d.with_second(0))
325391
.and_then(|d| d.with_minute(0)),
326-
"day" => value
392+
DateTruncGranularity::Day => value
327393
.and_then(|d| d.with_nanosecond(0))
328394
.and_then(|d| d.with_second(0))
329395
.and_then(|d| d.with_minute(0))
330396
.and_then(|d| d.with_hour(0)),
331-
"week" => value
397+
DateTruncGranularity::Week => value
332398
.and_then(|d| d.with_nanosecond(0))
333399
.and_then(|d| d.with_second(0))
334400
.and_then(|d| d.with_minute(0))
335401
.and_then(|d| d.with_hour(0))
336402
.map(|d| {
337403
d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
338404
}),
339-
"month" => value
405+
DateTruncGranularity::Month => value
340406
.and_then(|d| d.with_nanosecond(0))
341407
.and_then(|d| d.with_second(0))
342408
.and_then(|d| d.with_minute(0))
343409
.and_then(|d| d.with_hour(0))
344410
.and_then(|d| d.with_day0(0)),
345-
"quarter" => value
411+
DateTruncGranularity::Quarter => value
346412
.and_then(|d| d.with_nanosecond(0))
347413
.and_then(|d| d.with_second(0))
348414
.and_then(|d| d.with_minute(0))
349415
.and_then(|d| d.with_hour(0))
350416
.and_then(|d| d.with_day0(0))
351417
.and_then(|d| d.with_month(quarter_month(&d))),
352-
"year" => value
418+
DateTruncGranularity::Year => value
353419
.and_then(|d| d.with_nanosecond(0))
354420
.and_then(|d| d.with_second(0))
355421
.and_then(|d| d.with_minute(0))
356422
.and_then(|d| d.with_hour(0))
357423
.and_then(|d| d.with_day0(0))
358424
.and_then(|d| d.with_month0(0)),
359-
unsupported => {
360-
return exec_err!("Unsupported date_trunc granularity: {unsupported}");
361-
}
362425
};
363426
Ok(value)
364427
}
@@ -371,7 +434,7 @@ where
371434
}
372435

373436
fn _date_trunc_coarse_with_tz(
374-
granularity: &str,
437+
granularity: DateTruncGranularity,
375438
value: Option<DateTime<Tz>>,
376439
) -> Result<Option<i64>> {
377440
if let Some(value) = value {
@@ -413,7 +476,7 @@ fn _date_trunc_coarse_with_tz(
413476
}
414477

415478
fn _date_trunc_coarse_without_tz(
416-
granularity: &str,
479+
granularity: DateTruncGranularity,
417480
value: Option<NaiveDateTime>,
418481
) -> Result<Option<i64>> {
419482
let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
@@ -424,7 +487,11 @@ fn _date_trunc_coarse_without_tz(
424487
/// epoch, for granularities greater than 1 second, in taking into
425488
/// account that some granularities are not uniform durations of time
426489
/// (e.g. months are not always the same lengths, leap seconds, etc)
427-
fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i64> {
490+
fn date_trunc_coarse(
491+
granularity: DateTruncGranularity,
492+
value: i64,
493+
tz: Option<Tz>,
494+
) -> Result<i64> {
428495
let value = match tz {
429496
Some(tz) => {
430497
// Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
@@ -454,30 +521,30 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
454521
fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
455522
tu: TimeUnit,
456523
array: &PrimitiveArray<T>,
457-
granularity: &str,
524+
granularity: DateTruncGranularity,
458525
) -> Result<ArrayRef> {
459526
let unit = match (tu, granularity) {
460-
(Second, "minute") => NonZeroI64::new(60),
461-
(Second, "hour") => NonZeroI64::new(3600),
462-
(Second, "day") => NonZeroI64::new(86400),
463-
464-
(Millisecond, "second") => NonZeroI64::new(1_000),
465-
(Millisecond, "minute") => NonZeroI64::new(60_000),
466-
(Millisecond, "hour") => NonZeroI64::new(3_600_000),
467-
(Millisecond, "day") => NonZeroI64::new(86_400_000),
468-
469-
(Microsecond, "millisecond") => NonZeroI64::new(1_000),
470-
(Microsecond, "second") => NonZeroI64::new(1_000_000),
471-
(Microsecond, "minute") => NonZeroI64::new(60_000_000),
472-
(Microsecond, "hour") => NonZeroI64::new(3_600_000_000),
473-
(Microsecond, "day") => NonZeroI64::new(86_400_000_000),
474-
475-
(Nanosecond, "microsecond") => NonZeroI64::new(1_000),
476-
(Nanosecond, "millisecond") => NonZeroI64::new(1_000_000),
477-
(Nanosecond, "second") => NonZeroI64::new(1_000_000_000),
478-
(Nanosecond, "minute") => NonZeroI64::new(60_000_000_000),
479-
(Nanosecond, "hour") => NonZeroI64::new(3_600_000_000_000),
480-
(Nanosecond, "day") => NonZeroI64::new(86_400_000_000_000),
527+
(Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
528+
(Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
529+
(Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
530+
531+
(Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
532+
(Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
533+
(Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
534+
(Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
535+
536+
(Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
537+
(Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
538+
(Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
539+
(Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
540+
(Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
541+
542+
(Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
543+
(Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
544+
(Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
545+
(Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
546+
(Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
547+
(Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
481548
_ => None,
482549
};
483550

@@ -502,7 +569,7 @@ fn general_date_trunc(
502569
tu: TimeUnit,
503570
value: i64,
504571
tz: Option<Tz>,
505-
granularity: &str,
572+
granularity: DateTruncGranularity,
506573
) -> Result<i64, DataFusionError> {
507574
let scale = match tu {
508575
Second => 1_000_000_000,
@@ -516,25 +583,29 @@ fn general_date_trunc(
516583

517584
let result = match tu {
518585
Second => match granularity {
519-
"minute" => nano / 1_000_000_000 / 60 * 60,
586+
DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
520587
_ => nano / 1_000_000_000,
521588
},
522589
Millisecond => match granularity {
523-
"minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
524-
"second" => nano / 1_000_000 / 1_000 * 1_000,
590+
DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
591+
DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
525592
_ => nano / 1_000_000,
526593
},
527594
Microsecond => match granularity {
528-
"minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000,
529-
"second" => nano / 1_000 / 1_000_000 * 1_000_000,
530-
"millisecond" => nano / 1_000 / 1_000 * 1_000,
595+
DateTruncGranularity::Minute => {
596+
nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
597+
}
598+
DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
599+
DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
531600
_ => nano / 1_000,
532601
},
533602
_ => match granularity {
534-
"minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60,
535-
"second" => nano / 1_000_000_000 * 1_000_000_000,
536-
"millisecond" => nano / 1_000_000 * 1_000_000,
537-
"microsecond" => nano / 1_000 * 1_000,
603+
DateTruncGranularity::Minute => {
604+
nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
605+
}
606+
DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
607+
DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
608+
DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
538609
_ => nano,
539610
},
540611
};
@@ -554,7 +625,9 @@ fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
554625
mod tests {
555626
use std::sync::Arc;
556627

557-
use crate::datetime::date_trunc::{date_trunc_coarse, DateTruncFunc};
628+
use crate::datetime::date_trunc::{
629+
date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
630+
};
558631

559632
use arrow::array::cast::as_primitive_array;
560633
use arrow::array::types::TimestampNanosecondType;
@@ -655,7 +728,8 @@ mod tests {
655728
cases.iter().for_each(|(original, granularity, expected)| {
656729
let left = string_to_timestamp_nanos(original).unwrap();
657730
let right = string_to_timestamp_nanos(expected).unwrap();
658-
let result = date_trunc_coarse(granularity, left, None).unwrap();
731+
let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
732+
let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
659733
assert_eq!(result, right, "{original} = {expected}");
660734
});
661735
}

datafusion/sqllogictest/test_files/dates.slt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,14 @@ select to_date('2022-01-23', '%Y-%m-%d');
316316
----
317317
2022-01-23
318318

319+
# invalid date_trunc format
320+
query error DataFusion error: Execution error: Unsupported date_trunc granularity: ''. Supported values are: microsecond, millisecond, second, minute, hour, day, week, month, quarter, year
321+
SELECT date_trunc('', to_date('2022-02-23', '%Y-%m-%d'))
322+
323+
# invalid date_trunc format
324+
query error DataFusion error: Execution error: Unsupported date_trunc granularity: 'invalid'. Supported values are: microsecond, millisecond, second, minute, hour, day, week, month, quarter, year
325+
SELECT date_trunc('invalid', to_date('2022-02-23', '%Y-%m-%d'))
326+
319327
query PPPP
320328
select
321329
date_trunc('YEAR', to_date('2022-02-23', '%Y-%m-%d')),

0 commit comments

Comments
 (0)