@@ -47,6 +47,77 @@ use chrono::{
4747 DateTime , Datelike , Duration , LocalResult , NaiveDateTime , Offset , TimeDelta , Timelike ,
4848} ;
4949
50+ /// Represents the granularity for date truncation operations
51+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
52+ enum DateTruncGranularity {
53+ Microsecond ,
54+ Millisecond ,
55+ Second ,
56+ Minute ,
57+ Hour ,
58+ Day ,
59+ Week ,
60+ Month ,
61+ Quarter ,
62+ Year ,
63+ }
64+
65+ impl DateTruncGranularity {
66+ /// List of all supported granularity values
67+ /// Cannot use HashMap here as it would require lazy_static or once_cell,
68+ /// Rust does not support const HashMap yet.
69+ const SUPPORTED_GRANULARITIES : & [ & str ] = & [
70+ "microsecond" ,
71+ "millisecond" ,
72+ "second" ,
73+ "minute" ,
74+ "hour" ,
75+ "day" ,
76+ "week" ,
77+ "month" ,
78+ "quarter" ,
79+ "year" ,
80+ ] ;
81+
82+ /// Parse a granularity string into a DateTruncGranularity enum
83+ fn from_str ( s : & str ) -> Result < Self > {
84+ // Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
85+ match s. to_lowercase ( ) . as_str ( ) {
86+ "microsecond" => Ok ( Self :: Microsecond ) ,
87+ "millisecond" => Ok ( Self :: Millisecond ) ,
88+ "second" => Ok ( Self :: Second ) ,
89+ "minute" => Ok ( Self :: Minute ) ,
90+ "hour" => Ok ( Self :: Hour ) ,
91+ "day" => Ok ( Self :: Day ) ,
92+ "week" => Ok ( Self :: Week ) ,
93+ "month" => Ok ( Self :: Month ) ,
94+ "quarter" => Ok ( Self :: Quarter ) ,
95+ "year" => Ok ( Self :: Year ) ,
96+ _ => {
97+ let supported = Self :: SUPPORTED_GRANULARITIES . join ( ", " ) ;
98+ exec_err ! (
99+ "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100+ )
101+ }
102+ }
103+ }
104+
105+ /// Returns true if this granularity can be handled with simple arithmetic
106+ /// (fine granularity: second, minute, millisecond, microsecond)
107+ fn is_fine_granularity ( & self ) -> bool {
108+ matches ! (
109+ self ,
110+ Self :: Second | Self :: Minute | Self :: Millisecond | Self :: Microsecond
111+ )
112+ }
113+
114+ /// Returns true if this granularity can be handled with simple arithmetic in UTC
115+ /// (hour and day in addition to fine granularities)
116+ fn is_fine_granularity_utc ( & self ) -> bool {
117+ self . is_fine_granularity ( ) || matches ! ( self , Self :: Hour | Self :: Day )
118+ }
119+ }
120+
50121#[ user_doc(
51122 doc_section( label = "Time and Date Functions" ) ,
52123 description = "Truncates a timestamp value to a specified precision." ,
@@ -172,7 +243,7 @@ impl ScalarUDFImpl for DateTruncFunc {
172243 let args = args. args ;
173244 let ( granularity, array) = ( & args[ 0 ] , & args[ 1 ] ) ;
174245
175- let granularity = if let ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( v) ) ) =
246+ let granularity_str = if let ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( v) ) ) =
176247 granularity
177248 {
178249 v. to_lowercase ( )
@@ -183,54 +254,46 @@ impl ScalarUDFImpl for DateTruncFunc {
183254 return exec_err ! ( "Granularity of `date_trunc` must be non-null scalar Utf8" ) ;
184255 } ;
185256
257+ let granularity = DateTruncGranularity :: from_str ( & granularity_str) ?;
258+
186259 fn process_array < T : ArrowTimestampType > (
187260 array : & dyn Array ,
188- granularity : String ,
261+ granularity : DateTruncGranularity ,
189262 tz_opt : & Option < Arc < str > > ,
190263 ) -> Result < ColumnarValue > {
191264 let parsed_tz = parse_tz ( tz_opt) ?;
192265 let array = as_primitive_array :: < T > ( array) ?;
193266
194- // fast path for fine granularities
195- if matches ! (
196- granularity. as_str( ) ,
197- // For modern timezones, it's correct to truncate "minute" in this way.
198- // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
199- // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
200- "second" | "minute" | "millisecond" | "microsecond"
201- ) ||
267+ // fast path for fine granularity
268+ // For modern timezones, it's correct to truncate "minute" in this way.
269+ // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
270+ // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
202271 // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
203- ( parsed_tz. is_none ( ) && matches ! ( granularity. as_str( ) , "hour" | "day" ) )
272+ if granularity. is_fine_granularity ( )
273+ || ( parsed_tz. is_none ( ) && granularity. is_fine_granularity_utc ( ) )
204274 {
205275 let result = general_date_trunc_array_fine_granularity (
206276 T :: UNIT ,
207277 array,
208- granularity. as_str ( ) ,
278+ granularity,
209279 ) ?;
210280 return Ok ( ColumnarValue :: Array ( result) ) ;
211281 }
212282
213283 let array: PrimitiveArray < T > = array
214- . try_unary ( |x| {
215- general_date_trunc ( T :: UNIT , x, parsed_tz, granularity. as_str ( ) )
216- } ) ?
284+ . try_unary ( |x| general_date_trunc ( T :: UNIT , x, parsed_tz, granularity) ) ?
217285 . with_timezone_opt ( tz_opt. clone ( ) ) ;
218286 Ok ( ColumnarValue :: Array ( Arc :: new ( array) ) )
219287 }
220288
221289 fn process_scalar < T : ArrowTimestampType > (
222290 v : & Option < i64 > ,
223- granularity : String ,
291+ granularity : DateTruncGranularity ,
224292 tz_opt : & Option < Arc < str > > ,
225293 ) -> Result < ColumnarValue > {
226294 let parsed_tz = parse_tz ( tz_opt) ?;
227295 let value = if let Some ( v) = v {
228- Some ( general_date_trunc (
229- T :: UNIT ,
230- * v,
231- parsed_tz,
232- granularity. as_str ( ) ,
233- ) ?)
296+ Some ( general_date_trunc ( T :: UNIT , * v, parsed_tz, granularity) ?)
234297 } else {
235298 None
236299 } ;
@@ -308,57 +371,57 @@ impl ScalarUDFImpl for DateTruncFunc {
308371 }
309372}
310373
311- fn _date_trunc_coarse < T > ( granularity : & str , value : Option < T > ) -> Result < Option < T > >
374+ fn _date_trunc_coarse < T > (
375+ granularity : DateTruncGranularity ,
376+ value : Option < T > ,
377+ ) -> Result < Option < T > >
312378where
313379 T : Datelike + Timelike + Sub < Duration , Output = T > + Copy ,
314380{
315381 let value = match granularity {
316- "millisecond" => value,
317- "microsecond" => value,
318- "second" => value. and_then ( |d| d. with_nanosecond ( 0 ) ) ,
319- "minute" => value
382+ DateTruncGranularity :: Millisecond => value,
383+ DateTruncGranularity :: Microsecond => value,
384+ DateTruncGranularity :: Second => value. and_then ( |d| d. with_nanosecond ( 0 ) ) ,
385+ DateTruncGranularity :: Minute => value
320386 . and_then ( |d| d. with_nanosecond ( 0 ) )
321387 . and_then ( |d| d. with_second ( 0 ) ) ,
322- "hour" => value
388+ DateTruncGranularity :: Hour => value
323389 . and_then ( |d| d. with_nanosecond ( 0 ) )
324390 . and_then ( |d| d. with_second ( 0 ) )
325391 . and_then ( |d| d. with_minute ( 0 ) ) ,
326- "day" => value
392+ DateTruncGranularity :: Day => value
327393 . and_then ( |d| d. with_nanosecond ( 0 ) )
328394 . and_then ( |d| d. with_second ( 0 ) )
329395 . and_then ( |d| d. with_minute ( 0 ) )
330396 . and_then ( |d| d. with_hour ( 0 ) ) ,
331- "week" => value
397+ DateTruncGranularity :: Week => value
332398 . and_then ( |d| d. with_nanosecond ( 0 ) )
333399 . and_then ( |d| d. with_second ( 0 ) )
334400 . and_then ( |d| d. with_minute ( 0 ) )
335401 . and_then ( |d| d. with_hour ( 0 ) )
336402 . map ( |d| {
337403 d - TimeDelta :: try_seconds ( 60 * 60 * 24 * d. weekday ( ) as i64 ) . unwrap ( )
338404 } ) ,
339- "month" => value
405+ DateTruncGranularity :: Month => value
340406 . and_then ( |d| d. with_nanosecond ( 0 ) )
341407 . and_then ( |d| d. with_second ( 0 ) )
342408 . and_then ( |d| d. with_minute ( 0 ) )
343409 . and_then ( |d| d. with_hour ( 0 ) )
344410 . and_then ( |d| d. with_day0 ( 0 ) ) ,
345- "quarter" => value
411+ DateTruncGranularity :: Quarter => value
346412 . and_then ( |d| d. with_nanosecond ( 0 ) )
347413 . and_then ( |d| d. with_second ( 0 ) )
348414 . and_then ( |d| d. with_minute ( 0 ) )
349415 . and_then ( |d| d. with_hour ( 0 ) )
350416 . and_then ( |d| d. with_day0 ( 0 ) )
351417 . and_then ( |d| d. with_month ( quarter_month ( & d) ) ) ,
352- "year" => value
418+ DateTruncGranularity :: Year => value
353419 . and_then ( |d| d. with_nanosecond ( 0 ) )
354420 . and_then ( |d| d. with_second ( 0 ) )
355421 . and_then ( |d| d. with_minute ( 0 ) )
356422 . and_then ( |d| d. with_hour ( 0 ) )
357423 . and_then ( |d| d. with_day0 ( 0 ) )
358424 . and_then ( |d| d. with_month0 ( 0 ) ) ,
359- unsupported => {
360- return exec_err ! ( "Unsupported date_trunc granularity: {unsupported}" ) ;
361- }
362425 } ;
363426 Ok ( value)
364427}
@@ -371,7 +434,7 @@ where
371434}
372435
373436fn _date_trunc_coarse_with_tz (
374- granularity : & str ,
437+ granularity : DateTruncGranularity ,
375438 value : Option < DateTime < Tz > > ,
376439) -> Result < Option < i64 > > {
377440 if let Some ( value) = value {
@@ -413,7 +476,7 @@ fn _date_trunc_coarse_with_tz(
413476}
414477
415478fn _date_trunc_coarse_without_tz (
416- granularity : & str ,
479+ granularity : DateTruncGranularity ,
417480 value : Option < NaiveDateTime > ,
418481) -> Result < Option < i64 > > {
419482 let value = _date_trunc_coarse :: < NaiveDateTime > ( granularity, value) ?;
@@ -424,7 +487,11 @@ fn _date_trunc_coarse_without_tz(
424487/// epoch, for granularities greater than 1 second, in taking into
425488/// account that some granularities are not uniform durations of time
426489/// (e.g. months are not always the same lengths, leap seconds, etc)
427- fn date_trunc_coarse ( granularity : & str , value : i64 , tz : Option < Tz > ) -> Result < i64 > {
490+ fn date_trunc_coarse (
491+ granularity : DateTruncGranularity ,
492+ value : i64 ,
493+ tz : Option < Tz > ,
494+ ) -> Result < i64 > {
428495 let value = match tz {
429496 Some ( tz) => {
430497 // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
@@ -454,30 +521,30 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
454521fn general_date_trunc_array_fine_granularity < T : ArrowTimestampType > (
455522 tu : TimeUnit ,
456523 array : & PrimitiveArray < T > ,
457- granularity : & str ,
524+ granularity : DateTruncGranularity ,
458525) -> Result < ArrayRef > {
459526 let unit = match ( tu, granularity) {
460- ( Second , "minute" ) => NonZeroI64 :: new ( 60 ) ,
461- ( Second , "hour" ) => NonZeroI64 :: new ( 3600 ) ,
462- ( Second , "day" ) => NonZeroI64 :: new ( 86400 ) ,
463-
464- ( Millisecond , "second" ) => NonZeroI64 :: new ( 1_000 ) ,
465- ( Millisecond , "minute" ) => NonZeroI64 :: new ( 60_000 ) ,
466- ( Millisecond , "hour" ) => NonZeroI64 :: new ( 3_600_000 ) ,
467- ( Millisecond , "day" ) => NonZeroI64 :: new ( 86_400_000 ) ,
468-
469- ( Microsecond , "millisecond" ) => NonZeroI64 :: new ( 1_000 ) ,
470- ( Microsecond , "second" ) => NonZeroI64 :: new ( 1_000_000 ) ,
471- ( Microsecond , "minute" ) => NonZeroI64 :: new ( 60_000_000 ) ,
472- ( Microsecond , "hour" ) => NonZeroI64 :: new ( 3_600_000_000 ) ,
473- ( Microsecond , "day" ) => NonZeroI64 :: new ( 86_400_000_000 ) ,
474-
475- ( Nanosecond , "microsecond" ) => NonZeroI64 :: new ( 1_000 ) ,
476- ( Nanosecond , "millisecond" ) => NonZeroI64 :: new ( 1_000_000 ) ,
477- ( Nanosecond , "second" ) => NonZeroI64 :: new ( 1_000_000_000 ) ,
478- ( Nanosecond , "minute" ) => NonZeroI64 :: new ( 60_000_000_000 ) ,
479- ( Nanosecond , "hour" ) => NonZeroI64 :: new ( 3_600_000_000_000 ) ,
480- ( Nanosecond , "day" ) => NonZeroI64 :: new ( 86_400_000_000_000 ) ,
527+ ( Second , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60 ) ,
528+ ( Second , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3600 ) ,
529+ ( Second , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86400 ) ,
530+
531+ ( Millisecond , DateTruncGranularity :: Second ) => NonZeroI64 :: new ( 1_000 ) ,
532+ ( Millisecond , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60_000 ) ,
533+ ( Millisecond , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3_600_000 ) ,
534+ ( Millisecond , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86_400_000 ) ,
535+
536+ ( Microsecond , DateTruncGranularity :: Millisecond ) => NonZeroI64 :: new ( 1_000 ) ,
537+ ( Microsecond , DateTruncGranularity :: Second ) => NonZeroI64 :: new ( 1_000_000 ) ,
538+ ( Microsecond , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60_000_000 ) ,
539+ ( Microsecond , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3_600_000_000 ) ,
540+ ( Microsecond , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86_400_000_000 ) ,
541+
542+ ( Nanosecond , DateTruncGranularity :: Microsecond ) => NonZeroI64 :: new ( 1_000 ) ,
543+ ( Nanosecond , DateTruncGranularity :: Millisecond ) => NonZeroI64 :: new ( 1_000_000 ) ,
544+ ( Nanosecond , DateTruncGranularity :: Second ) => NonZeroI64 :: new ( 1_000_000_000 ) ,
545+ ( Nanosecond , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60_000_000_000 ) ,
546+ ( Nanosecond , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3_600_000_000_000 ) ,
547+ ( Nanosecond , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86_400_000_000_000 ) ,
481548 _ => None ,
482549 } ;
483550
@@ -502,7 +569,7 @@ fn general_date_trunc(
502569 tu : TimeUnit ,
503570 value : i64 ,
504571 tz : Option < Tz > ,
505- granularity : & str ,
572+ granularity : DateTruncGranularity ,
506573) -> Result < i64 , DataFusionError > {
507574 let scale = match tu {
508575 Second => 1_000_000_000 ,
@@ -516,25 +583,29 @@ fn general_date_trunc(
516583
517584 let result = match tu {
518585 Second => match granularity {
519- "minute" => nano / 1_000_000_000 / 60 * 60 ,
586+ DateTruncGranularity :: Minute => nano / 1_000_000_000 / 60 * 60 ,
520587 _ => nano / 1_000_000_000 ,
521588 } ,
522589 Millisecond => match granularity {
523- "minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60 ,
524- "second" => nano / 1_000_000 / 1_000 * 1_000 ,
590+ DateTruncGranularity :: Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60 ,
591+ DateTruncGranularity :: Second => nano / 1_000_000 / 1_000 * 1_000 ,
525592 _ => nano / 1_000_000 ,
526593 } ,
527594 Microsecond => match granularity {
528- "minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000 ,
529- "second" => nano / 1_000 / 1_000_000 * 1_000_000 ,
530- "millisecond" => nano / 1_000 / 1_000 * 1_000 ,
595+ DateTruncGranularity :: Minute => {
596+ nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
597+ }
598+ DateTruncGranularity :: Second => nano / 1_000 / 1_000_000 * 1_000_000 ,
599+ DateTruncGranularity :: Millisecond => nano / 1_000 / 1_000 * 1_000 ,
531600 _ => nano / 1_000 ,
532601 } ,
533602 _ => match granularity {
534- "minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60 ,
535- "second" => nano / 1_000_000_000 * 1_000_000_000 ,
536- "millisecond" => nano / 1_000_000 * 1_000_000 ,
537- "microsecond" => nano / 1_000 * 1_000 ,
603+ DateTruncGranularity :: Minute => {
604+ nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
605+ }
606+ DateTruncGranularity :: Second => nano / 1_000_000_000 * 1_000_000_000 ,
607+ DateTruncGranularity :: Millisecond => nano / 1_000_000 * 1_000_000 ,
608+ DateTruncGranularity :: Microsecond => nano / 1_000 * 1_000 ,
538609 _ => nano,
539610 } ,
540611 } ;
@@ -554,7 +625,9 @@ fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
554625mod tests {
555626 use std:: sync:: Arc ;
556627
557- use crate :: datetime:: date_trunc:: { date_trunc_coarse, DateTruncFunc } ;
628+ use crate :: datetime:: date_trunc:: {
629+ date_trunc_coarse, DateTruncFunc , DateTruncGranularity ,
630+ } ;
558631
559632 use arrow:: array:: cast:: as_primitive_array;
560633 use arrow:: array:: types:: TimestampNanosecondType ;
@@ -655,7 +728,8 @@ mod tests {
655728 cases. iter ( ) . for_each ( |( original, granularity, expected) | {
656729 let left = string_to_timestamp_nanos ( original) . unwrap ( ) ;
657730 let right = string_to_timestamp_nanos ( expected) . unwrap ( ) ;
658- let result = date_trunc_coarse ( granularity, left, None ) . unwrap ( ) ;
731+ let granularity_enum = DateTruncGranularity :: from_str ( granularity) . unwrap ( ) ;
732+ let result = date_trunc_coarse ( granularity_enum, left, None ) . unwrap ( ) ;
659733 assert_eq ! ( result, right, "{original} = {expected}" ) ;
660734 } ) ;
661735 }
0 commit comments