Skip to content

Commit 6e3e069

Browse files
committed
fix: correct date_trunc for times before the epoch
The array-based implementation of date_trunc can produce incorrect results for negative timestamps (i.e. dates before 1970-01-01). Check for any such incorrect values and compensate accordingly.
1 parent 6cc73fa commit 6e3e069

File tree

2 files changed

+40
-22
lines changed

2 files changed

+40
-22
lines changed

datafusion/functions/src/datetime/date_trunc.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,20 @@ fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
482482

483483
if let Some(unit) = unit {
484484
let original_type = array.data_type();
485-
let array = arrow::compute::cast(array, &DataType::Int64)?;
486-
let array = arrow::compute::kernels::numeric::div(&array, &unit)?;
485+
let input = arrow::compute::cast(array, &DataType::Int64)?;
486+
let array = arrow::compute::kernels::numeric::div(&input, &unit)?;
487487
let array = arrow::compute::kernels::numeric::mul(&array, &unit)?;
488+
// For timestamps before 1970-01-01T00:00:00Z (negative values)
489+
// it is possible that the truncated value is actually later
490+
// than the original value. Correct any such cases by
491+
// subtracting `unit`.
492+
let too_late = arrow::compute::kernels::cmp::gt(&array, &input)?;
493+
let array = if too_late.true_count() > 0 {
494+
let earlier = arrow::compute::kernels::numeric::sub(&array, &unit)?;
495+
arrow::compute::kernels::zip::zip(&too_late, &earlier, &array)?
496+
} else {
497+
array
498+
};
488499
let array = arrow::compute::cast(&array, original_type)?;
489500
Ok(array)
490501
} else {

datafusion/sqllogictest/test_files/timestamps.slt

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,13 @@ SELECT DATE_TRUNC('second', '2022-08-03 14:38:50Z');
16871687
----
16881688
2022-08-03T14:38:50
16891689

1690+
# DATE_TRUNC handling of times before the unix epoch (issue 18334)
1691+
query PPP
1692+
SELECT d, DATE_TRUNC('hour', d), DATE_TRUNC('hour', TIMESTAMP '1900-06-15 07:09:00')
1693+
FROM (VALUES (TIMESTAMP '1900-06-15 07:09:00')) AS t(d);
1694+
----
1695+
1900-06-15T07:09:00 1900-06-15T07:00:00 1900-06-15T07:00:00
1696+
16901697
# Test that interval can add a timestamp
16911698
query P
16921699
SELECT timestamp '2013-07-01 12:00:00' + INTERVAL '8' DAY;
@@ -2244,7 +2251,7 @@ SET TIME ZONE = '+05:00'
22442251

22452252
statement ok
22462253
CREATE TABLE foo (time TIMESTAMPTZ) AS VALUES
2247-
('2020-01-01T00:00:00+05:00'),
2254+
('2020-01-01T00:00:00+05:00'),
22482255
('2020-01-01T01:00:00+05:00'),
22492256
('2020-01-01T02:00:00+05:00'),
22502257
('2020-01-01T03:00:00+05:00')
@@ -2329,17 +2336,17 @@ NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:5
23292336
# verify timestamp syntax styles are consistent
23302337
query BBBBBBBBBBBBB
23312338
SELECT to_timestamp(null) is null as c1,
2332-
null::timestamp is null as c2,
2333-
cast(null as timestamp) is null as c3,
2334-
to_timestamp(0) = 0::timestamp as c4,
2335-
to_timestamp(1926632005) = 1926632005::timestamp as c5,
2336-
to_timestamp(1) = 1::timestamp as c6,
2337-
to_timestamp(-1) = -1::timestamp as c7,
2339+
null::timestamp is null as c2,
2340+
cast(null as timestamp) is null as c3,
2341+
to_timestamp(0) = 0::timestamp as c4,
2342+
to_timestamp(1926632005) = 1926632005::timestamp as c5,
2343+
to_timestamp(1) = 1::timestamp as c6,
2344+
to_timestamp(-1) = -1::timestamp as c7,
23382345
to_timestamp(0-1) = (0-1)::timestamp as c8,
2339-
to_timestamp(0) = cast(0 as timestamp) as c9,
2340-
to_timestamp(1926632005) = cast(1926632005 as timestamp) as c10,
2341-
to_timestamp(1) = cast(1 as timestamp) as c11,
2342-
to_timestamp(-1) = cast(-1 as timestamp) as c12,
2346+
to_timestamp(0) = cast(0 as timestamp) as c9,
2347+
to_timestamp(1926632005) = cast(1926632005 as timestamp) as c10,
2348+
to_timestamp(1) = cast(1 as timestamp) as c11,
2349+
to_timestamp(-1) = cast(-1 as timestamp) as c12,
23432350
to_timestamp(0-1) = cast(0-1 as timestamp) as c13
23442351
----
23452352
true true true true true true true true true true true true true
@@ -2352,10 +2359,10 @@ Timestamp(ns) Timestamp(ns) Timestamp(ns)
23522359

23532360
# verify timestamp output types using timestamp literal syntax
23542361
query BBBBBB
2355-
SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1,
2362+
SELECT arrow_typeof(to_timestamp(1)) = arrow_typeof(1::timestamp) as c1,
23562363
arrow_typeof(to_timestamp(null)) = arrow_typeof(null::timestamp) as c2,
23572364
arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) = arrow_typeof('2023-01-10 12:34:56.000'::timestamp) as c3,
2358-
arrow_typeof(to_timestamp(1)) = arrow_typeof(cast(1 as timestamp)) as c4,
2365+
arrow_typeof(to_timestamp(1)) = arrow_typeof(cast(1 as timestamp)) as c4,
23592366
arrow_typeof(to_timestamp(null)) = arrow_typeof(cast(null as timestamp)) as c5,
23602367
arrow_typeof(to_timestamp('2023-01-10 12:34:56.000')) = arrow_typeof(cast('2023-01-10 12:34:56.000' as timestamp)) as c6
23612368
----
@@ -2599,13 +2606,13 @@ drop table table_a
25992606
##########
26002607

26012608
statement ok
2602-
create table table_a (ts timestamp) as values
2603-
('2020-09-08T11:42:29Z'::timestamp),
2609+
create table table_a (ts timestamp) as values
2610+
('2020-09-08T11:42:29Z'::timestamp),
26042611
('2020-09-08T12:42:29Z'::timestamp),
26052612
('2020-09-08T13:42:29Z'::timestamp)
26062613

26072614
statement ok
2608-
create table table_b (ts timestamp) as values
2615+
create table table_b (ts timestamp) as values
26092616
('2020-09-08T11:42:29.190Z'::timestamp),
26102617
('2020-09-08T13:42:29.190Z'::timestamp),
26112618
('2020-09-08T12:42:29.190Z'::timestamp)
@@ -2727,8 +2734,8 @@ statement ok
27272734
drop table t1
27282735

27292736
statement ok
2730-
create table table_a (val int, ts1 timestamp, ts2 timestamp) as values
2731-
(1, '2018-07-01T06:00:00'::timestamp, '2018-07-01T07:00:00'::timestamp),
2737+
create table table_a (val int, ts1 timestamp, ts2 timestamp) as values
2738+
(1, '2018-07-01T06:00:00'::timestamp, '2018-07-01T07:00:00'::timestamp),
27322739
(2, '2018-07-01T07:00:00'::timestamp, '2018-07-01T08:00:00'::timestamp)
27332740

27342741
query I?
@@ -3032,7 +3039,7 @@ NULL
30323039

30333040
query T
30343041
SELECT to_char(date_column, '%Y-%m-%d')
3035-
FROM (VALUES
3042+
FROM (VALUES
30363043
(DATE '2020-09-01'),
30373044
(NULL)
30383045
) AS t(date_column);
@@ -3042,7 +3049,7 @@ NULL
30423049

30433050
query T
30443051
SELECT to_char(date_column, '%Y-%m-%d')
3045-
FROM (VALUES
3052+
FROM (VALUES
30463053
(NULL),
30473054
(DATE '2020-09-01')
30483055
) AS t(date_column);

0 commit comments

Comments
 (0)