diff --git a/Cargo.lock b/Cargo.lock index 1cacc75fb9..a47796c74c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3675,6 +3675,7 @@ version = "0.7.0" dependencies = [ "anyhow", "async-trait", + "chrono", "datafusion", "expect-test", "futures", diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index d06e754dcc..ba436adf29 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -60,6 +60,10 @@ const INT_MIN: i32 = -2147483648; const LONG_MAX: i64 = 9223372036854775807; const LONG_MIN: i64 = -9223372036854775808; +const MICROS_PER_DAY: i64 = 24 * 60 * 60 * 1_000_000; +const NANOS_PER_MICRO: i64 = 1000; +const NANOS_PER_DAY: i64 = NANOS_PER_MICRO * MICROS_PER_DAY; + /// Values present in iceberg type #[derive(Clone, Debug, PartialOrd, PartialEq, Hash, Eq)] pub enum PrimitiveLiteral { @@ -1189,17 +1193,11 @@ impl Datum { match target_type { Type::Primitive(target_primitive_type) => { match (&self.literal, &self.r#type, target_primitive_type) { - (PrimitiveLiteral::Int(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)), - (PrimitiveLiteral::Int(val), _, PrimitiveType::Date) => Ok(Datum::date(*val)), - (PrimitiveLiteral::Int(val), _, PrimitiveType::Long) => Ok(Datum::long(*val)), - (PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => { - Ok(Datum::i64_to_i32(*val)) - } - (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamp) => { - Ok(Datum::timestamp_micros(*val)) + (PrimitiveLiteral::Int(val), source_type, target_type) => { + convert_int(*val, source_type, target_type) } - (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamptz) => { - Ok(Datum::timestamptz_micros(*val)) + (PrimitiveLiteral::Long(val), source_type, target_type) => { + convert_long(*val, source_type, target_type) } // Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775 (PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => { @@ -1275,6 +1273,105 @@ impl Datum { } } +/// Converts an int literal between two [PrimitiveType]s. +fn convert_int( + val: i32, + source_type: &PrimitiveType, + target_type: &PrimitiveType, +) -> Result { + let datum = match (source_type, target_type) { + (_, PrimitiveType::Int) => Datum::int(val), + (_, PrimitiveType::Date) => Datum::date(val), + (_, PrimitiveType::Long) => Datum::long(val), + (PrimitiveType::Date, PrimitiveType::Timestamp) => Datum::timestamp_micros( + (val as i64) + .checked_mul(MICROS_PER_DAY) + .ok_or_else(overflow)?, + ), + (PrimitiveType::Date, PrimitiveType::Timestamptz) => Datum::timestamptz_micros( + (val as i64) + .checked_mul(MICROS_PER_DAY) + .ok_or_else(overflow)?, + ), + (PrimitiveType::Date, PrimitiveType::TimestampNs) => Datum::timestamp_nanos( + (val as i64) + .checked_mul(NANOS_PER_DAY) + .ok_or_else(overflow)?, + ), + (PrimitiveType::Date, PrimitiveType::TimestamptzNs) => Datum::timestamptz_nanos( + (val as i64) + .checked_mul(NANOS_PER_DAY) + .ok_or_else(overflow)?, + ), + _ => { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("Can't convert datum from {source_type} type to {target_type} type.",), + )); + } + }; + + Ok(datum) +} + +/// Converts a long literal between two [PrimitiveType]s. +fn convert_long( + val: i64, + source_type: &PrimitiveType, + target_type: &PrimitiveType, +) -> Result { + let datum = match (source_type, target_type) { + (_, PrimitiveType::Int) => Datum::i64_to_i32(val), + (_, PrimitiveType::Long) => Datum::long(val), + ( + PrimitiveType::Long | PrimitiveType::Timestamp | PrimitiveType::Timestamptz, + PrimitiveType::Timestamp, + ) => Datum::timestamp_micros(val), + ( + PrimitiveType::Long | PrimitiveType::Timestamp | PrimitiveType::Timestamptz, + PrimitiveType::Timestamptz, + ) => Datum::timestamptz_micros(val), + ( + PrimitiveType::Long | PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, + PrimitiveType::TimestampNs, + ) => Datum::timestamp_nanos(val), + ( + PrimitiveType::Long | PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, + PrimitiveType::TimestamptzNs, + ) => Datum::timestamptz_nanos(val), + (PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, PrimitiveType::Timestamp) => { + Datum::timestamp_micros(val / NANOS_PER_MICRO) + } + (PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, PrimitiveType::Timestamptz) => { + Datum::timestamptz_micros(val / NANOS_PER_MICRO) + } + (PrimitiveType::Timestamp | PrimitiveType::Timestamptz, PrimitiveType::TimestampNs) => { + Datum::timestamp_nanos(val.checked_mul(NANOS_PER_MICRO).ok_or_else(overflow)?) + } + (PrimitiveType::Timestamp | PrimitiveType::Timestamptz, PrimitiveType::TimestamptzNs) => { + Datum::timestamptz_nanos(val.checked_mul(NANOS_PER_MICRO).ok_or_else(overflow)?) + } + (PrimitiveType::Timestamp | PrimitiveType::Timestamptz, PrimitiveType::Date) => { + Datum::date((val / MICROS_PER_DAY) as i32) + } + (PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, PrimitiveType::Date) => { + Datum::date((val / (NANOS_PER_DAY)) as i32) + } + _ => { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("Can't convert datum from {source_type} type to {target_type} type."), + )); + } + }; + + Ok(datum) +} + +fn overflow() -> Error { + Error::new(ErrorKind::DataInvalid, "integer overflow") +} + /// Map is a collection of key-value pairs with a key type and a value type. /// It used in Literal::Map, to make it hashable, the order of key-value pairs is stored in a separate vector /// so that we can hash the map in a deterministic way. But it also means that the order of key-value pairs is matter @@ -3711,169 +3808,213 @@ mod tests { test_fn(datum); } - #[test] - fn test_datum_date_convert_to_int() { - let datum_date = Datum::date(12345); - - let result = datum_date.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::int(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_int_convert_to_date() { - let datum_int = Datum::int(12345); - - let result = datum_int.to(&Primitive(PrimitiveType::Date)).unwrap(); - - let expected = Datum::date(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_int() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::int(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_int_above_max() { - let datum = Datum::long(INT_MAX as i64 + 1); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_int_below_min() { - let datum = Datum::long(INT_MIN as i64 - 1); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_timestamp() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); - - let expected = Datum::timestamp_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_timestamptz() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); - - let expected = Datum::timestamptz_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_decimal_convert_to_long() { - let datum = Datum::decimal(12345).unwrap(); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::long(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_decimal_convert_to_long_above_max() { - let datum = Datum::decimal(LONG_MAX as i128 + 1).unwrap(); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::new(PrimitiveType::Long, PrimitiveLiteral::AboveMax); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_decimal_convert_to_long_below_min() { - let datum = Datum::decimal(LONG_MIN as i128 - 1).unwrap(); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::new(PrimitiveType::Long, PrimitiveLiteral::BelowMin); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_string_convert_to_boolean() { - let datum = Datum::string("true"); - - let result = datum.to(&Primitive(PrimitiveType::Boolean)).unwrap(); - - let expected = Datum::bool(true); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_string_convert_to_int() { - let datum = Datum::string("12345"); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::int(12345); - - assert_eq!(result, expected); - } + macro_rules! datum_convert_tests { + ($($name:ident: ($source:expr => $target_type:ident, $expected:expr),)*) => { + $( + #[test] + fn $name() { + let source = $source; + let target_type = PrimitiveType::$target_type; + let expected = $expected; - #[test] - fn test_datum_string_convert_to_long() { - let datum = Datum::string("12345"); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::long(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_string_convert_to_timestamp() { - let datum = Datum::string("1925-05-20T19:25:00.000"); - - let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); - - let expected = Datum::timestamp_micros(-1407990900000000); - - assert_eq!(result, expected); + let result = source.to(&Primitive(target_type)).unwrap(); + assert_eq!(result, expected); + } + )* + } } - #[test] - fn test_datum_string_convert_to_timestamptz() { - let datum = Datum::string("1925-05-20T19:25:00.000 UTC"); - - let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); - - let expected = Datum::timestamptz_micros(-1407990900000000); - - assert_eq!(result, expected); + datum_convert_tests! { + // Date conversions + convert_date_to_int: ( + Datum::date(12345) => Int, + Datum::int(12345) + ), + convert_int_to_date: ( + Datum::int(12345) => Date, + Datum::date(12345) + ), + convert_date_to_timestamp: ( + Datum::date(1) => Timestamp, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("1970-01-02T00:00:00Z") + .unwrap() + .naive_utc() + ) + ), + convert_date_to_timestamptz: ( + Datum::date(1) => Timestamptz, + Datum::timestamptz_from_str("1970-01-02T00:00:00Z").unwrap() + ), + convert_date_to_timestamp_nanos: ( + Datum::date(1) => TimestampNs, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("1970-01-02T00:00:00Z") + .unwrap() + .naive_utc() + ) + .to(&Primitive(PrimitiveType::TimestampNs)) + .unwrap() + ), + convert_date_to_timestamptz_nanos: ( + Datum::date(1) => TimestamptzNs, + Datum::timestamptz_from_datetime( + DateTime::parse_from_rfc3339("1970-01-02T00:00:00Z").unwrap() + ) + .to(&Primitive(PrimitiveType::TimestamptzNs)) + .unwrap() + ), + convert_date_negative_to_timestamp: ( + Datum::date(-1) => Timestamp, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("1969-12-31T00:00:00Z") + .unwrap() + .naive_utc() + ) + ), + convert_date_to_timestamp_year_9999: ( + Datum::date(2932896) => Timestamp, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("9999-12-31T00:00:00Z") + .unwrap() + .naive_utc() + ) + ), + // Long conversions + convert_long_to_int: ( + Datum::long(12345) => Int, + Datum::int(12345) + ), + convert_long_to_int_above_max: ( + Datum::long(INT_MAX as i64 + 1) => Int, + Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax) + ), + convert_long_to_int_below_min: ( + Datum::long(INT_MIN as i64 - 1) => Int, + Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin) + ), + convert_long_to_timestamp: ( + Datum::long(12345) => Timestamp, + Datum::timestamp_micros(12345) + ), + convert_long_to_timestamptz: ( + Datum::long(12345) => Timestamptz, + Datum::timestamptz_micros(12345) + ), + convert_long_to_timestamp_nanos: ( + Datum::long(12345) => TimestampNs, + Datum::timestamp_nanos(12345) + ), + convert_long_to_timestamptz_nanos: ( + Datum::long(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345) + ), + // Decimal conversions + convert_decimal_to_long: ( + Datum::decimal(12345).unwrap() => Long, + Datum::long(12345) + ), + convert_decimal_to_long_above_max: ( + Datum::decimal(LONG_MAX as i128 + 1).unwrap() => Long, + Datum::new(PrimitiveType::Long, PrimitiveLiteral::AboveMax) + ), + convert_decimal_to_long_below_min: ( + Datum::decimal(LONG_MIN as i128 - 1).unwrap() => Long, + Datum::new(PrimitiveType::Long, PrimitiveLiteral::BelowMin) + ), + // String conversions + convert_string_to_boolean: ( + Datum::string("true") => Boolean, + Datum::bool(true) + ), + convert_string_to_int: ( + Datum::string("12345") => Int, + Datum::int(12345) + ), + convert_string_to_long: ( + Datum::string("12345") => Long, + Datum::long(12345) + ), + convert_string_to_timestamp: ( + Datum::string("1925-05-20T19:25:00.000") => Timestamp, + Datum::timestamp_micros(-1407990900000000) + ), + convert_string_to_timestamptz: ( + Datum::string("1925-05-20T19:25:00.000 UTC") => Timestamptz, + Datum::timestamptz_micros(-1407990900000000) + ), + // Timestamp conversions (micros to nanos) + convert_timestamp_micros_to_timestamp_nanos: ( + Datum::timestamp_micros(12345) => TimestampNs, + Datum::timestamp_nanos(12345000) + ), + convert_timestamp_micros_to_timestamptz_nanos: ( + Datum::timestamp_micros(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345000) + ), + convert_timestamptz_micros_to_timestamp_nanos: ( + Datum::timestamptz_micros(12345) => TimestampNs, + Datum::timestamp_nanos(12345000) + ), + convert_timestamptz_micros_to_timestamptz_nanos: ( + Datum::timestamptz_micros(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345000) + ), + // Timestamp conversions (nanos to micros) + convert_timestamp_nanos_to_timestamp_micros: ( + Datum::timestamp_nanos(12345000) => Timestamp, + Datum::timestamp_micros(12345) + ), + test_datum_timestamp_micros_to_nanos: ( + Datum::timestamp_nanos(12345678) => Timestamp, + Datum::timestamp_micros(12345) + ), + convert_timestamp_nanos_to_timestamptz_micros: ( + Datum::timestamp_nanos(12345000) => Timestamptz, + Datum::timestamptz_micros(12345) + ), + convert_timestamptz_nanos_to_timestamp_micros: ( + Datum::timestamptz_nanos(12345000) => Timestamp, + Datum::timestamp_micros(12345) + ), + convert_timestamptz_nanos_to_timestamptz_micros: ( + Datum::timestamptz_nanos(12345000) => Timestamptz, + Datum::timestamptz_micros(12345) + ), + // Timestamp conversions (nanos to nanos) + convert_timestamp_nanos_to_timestamp_nanos: ( + Datum::timestamp_nanos(12345) => TimestampNs, + Datum::timestamp_nanos(12345) + ), + convert_timestamp_nanos_to_timestamptz_nanos: ( + Datum::timestamp_nanos(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345) + ), + convert_timestamptz_nanos_to_timestamp_nanos: ( + Datum::timestamptz_nanos(12345) => TimestampNs, + Datum::timestamp_nanos(12345) + ), + convert_timestamptz_nanos_to_timestamptz_nanos: ( + Datum::timestamptz_nanos(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345) + ), + // Timestamp to date conversions + convert_timestamp_to_date: ( + Datum::timestamp_micros(24 * 60 * 60 * 1_000_000) => Date, + Datum::date(1) + ), + convert_timestamptz_to_date: ( + Datum::timestamptz_micros(24 * 60 * 60 * 1_000_000) => Date, + Datum::date(1) + ), + convert_timestamp_nanos_to_date: ( + Datum::timestamp_nanos(24 * 60 * 60 * 1_000_000_000) => Date, + Datum::date(1) + ), + convert_timestamptz_nanos_to_date: ( + Datum::timestamptz_nanos(24 * 60 * 60 * 1_000_000_000) => Date, + Datum::date(1) + ), } #[test] diff --git a/crates/integrations/datafusion/Cargo.toml b/crates/integrations/datafusion/Cargo.toml index 0ee1738b4f..d5260f7770 100644 --- a/crates/integrations/datafusion/Cargo.toml +++ b/crates/integrations/datafusion/Cargo.toml @@ -31,6 +31,7 @@ repository = { workspace = true } [dependencies] anyhow = { workspace = true } async-trait = { workspace = true } +chrono = { workspace = true } datafusion = { workspace = true } futures = { workspace = true } iceberg = { workspace = true } diff --git a/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs b/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs index 2974dd2642..8eec27a8da 100644 --- a/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs +++ b/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs @@ -17,6 +17,7 @@ use std::vec; +use chrono::{FixedOffset, TimeZone as _}; use datafusion::arrow::datatypes::DataType; use datafusion::logical_expr::{Expr, Operator}; use datafusion::scalar::ScalarValue; @@ -201,6 +202,9 @@ fn reverse_predicate_operator(op: PredicateOperator) -> PredicateOperator { } const MILLIS_PER_DAY: i64 = 24 * 60 * 60 * 1000; +const MICROS_PER_SECOND: i64 = 1_000_000; +const MICROS_PER_MILLISECOND: i64 = 1_000; + /// Convert a scalar value to an iceberg datum. fn scalar_value_to_datum(value: &ScalarValue) -> Option { match value { @@ -214,18 +218,46 @@ fn scalar_value_to_datum(value: &ScalarValue) -> Option { ScalarValue::LargeUtf8(Some(v)) => Some(Datum::string(v.clone())), ScalarValue::Date32(Some(v)) => Some(Datum::date(*v)), ScalarValue::Date64(Some(v)) => Some(Datum::date((*v / MILLIS_PER_DAY) as i32)), + ScalarValue::TimestampSecond(Some(v), tz) => { + interpret_timestamptz_micros(v.checked_mul(MICROS_PER_SECOND)?, tz.as_deref()) + } + ScalarValue::TimestampMillisecond(Some(v), tz) => { + interpret_timestamptz_micros(v.checked_mul(MICROS_PER_MILLISECOND)?, tz.as_deref()) + } + ScalarValue::TimestampMicrosecond(Some(v), tz) => { + interpret_timestamptz_micros(*v, tz.as_deref()) + } + ScalarValue::TimestampNanosecond(Some(v), Some(_)) => Some(Datum::timestamptz_nanos(*v)), + ScalarValue::TimestampNanosecond(Some(v), None) => Some(Datum::timestamp_nanos(*v)), _ => None, } } +fn interpret_timestamptz_micros(micros: i64, tz: Option>) -> Option { + let offset = tz + .as_ref() + .and_then(|s| s.as_ref().parse::().ok()); + + match offset { + Some(off) => off + .timestamp_micros(micros) + .single() + .map(Datum::timestamptz_from_datetime), + None => Some(Datum::timestamp_micros(micros)), + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; + use chrono::DateTime; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use datafusion::common::DFSchema; use datafusion::logical_expr::utils::split_conjunction; + use datafusion::logical_expr::{Operator, binary_expr, col, lit}; use datafusion::prelude::{Expr, SessionContext}; + use datafusion::scalar::ScalarValue; use iceberg::expr::{Predicate, Reference}; use iceberg::spec::Datum; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; @@ -245,6 +277,42 @@ mod tests { Field::new("ts", DataType::Timestamp(TimeUnit::Second, None), true).with_metadata( HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "3".to_string())]), ), + Field::new( + "ts_ms", + DataType::Timestamp(TimeUnit::Millisecond, None), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "4".to_string(), + )])), + Field::new( + "ts_us", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "5".to_string(), + )])), + Field::new( + "ts_ns", + DataType::Timestamp(TimeUnit::Nanosecond, None), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "6".to_string(), + )])), + Field::new( + "ts_tz", + DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "7".to_string(), + )])), ]); DFSchema::try_from_qualified_schema("my_table", &arrow_schema).unwrap() } @@ -429,4 +497,76 @@ mod tests { let predicate = convert_to_iceberg_predicate(sql); assert_eq!(predicate, None); } + + #[test] + fn test_predicate_conversion_with_timestamp() { + // 2023-01-01 12:00:00 UTC + let timestamp_scalar = ScalarValue::TimestampSecond(Some(1672574400), None); + let dt = DateTime::parse_from_rfc3339("2023-01-01T12:00:00+00:00").unwrap(); + + let expr = binary_expr(col("ts"), Operator::Gt, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = + Reference::new("ts").greater_than(Datum::timestamp_from_datetime(dt.naive_utc())); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_milliseconds() { + // 2023-01-01 12:00:00 UTC + let timestamp_scalar = ScalarValue::TimestampMillisecond(Some(1672574400000), None); + let dt = DateTime::parse_from_rfc3339("2023-01-01T12:00:00+00:00").unwrap(); + + let expr = binary_expr(col("ts_ms"), Operator::LtEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = Reference::new("ts_ms") + .less_than_or_equal_to(Datum::timestamp_from_datetime(dt.naive_utc())); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_nanoseconds() { + // 2023-01-01 12:00:00 UTC + let timestamp_scalar = ScalarValue::TimestampNanosecond(Some(1672574400000000000), None); + let dt = DateTime::parse_from_rfc3339("2023-01-01T12:00:00+00:00").unwrap(); + + let expr = binary_expr(col("ts_ns"), Operator::NotEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = Reference::new("ts_ns") + .not_equal_to(Datum::timestamp_nanos(dt.timestamp_nanos_opt().unwrap())); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_with_timezone() { + // 2023-01-01 13:00:00 +01:00 + let timestamp_scalar = + ScalarValue::TimestampSecond(Some(1672574400), Some("+01:00".into())); + let dt = DateTime::parse_from_rfc3339("2023-01-01T13:00:00+01:00").unwrap(); + + let expr = binary_expr(col("ts_tz"), Operator::GtEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = + Reference::new("ts_tz").greater_than_or_equal_to(Datum::timestamptz_from_datetime(dt)); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_nanoseconds_with_timezone() { + // 2023-01-01 13:00:00 +01:00 + let timestamp_scalar = + ScalarValue::TimestampNanosecond(Some(1672574400000000000), Some("+01:00".into())); + let dt = DateTime::parse_from_rfc3339("2023-01-01T13:00:00+01:00").unwrap(); + + let expr = binary_expr(col("ts_tz"), Operator::GtEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = Reference::new("ts_tz") + .greater_than_or_equal_to(Datum::timestamptz_nanos(dt.timestamp_nanos_opt().unwrap())); + assert_eq!(predicate, expected_predicate); + } }