diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 7d37a8b64511..b3a3d9e41f13 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -22,7 +22,7 @@ use arrow::{ error::Result, }; use arrow_schema::{ArrowError, Field}; -use parquet_variant::path::VariantPath; +use parquet_variant::VariantPath; use crate::{VariantArray, VariantArrayBuilder}; @@ -41,8 +41,7 @@ pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result { if let Some(as_type) = options.as_type { return Err(ArrowError::NotYetImplemented(format!( - "getting a {} from a VariantArray is not implemented yet", - as_type + "getting a {as_type} from a VariantArray is not implemented yet", ))); } @@ -91,7 +90,7 @@ mod test { use std::sync::Arc; use arrow::array::{Array, ArrayRef, StringArray}; - use parquet_variant::path::{VariantPath, VariantPathElement}; + use parquet_variant::VariantPath; use crate::batch_json_string_to_variant; use crate::VariantArray; @@ -133,29 +132,21 @@ mod test { fn get_primitive_variant_field() { single_variant_get_test( r#"{"some_field": 1234}"#, - vec![VariantPathElement::field("some_field".into())].into(), + VariantPath::from("some_field"), "1234", ); } #[test] fn get_primitive_variant_list_index() { - single_variant_get_test( - "[1234, 5678]", - vec![VariantPathElement::index(0)].into(), - "1234", - ); + single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234"); } #[test] fn get_primitive_variant_inside_object_of_object() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - vec![ - VariantPathElement::field("top_level_field".into()), - VariantPathElement::field("inner_field".into()), - ] - .into(), + VariantPath::from("top_level_field").join("inner_field"), "1234", ); } @@ -164,11 +155,7 @@ mod test { fn get_primitive_variant_inside_list_of_object() { single_variant_get_test( r#"[{"some_field": 1234}]"#, - vec![ - VariantPathElement::index(0), - VariantPathElement::field("some_field".into()), - ] - .into(), + VariantPath::from(0).join("some_field"), "1234", ); } @@ -177,11 +164,7 @@ mod test { fn get_primitive_variant_inside_object_of_list() { single_variant_get_test( r#"{"some_field": [1234]}"#, - vec![ - VariantPathElement::field("some_field".into()), - VariantPathElement::index(0), - ] - .into(), + VariantPath::from("some_field").join(0), "1234", ); } @@ -190,7 +173,7 @@ mod test { fn get_complex_variant() { single_variant_get_test( r#"{"top_level_field": {"inner_field": 1234}}"#, - vec![VariantPathElement::field("top_level_field".into())].into(), + VariantPath::from("top_level_field"), r#"{"inner_field": 1234}"#, ); } diff --git a/parquet-variant/src/lib.rs b/parquet-variant/src/lib.rs index d04c59605fc4..a57b4709799d 100644 --- a/parquet-variant/src/lib.rs +++ b/parquet-variant/src/lib.rs @@ -20,6 +20,10 @@ //! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md //! [Apache Parquet]: https://parquet.apache.org/ //! +//! ## Main APIs +//! - [`Variant`]: Represents a variant value, which can be an object, list, or primitive. +//! - [`VariantBuilder`]: For building `Variant` values. +//! //! ## 🚧 Work In Progress //! //! This crate is under active development and is not yet ready for production use. @@ -29,9 +33,10 @@ mod builder; mod decoder; -pub mod path; +mod path; mod utils; mod variant; pub use builder::*; +pub use path::{VariantPath, VariantPathElement}; pub use variant::*; diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs index 1643d9c87c5f..42dbdb3abc2d 100644 --- a/parquet-variant/src/path.rs +++ b/parquet-variant/src/path.rs @@ -16,18 +16,77 @@ // under the License. use std::{borrow::Cow, ops::Deref}; -/// Represents a qualified path to a potential subfield or index of a variant value. -#[derive(Debug, Clone)] +/// Represents a qualified path to a potential subfield or index of a variant +/// value. +/// +/// Can be used with [`Variant::get_path`] to retrieve a specific subfield of +/// a variant value. +/// +/// [`Variant::get_path`]: crate::Variant::get_path +/// +/// Create a [`VariantPath`] from a vector of [`VariantPathElement`], or +/// from a single field name or index. +/// +/// # Example: Simple paths +/// ```rust +/// # use parquet_variant::{VariantPath, VariantPathElement}; +/// // access the field "foo" in a variant object value +/// let path = VariantPath::from("foo"); +/// // access the first element in a variant list vale +/// let path = VariantPath::from(0); +/// ``` +/// +/// # Example: Compound paths +/// ``` +/// # use parquet_variant::{VariantPath, VariantPathElement}; +/// /// You can also create a path by joining elements together: +/// // access the field "foo" and then the first element in a variant list value +/// let path = VariantPath::from("foo").join(0); +/// // this is the same as the previous one +/// let path2 = VariantPath::new(vec!["foo".into(), 0.into()]); +/// assert_eq!(path, path2); +/// // you can also create a path from a vector of `VariantPathElement` directly +/// let path3 = VariantPath::new(vec![ +/// VariantPathElement::field("foo"), +/// VariantPathElement::index(0) +/// ]); +/// assert_eq!(path, path3); +/// ``` +/// +/// # Example: Accessing Compound paths +/// ``` +/// # use parquet_variant::{VariantPath, VariantPathElement}; +/// /// You can access the paths using slices +/// // access the field "foo" and then the first element in a variant list value +/// let path = VariantPath::from("foo") +/// .join("bar") +/// .join("baz"); +/// assert_eq!(path[1], VariantPathElement::field("bar")); +/// ``` +#[derive(Debug, Clone, PartialEq)] pub struct VariantPath<'a>(Vec>); impl<'a> VariantPath<'a> { + /// Create a new `VariantPath` from a vector of `VariantPathElement`. pub fn new(path: Vec>) -> Self { Self(path) } + /// Return the inner path elements. pub fn path(&self) -> &Vec { &self.0 } + + /// Return a new `VariantPath` with element appended + pub fn join(mut self, element: impl Into>) -> Self { + self.push(element); + self + } + + /// Append a new element to the path + pub fn push(&mut self, element: impl Into>) { + self.0.push(element.into()); + } } impl<'a> From>> for VariantPath<'a> { @@ -36,6 +95,20 @@ impl<'a> From>> for VariantPath<'a> { } } +/// Create from &str +impl<'a> From<&'a str> for VariantPath<'a> { + fn from(path: &'a str) -> Self { + VariantPath::new(vec![path.into()]) + } +} + +/// Create from usize +impl<'a> From for VariantPath<'a> { + fn from(index: usize) -> Self { + VariantPath::new(vec![VariantPathElement::index(index)]) + } +} + impl<'a> Deref for VariantPath<'a> { type Target = [VariantPathElement<'a>]; @@ -44,8 +117,10 @@ impl<'a> Deref for VariantPath<'a> { } } -/// Element of a path -#[derive(Debug, Clone)] +/// Element of a [`VariantPath`] that can be a field name or an index. +/// +/// See [`VariantPath`] for more details and examples. +#[derive(Debug, Clone, PartialEq)] pub enum VariantPathElement<'a> { /// Access field with name `name` Field { name: Cow<'a, str> }, @@ -54,7 +129,8 @@ pub enum VariantPathElement<'a> { } impl<'a> VariantPathElement<'a> { - pub fn field(name: Cow<'a, str>) -> VariantPathElement<'a> { + pub fn field(name: impl Into>) -> VariantPathElement<'a> { + let name = name.into(); VariantPathElement::Field { name } } @@ -62,3 +138,34 @@ impl<'a> VariantPathElement<'a> { VariantPathElement::Index { index } } } + +// Conversion utilities for `VariantPathElement` from string types +impl<'a> From> for VariantPathElement<'a> { + fn from(name: Cow<'a, str>) -> Self { + VariantPathElement::field(name) + } +} + +impl<'a> From<&'a str> for VariantPathElement<'a> { + fn from(name: &'a str) -> Self { + VariantPathElement::field(Cow::Borrowed(name)) + } +} + +impl<'a> From for VariantPathElement<'a> { + fn from(name: String) -> Self { + VariantPathElement::field(Cow::Owned(name)) + } +} + +impl<'a> From<&'a String> for VariantPathElement<'a> { + fn from(name: &'a String) -> Self { + VariantPathElement::field(Cow::Borrowed(name.as_str())) + } +} + +impl<'a> From for VariantPathElement<'a> { + fn from(index: usize) -> Self { + VariantPathElement::index(index) + } +} diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 29b191970837..7792d9bdb52f 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -942,6 +942,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Returns `Some(&VariantObject)` for object variants, /// `None` for non-object variants. /// + /// See [`Self::get_path`] to dynamically traverse objects + /// /// # Examples /// ``` /// # use parquet_variant::{Variant, VariantBuilder, VariantObject}; @@ -999,6 +1001,8 @@ impl<'m, 'v> Variant<'m, 'v> { /// Returns `Some(&VariantList)` for list variants, /// `None` for non-list variants. /// + /// See [`Self::get_path`] to dynamically traverse lists + /// /// # Examples /// ``` /// # use parquet_variant::{Variant, VariantBuilder, VariantList}; @@ -1068,6 +1072,35 @@ impl<'m, 'v> Variant<'m, 'v> { /// Return a new Variant with the path followed. /// /// If the path is not found, `None` is returned. + /// + /// # Example + /// ``` + /// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath}; + /// # let mut builder = VariantBuilder::new(); + /// # let mut obj = builder.new_object(); + /// # let mut list = obj.new_list("foo"); + /// # list.append_value("bar"); + /// # list.append_value("baz"); + /// # list.finish(); + /// # obj.finish().unwrap(); + /// # let (metadata, value) = builder.finish(); + /// // given a variant like `{"foo": ["bar", "baz"]}` + /// let variant = Variant::new(&metadata, &value); + /// // Accessing a non existent path returns None + /// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None); + /// // Access obj["foo"] + /// let path = VariantPath::from("foo"); + /// let foo = variant.get_path(&path).expect("field `foo` should exist"); + /// assert!(foo.as_list().is_some(), "field `foo` should be a list"); + /// // Access foo[0] + /// let path = VariantPath::from(0); + /// let bar = foo.get_path(&path).expect("element 0 should exist"); + /// // bar is a string + /// assert_eq!(bar.as_string(), Some("bar")); + /// // You can also access nested paths + /// let path = VariantPath::from("foo").join(0); + /// assert_eq!(variant.get_path(&path).unwrap(), bar); + /// ``` pub fn get_path(&self, path: &VariantPath) -> Option { path.iter() .try_fold(self.clone(), |output, element| match element {