Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 9 additions & 26 deletions parquet-variant-compute/src/variant_get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use arrow::{
error::Result,
};
use arrow_schema::{ArrowError, Field};
use parquet_variant::path::VariantPath;
use parquet_variant::VariantPath;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I re-exported the paths at the same top level


use crate::{VariantArray, VariantArrayBuilder};

Expand All @@ -41,8 +41,7 @@ pub fn variant_get(input: &ArrayRef, options: GetOptions) -> Result<ArrayRef> {

if let Some(as_type) = options.as_type {
return Err(ArrowError::NotYetImplemented(format!(
"getting a {} from a VariantArray is not implemented yet",
as_type
"getting a {as_type} from a VariantArray is not implemented yet",
)));
}

Expand Down Expand Up @@ -91,7 +90,7 @@ mod test {
use std::sync::Arc;

use arrow::array::{Array, ArrayRef, StringArray};
use parquet_variant::path::{VariantPath, VariantPathElement};
use parquet_variant::VariantPath;

use crate::batch_json_string_to_variant;
use crate::VariantArray;
Expand Down Expand Up @@ -133,29 +132,21 @@ mod test {
fn get_primitive_variant_field() {
single_variant_get_test(
r#"{"some_field": 1234}"#,
vec![VariantPathElement::field("some_field".into())].into(),
VariantPath::from("some_field"),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This gives a good idea of what the new API looks like. I think it is much clearer personally

"1234",
);
}

#[test]
fn get_primitive_variant_list_index() {
single_variant_get_test(
"[1234, 5678]",
vec![VariantPathElement::index(0)].into(),
"1234",
);
single_variant_get_test("[1234, 5678]", VariantPath::from(0), "1234");
}

#[test]
fn get_primitive_variant_inside_object_of_object() {
single_variant_get_test(
r#"{"top_level_field": {"inner_field": 1234}}"#,
vec![
VariantPathElement::field("top_level_field".into()),
VariantPathElement::field("inner_field".into()),
]
.into(),
VariantPath::from("top_level_field").join("inner_field"),
"1234",
);
}
Expand All @@ -164,11 +155,7 @@ mod test {
fn get_primitive_variant_inside_list_of_object() {
single_variant_get_test(
r#"[{"some_field": 1234}]"#,
vec![
VariantPathElement::index(0),
VariantPathElement::field("some_field".into()),
]
.into(),
VariantPath::from(0).join("some_field"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a much better API!

"1234",
);
}
Expand All @@ -177,11 +164,7 @@ mod test {
fn get_primitive_variant_inside_object_of_list() {
single_variant_get_test(
r#"{"some_field": [1234]}"#,
vec![
VariantPathElement::field("some_field".into()),
VariantPathElement::index(0),
]
.into(),
VariantPath::from("some_field").join(0),
"1234",
);
}
Expand All @@ -190,7 +173,7 @@ mod test {
fn get_complex_variant() {
single_variant_get_test(
r#"{"top_level_field": {"inner_field": 1234}}"#,
vec![VariantPathElement::field("top_level_field".into())].into(),
VariantPath::from("top_level_field"),
r#"{"inner_field": 1234}"#,
);
}
Expand Down
7 changes: 6 additions & 1 deletion parquet-variant/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
//! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md
//! [Apache Parquet]: https://parquet.apache.org/
//!
//! ## Main APIs
//! - [`Variant`]: Represents a variant value, which can be an object, list, or primitive.
//! - [`VariantBuilder`]: For building `Variant` values.
//!
//! ## 🚧 Work In Progress
//!
//! This crate is under active development and is not yet ready for production use.
Expand All @@ -29,9 +33,10 @@

mod builder;
mod decoder;
pub mod path;
mod path;
mod utils;
mod variant;

pub use builder::*;
pub use path::{VariantPath, VariantPathElement};
pub use variant::*;
117 changes: 112 additions & 5 deletions parquet-variant/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,77 @@
// under the License.
use std::{borrow::Cow, ops::Deref};

/// Represents a qualified path to a potential subfield or index of a variant value.
#[derive(Debug, Clone)]
/// Represents a qualified path to a potential subfield or index of a variant
/// value.
///
/// Can be used with [`Variant::get_path`] to retrieve a specific subfield of
/// a variant value.
///
/// [`Variant::get_path`]: crate::Variant::get_path
///
/// Create a [`VariantPath`] from a vector of [`VariantPathElement`], or
/// from a single field name or index.
///
/// # Example: Simple paths
/// ```rust
/// # use parquet_variant::{VariantPath, VariantPathElement};
/// // access the field "foo" in a variant object value
/// let path = VariantPath::from("foo");
/// // access the first element in a variant list vale
/// let path = VariantPath::from(0);
/// ```
///
/// # Example: Compound paths
/// ```
/// # use parquet_variant::{VariantPath, VariantPathElement};
/// /// You can also create a path by joining elements together:
/// // access the field "foo" and then the first element in a variant list value
/// let path = VariantPath::from("foo").join(0);
/// // this is the same as the previous one
/// let path2 = VariantPath::new(vec!["foo".into(), 0.into()]);
/// assert_eq!(path, path2);
/// // you can also create a path from a vector of `VariantPathElement` directly
/// let path3 = VariantPath::new(vec![
/// VariantPathElement::field("foo"),
/// VariantPathElement::index(0)
/// ]);
Comment on lines +49 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it also impl FromIterator so people can do:

VariantPath::from_iter(["foo".into(), 0.into()])

or

["foo".into(), 0.into()].into_iter().collect::<VariantPath>()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/// assert_eq!(path, path3);
/// ```
///
/// # Example: Accessing Compound paths
/// ```
/// # use parquet_variant::{VariantPath, VariantPathElement};
/// /// You can access the paths using slices
/// // access the field "foo" and then the first element in a variant list value
/// let path = VariantPath::from("foo")
/// .join("bar")
/// .join("baz");
/// assert_eq!(path[1], VariantPathElement::field("bar"));
/// ```
#[derive(Debug, Clone, PartialEq)]
pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);

impl<'a> VariantPath<'a> {
/// Create a new `VariantPath` from a vector of `VariantPathElement`.
pub fn new(path: Vec<VariantPathElement<'a>>) -> Self {
Self(path)
}

/// Return the inner path elements.
pub fn path(&self) -> &Vec<VariantPathElement> {
&self.0
}

/// Return a new `VariantPath` with element appended
pub fn join(mut self, element: impl Into<VariantPathElement<'a>>) -> Self {
self.push(element);
self
}

/// Append a new element to the path
pub fn push(&mut self, element: impl Into<VariantPathElement<'a>>) {
self.0.push(element.into());
}
}

impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
Expand All @@ -36,6 +95,20 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for VariantPath<'a> {
}
}

/// Create from &str
impl<'a> From<&'a str> for VariantPath<'a> {
fn from(path: &'a str) -> Self {
VariantPath::new(vec![path.into()])
}
}

/// Create from usize
impl<'a> From<usize> for VariantPath<'a> {
fn from(index: usize) -> Self {
VariantPath::new(vec![VariantPathElement::index(index)])
}
}

impl<'a> Deref for VariantPath<'a> {
type Target = [VariantPathElement<'a>];

Expand All @@ -44,8 +117,10 @@ impl<'a> Deref for VariantPath<'a> {
}
}

/// Element of a path
#[derive(Debug, Clone)]
/// Element of a [`VariantPath`] that can be a field name or an index.
///
/// See [`VariantPath`] for more details and examples.
#[derive(Debug, Clone, PartialEq)]
pub enum VariantPathElement<'a> {
/// Access field with name `name`
Field { name: Cow<'a, str> },
Expand All @@ -54,11 +129,43 @@ pub enum VariantPathElement<'a> {
}

impl<'a> VariantPathElement<'a> {
pub fn field(name: Cow<'a, str>) -> VariantPathElement<'a> {
pub fn field(name: impl Into<Cow<'a, str>>) -> VariantPathElement<'a> {
let name = name.into();
VariantPathElement::Field { name }
}

pub fn index(index: usize) -> VariantPathElement<'a> {
VariantPathElement::Index { index }
}
}

// Conversion utilities for `VariantPathElement` from string types
impl<'a> From<Cow<'a, str>> for VariantPathElement<'a> {
fn from(name: Cow<'a, str>) -> Self {
VariantPathElement::field(name)
}
}

impl<'a> From<&'a str> for VariantPathElement<'a> {
fn from(name: &'a str) -> Self {
VariantPathElement::field(Cow::Borrowed(name))
}
}

impl<'a> From<String> for VariantPathElement<'a> {
fn from(name: String) -> Self {
VariantPathElement::field(Cow::Owned(name))
}
}

impl<'a> From<&'a String> for VariantPathElement<'a> {
fn from(name: &'a String) -> Self {
VariantPathElement::field(Cow::Borrowed(name.as_str()))
}
}

impl<'a> From<usize> for VariantPathElement<'a> {
fn from(index: usize) -> Self {
VariantPathElement::index(index)
}
}
33 changes: 33 additions & 0 deletions parquet-variant/src/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,8 @@ impl<'m, 'v> Variant<'m, 'v> {
/// Returns `Some(&VariantObject)` for object variants,
/// `None` for non-object variants.
///
/// See [`Self::get_path`] to dynamically traverse objects
///
/// # Examples
/// ```
/// # use parquet_variant::{Variant, VariantBuilder, VariantObject};
Expand Down Expand Up @@ -999,6 +1001,8 @@ impl<'m, 'v> Variant<'m, 'v> {
/// Returns `Some(&VariantList)` for list variants,
/// `None` for non-list variants.
///
/// See [`Self::get_path`] to dynamically traverse lists
///
/// # Examples
/// ```
/// # use parquet_variant::{Variant, VariantBuilder, VariantList};
Expand Down Expand Up @@ -1068,6 +1072,35 @@ impl<'m, 'v> Variant<'m, 'v> {
/// Return a new Variant with the path followed.
///
/// If the path is not found, `None` is returned.
///
/// # Example
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really wanted to add these tests / examples which lead to the other API changes

/// ```
/// # use parquet_variant::{Variant, VariantBuilder, VariantObject, VariantPath};
/// # let mut builder = VariantBuilder::new();
/// # let mut obj = builder.new_object();
/// # let mut list = obj.new_list("foo");
/// # list.append_value("bar");
/// # list.append_value("baz");
/// # list.finish();
/// # obj.finish().unwrap();
/// # let (metadata, value) = builder.finish();
/// // given a variant like `{"foo": ["bar", "baz"]}`
/// let variant = Variant::new(&metadata, &value);
/// // Accessing a non existent path returns None
/// assert_eq!(variant.get_path(&VariantPath::from("non_existent")), None);
/// // Access obj["foo"]
/// let path = VariantPath::from("foo");
/// let foo = variant.get_path(&path).expect("field `foo` should exist");
/// assert!(foo.as_list().is_some(), "field `foo` should be a list");
/// // Access foo[0]
/// let path = VariantPath::from(0);
/// let bar = foo.get_path(&path).expect("element 0 should exist");
/// // bar is a string
/// assert_eq!(bar.as_string(), Some("bar"));
/// // You can also access nested paths
/// let path = VariantPath::from("foo").join(0);
/// assert_eq!(variant.get_path(&path).unwrap(), bar);
/// ```
pub fn get_path(&self, path: &VariantPath) -> Option<Variant> {
path.iter()
.try_fold(self.clone(), |output, element| match element {
Expand Down
Loading