Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion java/lance-jni/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ pub fn convert_to_java_field<'local>(
let name = env.new_string(&lance_field.name)?;
let children = convert_children_fields(env, lance_field)?;
let metadata = to_java_map(env, &lance_field.metadata)?;
let logical_type = env.new_string(lance_field.logical_type.to_string())?;
let arrow_type = convert_arrow_type(env, &lance_field.data_type())?;
let ctor_sig = "(IILjava/lang/String;".to_owned()
+ "ZLorg/apache/arrow/vector/types/pojo/ArrowType;"
+ "ZLjava/lang/String;"
+ "Lorg/apache/arrow/vector/types/pojo/ArrowType;"
+ "Lorg/apache/arrow/vector/types/pojo/DictionaryEncoding;"
+ "Ljava/util/Map;"
+ "Ljava/util/List;Z)V";
Expand All @@ -53,6 +55,7 @@ pub fn convert_to_java_field<'local>(
JValue::Int(lance_field.parent_id as jint),
JValue::Object(&JObject::from(name)),
JValue::Bool(lance_field.nullable as jboolean),
JValue::Object(&JObject::from(logical_type)),
JValue::Object(&arrow_type),
JValue::Object(&JObject::null()),
JValue::Object(&metadata),
Expand Down
130 changes: 130 additions & 0 deletions java/src/main/java/org/lance/schema/LanceField.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,17 @@
package org.lance.schema;

import com.google.common.base.MoreObjects;
import com.google.common.collect.ImmutableMap;
import org.apache.arrow.vector.types.DateUnit;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.TimeUnit;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand All @@ -29,6 +35,7 @@ public class LanceField {
private final int parentId;
private final String name;
private final boolean nullable;
private final String logicalType;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#4207 (comment)

Just a little context

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For lance vector data type, only the logicalType has FixedSizeList's element type, such as fixed_size_list:float:3.

So, when LanceField is converted to arrow Field , the FixedSizeList's children should be constructed from logicalType.

private final ArrowType type;
private final DictionaryEncoding dictionaryEncoding;
private final Map<String, String> metadata;
Expand All @@ -40,6 +47,7 @@ public class LanceField {
int parentId,
String name,
boolean nullable,
String logicalType,
ArrowType type,
DictionaryEncoding dictionaryEncoding,
Map<String, String> metadata,
Expand All @@ -49,6 +57,7 @@ public class LanceField {
this.parentId = parentId;
this.name = name;
this.nullable = nullable;
this.logicalType = logicalType;
this.type = type;
this.dictionaryEncoding = dictionaryEncoding;
this.metadata = metadata;
Expand All @@ -72,6 +81,10 @@ public boolean isNullable() {
return nullable;
}

public String getLogicalType() {
return logicalType;
}

public ArrowType getType() {
return type;
}
Expand All @@ -95,17 +108,134 @@ public boolean isUnenforcedPrimaryKey() {
public Field asArrowField() {
List<Field> arrowChildren =
children.stream().map(LanceField::asArrowField).collect(Collectors.toList());

if (type instanceof ArrowType.FixedSizeList) {
arrowChildren.addAll(childrenForFixedSizeList());
}

return new Field(
name, new FieldType(nullable, type, dictionaryEncoding, metadata), arrowChildren);
}

private List<Field> childrenForFixedSizeList() {
if (logicalType == null || logicalType.isEmpty()) {
return Collections.emptyList();
}

if (!(type instanceof ArrowType.FixedSizeList)) {
return Collections.emptyList();
}

if (!logicalType.startsWith("fixed_size_list:")) {
return Collections.emptyList();
}

String[] parts = logicalType.split(":");
if (parts.length < 3) {
throw new IllegalArgumentException("Unsupported logical type: " + logicalType);
}

String innerLogicalType =
Arrays.asList(parts).subList(1, parts.length - 1).stream().collect(Collectors.joining(":"));

Field itemField;
switch (innerLogicalType) {
case "lance.bfloat16":
itemField =
new Field(
"item",
new FieldType(
true,
new ArrowType.FixedSizeBinary(2),
null,
ImmutableMap.of(
"ARROW:extension:name", "lance.bfloat16",
"ARROW:extension:metadata", "")),
Collections.emptyList());
return Collections.singletonList(itemField);

default:
ArrowType elementType = arrowTypeFromLogicalType(innerLogicalType);
itemField =
new Field(
"item",
new FieldType(true, elementType, null, Collections.emptyMap()),
Collections.emptyList());
return Collections.singletonList(itemField);
}
}

private ArrowType arrowTypeFromLogicalType(String logicalType) {
switch (logicalType) {
case "null":
return ArrowType.Null.INSTANCE;
case "bool":
return ArrowType.Bool.INSTANCE;
case "int8":
return new ArrowType.Int(8, true);
case "uint8":
return new ArrowType.Int(8, false);
case "int16":
return new ArrowType.Int(16, true);
case "uint16":
return new ArrowType.Int(16, false);
case "int32":
return new ArrowType.Int(32, true);
case "uint32":
return new ArrowType.Int(32, false);
case "int64":
return new ArrowType.Int(64, true);
case "uint64":
return new ArrowType.Int(64, false);
case "halffloat":
return new ArrowType.FloatingPoint(FloatingPointPrecision.HALF);
case "float":
return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
case "double":
return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
case "string":
return ArrowType.Utf8.INSTANCE;
case "binary":
return ArrowType.Binary.INSTANCE;
case "large_string":
return ArrowType.LargeUtf8.INSTANCE;
case "large_binary":
case "blob":
case "json":
return ArrowType.LargeBinary.INSTANCE;
case "date32:day":
return new ArrowType.Date(DateUnit.DAY);
case "date64:ms":
return new ArrowType.Date(DateUnit.MILLISECOND);
case "time32:s":
return new ArrowType.Time(TimeUnit.SECOND, 32);
case "time32:ms":
return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
case "time64:us":
return new ArrowType.Time(TimeUnit.MICROSECOND, 64);
case "time64:ns":
return new ArrowType.Time(TimeUnit.NANOSECOND, 64);
case "duration:s":
return new ArrowType.Duration(TimeUnit.SECOND);
case "duration:ms":
return new ArrowType.Duration(TimeUnit.MILLISECOND);
case "duration:us":
return new ArrowType.Duration(TimeUnit.MICROSECOND);
case "duration:ns":
return new ArrowType.Duration(TimeUnit.NANOSECOND);
default:
throw new IllegalArgumentException("Unsupported logical type: " + logicalType);
}
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("id", id)
.add("parentId", parentId)
.add("name", name)
.add("nullable", nullable)
.add("logicalType", logicalType)
.add("type", type)
.add("dictionaryEncoding", dictionaryEncoding)
.add("children", children)
Expand Down
27 changes: 26 additions & 1 deletion java/src/test/java/org/lance/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.lance.fragment.FragmentUpdateResult;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.arrow.c.ArrowArrayStream;
Expand Down Expand Up @@ -357,7 +358,31 @@ public static class ComplexTestDataset extends TestDataset {
FieldType.nullable(new ArrowType.Struct()),
Arrays.asList(
Field.nullable("field1", ArrowType.Utf8.INSTANCE),
Field.nullable("field2", new ArrowType.Int(16, true))))));
Field.nullable("field2", new ArrowType.Int(16, true)))),

// fixed size list type
new Field(
"fixed_size_list_col",
FieldType.nullable(new ArrowType.FixedSizeList(3)),
Collections.singletonList(Field.nullable("item", new ArrowType.Int(32, true)))),

// fixed bfloat16 list type
new Field(
"bfloat16_fixed_size_list_col",
FieldType.nullable(new ArrowType.FixedSizeList(3)),
Collections.singletonList(
new Field(
"item",
new FieldType(
true,
new ArrowType.FixedSizeBinary(2),
null,
ImmutableMap.of(
"ARROW:extension:name",
"lance.bfloat16",
"ARROW:extension:metadata",
"")),
Collections.emptyList())))));

public ComplexTestDataset(BufferAllocator allocator, String datasetPath) {
super(allocator, datasetPath);
Expand Down
Loading