From ddc0f458af021ea39797382d92cc53e4d19ee66d Mon Sep 17 00:00:00 2001 From: absurdfarce Date: Wed, 26 Jun 2024 14:53:18 -0500 Subject: [PATCH 1/4] Harmonize String and JSON vector code implementations --- .../json/JsonNodeConvertingCodecProvider.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java index 139440f3f..977b49f62 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java @@ -396,18 +396,14 @@ public class JsonNodeConvertingCodecProvider implements ConvertingCodecProvider return new JsonNodeToDateRangeCodec(nullStrings); case DefaultVectorType.VECTOR_CLASS_NAME: VectorType vectorType = (VectorType) cqlType; - // Step 1: create a JSON codec which will take the input JSON nodes and generate - // something matching the expected data type - ConvertingCodec jsonCodec = + // Parser for JSON leaf nodes, each of which represents a value of the vector subtype + ConvertingCodec leafCodec = createJsonNodeConvertingCodec(vectorType.getElementType(), codecFactory, false); - // Step 2: create a conventional codec which will take instances of the Java type - // generated by the JSON codec above and perform standard serde on them. - ConvertingCodec standardCodec = - codecFactory.createConvertingCodec( - vectorType.getElementType(), jsonCodec.getInternalJavaType(), false); return new JsonNodeToVectorCodec( - new VectorCodec(vectorType, standardCodec), - jsonCodec, + new VectorCodec( + vectorType, + codecFactory.getCodecRegistry().codecFor(vectorType.getElementType())), + leafCodec, context.getAttribute(OBJECT_MAPPER), nullStrings); } From e0c84b4176bda3fc5893d8d66fae4ff3499294a2 Mon Sep 17 00:00:00 2001 From: absurdfarce Date: Wed, 26 Jun 2024 15:39:23 -0500 Subject: [PATCH 2/4] String-to-vector codecs are now using the dsbulk codecs for initial conversion.. still need to sort out the JSON node version --- .../string/StringConvertingCodecProvider.java | 1 + .../text/string/StringToVectorCodec.java | 28 +++++++++++- .../text/string/StringToVectorCodecTest.java | 43 ++++++++++++++++++- 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java index 740df2d20..3499ab38d 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java @@ -339,6 +339,7 @@ public class StringConvertingCodecProvider implements ConvertingCodecProvider { new VectorCodec( vectorType, codecFactory.getCodecRegistry().codecFor(vectorType.getElementType())), + createStringConvertingCodec(vectorType.getElementType(), codecFactory, false), nullStrings); } } diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java index fd70f1ea2..a0005c6cf 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java @@ -17,18 +17,42 @@ import com.datastax.oss.driver.api.core.data.CqlVector; import com.datastax.oss.driver.internal.core.type.codec.VectorCodec; +import com.datastax.oss.driver.shaded.guava.common.base.Splitter; +import com.datastax.oss.driver.shaded.guava.common.collect.Streams; +import com.datastax.oss.dsbulk.codecs.api.ConvertingCodec; +import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; public class StringToVectorCodec extends StringConvertingCodec> { - public StringToVectorCodec(VectorCodec targetCodec, List nullStrings) { + private final ConvertingCodec stringCodec; + + public StringToVectorCodec( + VectorCodec targetCodec, + ConvertingCodec stringCodec, + List nullStrings) { super(targetCodec, nullStrings); + this.stringCodec = stringCodec; } @Override public CqlVector externalToInternal(String s) { - return this.internalCodec.parse(s); + + // Logic below adapted from VectorCodec.parse() and CqlVector.from() but here we use the dsbulk + // codecs for the subtype in order to enforce any additional behaviours + // + // Logically this probably makes more sense anyway. It's the responsibilty of dsbulk to define + // what sorts of formats it wants to support for vectors. It can certainly re-use a + // representation + // known to work with vectors but it certainly isn't obligated to do so. + if (s == null || s.isEmpty() || s.equalsIgnoreCase("NULL")) return null; + ArrayList vals = + Streams.stream(Splitter.on(", ").split(s.substring(1, s.length() - 1))) + .map(this.stringCodec::externalToInternal) + .collect(Collectors.toCollection(ArrayList::new)); + return CqlVector.newInstance(vals); } @Override diff --git a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java index d13112c85..1c5debfd1 100644 --- a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java +++ b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java @@ -15,6 +15,15 @@ */ package com.datastax.oss.dsbulk.codecs.text.string; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.BOOLEAN_INPUT_WORDS; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.BOOLEAN_NUMBERS; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.EPOCH; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.NUMBER_FORMAT; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.OVERFLOW_STRATEGY; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.ROUNDING_MODE; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.TIMESTAMP_FORMAT; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.TIME_UNIT; +import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.TIME_ZONE; import static com.datastax.oss.dsbulk.tests.assertions.TestAssertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -24,8 +33,12 @@ import com.datastax.oss.driver.api.core.type.codec.TypeCodecs; import com.datastax.oss.driver.internal.core.type.DefaultVectorType; import com.datastax.oss.driver.internal.core.type.codec.VectorCodec; +import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList; import com.datastax.oss.driver.shaded.guava.common.collect.Lists; +import com.datastax.oss.dsbulk.codecs.api.CommonConversionContext; +import com.datastax.oss.dsbulk.codecs.api.ConversionContext; import java.util.ArrayList; +import java.util.List; import org.junit.jupiter.api.Test; public class StringToVectorCodecTest { @@ -35,8 +48,26 @@ public class StringToVectorCodecTest { private final VectorCodec vectorCodec = new VectorCodec(new DefaultVectorType(DataTypes.FLOAT, 5), TypeCodecs.FLOAT); - private final StringToVectorCodec dsbulkCodec = - new StringToVectorCodec(vectorCodec, Lists.newArrayList("NULL")); + private final StringToVectorCodec dsbulkCodec; + + public StringToVectorCodecTest() { + + ConversionContext context = new CommonConversionContext(); + List nullStrings = ImmutableList.of(); + StringToFloatCodec stringCodec = + new StringToFloatCodec( + context.getAttribute(NUMBER_FORMAT), + context.getAttribute(OVERFLOW_STRATEGY), + context.getAttribute(ROUNDING_MODE), + context.getAttribute(TIMESTAMP_FORMAT), + context.getAttribute(TIME_ZONE), + context.getAttribute(TIME_UNIT), + context.getAttribute(EPOCH), + context.getAttribute(BOOLEAN_INPUT_WORDS), + context.getAttribute(BOOLEAN_NUMBERS), + nullStrings); + dsbulkCodec = new StringToVectorCodec(vectorCodec, stringCodec, nullStrings); + } @Test void should_convert_from_valid_external() { @@ -88,4 +119,12 @@ void should_encode_too_many_but_not_too_few() { assertThatThrownBy(() -> dsbulkCodec.encode(tooFewString, ProtocolVersion.DEFAULT)) .isInstanceOf(IllegalArgumentException.class); } + + /* Issue 484: now that we're using the dsbulk string-to-subtype converters we should get + * enforcement of existing dsbulk policies. For our purposes that means the failure on + * arithmetic overflow */ + @Test + void should_not_convert_too_much_precision() { + assertThat(dsbulkCodec).cannotConvertFromInternal("6.646329843"); + } } From d4366933b26ffb5bd0b30df6666eb31e3a41951f Mon Sep 17 00:00:00 2001 From: absurdfarce Date: Wed, 26 Jun 2024 15:59:46 -0500 Subject: [PATCH 3/4] Added explicit test cases for the overflow case --- .../codecs/text/json/JsonNodeToVectorCodec.java | 10 +++++----- .../codecs/text/json/JsonNodeToVectorCodecTest.java | 12 ++++++++++++ .../codecs/text/string/StringToVectorCodecTest.java | 5 ++++- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java index 15ffd6d08..299927568 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java @@ -29,16 +29,16 @@ public class JsonNodeToVectorCodec extends JsonNodeConvertingCodec> { - private final ConvertingCodec subtypeCodec; + private final ConvertingCodec leafCodec; private final ObjectMapper objectMapper; public JsonNodeToVectorCodec( VectorCodec targetCodec, - ConvertingCodec subtypeCodec, + ConvertingCodec leafCodec, ObjectMapper objectMapper, List nullStrings) { super(targetCodec, nullStrings); - this.subtypeCodec = subtypeCodec; + this.leafCodec = leafCodec; this.objectMapper = objectMapper; } @@ -47,7 +47,7 @@ public CqlVector externalToInternal(JsonNode jsonNode) { if (jsonNode == null || !jsonNode.isArray()) return null; List elems = Streams.stream(jsonNode.elements()) - .map(e -> subtypeCodec.externalToInternal(e)) + .map(e -> leafCodec.externalToInternal(e)) .collect(Collectors.toCollection(ArrayList::new)); return CqlVector.newInstance(elems); } @@ -57,7 +57,7 @@ public JsonNode internalToExternal(CqlVector value) { if (value == null) return null; ArrayNode root = objectMapper.createArrayNode(); for (SubtypeT element : value) { - root.add(subtypeCodec.internalToExternal(element)); + root.add(leafCodec.internalToExternal(element)); } return root; } diff --git a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java index 4913b6cac..a991c1ef6 100644 --- a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java +++ b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java @@ -104,4 +104,16 @@ void should_encode_too_many_but_not_too_few() { assertThatThrownBy(() -> dsbulkCodec.encode(tooFewNode, ProtocolVersion.DEFAULT)) .isInstanceOf(IllegalArgumentException.class); } + + /* Issue 484: now that we're using the dsbulk string-to-subtype converters we should get + * enforcement of existing dsbulk policies. For our purposes that means the failure on + * arithmetic overflow */ + @Test + void should_not_convert_too_much_precision() { + ArrayNode tooPreciseNode = JSON_NODE_FACTORY.arrayNode(); + tooPreciseNode.add(JSON_NODE_FACTORY.numberNode(6.646329843)); + assertThat(dsbulkCodec).cannotConvertFromInternal(tooPreciseNode); + assertThatThrownBy(() -> dsbulkCodec.encode(tooPreciseNode, ProtocolVersion.DEFAULT)) + .isInstanceOf(ArithmeticException.class); + } } diff --git a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java index 1c5debfd1..e3dc2d407 100644 --- a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java +++ b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java @@ -125,6 +125,9 @@ void should_encode_too_many_but_not_too_few() { * arithmetic overflow */ @Test void should_not_convert_too_much_precision() { - assertThat(dsbulkCodec).cannotConvertFromInternal("6.646329843"); + String tooPreciseVal = "6.646329843"; + assertThat(dsbulkCodec).cannotConvertFromInternal(tooPreciseVal); + assertThatThrownBy(() -> dsbulkCodec.encode(tooPreciseVal, ProtocolVersion.DEFAULT)) + .isInstanceOf(ArithmeticException.class); } } From 7e1d16d1646fdabe77c9a7029d6198c69deb0bcb Mon Sep 17 00:00:00 2001 From: Bret McGuire Date: Wed, 10 Jul 2024 17:15:02 -0500 Subject: [PATCH 4/4] Refactor StringToVectorCodec (#498) Co-authored-by: Alexandre Dutra --- .../string/StringConvertingCodecProvider.java | 13 ++- .../text/string/StringToVectorCodec.java | 53 ++++++----- .../text/string/StringToVectorCodecTest.java | 94 ++++++++----------- 3 files changed, 79 insertions(+), 81 deletions(-) diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java index 3499ab38d..c9e3cf1ea 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java @@ -335,12 +335,15 @@ public class StringConvertingCodecProvider implements ConvertingCodecProvider { return new StringToDateRangeCodec(nullStrings); case DefaultVectorType.VECTOR_CLASS_NAME: VectorType vectorType = (VectorType) cqlType; - return new StringToVectorCodec( - new VectorCodec( + VectorCodec vectorCodec = + new VectorCodec<>( vectorType, - codecFactory.getCodecRegistry().codecFor(vectorType.getElementType())), - createStringConvertingCodec(vectorType.getElementType(), codecFactory, false), - nullStrings); + codecFactory.getCodecRegistry().codecFor(vectorType.getElementType())); + ConvertingCodec> jsonCodec = + codecFactory.createConvertingCodec( + DataTypes.listOf(vectorType.getElementType()), JSON_NODE_TYPE, false); + return new StringToVectorCodec<>( + vectorCodec, jsonCodec, context.getAttribute(OBJECT_MAPPER), nullStrings); } } // fall through diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java index a0005c6cf..a65e9a820 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java @@ -17,46 +17,57 @@ import com.datastax.oss.driver.api.core.data.CqlVector; import com.datastax.oss.driver.internal.core.type.codec.VectorCodec; -import com.datastax.oss.driver.shaded.guava.common.base.Splitter; -import com.datastax.oss.driver.shaded.guava.common.collect.Streams; import com.datastax.oss.dsbulk.codecs.api.ConvertingCodec; -import java.util.ArrayList; +import com.datastax.oss.dsbulk.codecs.text.utils.StringUtils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; import java.util.List; import java.util.stream.Collectors; public class StringToVectorCodec extends StringConvertingCodec> { - private final ConvertingCodec stringCodec; + private final ConvertingCodec> jsonCodec; + private final ObjectMapper objectMapper; public StringToVectorCodec( VectorCodec targetCodec, - ConvertingCodec stringCodec, + ConvertingCodec> jsonCodec, + ObjectMapper objectMapper, List nullStrings) { super(targetCodec, nullStrings); - this.stringCodec = stringCodec; + this.jsonCodec = jsonCodec; + this.objectMapper = objectMapper; } @Override public CqlVector externalToInternal(String s) { - - // Logic below adapted from VectorCodec.parse() and CqlVector.from() but here we use the dsbulk - // codecs for the subtype in order to enforce any additional behaviours - // - // Logically this probably makes more sense anyway. It's the responsibilty of dsbulk to define - // what sorts of formats it wants to support for vectors. It can certainly re-use a - // representation - // known to work with vectors but it certainly isn't obligated to do so. - if (s == null || s.isEmpty() || s.equalsIgnoreCase("NULL")) return null; - ArrayList vals = - Streams.stream(Splitter.on(", ").split(s.substring(1, s.length() - 1))) - .map(this.stringCodec::externalToInternal) - .collect(Collectors.toCollection(ArrayList::new)); - return CqlVector.newInstance(vals); + if (isNullOrEmpty(s)) { + return null; + } + try { + JsonNode node = objectMapper.readTree(StringUtils.ensureBrackets(s)); + List vals = jsonCodec.externalToInternal(node); + return CqlVector.newInstance(vals); + } catch (IOException e) { + throw new IllegalArgumentException(String.format("Could not parse '%s' as Json", s), e); + } } @Override public String internalToExternal(CqlVector cqlVector) { - return this.internalCodec.format(cqlVector); + if (cqlVector == null) { + return nullString(); + } + try { + List vals = cqlVector.stream().collect(Collectors.toList()); + JsonNode node = jsonCodec.internalToExternal(vals); + return objectMapper.writeValueAsString(node); + } catch (JsonProcessingException e) { + throw new IllegalArgumentException( + String.format("Could not format '%s' to Json", cqlVector), e); + } } } diff --git a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java index e3dc2d407..7de54316a 100644 --- a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java +++ b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java @@ -15,15 +15,6 @@ */ package com.datastax.oss.dsbulk.codecs.text.string; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.BOOLEAN_INPUT_WORDS; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.BOOLEAN_NUMBERS; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.EPOCH; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.NUMBER_FORMAT; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.OVERFLOW_STRATEGY; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.ROUNDING_MODE; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.TIMESTAMP_FORMAT; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.TIME_UNIT; -import static com.datastax.oss.dsbulk.codecs.api.CommonConversionContext.TIME_ZONE; import static com.datastax.oss.dsbulk.tests.assertions.TestAssertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -31,48 +22,45 @@ import com.datastax.oss.driver.api.core.data.CqlVector; import com.datastax.oss.driver.api.core.type.DataTypes; import com.datastax.oss.driver.api.core.type.codec.TypeCodecs; +import com.datastax.oss.driver.api.core.type.reflect.GenericType; import com.datastax.oss.driver.internal.core.type.DefaultVectorType; import com.datastax.oss.driver.internal.core.type.codec.VectorCodec; -import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList; import com.datastax.oss.driver.shaded.guava.common.collect.Lists; -import com.datastax.oss.dsbulk.codecs.api.CommonConversionContext; import com.datastax.oss.dsbulk.codecs.api.ConversionContext; +import com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory; +import com.datastax.oss.dsbulk.codecs.text.TextConversionContext; import java.util.ArrayList; -import java.util.List; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class StringToVectorCodecTest { private final ArrayList values = Lists.newArrayList(1.1f, 2.2f, 3.3f, 4.4f, 5.5f); - private final CqlVector vector = CqlVector.newInstance(values); - private final VectorCodec vectorCodec = - new VectorCodec(new DefaultVectorType(DataTypes.FLOAT, 5), TypeCodecs.FLOAT); + private final CqlVector vector = CqlVector.newInstance(values); + private final VectorCodec vectorCodec = + new VectorCodec<>(new DefaultVectorType(DataTypes.FLOAT, 5), TypeCodecs.FLOAT); - private final StringToVectorCodec dsbulkCodec; + private StringToVectorCodec codec; - public StringToVectorCodecTest() { - - ConversionContext context = new CommonConversionContext(); - List nullStrings = ImmutableList.of(); - StringToFloatCodec stringCodec = - new StringToFloatCodec( - context.getAttribute(NUMBER_FORMAT), - context.getAttribute(OVERFLOW_STRATEGY), - context.getAttribute(ROUNDING_MODE), - context.getAttribute(TIMESTAMP_FORMAT), - context.getAttribute(TIME_ZONE), - context.getAttribute(TIME_UNIT), - context.getAttribute(EPOCH), - context.getAttribute(BOOLEAN_INPUT_WORDS), - context.getAttribute(BOOLEAN_NUMBERS), - nullStrings); - dsbulkCodec = new StringToVectorCodec(vectorCodec, stringCodec, nullStrings); + @BeforeEach + void setUp() { + ConversionContext context = new TextConversionContext().setNullStrings("NULL"); + ConvertingCodecFactory codecFactory = new ConvertingCodecFactory(context); + codec = + (StringToVectorCodec) + codecFactory.>createConvertingCodec( + DataTypes.vectorOf(DataTypes.FLOAT, 5), GenericType.STRING, true); } @Test void should_convert_from_valid_external() { - assertThat(dsbulkCodec) - .convertsFromExternal(vectorCodec.format(vector)) // standard pattern + assertThat(codec) + .convertsFromExternal( + vectorCodec.format(vector)) // CQL representation is parsable as a json array + .toInternal(vector) + .convertsFromExternal("[1.1,2.2,3.3,4.4,5.5]") + .toInternal(vector) + .convertsFromExternal("[1.1000,2.2000,3.3000,4.4000,5.5000]") .toInternal(vector) .convertsFromExternal("") .toInternal(null) @@ -84,50 +72,46 @@ void should_convert_from_valid_external() { @Test void should_convert_from_valid_internal() { - assertThat(dsbulkCodec) + assertThat(codec) .convertsFromInternal(vector) - .toExternal(vectorCodec.format(vector)) + .toExternal( + "[1.1,2.2,3.3,4.4,5.5]") // this is NOT 100% identical to vector CQL representation .convertsFromInternal(null) .toExternal("NULL"); - - // We should encode } @Test - void should_not_convert_from_invalid_internal() { - assertThat(dsbulkCodec).cannotConvertFromInternal("not a valid vector"); + void should_not_convert_from_invalid_external() { + assertThat(codec).cannotConvertFromExternal("[6.646329843]"); } // To keep usage consistent with VectorCodec we confirm that we support encoding when too many - // elements are - // available but not when too few are. Note that it's actually VectorCodec that enforces this - // constraint so we - // have to go through encode() rather than the internal/external methods. + // elements are available but not when too few are. Note that it's actually VectorCodec that + // enforces this constraint so we have to go through encode() rather than the internal/external + // methods. @Test void should_encode_too_many_but_not_too_few() { ArrayList tooMany = Lists.newArrayList(values); tooMany.add(6.6f); CqlVector tooManyVector = CqlVector.newInstance(tooMany); - String tooManyString = dsbulkCodec.internalToExternal(tooManyVector); + String tooManyString = codec.internalToExternal(tooManyVector); ArrayList tooFew = Lists.newArrayList(values); tooFew.remove(0); CqlVector tooFewVector = CqlVector.newInstance(tooFew); - String tooFewString = dsbulkCodec.internalToExternal(tooFewVector); + String tooFewString = codec.internalToExternal(tooFewVector); - assertThat(dsbulkCodec.encode(tooManyString, ProtocolVersion.DEFAULT)).isNotNull(); - assertThatThrownBy(() -> dsbulkCodec.encode(tooFewString, ProtocolVersion.DEFAULT)) + assertThat(codec.encode(tooManyString, ProtocolVersion.DEFAULT)).isNotNull(); + assertThatThrownBy(() -> codec.encode(tooFewString, ProtocolVersion.DEFAULT)) .isInstanceOf(IllegalArgumentException.class); } - /* Issue 484: now that we're using the dsbulk string-to-subtype converters we should get - * enforcement of existing dsbulk policies. For our purposes that means the failure on - * arithmetic overflow */ + // Issue 484: now that we're using the dsbulk string-to-subtype converters we should get + // enforcement of existing dsbulk policies. For our purposes that means the failure on + // arithmetic overflow. @Test void should_not_convert_too_much_precision() { - String tooPreciseVal = "6.646329843"; - assertThat(dsbulkCodec).cannotConvertFromInternal(tooPreciseVal); - assertThatThrownBy(() -> dsbulkCodec.encode(tooPreciseVal, ProtocolVersion.DEFAULT)) + assertThatThrownBy(() -> codec.encode("6.646329843", ProtocolVersion.DEFAULT)) .isInstanceOf(ArithmeticException.class); } }