diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java index 139440f3f..977b49f62 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeConvertingCodecProvider.java @@ -396,18 +396,14 @@ public class JsonNodeConvertingCodecProvider implements ConvertingCodecProvider return new JsonNodeToDateRangeCodec(nullStrings); case DefaultVectorType.VECTOR_CLASS_NAME: VectorType vectorType = (VectorType) cqlType; - // Step 1: create a JSON codec which will take the input JSON nodes and generate - // something matching the expected data type - ConvertingCodec jsonCodec = + // Parser for JSON leaf nodes, each of which represents a value of the vector subtype + ConvertingCodec leafCodec = createJsonNodeConvertingCodec(vectorType.getElementType(), codecFactory, false); - // Step 2: create a conventional codec which will take instances of the Java type - // generated by the JSON codec above and perform standard serde on them. - ConvertingCodec standardCodec = - codecFactory.createConvertingCodec( - vectorType.getElementType(), jsonCodec.getInternalJavaType(), false); return new JsonNodeToVectorCodec( - new VectorCodec(vectorType, standardCodec), - jsonCodec, + new VectorCodec( + vectorType, + codecFactory.getCodecRegistry().codecFor(vectorType.getElementType())), + leafCodec, context.getAttribute(OBJECT_MAPPER), nullStrings); } diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java index 15ffd6d08..299927568 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodec.java @@ -29,16 +29,16 @@ public class JsonNodeToVectorCodec extends JsonNodeConvertingCodec> { - private final ConvertingCodec subtypeCodec; + private final ConvertingCodec leafCodec; private final ObjectMapper objectMapper; public JsonNodeToVectorCodec( VectorCodec targetCodec, - ConvertingCodec subtypeCodec, + ConvertingCodec leafCodec, ObjectMapper objectMapper, List nullStrings) { super(targetCodec, nullStrings); - this.subtypeCodec = subtypeCodec; + this.leafCodec = leafCodec; this.objectMapper = objectMapper; } @@ -47,7 +47,7 @@ public CqlVector externalToInternal(JsonNode jsonNode) { if (jsonNode == null || !jsonNode.isArray()) return null; List elems = Streams.stream(jsonNode.elements()) - .map(e -> subtypeCodec.externalToInternal(e)) + .map(e -> leafCodec.externalToInternal(e)) .collect(Collectors.toCollection(ArrayList::new)); return CqlVector.newInstance(elems); } @@ -57,7 +57,7 @@ public JsonNode internalToExternal(CqlVector value) { if (value == null) return null; ArrayNode root = objectMapper.createArrayNode(); for (SubtypeT element : value) { - root.add(subtypeCodec.internalToExternal(element)); + root.add(leafCodec.internalToExternal(element)); } return root; } diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java index 740df2d20..c9e3cf1ea 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringConvertingCodecProvider.java @@ -335,11 +335,15 @@ public class StringConvertingCodecProvider implements ConvertingCodecProvider { return new StringToDateRangeCodec(nullStrings); case DefaultVectorType.VECTOR_CLASS_NAME: VectorType vectorType = (VectorType) cqlType; - return new StringToVectorCodec( - new VectorCodec( + VectorCodec vectorCodec = + new VectorCodec<>( vectorType, - codecFactory.getCodecRegistry().codecFor(vectorType.getElementType())), - nullStrings); + codecFactory.getCodecRegistry().codecFor(vectorType.getElementType())); + ConvertingCodec> jsonCodec = + codecFactory.createConvertingCodec( + DataTypes.listOf(vectorType.getElementType()), JSON_NODE_TYPE, false); + return new StringToVectorCodec<>( + vectorCodec, jsonCodec, context.getAttribute(OBJECT_MAPPER), nullStrings); } } // fall through diff --git a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java index fd70f1ea2..a65e9a820 100644 --- a/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java +++ b/codecs/text/src/main/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodec.java @@ -17,22 +17,57 @@ import com.datastax.oss.driver.api.core.data.CqlVector; import com.datastax.oss.driver.internal.core.type.codec.VectorCodec; +import com.datastax.oss.dsbulk.codecs.api.ConvertingCodec; +import com.datastax.oss.dsbulk.codecs.text.utils.StringUtils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; import java.util.List; +import java.util.stream.Collectors; public class StringToVectorCodec extends StringConvertingCodec> { - public StringToVectorCodec(VectorCodec targetCodec, List nullStrings) { + private final ConvertingCodec> jsonCodec; + private final ObjectMapper objectMapper; + + public StringToVectorCodec( + VectorCodec targetCodec, + ConvertingCodec> jsonCodec, + ObjectMapper objectMapper, + List nullStrings) { super(targetCodec, nullStrings); + this.jsonCodec = jsonCodec; + this.objectMapper = objectMapper; } @Override public CqlVector externalToInternal(String s) { - return this.internalCodec.parse(s); + if (isNullOrEmpty(s)) { + return null; + } + try { + JsonNode node = objectMapper.readTree(StringUtils.ensureBrackets(s)); + List vals = jsonCodec.externalToInternal(node); + return CqlVector.newInstance(vals); + } catch (IOException e) { + throw new IllegalArgumentException(String.format("Could not parse '%s' as Json", s), e); + } } @Override public String internalToExternal(CqlVector cqlVector) { - return this.internalCodec.format(cqlVector); + if (cqlVector == null) { + return nullString(); + } + try { + List vals = cqlVector.stream().collect(Collectors.toList()); + JsonNode node = jsonCodec.internalToExternal(vals); + return objectMapper.writeValueAsString(node); + } catch (JsonProcessingException e) { + throw new IllegalArgumentException( + String.format("Could not format '%s' to Json", cqlVector), e); + } } } diff --git a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java index 4913b6cac..a991c1ef6 100644 --- a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java +++ b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/json/JsonNodeToVectorCodecTest.java @@ -104,4 +104,16 @@ void should_encode_too_many_but_not_too_few() { assertThatThrownBy(() -> dsbulkCodec.encode(tooFewNode, ProtocolVersion.DEFAULT)) .isInstanceOf(IllegalArgumentException.class); } + + /* Issue 484: now that we're using the dsbulk string-to-subtype converters we should get + * enforcement of existing dsbulk policies. For our purposes that means the failure on + * arithmetic overflow */ + @Test + void should_not_convert_too_much_precision() { + ArrayNode tooPreciseNode = JSON_NODE_FACTORY.arrayNode(); + tooPreciseNode.add(JSON_NODE_FACTORY.numberNode(6.646329843)); + assertThat(dsbulkCodec).cannotConvertFromInternal(tooPreciseNode); + assertThatThrownBy(() -> dsbulkCodec.encode(tooPreciseNode, ProtocolVersion.DEFAULT)) + .isInstanceOf(ArithmeticException.class); + } } diff --git a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java index d13112c85..7de54316a 100644 --- a/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java +++ b/codecs/text/src/test/java/com/datastax/oss/dsbulk/codecs/text/string/StringToVectorCodecTest.java @@ -22,26 +22,45 @@ import com.datastax.oss.driver.api.core.data.CqlVector; import com.datastax.oss.driver.api.core.type.DataTypes; import com.datastax.oss.driver.api.core.type.codec.TypeCodecs; +import com.datastax.oss.driver.api.core.type.reflect.GenericType; import com.datastax.oss.driver.internal.core.type.DefaultVectorType; import com.datastax.oss.driver.internal.core.type.codec.VectorCodec; import com.datastax.oss.driver.shaded.guava.common.collect.Lists; +import com.datastax.oss.dsbulk.codecs.api.ConversionContext; +import com.datastax.oss.dsbulk.codecs.api.ConvertingCodecFactory; +import com.datastax.oss.dsbulk.codecs.text.TextConversionContext; import java.util.ArrayList; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class StringToVectorCodecTest { private final ArrayList values = Lists.newArrayList(1.1f, 2.2f, 3.3f, 4.4f, 5.5f); - private final CqlVector vector = CqlVector.newInstance(values); - private final VectorCodec vectorCodec = - new VectorCodec(new DefaultVectorType(DataTypes.FLOAT, 5), TypeCodecs.FLOAT); + private final CqlVector vector = CqlVector.newInstance(values); + private final VectorCodec vectorCodec = + new VectorCodec<>(new DefaultVectorType(DataTypes.FLOAT, 5), TypeCodecs.FLOAT); - private final StringToVectorCodec dsbulkCodec = - new StringToVectorCodec(vectorCodec, Lists.newArrayList("NULL")); + private StringToVectorCodec codec; + + @BeforeEach + void setUp() { + ConversionContext context = new TextConversionContext().setNullStrings("NULL"); + ConvertingCodecFactory codecFactory = new ConvertingCodecFactory(context); + codec = + (StringToVectorCodec) + codecFactory.>createConvertingCodec( + DataTypes.vectorOf(DataTypes.FLOAT, 5), GenericType.STRING, true); + } @Test void should_convert_from_valid_external() { - assertThat(dsbulkCodec) - .convertsFromExternal(vectorCodec.format(vector)) // standard pattern + assertThat(codec) + .convertsFromExternal( + vectorCodec.format(vector)) // CQL representation is parsable as a json array + .toInternal(vector) + .convertsFromExternal("[1.1,2.2,3.3,4.4,5.5]") + .toInternal(vector) + .convertsFromExternal("[1.1000,2.2000,3.3000,4.4000,5.5000]") .toInternal(vector) .convertsFromExternal("") .toInternal(null) @@ -53,39 +72,46 @@ void should_convert_from_valid_external() { @Test void should_convert_from_valid_internal() { - assertThat(dsbulkCodec) + assertThat(codec) .convertsFromInternal(vector) - .toExternal(vectorCodec.format(vector)) + .toExternal( + "[1.1,2.2,3.3,4.4,5.5]") // this is NOT 100% identical to vector CQL representation .convertsFromInternal(null) .toExternal("NULL"); - - // We should encode } @Test - void should_not_convert_from_invalid_internal() { - assertThat(dsbulkCodec).cannotConvertFromInternal("not a valid vector"); + void should_not_convert_from_invalid_external() { + assertThat(codec).cannotConvertFromExternal("[6.646329843]"); } // To keep usage consistent with VectorCodec we confirm that we support encoding when too many - // elements are - // available but not when too few are. Note that it's actually VectorCodec that enforces this - // constraint so we - // have to go through encode() rather than the internal/external methods. + // elements are available but not when too few are. Note that it's actually VectorCodec that + // enforces this constraint so we have to go through encode() rather than the internal/external + // methods. @Test void should_encode_too_many_but_not_too_few() { ArrayList tooMany = Lists.newArrayList(values); tooMany.add(6.6f); CqlVector tooManyVector = CqlVector.newInstance(tooMany); - String tooManyString = dsbulkCodec.internalToExternal(tooManyVector); + String tooManyString = codec.internalToExternal(tooManyVector); ArrayList tooFew = Lists.newArrayList(values); tooFew.remove(0); CqlVector tooFewVector = CqlVector.newInstance(tooFew); - String tooFewString = dsbulkCodec.internalToExternal(tooFewVector); + String tooFewString = codec.internalToExternal(tooFewVector); - assertThat(dsbulkCodec.encode(tooManyString, ProtocolVersion.DEFAULT)).isNotNull(); - assertThatThrownBy(() -> dsbulkCodec.encode(tooFewString, ProtocolVersion.DEFAULT)) + assertThat(codec.encode(tooManyString, ProtocolVersion.DEFAULT)).isNotNull(); + assertThatThrownBy(() -> codec.encode(tooFewString, ProtocolVersion.DEFAULT)) .isInstanceOf(IllegalArgumentException.class); } + + // Issue 484: now that we're using the dsbulk string-to-subtype converters we should get + // enforcement of existing dsbulk policies. For our purposes that means the failure on + // arithmetic overflow. + @Test + void should_not_convert_too_much_precision() { + assertThatThrownBy(() -> codec.encode("6.646329843", ProtocolVersion.DEFAULT)) + .isInstanceOf(ArithmeticException.class); + } }