|
18 | 18 |
|
19 | 19 | package org.apache.xtable.service; |
20 | 20 |
|
21 | | -import static org.apache.xtable.model.storage.TableFormat.DELTA; |
22 | | -import static org.apache.xtable.model.storage.TableFormat.HUDI; |
23 | | -import static org.apache.xtable.model.storage.TableFormat.ICEBERG; |
| 21 | +import static org.apache.xtable.conversion.ConversionUtils.convertToSourceTable; |
24 | 22 |
|
25 | 23 | import java.util.ArrayList; |
26 | 24 | import java.util.List; |
27 | 25 |
|
28 | | -import org.apache.commons.lang3.tuple.Pair; |
| 26 | +import lombok.extern.log4j.Log4j2; |
| 27 | + |
| 28 | +import org.apache.hadoop.conf.Configuration; |
| 29 | + |
| 30 | +import org.apache.iceberg.SchemaParser; |
29 | 31 |
|
30 | 32 | import com.google.common.annotations.VisibleForTesting; |
31 | 33 |
|
| 34 | +import org.apache.xtable.avro.AvroSchemaConverter; |
32 | 35 | import org.apache.xtable.conversion.ConversionConfig; |
33 | 36 | import org.apache.xtable.conversion.ConversionController; |
34 | | -import org.apache.xtable.conversion.ConversionSourceProvider; |
| 37 | +import org.apache.xtable.conversion.ConversionUtils; |
35 | 38 | import org.apache.xtable.conversion.SourceTable; |
36 | 39 | import org.apache.xtable.conversion.TargetTable; |
| 40 | +import org.apache.xtable.iceberg.IcebergSchemaExtractor; |
| 41 | +import org.apache.xtable.model.InternalTable; |
| 42 | +import org.apache.xtable.model.storage.TableFormat; |
| 43 | +import org.apache.xtable.schema.SparkSchemaExtractor; |
37 | 44 | import org.apache.xtable.service.models.ConvertTableRequest; |
38 | 45 | import org.apache.xtable.service.models.ConvertTableResponse; |
39 | 46 | import org.apache.xtable.service.models.ConvertedTable; |
40 | | -import org.apache.xtable.service.spark.SparkHolder; |
41 | | -import org.apache.xtable.service.utils.ConversionServiceUtil; |
42 | 47 |
|
43 | 48 | import jakarta.enterprise.context.ApplicationScoped; |
44 | 49 | import jakarta.inject.Inject; |
|
50 | 55 | * a source table, generating target tables, and then executing the conversion via a designated |
51 | 56 | * conversion controller. |
52 | 57 | */ |
| 58 | +@Log4j2 |
53 | 59 | @ApplicationScoped |
54 | 60 | public class ConversionService { |
55 | | - private final SparkHolder sparkHolder; |
56 | 61 | private final ConversionController conversionController; |
57 | | - private final ConversionServiceUtil conversionServiceUtil; |
| 62 | + private final ConversionServiceConfig serviceConfig; |
| 63 | + private final Configuration hadoopConf; |
58 | 64 |
|
59 | 65 | /** |
60 | 66 | * Constructs a ConversionService instance with required dependencies. |
61 | 67 | * |
62 | | - * @param sparkHolder the Spark holder instance containing the Spark context and configuration |
63 | | - * @param conversionServiceUtil utility for handling metadata operations |
| 68 | + * @param serviceConfig the conversion service configuration |
64 | 69 | */ |
65 | 70 | @Inject |
66 | | - public ConversionService(SparkHolder sparkHolder, ConversionServiceUtil conversionServiceUtil) { |
67 | | - this.sparkHolder = sparkHolder; |
68 | | - this.conversionServiceUtil = conversionServiceUtil; |
69 | | - this.conversionController = new ConversionController(sparkHolder.jsc().hadoopConfiguration()); |
| 71 | + public ConversionService(ConversionServiceConfig serviceConfig) { |
| 72 | + this.serviceConfig = serviceConfig; |
| 73 | + this.hadoopConf = getHadoopConf(); |
| 74 | + this.conversionController = new ConversionController(hadoopConf); |
| 75 | + } |
| 76 | + |
| 77 | + private Configuration getHadoopConf() { |
| 78 | + Configuration conf = new Configuration(); |
| 79 | + String hadoopConfigPath = serviceConfig.getHadoopConfigPath(); |
| 80 | + try { |
| 81 | + // Load configuration from the specified XML file |
| 82 | + conf.addResource(hadoopConfigPath); |
| 83 | + |
| 84 | + // If the resource wasn’t found, log a warning |
| 85 | + if (conf.size() == 0) { |
| 86 | + log.warn( |
| 87 | + "Could not load Hadoop configuration from: {}. Using default Hadoop configuration.", |
| 88 | + hadoopConfigPath); |
| 89 | + } |
| 90 | + } catch (Exception e) { |
| 91 | + log.error( |
| 92 | + "Error loading Hadoop configuration from: {}. Exception: {}", |
| 93 | + hadoopConfigPath, |
| 94 | + e.getMessage(), |
| 95 | + e); |
| 96 | + } |
| 97 | + return conf; |
70 | 98 | } |
71 | 99 |
|
72 | 100 | /** |
73 | 101 | * Constructs a ConversionService instance using dependency injection for testing. |
74 | 102 | * |
75 | | - * @param sparkHolder the Spark holder instance |
76 | 103 | * @param conversionController a preconfigured conversion controller |
77 | | - * @param conversionServiceUtil utility for handling metadata operations |
78 | 104 | */ |
79 | 105 | @VisibleForTesting |
80 | 106 | public ConversionService( |
81 | | - SparkHolder sparkHolder, |
82 | | - ConversionServiceUtil conversionServiceUtil, |
83 | | - ConversionController conversionController) { |
84 | | - this.sparkHolder = sparkHolder; |
| 107 | + ConversionServiceConfig serviceConfig, |
| 108 | + ConversionController conversionController, |
| 109 | + Configuration hadoopConf) { |
| 110 | + this.serviceConfig = serviceConfig; |
85 | 111 | this.conversionController = conversionController; |
86 | | - this.conversionServiceUtil = conversionServiceUtil; |
| 112 | + this.hadoopConf = hadoopConf; |
87 | 113 | } |
88 | 114 |
|
89 | 115 | /** |
@@ -120,37 +146,47 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest |
120 | 146 | ConversionConfig conversionConfig = |
121 | 147 | ConversionConfig.builder().sourceTable(sourceTable).targetTables(targetTables).build(); |
122 | 148 |
|
123 | | - ConversionSourceProvider<?> conversionSourceProvider = |
124 | | - conversionServiceUtil.getConversionSourceProvider( |
125 | | - convertTableRequest.getSourceFormat(), sparkHolder.jsc().hadoopConfiguration()); |
| 149 | + conversionController.sync( |
| 150 | + conversionConfig, |
| 151 | + ConversionUtils.getConversionSourceProvider( |
| 152 | + convertTableRequest.getSourceFormat(), hadoopConf)); |
126 | 153 |
|
127 | | - conversionController.sync(conversionConfig, conversionSourceProvider); |
128 | | - |
129 | | - List<ConvertedTable> restTargetTables = new ArrayList<>(); |
130 | | - for (String targetFormat : convertTableRequest.getTargetFormats()) { |
131 | | - if (targetFormat.equals(ICEBERG)) { |
132 | | - Pair<String, String> responseFields = |
133 | | - conversionServiceUtil.getIcebergSchemaAndMetadataPath( |
134 | | - convertTableRequest.getSourceTablePath(), sparkHolder.jsc().hadoopConfiguration()); |
135 | | - ConvertedTable icebergTable = |
136 | | - new ConvertedTable(ICEBERG, responseFields.getLeft(), responseFields.getRight()); |
137 | | - restTargetTables.add(icebergTable); |
138 | | - } else if (targetFormat.equals(HUDI)) { |
139 | | - Pair<String, String> responseFields = |
140 | | - conversionServiceUtil.getHudiSchemaAndMetadataPath( |
141 | | - convertTableRequest.getSourceTablePath(), sparkHolder.jsc().hadoopConfiguration()); |
142 | | - ConvertedTable hudiTable = |
143 | | - new ConvertedTable(HUDI, responseFields.getLeft(), responseFields.getRight()); |
144 | | - restTargetTables.add(hudiTable); |
145 | | - } else if (targetFormat.equals(DELTA)) { |
146 | | - Pair<String, String> responseFields = |
147 | | - conversionServiceUtil.getDeltaSchemaAndMetadataPath( |
148 | | - convertTableRequest.getSourceTablePath(), sparkHolder.spark()); |
149 | | - ConvertedTable deltaTable = |
150 | | - new ConvertedTable(DELTA, responseFields.getLeft(), responseFields.getRight()); |
151 | | - restTargetTables.add(deltaTable); |
| 154 | + List<ConvertedTable> convertedTables = new ArrayList<>(); |
| 155 | + for (TargetTable targetTable : targetTables) { |
| 156 | + InternalTable internalTable = |
| 157 | + ConversionUtils.getConversionSourceProvider(targetTable.getFormatName(), hadoopConf) |
| 158 | + .getConversionSourceInstance(convertToSourceTable(targetTable)) |
| 159 | + .getCurrentTable(); |
| 160 | + String schemaString; |
| 161 | + switch (targetTable.getFormatName()) { |
| 162 | + case TableFormat.HUDI: |
| 163 | + schemaString = |
| 164 | + AvroSchemaConverter.getInstance() |
| 165 | + .fromInternalSchema(internalTable.getReadSchema()) |
| 166 | + .toString(); |
| 167 | + break; |
| 168 | + case TableFormat.ICEBERG: |
| 169 | + org.apache.iceberg.Schema iceSchema = |
| 170 | + IcebergSchemaExtractor.getInstance().toIceberg(internalTable.getReadSchema()); |
| 171 | + schemaString = SchemaParser.toJson(iceSchema); |
| 172 | + break; |
| 173 | + case TableFormat.DELTA: |
| 174 | + schemaString = |
| 175 | + SparkSchemaExtractor.getInstance() |
| 176 | + .fromInternalSchema(internalTable.getReadSchema()) |
| 177 | + .json(); |
| 178 | + break; |
| 179 | + default: |
| 180 | + throw new UnsupportedOperationException( |
| 181 | + "Unsupported table format: " + targetTable.getFormatName()); |
152 | 182 | } |
| 183 | + convertedTables.add( |
| 184 | + ConvertedTable.builder() |
| 185 | + .targetFormat(internalTable.getName()) |
| 186 | + .targetSchema(schemaString) |
| 187 | + .targetMetadataPath(internalTable.getLatestMetdataPath()) |
| 188 | + .build()); |
153 | 189 | } |
154 | | - return new ConvertTableResponse(restTargetTables); |
| 190 | + return new ConvertTableResponse(convertedTables); |
155 | 191 | } |
156 | 192 | } |
0 commit comments