Skip to content

Commit 2e2de6a

Browse files
committed
Group reader/writer into format adapters
1 parent 0787de2 commit 2e2de6a

File tree

11 files changed

+236
-120
lines changed

11 files changed

+236
-120
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package com.widen.tabitha.formats;
2+
3+
import com.widen.tabitha.reader.ReaderOptions;
4+
import com.widen.tabitha.reader.RowReader;
5+
import com.widen.tabitha.writer.RowWriter;
6+
7+
import java.io.IOException;
8+
import java.io.InputStream;
9+
import java.io.OutputStream;
10+
import java.nio.file.Files;
11+
import java.nio.file.Path;
12+
13+
/**
14+
* Provides factory methods for creating readers and writers of a particular format.
15+
*/
16+
public interface FormatAdapter {
17+
/**
18+
* Create a row reader for a file at the given path.
19+
*
20+
* @param path The path of the file to read.
21+
* @param options Options to pass to the reader.
22+
* @return A new row reader.
23+
* @throws IOException if an I/O error occurs.
24+
*/
25+
default RowReader createReader(Path path, ReaderOptions options) throws IOException {
26+
return createReader(Files.newInputStream(path), options);
27+
}
28+
29+
/**
30+
* Create a row reader for an input stream.
31+
*
32+
* @param inputStream The input stream to read.
33+
* @param options Options to pass to the reader.
34+
* @return A new row reader.
35+
* @throws IOException if an I/O error occurs.
36+
*/
37+
RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException;
38+
39+
/**
40+
* Create a row writer that writes to the given path.
41+
*
42+
* @param path The path to write to.
43+
* @return A new row writer.
44+
* @throws IOException if an I/O error occurs.
45+
*/
46+
default RowWriter createWriter(Path path) throws IOException {
47+
return createWriter(Files.newOutputStream(path));
48+
}
49+
50+
/**
51+
* Create a row writer that writes to the given output stream.
52+
*
53+
* @param outputStream The output stream to write to.
54+
* @return A new row writer.
55+
* @throws IOException if an I/O error occurs.
56+
*/
57+
RowWriter createWriter(OutputStream outputStream) throws IOException;
58+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
package com.widen.tabitha.formats;
2+
3+
import com.widen.tabitha.formats.delimited.DelimitedFormat;
4+
import com.widen.tabitha.formats.delimited.DelimitedRowReader;
5+
import com.widen.tabitha.formats.delimited.DelimitedRowWriter;
6+
import com.widen.tabitha.formats.excel.WorkbookRowWriter;
7+
import com.widen.tabitha.formats.excel.XLSRowReader;
8+
import com.widen.tabitha.formats.excel.XLSXRowReader;
9+
import com.widen.tabitha.reader.InlineHeaderReader;
10+
import com.widen.tabitha.reader.ReaderOptions;
11+
import com.widen.tabitha.reader.RowReader;
12+
import com.widen.tabitha.writer.RowWriter;
13+
import io.reactivex.Maybe;
14+
15+
import java.io.IOException;
16+
import java.io.InputStream;
17+
import java.io.OutputStream;
18+
import java.nio.file.Path;
19+
20+
/**
21+
* Manages the adapters for the file formats supported by Tabitha.
22+
* <p>
23+
* You probably want to use {@link com.widen.tabitha.reader.RowReaders} or {@link com.widen.tabitha.writer.RowWriter}
24+
* instead.
25+
*/
26+
public class FormatRegistry {
27+
/**
28+
* Get a format factory for handling the given MIME type.
29+
*
30+
* @param mimeType The format MIME type.
31+
* @return A format adapter, if one could be found.
32+
*/
33+
public static Maybe<FormatAdapter> forMimeType(String mimeType) {
34+
switch (mimeType) {
35+
case "text/csv":
36+
case "text/plain":
37+
return Maybe.just(new FormatAdapter() {
38+
@Override
39+
public RowReader createReader(InputStream inputStream, ReaderOptions options) {
40+
return decorateReader(new DelimitedRowReader(inputStream, DelimitedFormat.CSV), options);
41+
}
42+
43+
@Override
44+
public RowWriter createWriter(OutputStream outputStream) {
45+
return new DelimitedRowWriter(outputStream, DelimitedFormat.CSV);
46+
}
47+
});
48+
49+
case "text/tab-separated-values":
50+
return Maybe.just(new FormatAdapter() {
51+
@Override
52+
public RowReader createReader(InputStream inputStream, ReaderOptions options) {
53+
return decorateReader(new DelimitedRowReader(inputStream, DelimitedFormat.TSV), options);
54+
}
55+
56+
@Override
57+
public RowWriter createWriter(OutputStream outputStream) {
58+
return new DelimitedRowWriter(outputStream, DelimitedFormat.TSV);
59+
}
60+
});
61+
62+
case "application/vnd.ms-excel":
63+
return Maybe.just(new FormatAdapter() {
64+
@Override
65+
public RowReader createReader(Path path, ReaderOptions options) throws IOException {
66+
return decorateReader(XLSRowReader.open(path, options), options);
67+
}
68+
69+
@Override
70+
public RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException {
71+
return decorateReader(XLSRowReader.open(inputStream, options), options);
72+
}
73+
74+
@Override
75+
public RowWriter createWriter(OutputStream outputStream) {
76+
return WorkbookRowWriter.xls(outputStream);
77+
}
78+
});
79+
80+
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
81+
case "application/x-tika-ooxml":
82+
return Maybe.just(new FormatAdapter() {
83+
@Override
84+
public RowReader createReader(Path path, ReaderOptions options) throws IOException {
85+
return decorateReader(XLSXRowReader.open(path, options), options);
86+
}
87+
88+
@Override
89+
public RowReader createReader(InputStream inputStream, ReaderOptions options) throws IOException {
90+
return decorateReader(XLSXRowReader.open(inputStream, options), options);
91+
}
92+
93+
@Override
94+
public RowWriter createWriter(OutputStream outputStream) {
95+
return WorkbookRowWriter.xlsx(outputStream);
96+
}
97+
});
98+
99+
default:
100+
return Maybe.empty();
101+
}
102+
}
103+
104+
private static RowReader decorateReader(RowReader reader, ReaderOptions options) {
105+
if (options.isInlineHeaders()) {
106+
reader = new InlineHeaderReader(reader);
107+
}
108+
return reader;
109+
}
110+
}

src/main/java/com/widen/tabitha/reader/Header.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
package com.widen.tabitha.reader;
22

3-
import java.util.*;
3+
import java.util.ArrayList;
4+
import java.util.Arrays;
5+
import java.util.HashMap;
6+
import java.util.Iterator;
7+
import java.util.List;
8+
import java.util.Map;
9+
import java.util.Optional;
410

511
/**
612
* Defines an ordered list of named columns.
@@ -182,5 +188,7 @@ public DuplicateColumnException(String column) {
182188
}
183189

184190
@Override
185-
public String toString() { return columnsByIndex.toString(); }
191+
public String toString() {
192+
return columnsByIndex.toString();
193+
}
186194
}

src/main/java/com/widen/tabitha/reader/InlineHeaderReader.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
/**
99
* Decorates another reader, interpreting the first row of each page of data as the header for subsequent rows.
1010
*/
11-
class InlineHeaderReader implements RowReader {
11+
public class InlineHeaderReader implements RowReader {
1212
private final RowReader inner;
1313
private Header currentHeader;
1414
private long currentPage = -1;
1515

16-
InlineHeaderReader(RowReader inner) {
16+
public InlineHeaderReader(RowReader inner) {
1717
this.inner = inner;
1818
}
1919

src/main/java/com/widen/tabitha/reader/RowReaders.java

Lines changed: 18 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,13 @@
11
package com.widen.tabitha.reader;
22

3-
import com.widen.tabitha.formats.delimited.DelimitedFormat;
4-
import com.widen.tabitha.formats.delimited.DelimitedRowReader;
5-
import com.widen.tabitha.formats.excel.XLSRowReader;
6-
import com.widen.tabitha.formats.excel.XLSXRowReader;
3+
import com.widen.tabitha.formats.FormatRegistry;
4+
import io.reactivex.Maybe;
75
import org.apache.tika.Tika;
86

97
import java.io.BufferedInputStream;
10-
import java.io.IOException;
118
import java.io.InputStream;
12-
import java.nio.file.Files;
139
import java.nio.file.Path;
1410
import java.nio.file.Paths;
15-
import java.util.Optional;
1611

1712
/**
1813
* Helper factory methods for creating row readers.
@@ -24,7 +19,7 @@ public class RowReaders {
2419
* @param path The file path of the file to open.
2520
* @return A row reader if the file is in a supported format.
2621
*/
27-
public static Optional<RowReader> open(String path) throws Exception {
22+
public static Maybe<RowReader> open(String path) {
2823
return open(Paths.get(path), null);
2924
}
3025

@@ -34,7 +29,7 @@ public static Optional<RowReader> open(String path) throws Exception {
3429
* @param path The file path of the file to open.
3530
* @return A row reader if the file is in a supported format.
3631
*/
37-
public static Optional<RowReader> open(Path path) throws Exception {
32+
public static Maybe<RowReader> open(Path path) {
3833
return open(path, null);
3934
}
4035

@@ -45,30 +40,11 @@ public static Optional<RowReader> open(Path path) throws Exception {
4540
* @param options Options to pass to the reader.
4641
* @return A row reader if the file is in a supported format.
4742
*/
48-
public static Optional<RowReader> open(Path path, ReaderOptions options) throws Exception {
49-
if (options == null) {
50-
options = new ReaderOptions();
51-
}
52-
53-
String mimeType = tika.detect(path);
54-
55-
switch (mimeType) {
56-
case "text/csv":
57-
case "text/plain":
58-
return Optional.of(decorate(new DelimitedRowReader(Files.newInputStream(path), DelimitedFormat.CSV), options));
59-
60-
case "text/tab-separated-values":
61-
return Optional.of(decorate(new DelimitedRowReader(Files.newInputStream(path), DelimitedFormat.TSV), options));
62-
63-
case "application/vnd.ms-excel":
64-
return Optional.of(decorate(XLSRowReader.open(path, options), options));
65-
66-
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
67-
case "application/x-tika-ooxml":
68-
return Optional.of(decorate(XLSXRowReader.open(path, options), options));
69-
}
70-
71-
return Optional.empty();
43+
public static Maybe<RowReader> open(Path path, ReaderOptions options) {
44+
return Maybe
45+
.fromCallable(() -> tika.detect(path))
46+
.flatMap(FormatRegistry::forMimeType)
47+
.map(formatAdapter -> formatAdapter.createReader(path, options != null ? options : new ReaderOptions()));
7248
}
7349

7450
/**
@@ -77,7 +53,7 @@ public static Optional<RowReader> open(Path path, ReaderOptions options) throws
7753
* @param inputStream The input stream to read.
7854
* @return A row reader if the stream is in a supported format.
7955
*/
80-
public static Optional<RowReader> open(InputStream inputStream) throws IOException {
56+
public static Maybe<RowReader> open(InputStream inputStream) {
8157
return open(inputStream, null, null);
8258
}
8359

@@ -88,7 +64,7 @@ public static Optional<RowReader> open(InputStream inputStream) throws IOExcepti
8864
* @param filename The filename associated with the stream, if known.
8965
* @return A row reader if the stream is in a supported format.
9066
*/
91-
public static Optional<RowReader> open(InputStream inputStream, String filename) throws IOException {
67+
public static Maybe<RowReader> open(InputStream inputStream, String filename) {
9268
return open(inputStream, filename, null);
9369
}
9470

@@ -99,7 +75,7 @@ public static Optional<RowReader> open(InputStream inputStream, String filename)
9975
* @param options Options to pass to the reader.
10076
* @return A row reader if the stream is in a supported format.
10177
*/
102-
public static Optional<RowReader> open(InputStream inputStream, ReaderOptions options) throws IOException {
78+
public static Maybe<RowReader> open(InputStream inputStream, ReaderOptions options) {
10379
return open(inputStream, null, options);
10480
}
10581

@@ -111,44 +87,15 @@ public static Optional<RowReader> open(InputStream inputStream, ReaderOptions op
11187
* @param options Options to pass to the reader.
11288
* @return A row reader if the stream is in a supported format.
11389
*/
114-
public static Optional<RowReader> open(
115-
InputStream inputStream,
116-
String filename,
117-
ReaderOptions options
118-
) throws IOException {
119-
if (options == null) {
120-
options = new ReaderOptions();
121-
}
122-
90+
public static Maybe<RowReader> open(InputStream inputStream, String filename, ReaderOptions options) {
12391
// If our input stream supports marks, Tika will rewind the stream back to the start for us after detecting the
12492
// format, so ensure our input stream supports it.
125-
inputStream = createRewindableInputStream(inputStream);
126-
String mimeType = tika.detect(inputStream, filename);
127-
128-
switch (mimeType) {
129-
case "text/csv":
130-
case "text/plain":
131-
return Optional.of(decorate(new DelimitedRowReader(inputStream, DelimitedFormat.CSV), options));
132-
133-
case "text/tab-separated-values":
134-
return Optional.of(decorate(new DelimitedRowReader(inputStream, DelimitedFormat.TSV), options));
135-
136-
case "application/vnd.ms-excel":
137-
return Optional.of(decorate(XLSRowReader.open(inputStream, options), options));
138-
139-
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
140-
case "application/x-tika-ooxml":
141-
return Optional.of(decorate(XLSXRowReader.open(inputStream, options), options));
142-
}
143-
144-
return Optional.empty();
145-
}
93+
InputStream rewindableStream = createRewindableInputStream(inputStream);
14694

147-
private static RowReader decorate(RowReader reader, ReaderOptions options) {
148-
if (options.isInlineHeaders()) {
149-
reader = new InlineHeaderReader(reader);
150-
}
151-
return reader;
95+
return Maybe
96+
.fromCallable(() -> tika.detect(rewindableStream, filename))
97+
.flatMap(FormatRegistry::forMimeType)
98+
.map(formatAdapter -> formatAdapter.createReader(rewindableStream, options != null ? options : new ReaderOptions()));
15299
}
153100

154101
private static InputStream createRewindableInputStream(InputStream inputStream) {

0 commit comments

Comments
 (0)