11package com .widen .tabitha .reader ;
22
3- import com .widen .tabitha .formats .delimited .DelimitedFormat ;
4- import com .widen .tabitha .formats .delimited .DelimitedRowReader ;
5- import com .widen .tabitha .formats .excel .XLSRowReader ;
6- import com .widen .tabitha .formats .excel .XLSXRowReader ;
3+ import com .widen .tabitha .formats .FormatRegistry ;
4+ import io .reactivex .Maybe ;
75import org .apache .tika .Tika ;
86
97import java .io .BufferedInputStream ;
10- import java .io .IOException ;
118import java .io .InputStream ;
12- import java .nio .file .Files ;
139import java .nio .file .Path ;
1410import java .nio .file .Paths ;
15- import java .util .Optional ;
1611
1712/**
1813 * Helper factory methods for creating row readers.
@@ -24,7 +19,7 @@ public class RowReaders {
2419 * @param path The file path of the file to open.
2520 * @return A row reader if the file is in a supported format.
2621 */
27- public static Optional <RowReader > open (String path ) throws Exception {
22+ public static Maybe <RowReader > open (String path ) {
2823 return open (Paths .get (path ), null );
2924 }
3025
@@ -34,7 +29,7 @@ public static Optional<RowReader> open(String path) throws Exception {
3429 * @param path The file path of the file to open.
3530 * @return A row reader if the file is in a supported format.
3631 */
37- public static Optional <RowReader > open (Path path ) throws Exception {
32+ public static Maybe <RowReader > open (Path path ) {
3833 return open (path , null );
3934 }
4035
@@ -45,30 +40,11 @@ public static Optional<RowReader> open(Path path) throws Exception {
4540 * @param options Options to pass to the reader.
4641 * @return A row reader if the file is in a supported format.
4742 */
48- public static Optional <RowReader > open (Path path , ReaderOptions options ) throws Exception {
49- if (options == null ) {
50- options = new ReaderOptions ();
51- }
52-
53- String mimeType = tika .detect (path );
54-
55- switch (mimeType ) {
56- case "text/csv" :
57- case "text/plain" :
58- return Optional .of (decorate (new DelimitedRowReader (Files .newInputStream (path ), DelimitedFormat .CSV ), options ));
59-
60- case "text/tab-separated-values" :
61- return Optional .of (decorate (new DelimitedRowReader (Files .newInputStream (path ), DelimitedFormat .TSV ), options ));
62-
63- case "application/vnd.ms-excel" :
64- return Optional .of (decorate (XLSRowReader .open (path , options ), options ));
65-
66- case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" :
67- case "application/x-tika-ooxml" :
68- return Optional .of (decorate (XLSXRowReader .open (path , options ), options ));
69- }
70-
71- return Optional .empty ();
43+ public static Maybe <RowReader > open (Path path , ReaderOptions options ) {
44+ return Maybe
45+ .fromCallable (() -> tika .detect (path ))
46+ .flatMap (FormatRegistry ::forMimeType )
47+ .map (formatAdapter -> formatAdapter .createReader (path , options != null ? options : new ReaderOptions ()));
7248 }
7349
7450 /**
@@ -77,7 +53,7 @@ public static Optional<RowReader> open(Path path, ReaderOptions options) throws
7753 * @param inputStream The input stream to read.
7854 * @return A row reader if the stream is in a supported format.
7955 */
80- public static Optional <RowReader > open (InputStream inputStream ) throws IOException {
56+ public static Maybe <RowReader > open (InputStream inputStream ) {
8157 return open (inputStream , null , null );
8258 }
8359
@@ -88,7 +64,7 @@ public static Optional<RowReader> open(InputStream inputStream) throws IOExcepti
8864 * @param filename The filename associated with the stream, if known.
8965 * @return A row reader if the stream is in a supported format.
9066 */
91- public static Optional <RowReader > open (InputStream inputStream , String filename ) throws IOException {
67+ public static Maybe <RowReader > open (InputStream inputStream , String filename ) {
9268 return open (inputStream , filename , null );
9369 }
9470
@@ -99,7 +75,7 @@ public static Optional<RowReader> open(InputStream inputStream, String filename)
9975 * @param options Options to pass to the reader.
10076 * @return A row reader if the stream is in a supported format.
10177 */
102- public static Optional <RowReader > open (InputStream inputStream , ReaderOptions options ) throws IOException {
78+ public static Maybe <RowReader > open (InputStream inputStream , ReaderOptions options ) {
10379 return open (inputStream , null , options );
10480 }
10581
@@ -111,44 +87,15 @@ public static Optional<RowReader> open(InputStream inputStream, ReaderOptions op
11187 * @param options Options to pass to the reader.
11288 * @return A row reader if the stream is in a supported format.
11389 */
114- public static Optional <RowReader > open (
115- InputStream inputStream ,
116- String filename ,
117- ReaderOptions options
118- ) throws IOException {
119- if (options == null ) {
120- options = new ReaderOptions ();
121- }
122-
90+ public static Maybe <RowReader > open (InputStream inputStream , String filename , ReaderOptions options ) {
12391 // If our input stream supports marks, Tika will rewind the stream back to the start for us after detecting the
12492 // format, so ensure our input stream supports it.
125- inputStream = createRewindableInputStream (inputStream );
126- String mimeType = tika .detect (inputStream , filename );
127-
128- switch (mimeType ) {
129- case "text/csv" :
130- case "text/plain" :
131- return Optional .of (decorate (new DelimitedRowReader (inputStream , DelimitedFormat .CSV ), options ));
132-
133- case "text/tab-separated-values" :
134- return Optional .of (decorate (new DelimitedRowReader (inputStream , DelimitedFormat .TSV ), options ));
135-
136- case "application/vnd.ms-excel" :
137- return Optional .of (decorate (XLSRowReader .open (inputStream , options ), options ));
138-
139- case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" :
140- case "application/x-tika-ooxml" :
141- return Optional .of (decorate (XLSXRowReader .open (inputStream , options ), options ));
142- }
143-
144- return Optional .empty ();
145- }
93+ InputStream rewindableStream = createRewindableInputStream (inputStream );
14694
147- private static RowReader decorate (RowReader reader , ReaderOptions options ) {
148- if (options .isInlineHeaders ()) {
149- reader = new InlineHeaderReader (reader );
150- }
151- return reader ;
95+ return Maybe
96+ .fromCallable (() -> tika .detect (rewindableStream , filename ))
97+ .flatMap (FormatRegistry ::forMimeType )
98+ .map (formatAdapter -> formatAdapter .createReader (rewindableStream , options != null ? options : new ReaderOptions ()));
15299 }
153100
154101 private static InputStream createRewindableInputStream (InputStream inputStream ) {
0 commit comments