Skip to content

Commit 71cee95

Browse files
committed
Polishes, dependency updates
1 parent 75318ba commit 71cee95

File tree

3 files changed

+21
-20
lines changed

3 files changed

+21
-20
lines changed

pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>fun.seabird</groupId>
44
<artifactId>ebird-csv-parser</artifactId>
5-
<version>0.0.1</version>
5+
<version>0.0.2</version>
66
<name>ebird-csv-parser</name>
77

88
<build>
@@ -48,7 +48,7 @@
4848
<dependency>
4949
<groupId>org.apache.commons</groupId>
5050
<artifactId>commons-lang3</artifactId>
51-
<version>3.12.0</version>
51+
<version>3.13.0</version>
5252
</dependency>
5353

5454
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
@@ -62,7 +62,7 @@
6262
<dependency>
6363
<groupId>io.projectreactor</groupId>
6464
<artifactId>reactor-core</artifactId>
65-
<version>3.5.6</version>
65+
<version>3.5.9</version>
6666
</dependency>
6767

6868
<dependency>
@@ -74,7 +74,7 @@
7474
<dependency>
7575
<groupId>org.projectlombok</groupId>
7676
<artifactId>lombok</artifactId>
77-
<version>1.18.26</version>
77+
<version>1.18.28</version>
7878
<scope>provided</scope>
7979
</dependency>
8080

src/main/java/fun/seabird/EbirdCsvParser.java

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,14 @@
1919
import org.apache.commons.csv.CSVParser;
2020
import org.apache.commons.csv.CSVRecord;
2121
import org.apache.commons.lang3.time.StopWatch;
22-
import org.slf4j.Logger;
23-
import org.slf4j.LoggerFactory;
2422

23+
import lombok.extern.slf4j.Slf4j;
2524
import reactor.core.publisher.Flux;
2625
import reactor.core.scheduler.Schedulers;
2726

27+
@Slf4j
2828
public abstract class EbirdCsvParser
2929
{
30-
private static final Logger logger = LoggerFactory.getLogger(EbirdCsvParser.class);
31-
3230
public enum ParseMode {SINGLE_THREAD,MULTI_THREAD}
3331

3432
public enum PreSort {NONE,DATE}
@@ -37,14 +35,16 @@ public enum PreSort {NONE,DATE}
3735

3836
private static final AtomicInteger linesProcessed = new AtomicInteger(0);
3937

38+
private static final int ROW_PREFETCH = 25000;
39+
4040
/**
4141
* Parses the date and time fields from a CSV record and returns a LocalDateTime object representing the combined datetime value.
4242
* If the time is not defined, assumes midnight.
4343
*
4444
* @param record The CSVRecord representing a single row of data in the CSV file.
4545
* @return A LocalDateTime object representing the combined date and time parsed from the CSV record.
4646
*/
47-
static final LocalDateTime parseSubDate(CSVRecord record)
47+
private static LocalDateTime parseSubDate(CSVRecord record)
4848
{
4949
if (record.getRecordNumber() == 1l)
5050
return LocalDateTime.MIN;
@@ -64,7 +64,7 @@ static final LocalDateTime parseSubDate(CSVRecord record)
6464
* @param record The CSVRecord representing a single row of data in the CSV file.
6565
* @return An EbirdCsvRow object constructed from the CSV record.
6666
*/
67-
private static final EbirdCsvRow parseCsvLine(CSVRecord record)
67+
private static EbirdCsvRow parseCsvLine(CSVRecord record)
6868
{
6969
if (record.getRecordNumber() == 1l)
7070
return null; // skip the header
@@ -136,7 +136,7 @@ private static final EbirdCsvRow parseCsvLine(CSVRecord record)
136136
*/
137137
public static final void parseCsv(Path csvFile,Consumer<EbirdCsvRow> rowProcessor,ParseMode mode,PreSort preSort) throws IOException
138138
{
139-
logger.info("Parsing " + csvFile + "...");
139+
log.info("Parsing {} ...", csvFile);
140140

141141
linesProcessed.set(0);
142142

@@ -146,19 +146,19 @@ public static final void parseCsv(Path csvFile,Consumer<EbirdCsvRow> rowProcesso
146146

147147
StopWatch stopwatch = StopWatch.createStarted();
148148

149-
Iterable<CSVRecord> records;
149+
Flux<CSVRecord> recordsFlux;
150150
if (PreSort.DATE == preSort)
151151
{
152152
// Read all lines and sort by date and time columns
153153
List<CSVRecord> recordsList = csvParser.getRecords();
154154
recordsList.sort(Comparator.comparing(EbirdCsvParser::parseSubDate));
155-
logger.debug("Read and sorted " + (recordsList.size()-1) + " eBird observations in " + stopwatch.getTime(TimeUnit.SECONDS) + " seconds");
156-
records = recordsList;
155+
log.debug("Read and sorted {} eBird observations in {} seconds",recordsList.size()-1,stopwatch.getTime(TimeUnit.SECONDS));
156+
recordsFlux = Flux.fromIterable(recordsList);
157157
}
158158
else
159-
records = csvParser;
159+
recordsFlux = Flux.fromIterable(csvParser);
160160

161-
Consumer<CSVRecord> csvRecordConsumer = new Consumer<CSVRecord>() {
161+
final Consumer<CSVRecord> csvRecordConsumer = new Consumer<CSVRecord>() {
162162
@Override
163163
public void accept(CSVRecord record)
164164
{
@@ -174,15 +174,16 @@ public void accept(CSVRecord record)
174174
switch (mode)
175175
{
176176
case MULTI_THREAD:
177-
Flux.fromIterable(records).parallel().runOn(Schedulers.parallel()).sequential(25000).doOnNext(csvRecordConsumer).then().block();
177+
recordsFlux.parallel().runOn(Schedulers.parallel()).sequential(ROW_PREFETCH).doOnNext(csvRecordConsumer).then().block();
178178
break;
179179
case SINGLE_THREAD:
180-
Flux.fromIterable(records).doOnNext(csvRecordConsumer).then().block();
180+
recordsFlux.doOnNext(csvRecordConsumer).then().block();
181181
break;
182182
}
183183

184184
stopwatch.stop();
185-
logger.info("Processed " + linesProcessed.get() + " eBird observations in " + stopwatch.getTime(TimeUnit.SECONDS) + " seconds");
185+
186+
log.info("Processed {} eBird observations in {} seconds",linesProcessed.get(),stopwatch.getTime(TimeUnit.SECONDS));
186187
}
187188
}
188189

src/main/java/fun/seabird/EbirdCsvRow.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public class EbirdCsvRow
3535
public LocalDateTime dateTime()
3636
{
3737
if (time == null)
38-
return date.atTime(LocalTime.MIDNIGHT);
38+
return date.atStartOfDay();
3939

4040
return date.atTime(time);
4141
}

0 commit comments

Comments
 (0)