Skip to content

Commit 6ea5968

Browse files
fix(MergeLineContext): Partially remove unused service_ids.
1 parent 2a30ee5 commit 6ea5968

File tree

7 files changed

+123
-54
lines changed

7 files changed

+123
-54
lines changed

src/main/java/com/conveyal/datatools/manager/jobs/MergeFeedsJob.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public class MergeFeedsJob extends FeedSourceJob {
8686
@JsonIgnore @BsonIgnore
8787
public Set<String> serviceIdsToCloneRenameAndExtend = new HashSet<>();
8888
@JsonIgnore @BsonIgnore
89-
public Set<String> serviceIdsToTerminateEarly = new HashSet<>();
89+
public Set<String> serviceIdsFromActiveFeedToTerminateEarly = new HashSet<>();
9090

9191
private List<TripAndCalendars> sharedConsistentTripAndCalendarIds = new ArrayList<>();
9292

@@ -437,7 +437,7 @@ private void determineMergeStrategy() {
437437

438438
// Build the set of calendars to be shortened to the day before the future feed start date
439439
// from trips in the active feed but not in the future feed.
440-
serviceIdsToTerminateEarly.addAll(
440+
serviceIdsFromActiveFeedToTerminateEarly.addAll(
441441
getActiveServiceIds(feedMergeContext.getActiveTripIdsNotInFutureFeed())
442442
);
443443

src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/CalendarMergeLineContext.java

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,20 @@ public void afterRowWrite() throws IOException {
3838

3939
private boolean checkCalendarIds(Set<NewGTFSError> idErrors, FieldContext fieldContext) throws IOException {
4040
boolean shouldSkipRecord = false;
41+
String key = getTableScopedValue(table, getIdScope(), keyValue);
42+
4143
if (isHandlingActiveFeed()) {
4244
LocalDate startDate = getCsvDate("start_date");
43-
// If a service_id from the active calendar has both the
44-
// start_date and end_date in the future, the service will be
45-
// excluded from the merged file. Records in trips,
46-
// calendar_dates, and calendar_attributes referencing this
47-
// service_id shall also be removed/ignored. Stop_time records
48-
// for the ignored trips shall also be removed.
4945
if (!startDate.isBefore(feedMergeContext.future.getFeedFirstDate())) {
46+
// If a service_id from the active calendar has both the
47+
// start_date and end_date in the future, the service will be
48+
// excluded from the merged file. Records in trips,
49+
// calendar_dates, and calendar_attributes referencing this
50+
// service_id shall also be removed/ignored. Stop_time records
51+
// for the ignored trips shall also be removed.
5052
LOG.warn(
51-
"Skipping calendar entry {} because it operates fully within the time span of future feed.",
53+
"Skipping active calendar entry {} because it operates fully within the time span of future feed.",
5254
keyValue);
53-
String key = getTableScopedValue(table, getIdScope(), keyValue);
5455
mergeFeedsResult.skippedIds.add(key);
5556
shouldSkipRecord = true;
5657
} else {
@@ -68,7 +69,7 @@ private boolean checkCalendarIds(Set<NewGTFSError> idErrors, FieldContext fieldC
6869
boolean activeAndFutureTripIdsAreDisjoint = job.sharedTripIdsWithConsistentSignature.isEmpty();
6970
if (activeAndFutureTripIdsAreDisjoint) {
7071
futureStartDate = feedMergeContext.futureFirstCalendarStartDate;
71-
} else if (job.serviceIdsToTerminateEarly.contains(keyValue)) {
72+
} else if (job.serviceIdsFromActiveFeedToTerminateEarly.contains(keyValue)) {
7273
futureStartDate = feedMergeContext.future.getFeedFirstDate();
7374
}
7475
// In other cases not covered above, new calendar entry is already flagged for insertion
@@ -85,10 +86,20 @@ private boolean checkCalendarIds(Set<NewGTFSError> idErrors, FieldContext fieldC
8586
.format(GTFS_DATE_FORMATTER));
8687
}
8788
}
89+
/* } else {
90+
// If handling the future feed, the MTC revised feed merge logic is as follows:
91+
// - Calendar entries from the future feed will be inserted as is in the merged feed.
92+
// so no additional processing needed here, unless the calendar entry is no longer used.
93+
if (job.serviceIdsFromFutureFeedToRemove.contains(keyValue)) {
94+
LOG.warn(
95+
"Skipping future calendar entry {} because it will become unused in the merged feed.",
96+
keyValue);
97+
mergeFeedsResult.skippedIds.add(key);
98+
shouldSkipRecord = true;
99+
}
100+
101+
*/
88102
}
89-
// If handling the future feed, the MTC revised feed merge logic is as follows:
90-
// - Calendar entries from the future feed will be inserted as is in the merged feed.
91-
// so no additional processing needed here.
92103

93104

94105
// If any service_id in the active feed matches with the future
@@ -98,8 +109,7 @@ private boolean checkCalendarIds(Set<NewGTFSError> idErrors, FieldContext fieldC
98109
// duplicates? I think we would need to consider the
99110
// service_id:exception_type:date as the unique key and include any
100111
// all entries as long as they are unique on this key.
101-
102-
if (hasDuplicateError(idErrors)) {
112+
if (isHandlingActiveFeed() && hasDuplicateError(idErrors)) {
103113
// Modify service_id and ensure that referencing trips
104114
// have service_id updated.
105115
updateAndRemapOutput(fieldContext);
@@ -108,7 +118,11 @@ private boolean checkCalendarIds(Set<NewGTFSError> idErrors, FieldContext fieldC
108118
// Track service ID because we want to avoid removing trips that may reference this
109119
// service_id when the service_id is used by calendar_dates that operate in the valid
110120
// date range, i.e., before the future feed's first date.
111-
if (!shouldSkipRecord && fieldContext.nameEquals(SERVICE_ID)) mergeFeedsResult.serviceIds.add(fieldContext.getValueToWrite());
121+
//
122+
// If service is going to be cloned, add to the output service ids.
123+
if (!shouldSkipRecord && fieldContext.nameEquals(SERVICE_ID)) {
124+
mergeFeedsResult.serviceIds.add(fieldContext.getValueToWrite());
125+
}
112126

113127
return !shouldSkipRecord;
114128
}
@@ -119,25 +133,27 @@ private boolean checkCalendarIds(Set<NewGTFSError> idErrors, FieldContext fieldC
119133
* @throws IOException
120134
*/
121135
public void addClonedServiceId() throws IOException {
122-
String originalServiceId = getRowValues()[keyFieldIndex];
123-
if (job.serviceIdsToCloneRenameAndExtend.contains(originalServiceId)) {
124-
// FIXME: Do we need to worry about calendar_dates?
125-
String[] clonedValues = getRowValues().clone();
126-
String newServiceId = clonedValues[keyFieldIndex] = String.join(":", getIdScope(), originalServiceId);
127-
// Modify start date only (preserve the end date on the future calendar entry).
128-
int startDateIndex = Table.CALENDAR.getFieldIndex("start_date");
129-
clonedValues[startDateIndex] = feedMergeContext.active.feed.calendars.get(originalServiceId).start_date
130-
.format(GTFS_DATE_FORMATTER);
131-
referenceTracker.checkReferencesAndUniqueness(
132-
keyValue,
133-
getLineNumber(),
134-
table.fields[0],
135-
newServiceId,
136-
table,
137-
keyField,
138-
table.getOrderFieldName()
139-
);
140-
writeValuesToTable(clonedValues, true);
136+
if (isHandlingFutureFeed()) {
137+
String originalServiceId = keyValue;
138+
if (job.serviceIdsToCloneRenameAndExtend.contains(originalServiceId)) {
139+
// FIXME: Do we need to worry about calendar_dates?
140+
String[] clonedValues = getRowValues().clone();
141+
String newServiceId = clonedValues[keyFieldIndex] = String.join(":", getIdScope(), originalServiceId);
142+
// Modify start date only (preserve the end date from the future calendar entry).
143+
int startDateIndex = Table.CALENDAR.getFieldIndex("start_date");
144+
clonedValues[startDateIndex] = feedMergeContext.active.feed.calendars.get(originalServiceId).start_date
145+
.format(GTFS_DATE_FORMATTER);
146+
referenceTracker.checkReferencesAndUniqueness(
147+
keyValue,
148+
getLineNumber(),
149+
table.fields[0],
150+
newServiceId,
151+
table,
152+
keyField,
153+
table.getOrderFieldName()
154+
);
155+
writeValuesToTable(clonedValues, true);
156+
}
141157
}
142158
}
143159
}

src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedContext.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ public FeedContext(FeedToMerge givenFeedToMerge) throws IOException {
3838

3939
public LocalDate getFeedFirstDate() { return feedFirstDate; }
4040

41-
public void setFeedFirstDate(LocalDate firstDate) { feedFirstDate = firstDate; }
42-
4341
public String getNewAgencyId() {
4442
return newAgencyId;
4543
}

src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedMergeContext.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public FeedMergeContext(Set<FeedVersion> feedVersions, Auth0UserProfile owner) t
3232
future = new FeedContext(futureFeedToMerge);
3333

3434
// Determine whether service and trip IDs are exact matches.
35-
serviceIdsMatch = activeFeedToMerge.serviceIds.equals(futureFeedToMerge.serviceIds);
35+
serviceIdsMatch = activeFeedToMerge.serviceIdsInUse.equals(futureFeedToMerge.serviceIdsInUse);
3636
tripIdsMatch = active.tripIds.equals(future.tripIds);
3737
sharedTripIds = Sets.intersection(active.tripIds, future.tripIds);
3838

@@ -63,9 +63,16 @@ public boolean areActiveAndFutureTripIdsDisjoint() {
6363
}
6464

6565
/**
66-
* Obtains the active trip ids found in the active feed, but not in the future feed.
66+
* Obtains the trip ids found in the active feed, but not in the future feed.
6767
*/
6868
public Sets.SetView<String> getActiveTripIdsNotInFutureFeed() {
6969
return Sets.difference(active.tripIds, future.tripIds);
7070
}
71+
72+
/**
73+
* Obtains the trip ids found in the future feed, but not in the active feed.
74+
*/
75+
public Sets.SetView<String> getFutureTripIdsNotInActiveFeed() {
76+
return Sets.difference(future.tripIds, active.tripIds);
77+
}
7178
}

src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/FeedToMerge.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import java.io.IOException;
1111
import java.util.HashSet;
1212
import java.util.Set;
13+
import java.util.stream.Collectors;
1314
import java.util.zip.ZipFile;
1415

1516
import static com.conveyal.datatools.manager.utils.MergeFeedUtils.getIdsForTable;
@@ -23,6 +24,7 @@ public class FeedToMerge implements Closeable {
2324
public ZipFile zipFile;
2425
public SetMultimap<Table, String> idsForTable = HashMultimap.create();
2526
public Set<String> serviceIds = new HashSet<>();
27+
public Set<String> serviceIdsInUse;
2628
private static final Set<Table> tablesToCheck = Sets.newHashSet(Table.TRIPS, Table.CALENDAR, Table.CALENDAR_DATES);
2729

2830
public FeedToMerge(FeedVersion version) throws IOException {
@@ -37,6 +39,18 @@ public void collectTripAndServiceIds() throws IOException {
3739
}
3840
serviceIds.addAll(idsForTable.get(Table.CALENDAR));
3941
serviceIds.addAll(idsForTable.get(Table.CALENDAR_DATES));
42+
43+
serviceIdsInUse = getServiceIdsInUse(idsForTable.get(Table.TRIPS));
44+
}
45+
46+
/**
47+
* Obtains the service ids corresponding to the provided trip ids.
48+
* FIXME: Duplicate of MergeFeedsJob.
49+
*/
50+
private Set<String> getServiceIdsInUse(Set<String> tripIds) {
51+
return tripIds.stream()
52+
.map(tripId -> version.retrieveFeed().trips.get(tripId).service_id)
53+
.collect(Collectors.toSet());
4054
}
4155

4256
public void close() throws IOException {

src/main/java/com/conveyal/datatools/manager/jobs/feedmerge/MergeLineContext.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,14 @@ public void startNewFeed(int feedIndex) throws IOException {
144144
// If there is no agency_id for agency table, create one and ensure that
145145
// route#agency_id gets set.
146146
}
147+
148+
if (handlingFutureFeed) {
149+
mergeFeedsResult.serviceIds.addAll(
150+
job.serviceIdsToCloneRenameAndExtend.stream().map(
151+
id -> String.join(":", idScope, id)
152+
).collect(Collectors.toSet())
153+
);
154+
}
147155
}
148156

149157
public boolean shouldSkipFile() {
@@ -395,6 +403,24 @@ public boolean updateAgencyIdIfNeeded(FieldContext fieldContext) {
395403
return true;
396404
}
397405

406+
public boolean updateServiceIdsIfNeeded(FieldContext fieldContext) {
407+
String fieldValue = fieldContext.getValue();
408+
if (table.name.equals(Table.TRIPS.name) &&
409+
fieldContext.nameEquals(SERVICE_ID) &&
410+
job.serviceIdsToCloneRenameAndExtend.contains(fieldValue) &&
411+
job.mergeType.equals(SERVICE_PERIOD)
412+
) {
413+
// Future trip ids not in the active feed will not get the service id remapped,
414+
// they will use the service id as defined in the future feed instead.
415+
if (!(handlingFutureFeed && feedMergeContext.getFutureTripIdsNotInActiveFeed().contains(keyValue))) {
416+
String newServiceId = String.join(":", idScope, fieldValue);
417+
LOG.info("Updating {}#service_id to (auto-generated) {} for ID {}", table.name, newServiceId, keyValue);
418+
fieldContext.setValueToWrite(newServiceId);
419+
}
420+
}
421+
return true;
422+
}
423+
398424
public boolean storeRowAndStopValues() {
399425
String newLine = String.join(",", rowValues);
400426
switch (table.name) {
@@ -527,9 +553,10 @@ public boolean constructRowValues() throws IOException {
527553
updateAndRemapOutput(fieldContext);
528554
}
529555

556+
updateServiceIdsIfNeeded(fieldContext);
557+
530558
// Store values for key fields that have been encountered and update any key values that need modification due
531559
// to conflicts.
532-
// This method can change skipRecord.
533560
if (!checkFieldsForMergeConflicts(getIdErrors(fieldContext), fieldContext)) {
534561
skipRecord = true;
535562
break;

src/test/java/com/conveyal/datatools/manager/jobs/MergeFeedsJobTest.java

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -387,16 +387,31 @@ void mergeMTCShouldHandleMatchingTripIdsWithSameSignature() throws SQLException
387387

388388
// - calendar table
389389
// expect a total of 4 records in calendar table:
390-
// - 1 from the active feed (common_id start date is changed to one day before first start_date in future feed)
391-
// (the other one is unused and is discarded)
392-
// - 2 from the future feed
393-
// - 1 cloned for the matching trip id present in both active and future feeds
394-
// (from MergeFeedsJob#serviceIdsToCloneAndRename).
390+
// - common_id from the active feed (but start date is changed to one day before first start_date in future feed),
391+
// - common_id from the future feed (because of one future trip not in the active feed),
392+
// - common_id cloned and extended for the matching trip id present in both active and future feeds
393+
// (from MergeFeedsJob#serviceIdsToCloneAndRename),
394+
// - only_calendar_id used in the future feed.
395395
assertThatSqlCountQueryYieldsExpectedCount(
396396
String.format("SELECT count(*) FROM %s.calendar", mergedNamespace),
397397
4
398398
);
399399

400+
// Out of all trips from the input datasets, expect 4 trips in merged output.
401+
// 1 trip from active feed that is not in the future feed,
402+
// 1 trip in both the active and future feeds, with the same signature (same stop times),
403+
// 2 trips from the future feed not in the active feed.
404+
assertThatSqlCountQueryYieldsExpectedCount(
405+
String.format("SELECT count(*) FROM %s.trips", mergedNamespace),
406+
4
407+
);
408+
409+
// There should be no unused service ids.
410+
assertThatSqlCountQueryYieldsExpectedCount(
411+
String.format("SELECT count(*) FROM %s.errors where error_type = 'SERVICE_UNUSED'", mergedNamespace),
412+
0
413+
);
414+
400415
// expect that 2 calendars (1 common_id extended from future and 1 Fake_Transit1:common_id from active) have
401416
// start_date pinned to start date of active feed.
402417
assertThatSqlCountQueryYieldsExpectedCount(
@@ -415,14 +430,6 @@ void mergeMTCShouldHandleMatchingTripIdsWithSameSignature() throws SQLException
415430
String.format("SELECT count(*) FROM %s.calendar where start_date = '20170918' and end_date='20170919'", mergedNamespace),
416431
1
417432
);
418-
// Out of all trips from the input datasets, expect 4 trips in merged output.
419-
// 1 trip from active feed that is not in the future feed,
420-
// 1 trip in both the active and future feeds, with the same signature (same stop times),
421-
// 2 trips from the future feed not in the active feed.
422-
assertThatSqlCountQueryYieldsExpectedCount(
423-
String.format("SELECT count(*) FROM %s.trips", mergedNamespace),
424-
4
425-
);
426433
}
427434

428435
/**

0 commit comments

Comments
 (0)