@@ -158,6 +158,7 @@ public Map<String, String> checkForUpdatedFeeds() {
158
158
String keyName = objSummary .getKey ();
159
159
LOG .debug ("{} etag = {}" , keyName , eTag );
160
160
161
+ // Don't add object if it is a dir
161
162
if (keyName .equals (bucketFolder )) continue ;
162
163
String filename = keyName .split ("/" )[1 ];
163
164
String feedId = filename .replace (".zip" , "" );
@@ -166,16 +167,20 @@ public Map<String, String> checkForUpdatedFeeds() {
166
167
LOG .error ("No feed source found for feed ID {}" , feedId );
167
168
continue ;
168
169
}
170
+ // Skip object if the filename is null
171
+ if ("null" .equals (feedId )) continue ;
169
172
170
- if (shouldMarkFeedAsProcessed (eTag , feedSource )) {
171
- // Don't add object if it is a dir
172
- // Skip object if the filename is null
173
- if ("null" .equals (feedId )) continue ;
173
+ FeedVersion latestVersionSentForPublishing = getLatestVersionSentForPublishing (feedId , feedSource );
174
+ if (shouldMarkFeedAsProcessed (eTag , latestVersionSentForPublishing )) {
174
175
try {
175
- LOG .info ("New version found for {} at s3://{}/{}. ETag = {}." , feedId , feedBucket , keyName , eTag );
176
- updatePublishedFeedVersion (feedId , feedSource );
177
- // TODO: Explore if MD5 checksum can be used to find matching feed version.
178
- // findMatchingFeedVersion(md5, feedId, feedSource);
176
+ // Don't mark a feed version as published if previous published version is before sentToExternalPublisher.
177
+ if (!objSummary .getLastModified ().before (latestVersionSentForPublishing .sentToExternalPublisher )) {
178
+ LOG .info ("New version found for {} at s3://{}/{}. ETag = {}." , feedId , feedBucket , keyName , eTag );
179
+ updatePublishedFeedVersion (feedId , latestVersionSentForPublishing );
180
+ // TODO: Explore if MD5 checksum can be used to find matching feed version.
181
+ // findMatchingFeedVersion(md5, feedId, feedSource);
182
+ }
183
+
179
184
} catch (Exception e ) {
180
185
LOG .warn ("Could not load feed " + keyName , e );
181
186
} finally {
@@ -200,7 +205,7 @@ private FeedSource getFeedSource(String feedId) {
200
205
and (eq ("value" , feedId ), eq ("name" , AGENCY_ID_FIELDNAME ))
201
206
);
202
207
if (properties .size () > 1 ) {
203
- LOG .warn ("Found multiple feed sources for {}: {}" ,
208
+ LOG .warn ("Found multiple feed sources for {}: {}. The published status on some feed versions will be incorrect. " ,
204
209
feedId ,
205
210
properties .stream ().map (p -> p .feedSourceId ).collect (Collectors .joining ("," )));
206
211
}
@@ -216,22 +221,20 @@ private FeedSource getFeedSource(String feedId) {
216
221
/**
217
222
* @return true if the feed with the corresponding etag should be mark as processed, false otherwise.
218
223
*/
219
- private boolean shouldMarkFeedAsProcessed (String eTag , FeedSource feedSource ) {
224
+ private boolean shouldMarkFeedAsProcessed (String eTag , FeedVersion publishedVersion ) {
220
225
if (eTagForFeed .containsValue (eTag )) return false ;
221
-
222
- FeedVersion publishedVersion = getLatestPublishedVersion (feedSource );
223
226
if (publishedVersion == null ) return false ;
227
+
224
228
return versionsToMarkAsProcessed .contains (publishedVersion .id );
225
229
}
226
230
227
231
/**
228
232
* Update the published feed version for the feed source.
229
233
* @param feedId the unique ID used by MTC to identify a feed source
230
- * @param feedSource the feed source for which a newly published version should be registered
234
+ * @param publishedVersion the feed version to be registered
231
235
*/
232
- private void updatePublishedFeedVersion (String feedId , FeedSource feedSource ) {
236
+ private void updatePublishedFeedVersion (String feedId , FeedVersion publishedVersion ) {
233
237
try {
234
- FeedVersion publishedVersion = getLatestPublishedVersion (feedSource );
235
238
if (publishedVersion != null ) {
236
239
if (publishedVersion .sentToExternalPublisher == null ) {
237
240
LOG .warn ("Not updating published version for {} (version was never sent to external publisher)" , feedId );
@@ -240,13 +243,18 @@ private void updatePublishedFeedVersion(String feedId, FeedSource feedSource) {
240
243
// Set published namespace to the feed version and set the processedByExternalPublisher timestamp.
241
244
LOG .info ("Latest published version (sent at {}) for {} is {}" , publishedVersion .sentToExternalPublisher , feedId , publishedVersion .id );
242
245
Persistence .feedVersions .updateField (publishedVersion .id , PROCESSED_BY_EXTERNAL_PUBLISHER_FIELD , new Date ());
243
- Persistence .feedSources .updateField (feedSource . id , "publishedVersionId" , publishedVersion .namespace );
246
+ Persistence .feedSources .updateField (publishedVersion . feedSourceId , "publishedVersionId" , publishedVersion .namespace );
244
247
} else {
245
- LOG .error ("No published versions found for {} ({} id={})" , feedId , feedSource .name , feedSource .id );
248
+ LOG .error (
249
+ "No published versions found for {} ({} id={})" ,
250
+ feedId ,
251
+ publishedVersion .parentFeedSource ().name ,
252
+ publishedVersion .feedSourceId
253
+ );
246
254
}
247
255
} catch (Exception e ) {
248
256
e .printStackTrace ();
249
- LOG .error ("Error encountered while checking for latest published version for {}" , feedId );
257
+ LOG .error ("Error encountered while updating the latest published version for {}" , feedId );
250
258
}
251
259
}
252
260
@@ -256,13 +264,19 @@ private void updatePublishedFeedVersion(String feedId, FeedSource feedSource) {
256
264
* could be that more than one versions were recently "published" and the latest published version was a bad
257
265
* feed that failed processing by RTD.
258
266
*/
259
- private static FeedVersion getLatestPublishedVersion (FeedSource feedSource ) {
260
- // Collect the feed versions for the feed source.
261
- Collection <FeedVersion > versions = feedSource .retrieveFeedVersions ();
262
- Optional <FeedVersion > lastPublishedVersionCandidate = versions
263
- .stream ()
264
- .min (Comparator .comparing (v -> v .sentToExternalPublisher , Comparator .nullsLast (Comparator .reverseOrder ())));
265
- return lastPublishedVersionCandidate .orElse (null );
267
+ private static FeedVersion getLatestVersionSentForPublishing (String feedId , FeedSource feedSource ) {
268
+ try {
269
+ // Collect the feed versions for the feed source.
270
+ Collection <FeedVersion > versions = feedSource .retrieveFeedVersions ();
271
+ Optional <FeedVersion > lastPublishedVersionCandidate = versions
272
+ .stream ()
273
+ .min (Comparator .comparing (v -> v .sentToExternalPublisher , Comparator .nullsLast (Comparator .reverseOrder ())));
274
+ return lastPublishedVersionCandidate .orElse (null );
275
+ } catch (Exception e ) {
276
+ e .printStackTrace ();
277
+ LOG .error ("Error encountered while checking for latest published version for {}" , feedId );
278
+ return null ;
279
+ }
266
280
}
267
281
268
282
/**
0 commit comments