From c417eee00fbff4f73e81144c8a8a9328e5f5301c Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Mon, 30 Sep 2024 18:33:28 +0200 Subject: [PATCH 1/4] store and retrieve user-specified network config --- .../controllers/BundleController.java | 47 +++++++++++++++---- .../com/conveyal/analysis/models/Bundle.java | 6 +++ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index b7fc71cc5..dc3034583 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -13,7 +13,6 @@ import com.conveyal.file.FileUtils; import com.conveyal.gtfs.GTFSCache; import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.error.GeneralError; import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.validator.PostLoadValidator; @@ -81,6 +80,7 @@ public BundleController (BackendComponents components) { public void registerEndpoints (Service sparkService) { sparkService.path("/api/bundle", () -> { sparkService.get("", this::getBundles, toJson); + sparkService.get("/:_id/config", this::getBundleConfig, toJson); sparkService.get("/:_id", this::getBundle, toJson); sparkService.post("", this::create, toJson); sparkService.put("/:_id", this::update, toJson); @@ -110,7 +110,6 @@ private Bundle create (Request req, Response res) { try { bundle.name = files.get("bundleName").get(0).getString("UTF-8"); bundle.regionId = files.get("regionId").get(0).getString("UTF-8"); - if (files.get("osmId") != null) { bundle.osmId = files.get("osmId").get(0).getString("UTF-8"); Bundle bundleWithOsm = Persistence.bundles.find(QueryBuilder.start("osmId").is(bundle.osmId).get()).next(); @@ -118,7 +117,6 @@ private Bundle create (Request req, Response res) { throw AnalysisServerException.badRequest("Selected OSM does not exist."); } } - if (files.get("feedGroupId") != null) { bundle.feedGroupId = files.get("feedGroupId").get(0).getString("UTF-8"); Bundle bundleWithFeed = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(bundle.feedGroupId).get()).next(); @@ -135,6 +133,12 @@ private Bundle create (Request req, Response res) { bundle.feedsComplete = bundleWithFeed.feedsComplete; bundle.totalFeeds = bundleWithFeed.totalFeeds; } + if (files.get("config") != null) { + // For validation, rather than reading as freeform JSON, deserialize into a model class instance. + // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. + String configString = files.get("config").get(0).getString(); + bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); + } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; bundle.createdBy = userPermissions.email; @@ -274,15 +278,19 @@ private Bundle create (Request req, Response res) { return bundle; } + /** SIDE EFFECTS: This method will change the field bundle.config before writing it. */ private void writeNetworkConfigToCache (Bundle bundle) throws IOException { - TransportNetworkConfig networkConfig = new TransportNetworkConfig(); - networkConfig.osmId = bundle.osmId; - networkConfig.gtfsIds = bundle.feeds.stream().map(f -> f.bundleScopedFeedId).collect(Collectors.toList()); - + // If the user specified additional network configuration options, they should already be in bundle.config. + // If no custom options were specified, we start with a fresh, empty instance. + if (bundle.config == null) { + bundle.config = new TransportNetworkConfig(); + } + // This will overwrite and override any inconsistent osm and gtfs IDs that were mistakenly supplied by the user. + bundle.config.osmId = bundle.osmId; + bundle.config.gtfsIds = bundle.feeds.stream().map(f -> f.bundleScopedFeedId).collect(Collectors.toList()); String configFileName = bundle._id + ".json"; File configFile = FileUtils.createScratchFile("json"); - JsonUtil.objectMapper.writeValue(configFile, networkConfig); - + JsonUtil.objectMapper.writeValue(configFile, bundle.config); FileStorageKey key = new FileStorageKey(BUNDLES, configFileName); fileStorage.moveIntoStorage(key, configFile); } @@ -312,6 +320,27 @@ private Bundle getBundle (Request req, Response res) { return bundle; } + /** + * There are two copies of the Bundle/Network config: one in the Bundle entry in the database and one in a JSON + * file (obtainable by the workers). This method always reads the one in the file, which has been around longer + * and is considered the definitive source of truth. The entry in the database is a newer addition and has only + * been around since September 2024. + */ + private TransportNetworkConfig getBundleConfig (Request request, Response res) { + // Unfortunately this mimics logic in TransportNetworkCache. Deduplicate in a static utility method? + String id = GTFSCache.cleanId(request.params("_id")); + FileStorageKey key = new FileStorageKey(BUNDLES, id, "json"); + File networkConfigFile = fileStorage.getFile(key); + // Unlike in the worker, we expect the backend to have a model field for every known network/bundle option. + // Threfore, use the default objectMapper that does not tolerate unknown fields. + try { + return JsonUtil.objectMapper.readValue(networkConfigFile, TransportNetworkConfig.class); + } catch (Exception exception) { + LOG.error("Exception deserializing stored network config", exception); + return null; + } + } + private Collection getBundles (Request req, Response res) { return Persistence.bundles.findPermittedForQuery(req); } diff --git a/src/main/java/com/conveyal/analysis/models/Bundle.java b/src/main/java/com/conveyal/analysis/models/Bundle.java index 912f066b4..44cea922b 100644 --- a/src/main/java/com/conveyal/analysis/models/Bundle.java +++ b/src/main/java/com/conveyal/analysis/models/Bundle.java @@ -5,6 +5,7 @@ import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.model.FeedInfo; import com.conveyal.gtfs.validator.model.Priority; +import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.fasterxml.jackson.annotation.JsonIgnore; import java.time.LocalDate; @@ -47,6 +48,11 @@ public class Bundle extends Model implements Cloneable { public int feedsComplete; public int totalFeeds; + // The definitive TransportNetworkConfig is a JSON file stored alonside the feeds in file storage. It is + // duplicated here to record any additional user-specified options that were supplied when the bundle was created. + // It may contain redundant copies of information stored in the outer level Bundle such as OSM and GTFS feed IDs. + public TransportNetworkConfig config; + public static String bundleScopeFeedId (String feedId, String feedGroupId) { return String.format("%s_%s", feedId, feedGroupId); } From e3094f8a32aa5ef8be8a816ef6fe42bc5ea8513c Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Wed, 2 Oct 2024 12:17:21 -0400 Subject: [PATCH 2/4] strict objectmapper to detect misspelled fields --- .../com/conveyal/analysis/controllers/BundleController.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index dc3034583..4f26d0aed 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -21,6 +21,7 @@ import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.conveyal.r5.analyst.progress.Task; +import com.conveyal.r5.common.JsonUtilities; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; import com.mongodb.QueryBuilder; @@ -136,8 +137,9 @@ private Bundle create (Request req, Response res) { if (files.get("config") != null) { // For validation, rather than reading as freeform JSON, deserialize into a model class instance. // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. + // Use strict object mapper (from the strict/lenient pair) to fail on misspelled field names. String configString = files.get("config").get(0).getString(); - bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); + bundle.config = JsonUtilities.objectMapper.readValue(configString, TransportNetworkConfig.class); } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; From ecc8002021ce86927b19c6a0240a468680513af4 Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 3 Oct 2024 12:16:08 -0400 Subject: [PATCH 3/4] Revert "strict objectmapper to detect misspelled fields" This reverts commit 3094f8a32aa5ef8be8a816ef6fe42bc5ea8513c. The backend needs to allow sending config to workers with new features it doesn't know about. --- .../com/conveyal/analysis/controllers/BundleController.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index 4f26d0aed..dc3034583 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -21,7 +21,6 @@ import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; import com.conveyal.r5.analyst.progress.Task; -import com.conveyal.r5.common.JsonUtilities; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; import com.mongodb.QueryBuilder; @@ -137,9 +136,8 @@ private Bundle create (Request req, Response res) { if (files.get("config") != null) { // For validation, rather than reading as freeform JSON, deserialize into a model class instance. // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. - // Use strict object mapper (from the strict/lenient pair) to fail on misspelled field names. String configString = files.get("config").get(0).getString(); - bundle.config = JsonUtilities.objectMapper.readValue(configString, TransportNetworkConfig.class); + bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; From e4614bb6ac54927158a883daef49060e29ba28dd Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 3 Oct 2024 12:27:29 -0400 Subject: [PATCH 4/4] clarify why unknown field names are tolerated --- .../com/conveyal/analysis/controllers/BundleController.java | 5 +++-- src/main/java/com/conveyal/analysis/models/Bundle.java | 2 +- .../java/com/conveyal/r5/transit/TransportNetworkCache.java | 2 ++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index dc3034583..decd74a2c 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -134,8 +134,9 @@ private Bundle create (Request req, Response res) { bundle.totalFeeds = bundleWithFeed.totalFeeds; } if (files.get("config") != null) { - // For validation, rather than reading as freeform JSON, deserialize into a model class instance. - // However, only the instance fields specifying things other than OSM and GTFS IDs will be retained. + // Validation by deserializing into a model class instance. Unknown fields are ignored to + // allow sending config to custom or experimental workers with features unknown to the backend. + // The fields specifying OSM and GTFS IDs are not expected here. They will be ignored and overwritten. String configString = files.get("config").get(0).getString(); bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class); } diff --git a/src/main/java/com/conveyal/analysis/models/Bundle.java b/src/main/java/com/conveyal/analysis/models/Bundle.java index 44cea922b..4d5bef3e6 100644 --- a/src/main/java/com/conveyal/analysis/models/Bundle.java +++ b/src/main/java/com/conveyal/analysis/models/Bundle.java @@ -48,7 +48,7 @@ public class Bundle extends Model implements Cloneable { public int feedsComplete; public int totalFeeds; - // The definitive TransportNetworkConfig is a JSON file stored alonside the feeds in file storage. It is + // The definitive TransportNetworkConfig is a JSON file stored alongside the feeds in file storage. It is // duplicated here to record any additional user-specified options that were supplied when the bundle was created. // It may contain redundant copies of information stored in the outer level Bundle such as OSM and GTFS feed IDs. public TransportNetworkConfig config; diff --git a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java index b7f1f1d8f..6a0c291ab 100644 --- a/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java +++ b/src/main/java/com/conveyal/r5/transit/TransportNetworkCache.java @@ -168,6 +168,8 @@ private TransportNetworkConfig loadNetworkConfig (String networkId) { File configFile = fileStorage.getFile(configFileKey); try { // Use lenient mapper to mimic behavior in objectFromRequestBody. + // A single network configuration file might be used across several worker versions. Unknown field names + // may be present for other worker versions unknown to this one. So we can't strictly validate field names. return JsonUtilities.lenientObjectMapper.readValue(configFile, TransportNetworkConfig.class); } catch (IOException e) { throw new RuntimeException("Error reading TransportNetworkConfig. Does it contain new unrecognized fields?", e);