apollographql · bnjjj · Dec 19, 2022 · Dec 14, 2022 · Dec 15, 2022 · Dec 15, 2022
diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md
@@ -26,6 +26,7 @@ By [@USERNAME](https://github.com/USERNAME) in https://github.com/apollographql/
 
 # [x.x.x] (unreleased) - 2022-mm-dd
 
+## ❗ BREAKING ❗
 ## 🚀 Features
 
 ### Apollo uplink: Configurable schema poll timeout ([PR #2271](https://github.com/apollographql/router/pull/2271))
@@ -43,6 +44,26 @@ By [@o0Ignition0o](https://github.com/o0Ignition0o) in https://github.com/apollo
 
 ## 🐛 Fixes
 
+### Return an error on duplicate keys in configuration ([Issue #1428](https://github.com/apollographql/router/issues/1428))
+
+If you have duplicated keys in your yaml configuration like this:
+
+```yaml
+telemetry:
+  tracing:
+    propagation:
+      jaeger: true
+  tracing:
+    propagation:
+      jaeger: false
+```
+
+It will now throw an error on router startup:
+
+`ERROR duplicated keys detected in your yaml configuration: 'telemetry.tracing'`
+
+By [@bnjjj](https://github.com/bnjjj) in https://github.com/apollographql/router/pull/2270
+
 ### Return root `__typename` in first chunk of defer response when first response is empty ([Issue #1922](https://github.com/apollographql/router/issues/1922))
 
 With this query:

@@ -105,125 +105,113 @@ pub(crate) fn validate_yaml_configuration(
     }
     log_used_experimental_conf(&yaml);
     let expanded_yaml = expand_env_variables(&yaml, &expansion)?;
-
+    let parsed_yaml = super::yaml::parse(raw_yaml)?;
     if let Err(errors) = schema.validate(&expanded_yaml) {
         // Validation failed, translate the errors into something nice for the user
         // We have to reparse the yaml to get the line number information for each error.
-        match super::yaml::parse(raw_yaml) {
-            Ok(yaml) => {
-                let yaml_split_by_lines = raw_yaml.split('\n').collect::<Vec<_>>();
-
-                let errors = errors
-                    .enumerate()
-                    .filter_map(|(idx, mut e)| {
-                        if let Some(element) = yaml.get_element(&e.instance_path) {
-                            const NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY: usize = 5;
-                            match element {
-                                yaml::Value::String(value, marker) => {
-                                    let lines = yaml_split_by_lines[0.max(
-                                        marker
-                                            .line()
-                                            .saturating_sub(NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY),
-                                    )
-                                        ..marker.line()]
-                                        .iter()
-                                        .join("\n");
-
-                                    // Replace the value in the error message with the one from the raw config.
-                                    // This guarantees that if the env variable contained a secret it won't be leaked.
-                                    e.instance = Cow::Owned(coerce(value));
-
-                                    Some(format!(
-                                        "{}. {}\n\n{}\n{}^----- {}",
-                                        idx + 1,
-                                        e.instance_path,
-                                        lines,
-                                        " ".repeat(0.max(marker.col())),
-                                        e
-                                    ))
-                                }
-                                seq_element @ yaml::Value::Sequence(_, m) => {
-                                    let (start_marker, end_marker) = (m, seq_element.end_marker());
-
-                                    let offset = 0.max(
-                                        start_marker
-                                            .line()
-                                            .saturating_sub(NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY),
-                                    );
-                                    let lines = yaml_split_by_lines[offset..end_marker.line()]
-                                        .iter()
-                                        .enumerate()
-                                        .map(|(idx, line)| {
-                                            let real_line = idx + offset;
-                                            match real_line.cmp(&start_marker.line()) {
-                                                Ordering::Equal => format!("┌ {line}"),
-                                                Ordering::Greater => format!("| {line}"),
-                                                Ordering::Less => line.to_string(),
-                                            }
-                                        })
-                                        .join("\n");
-
-                                    Some(format!(
-                                        "{}. {}\n\n{}\n└-----> {}",
-                                        idx + 1,
-                                        e.instance_path,
-                                        lines,
-                                        e
-                                    ))
-                                }
-                                map_value
-                                @ yaml::Value::Mapping(current_label, _value, _marker) => {
-                                    let (start_marker, end_marker) = (
-                                        current_label.as_ref()?.marker.as_ref()?,
-                                        map_value.end_marker(),
-                                    );
-                                    let offset = 0.max(
-                                        start_marker
-                                            .line()
-                                            .saturating_sub(NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY),
-                                    );
-                                    let lines = yaml_split_by_lines[offset..end_marker.line()]
-                                        .iter()
-                                        .enumerate()
-                                        .map(|(idx, line)| {
-                                            let real_line = idx + offset;
-                                            match real_line.cmp(&start_marker.line()) {
-                                                Ordering::Equal => format!("┌ {line}"),
-                                                Ordering::Greater => format!("| {line}"),
-                                                Ordering::Less => line.to_string(),
-                                            }
-                                        })
-                                        .join("\n");
-
-                                    Some(format!(
-                                        "{}. {}\n\n{}\n└-----> {}",
-                                        idx + 1,
-                                        e.instance_path,
-                                        lines,
-                                        e
-                                    ))
-                                }
-                            }
-                        } else {
-                            None
+        let yaml_split_by_lines = raw_yaml.split('\n').collect::<Vec<_>>();
+
+        let errors = errors
+            .enumerate()
+            .filter_map(|(idx, mut e)| {
+                if let Some(element) = parsed_yaml.get_element(&e.instance_path) {
+                    const NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY: usize = 5;
+                    match element {
+                        yaml::Value::String(value, marker) => {
+                            let lines = yaml_split_by_lines[0.max(
+                                marker
+                                    .line()
+                                    .saturating_sub(NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY),
+                            )
+                                ..marker.line()]
+                                .iter()
+                                .join("\n");
+
+                            // Replace the value in the error message with the one from the raw config.
+                            // This guarantees that if the env variable contained a secret it won't be leaked.
+                            e.instance = Cow::Owned(coerce(value));
+
+                            Some(format!(
+                                "{}. {}\n\n{}\n{}^----- {}",
+                                idx + 1,
+                                e.instance_path,
+                                lines,
+                                " ".repeat(0.max(marker.col())),
+                                e
+                            ))
                         }
-                    })
-                    .join("\n\n");
-
-                if !errors.is_empty() {
-                    return Err(ConfigurationError::InvalidConfiguration {
-                        message: "configuration had errors",
-                        error: format!("\n{}", errors),
-                    });
+                        seq_element @ yaml::Value::Sequence(_, m) => {
+                            let (start_marker, end_marker) = (m, seq_element.end_marker());
+
+                            let offset = 0.max(
+                                start_marker
+                                    .line()
+                                    .saturating_sub(NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY),
+                            );
+                            let lines = yaml_split_by_lines[offset..end_marker.line()]
+                                .iter()
+                                .enumerate()
+                                .map(|(idx, line)| {
+                                    let real_line = idx + offset;
+                                    match real_line.cmp(&start_marker.line()) {
+                                        Ordering::Equal => format!("┌ {line}"),
+                                        Ordering::Greater => format!("| {line}"),
+                                        Ordering::Less => line.to_string(),
+                                    }
+                                })
+                                .join("\n");
+
+                            Some(format!(
+                                "{}. {}\n\n{}\n└-----> {}",
+                                idx + 1,
+                                e.instance_path,
+                                lines,
+                                e
+                            ))
+                        }
+                        map_value @ yaml::Value::Mapping(current_label, _value, _marker) => {
+                            let (start_marker, end_marker) = (
+                                current_label.as_ref()?.marker.as_ref()?,
+                                map_value.end_marker(),
+                            );
+                            let offset = 0.max(
+                                start_marker
+                                    .line()
+                                    .saturating_sub(NUMBER_OF_PREVIOUS_LINES_TO_DISPLAY),
+                            );
+                            let lines = yaml_split_by_lines[offset..end_marker.line()]
+                                .iter()
+                                .enumerate()
+                                .map(|(idx, line)| {
+                                    let real_line = idx + offset;
+                                    match real_line.cmp(&start_marker.line()) {
+                                        Ordering::Equal => format!("┌ {line}"),
+                                        Ordering::Greater => format!("| {line}"),
+                                        Ordering::Less => line.to_string(),
+                                    }
+                                })
+                                .join("\n");
+
+                            Some(format!(
+                                "{}. {}\n\n{}\n└-----> {}",
+                                idx + 1,
+                                e.instance_path,
+                                lines,
+                                e
+                            ))
+                        }
+                    }
+                } else {
+                    None
                 }
-            }
-            Err(e) => {
-                // the yaml failed to parse. Just let serde do it's thing.
-                tracing::warn!(
-                    "failed to parse yaml using marked parser: {}. Falling back to serde validation",
-                    e
-                );
-            }
+            })
+            .join("\n\n");
+
+        if !errors.is_empty() {
+            return Err(ConfigurationError::InvalidConfiguration {
+                message: "configuration had errors",
+                error: format!("\n{}", errors),
+            });
         }
     }
 

@@ -0,0 +1,8 @@
+---
+source: apollo-router/src/configuration/yaml.rs
+expression: "format!(\"{:#?}\", parse(yaml).unwrap_err())"
+---
+InvalidConfiguration {
+    message: "duplicated keys detected in your yaml configuration",
+    error: "'test.a', 'c.dup', 'test'",
+}
@@ -1,4 +1,5 @@
 use std::collections::HashMap;
+use std::collections::HashSet;
 
 use derivative::Derivative;
 use indexmap::IndexMap;
@@ -64,6 +65,7 @@ pub(crate) struct MarkedYaml {
     current_label: Option<Label>,
     object_stack: Vec<(Option<Label>, Value, usize)>,
     root: Option<Value>,
+    duplicated_fields: HashSet<(Option<Label>, Label)>,
 }
 
 impl MarkedYaml {
@@ -89,8 +91,11 @@ impl MarkedYaml {
         let (label, v, id) = self.object_stack.pop().expect("imbalanced parse events");
         self.anchors.insert(id, v.clone());
         match (label, self.object_stack.last_mut()) {
-            (Some(label), Some((_, Value::Mapping(_current_label, mapping, _), _))) => {
-                mapping.insert(label, v);
+            (Some(label), Some((_, Value::Mapping(current_label, mapping, _), _))) => {
+                if let Some(_previous) = mapping.insert(label.clone(), v) {
+                    self.duplicated_fields
+                        .insert((current_label.clone(), label));
+                }
                 None
             }
             (None, Some((_, Value::Sequence(sequence, _), _))) => {
@@ -103,10 +108,13 @@ impl MarkedYaml {
 
     fn add_value(&mut self, marker: Marker, v: String, id: usize) {
         match (self.current_label.take(), self.object_stack.last_mut()) {
-            (Some(label), Some((_, Value::Mapping(_current_label, mapping, _), _))) => {
+            (Some(label), Some((_, Value::Mapping(current_label, mapping, _), _))) => {
                 let v = Value::String(v, marker);
                 self.anchors.insert(id, v.clone());
-                mapping.insert(label, v);
+                if let Some(_previous) = mapping.insert(label.clone(), v) {
+                    self.duplicated_fields
+                        .insert((current_label.clone(), label));
+                }
             }
             (None, Some((_, Value::Sequence(sequence, _), _))) => {
                 let v = Value::String(v, marker);
@@ -149,6 +157,26 @@ pub(crate) fn parse(source: &str) -> Result<MarkedYaml, ConfigurationError> {
             error: e.to_string(),
         })?;
 
+    // Detect duplicated keys in configuration file
+    if !loader.duplicated_fields.is_empty() {
+        let error = loader
+            .duplicated_fields
+            .iter()
+            .map(|(parent_label, dup_label)| {
+                let prefix = parent_label
+                    .as_ref()
+                    .map(|l| format!("{}.", l.name))
+                    .unwrap_or_default();
+                format!("'{prefix}{}'", dup_label.name)
+            })
+            .collect::<Vec<String>>()
+            .join(", ");
+        return Err(ConfigurationError::InvalidConfiguration {
+            message: "duplicated keys detected in your yaml configuration",
+            error,
+        });
+    }
+
     Ok(loader)
 }
 
@@ -218,4 +246,36 @@ mod test {
         let root = parsed.root().unwrap();
         assert_snapshot!(format!("{:#?}", root));
     }
+
+    #[test]
+    fn test_duplicate_keys() {
+        // DON'T reformat this. It'll change the test results
+        let yaml = r#"test:
+  a: 4
+  b: 3
+  a: 5
+  c:
+    dup: 5
+    other: 3
+    dup: 8
+test:
+  foo: bar
+"#;
+        let err = parse(yaml).unwrap_err();
+        match err {
+            crate::configuration::ConfigurationError::InvalidConfiguration { message, error } => {
+                assert_eq!(
+                    message,
+                    "duplicated keys detected in your yaml configuration"
+                );
+                // Can't do an assert on error because under the hood it uses an hashset then the order is not guaranteed
+                let error_splitted: Vec<&str> = error.split(", ").collect();
+                assert_eq!(error_splitted.len(), 3);
+                assert!(error_splitted.contains(&"'test.a'"));
+                assert!(error_splitted.contains(&"'test'"));
+                assert!(error_splitted.contains(&"'c.dup'"));
+            }
+            _ => panic!("this error must be InvalidConfiguration variant"),
+        }
+    }
 }