Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(wsl): Properly assemble multipart data #5538

Merged
merged 8 commits into from
Aug 16, 2024
186 changes: 121 additions & 65 deletions cloudinit/sources/DataSourceWSL.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
import os
import typing
from pathlib import PurePath
from typing import Any, List, Optional, Tuple, Union, cast
from typing import List, Optional, Tuple

import yaml

from cloudinit import sources, subp, util
from cloudinit.distros import Distro
from cloudinit.handlers import type_from_starts_with
from cloudinit.helpers import Paths

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -143,22 +144,21 @@ def candidate_user_data_file_names(instance_name) -> List[str]:
]


def load_yaml_or_bin(data_path: str) -> Optional[Union[dict, bytes]]:
"""
Tries to load a YAML file as a dict, otherwise returns the file's raw
binary contents as `bytes`. Returns `None` if no file is found.
"""
try:
bin_data = util.load_binary_file(data_path)
dict_data = util.load_yaml(bin_data)
if dict_data is None:
return bin_data
class ConfigData:
blackboxsw marked this conversation as resolved.
Show resolved Hide resolved
"""Models a piece of configuration data as a dict if possible, while
retaining its raw representation alongside its file path"""

return dict_data
except FileNotFoundError:
LOG.debug("No data found at %s, ignoring.", data_path)
def __init__(self, path: PurePath):
self.raw: str = util.load_text_file(path)
self.path: PurePath = path

self.config_dict: Optional[dict] = None

return None
if "text/cloud-config" == type_from_starts_with(self.raw):
self.config_dict = util.load_yaml(self.raw)

def is_cloud_config(self) -> bool:
return self.config_dict is not None


def load_instance_metadata(
Expand All @@ -176,7 +176,7 @@ def load_instance_metadata(
)

try:
metadata = util.load_yaml(util.load_binary_file(metadata_path))
metadata = util.load_yaml(util.load_text_file(metadata_path))
except FileNotFoundError:
LOG.debug(
"No instance metadata found at %s. Using default instance-id.",
Expand All @@ -196,7 +196,7 @@ def load_instance_metadata(

def load_ubuntu_pro_data(
user_home: PurePath,
) -> Tuple[Union[dict, bytes, None], Union[dict, bytes, None]]:
) -> Tuple[Optional[ConfigData], Optional[ConfigData]]:
"""
Read .ubuntupro user-data if present and return a tuple of agent and
landscape user-data.
Expand All @@ -205,13 +205,110 @@ def load_ubuntu_pro_data(
if not os.path.isdir(pro_dir):
return None, None

landscape_data = load_yaml_or_bin(
landscape_path = PurePath(
os.path.join(pro_dir, LANDSCAPE_DATA_FILE % instance_name())
)
agent_data = load_yaml_or_bin(os.path.join(pro_dir, AGENT_DATA_FILE))
landscape_data = None
if os.path.isfile(landscape_path):
LOG.debug(
"Landscape configuration found: %s. Organization policy "
"ignores any local user-data in %s.",
landscape_path,
cloud_init_data_dir(user_home),
)
landscape_data = ConfigData(landscape_path)

agent_path = PurePath(os.path.join(pro_dir, AGENT_DATA_FILE))
agent_data = None
if os.path.isfile(agent_path):
agent_data = ConfigData(agent_path)

return agent_data, landscape_data


def merge_agent_landscape_data(
agent_data: Optional[ConfigData], user_data: Optional[ConfigData]
) -> Optional[str]:
"""Merge agent.yaml and <instance>.user-data provided by WSL and Landscape.

When merging is not possible, provide #include directive to allow
cloud-init to merge separate parts.
"""
CarlosNihelton marked this conversation as resolved.
Show resolved Hide resolved
# Ignore agent_data if None or empty
if agent_data is None or len(agent_data.raw) == 0:
if user_data is None or len(user_data.raw) == 0:
return None
return user_data.raw

# Ignore user_data if None or empty
if user_data is None or len(user_data.raw) == 0:
if agent_data is None or len(agent_data.raw) == 0:
return None
return agent_data.raw

# If both are found but we cannot reliably model both data files as
# cloud-config dicts, then we cannot merge them ourselves, so we should
# pass the data as if the user had written an include file
# for cloud-init to handle internally. We explicitely prioritize the
# agent data, to ensure cloud-init would handle it even in the presence
# of syntax errors in user data (agent data is autogenerated).
# It's possible that the effects caused by the user data would override
# the agent data, but that's the user's ultimately responsibility.
# The alternative of writing the user data first would make it possible
# for the agent data to be skipped in the presence of syntax errors in
# user data.

if not all([agent_data.is_cloud_config(), user_data.is_cloud_config()]):
LOG.debug(
"Unable to merge {agent_data.path} and {user_data.path}. "
"Providing as separate user-data #include."
)
return "#include\n%s\n%s\n" % (
agent_data.path.as_posix(),
user_data.path.as_posix(),
)

# We only care about overriding top-level config keys entirely, so we
# can just iterate over the top level keys and write over them if the
# agent provides them instead.
# That's the reason for not using util.mergemanydict().
merged: dict = {}
user_tags: str = ""
overridden_keys: typing.List[str] = []
if isinstance(user_data.config_dict, dict):
merged = user_data.config_dict
user_tags = (
merged.get("landscape", {}).get("client", {}).get("tags", "")
)
if isinstance(agent_data.config_dict, dict):
if user_data:
LOG.debug("Merging both user_data and agent.yaml configs.")
agent = agent_data.config_dict
for key in agent:
if key in merged:
overridden_keys.append(key)
merged[key] = agent[key]
if overridden_keys:
LOG.debug(
(
" agent.yaml overrides config keys: "
", ".join(overridden_keys)
)
)
if user_tags and merged.get("landscape", {}).get("client"):
LOG.debug(
"Landscape client conf updated with user-data"
" landscape.client.tags: %s",
user_tags,
)
merged["landscape"]["client"]["tags"] = user_tags

return (
"#cloud-config\n# WSL datasouce Merged agent.yaml and user_data\n%s"
% yaml.dump(merged).strip()
)


class DataSourceWSL(sources.DataSource):
dsname = "WSL"

Expand Down Expand Up @@ -284,8 +381,8 @@ def _get_data(self) -> bool:
return False

seed_dir = cloud_init_data_dir(user_home)
agent_data = None
user_data: Optional[Union[dict, bytes]] = None
agent_data: Optional[ConfigData] = None
user_data: Optional[ConfigData] = None

# Load any metadata
try:
Expand All @@ -303,8 +400,8 @@ def _get_data(self) -> bool:
# Load regular user configs
try:
if user_data is None and seed_dir is not None:
file = self.find_user_data_file(seed_dir)
user_data = load_yaml_or_bin(file.as_posix())
user_data = ConfigData(self.find_user_data_file(seed_dir))

except (ValueError, IOError) as err:
LOG.error(
"Unable to load any user-data file in %s: %s",
Expand All @@ -316,48 +413,7 @@ def _get_data(self) -> bool:
if not any([user_data, agent_data]):
return False

# If we cannot reliably model data files as dicts, then we cannot merge
# ourselves, so we can pass the data in ascending order as a list for
# cloud-init to handle internally
if isinstance(agent_data, bytes) or isinstance(user_data, bytes):
self.userdata_raw = cast(Any, [user_data, agent_data])
return True

# We only care about overriding modules entirely, so we can just
# iterate over the top level keys and write over them if the agent
# provides them instead.
# That's the reason for not using util.mergemanydict().
merged: dict = {}
user_tags: str = ""
overridden_keys: typing.List[str] = []
if user_data:
merged = user_data
user_tags = (
merged.get("landscape", {}).get("client", {}).get("tags", "")
)
if agent_data:
if user_data:
LOG.debug("Merging both user_data and agent.yaml configs.")
for key in agent_data:
if key in merged:
overridden_keys.append(key)
merged[key] = agent_data[key]
if overridden_keys:
LOG.debug(
(
" agent.yaml overrides config keys: "
", ".join(overridden_keys)
)
)
if user_tags and merged.get("landscape", {}).get("client"):
LOG.debug(
"Landscape client conf updated with user-data"
" landscape.client.tags: %s",
user_tags,
)
merged["landscape"]["client"]["tags"] = user_tags

self.userdata_raw = "#cloud-config\n%s" % yaml.dump(merged)
self.userdata_raw = merge_agent_landscape_data(agent_data, user_data)
return True


Expand Down
Loading
Loading