Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend prost-build to generate message builders #901

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,55 @@ pub mod foo {

[^3]: Annotations have been elided for clarity. See below for a full example.

### Builders

[Builders] can be optionally generated for message types. A builder pattern API
allows constructing messages with code that does not need to be changed when
new fields are added to the message definition. For example,
if a matching builder name `Builder` is configured for this proto message:

```protobuf,ignore
message Person {
string name = 1;
repeated PhoneNumber phones = 2;
}
```

the following will be generated in addition to the message struct:

```rust,ignore
impl Person {
pub fn builder() -> person::Builder { ... }
}

pub mod person {
#[derive(Default)]
pub struct Builder {
// ...
}

impl Builder {
pub fn name(mut self, value: impl Into<String>) -> Self { ... }

pub fn phones(
mut self,
value: impl IntoIterator<Item = impl Into<PhoneNumber>>,
) -> Self {
...
}

pub fn build(self) -> super::Person { ... }
}
}
```

This can be combined with the `#[non_exhaustive]` attribute on the message struct
to ensure that field additions, which are backward-compatible accordingly to
the Protobuf semver convention, do not force semver breaks in a Rust crate that
provides the generated struct.

[Builders]: https://rust-lang.github.io/api-guidelines/type-safety.html#c-builder

### Services

`prost-build` allows a custom code-generator to be used for processing `service`
Expand Down
249 changes: 248 additions & 1 deletion prost-build/src/code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,17 @@ impl<'a> CodeGenerator<'a> {
self.push_indent();
self.buf.push_str("}\n");

if !message.enum_type.is_empty() || !nested_types.is_empty() || !oneof_fields.is_empty() {
let builder = self.builder_name(&fq_message_name);

if let Some(builder_name) = &builder {
self.append_associated_builder_fn(&message_name, builder_name);
}

if builder.is_some()
|| !message.enum_type.is_empty()
|| !nested_types.is_empty()
|| !oneof_fields.is_empty()
{
self.push_mod(&message_name);
self.path.push(3);
for (nested_type, idx) in nested_types {
Expand All @@ -291,6 +301,16 @@ impl<'a> CodeGenerator<'a> {
self.append_oneof(&fq_message_name, oneof);
}

if let Some(builder_name) = &builder {
self.append_builder(
builder_name,
&message_name,
&fq_message_name,
&fields,
&oneof_fields,
);
}

self.pop_mod();
}

Expand Down Expand Up @@ -676,6 +696,233 @@ impl<'a> CodeGenerator<'a> {
self.buf.push_str("}\n");
}

fn builder_name(&self, fq_message_name: &str) -> Option<String> {
self.config
.builders
.get_first(fq_message_name)
.map(|name| name.to_owned())
}

fn append_associated_builder_fn(&mut self, message_name: &str, builder_name: &str) {
let mod_name = to_snake(message_name);
let struct_name = to_upper_camel(message_name);
let builder_fn_name = to_snake(builder_name);
let mq_builder_name = format!("{mod_name}::{builder_name}");

self.push_indent();
self.buf.push_str("impl ");
self.buf.push_str(&struct_name);
self.buf.push_str(" {\n");
self.depth += 1;

self.push_indent();
self.buf.push_str("/// Starts a builder for `");
self.buf.push_str(&struct_name);
self.buf.push_str("`.\n");
self.push_indent();
self.buf
.push_str("/// All fields are initialized with default values.\n");
self.push_indent();
self.buf.push_str("#[inline]\n");
self.push_indent();
self.buf.push_str("pub fn ");
self.buf.push_str(&builder_fn_name);
self.buf.push_str("() -> ");
self.buf.push_str(&mq_builder_name);
self.buf.push_str(" {\n");
self.depth += 1;
self.push_indent();
self.buf.push_str("Default::default()\n");
self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");

self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");
}

fn append_builder(
&mut self,
builder_name: &str,
message_name: &str,
fq_message_name: &str,
fields: &[Field],
oneof_fields: &[OneofField],
) {
debug!(" builder: {:?}", message_name);

let struct_name = to_upper_camel(message_name);

// Generate builder struct
self.push_indent();
self.buf.push_str(&format!(
"/// Builder for [`{struct_name}`](super::{struct_name})\n"
));
self.push_indent();
self.buf.push_str("#[derive(Clone, Default)]\n");
self.push_indent();
self.buf.push_str("pub struct ");
self.buf.push_str(builder_name);
self.buf.push_str(" {\n");
self.depth += 1;

self.push_indent();
self.buf.push_str("inner: super::");
self.buf.push_str(&struct_name);
self.buf.push_str(",\n");

self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");

// Generate impl item with setter methods and the build method
self.push_indent();
self.buf.push_str("impl ");
self.buf.push_str(builder_name);
self.buf.push_str(" {\n");
self.depth += 1;

for field in fields {
self.append_builder_field_setter(fq_message_name, field);
}

for oneof in oneof_fields {
self.append_builder_oneof_field_setter(oneof);
}

self.push_indent();
self.buf.push_str("#[inline]\n");
self.push_indent();
self.buf.push_str("pub fn build(self) -> super::");
self.buf.push_str(&struct_name);
self.buf.push_str(" {\n");
self.depth += 1;
self.push_indent();
self.buf.push_str("self.inner\n");
self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");

self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");

self.push_indent();
self.buf.push_str("impl From<");
self.buf.push_str(builder_name);
self.buf.push_str("> for super::");
self.buf.push_str(&struct_name);
self.buf.push_str(" {\n");
self.depth += 1;
self.push_indent();
self.buf.push_str("#[inline]\n");
self.push_indent();
self.buf.push_str("fn from(builder: ");
self.buf.push_str(builder_name);
self.buf.push_str(") -> Self {\n");
self.depth += 1;
self.push_indent();
self.buf.push_str("builder.inner\n");
self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");
self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");
}

fn append_builder_field_setter(&mut self, fq_message_name: &str, field: &Field) {
let rust_name = field.rust_name();
let repeated = field.descriptor.label == Some(Label::Repeated as i32);
let deprecated = self.deprecated(&field.descriptor);
let optional = self.optional(&field.descriptor);
let boxed = self.boxed(&field.descriptor, fq_message_name, None);
let ty = self.resolve_type(&field.descriptor, fq_message_name);

let prost_path = prost_path(self.config);
let arg_type = if field.descriptor.r#type() == Type::Enum {
// Enums are special: the field type is i32, but for the setter
// we want the accompanying enumeration type.
self.resolve_ident(field.descriptor.type_name())
} else {
let arg_bound = if boxed {
format!("Into<{}::alloc::boxed::Box<{}>>", prost_path, ty)
} else {
format!("Into<{}>", ty)
};
if repeated {
format!("impl IntoIterator<Item = impl {}>", arg_bound)
} else {
format!("impl {}", arg_bound)
}
};

debug!(" field setter: {:?}, arg: {:?}", rust_name, arg_type);

if deprecated {
self.push_indent();
self.buf.push_str("#[deprecated]\n");
}
self.push_indent();
self.buf.push_str("#[inline]\n");
self.push_indent();
self.buf.push_str("pub fn ");
self.buf.push_str(&rust_name);
self.buf.push_str("(mut self, value: ");
self.buf.push_str(&arg_type);
self.buf.push_str(") -> Self {\n");
self.depth += 1;

self.push_indent();
self.buf.push_str("self.inner.");
self.buf.push_str(&rust_name);
self.buf.push_str(" = ");
if repeated {
self.buf
.push_str("value.into_iter().map(Into::into).collect();\n");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A repeated type is a Vec, but this setter doesn't allow the direct setting of a Vec. It will reallocate. I think the setter should just use Vec as its input type.

Copy link
Contributor Author

@mzabaluev mzabaluev Nov 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I consider the setter a convenience method. The field is public, so there is always the option of assigning to it directly rather than going through FromIterator.

When specialization is stabilized in future Rust, the setter can be specialized for Vec without breaking the API.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An impl Into<Vec<T>> argument looks like a reasonable compromise: apart from Vec<T> with zero copying, it accepts &[T], &[T; N], and [T; N], so it should cover the most common cases without extra conversions. To set from an iterator, the caller would have to call .collect::<Vec<_>>()

} else if optional {
self.buf.push_str("Some(value.into());\n")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The setter for optional fields should accept a Option. For "normal" fields I think it is good to forgo the Option.

Copy link
Contributor Author

@mzabaluev mzabaluev Nov 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I applied the same rationale as for repeated fieds: the setter is complementary and allows shorthand code, while there is the option of assigning the field directly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking of the most common use case here, where the application has a valid value to set. It should be rare to need to explicitly set a field to None, that is not addressed by Default impls on the struct and the Option field type.

Copy link
Contributor Author

@mzabaluev mzabaluev Nov 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand, maybe the case of an explicitly optional field (as opposed to a "normal" field which must have a default for the uninitialized case) is special enough that the developers meant it to be used this way. I will change to it to Option.

} else {
self.buf.push_str("value.into();\n")
}

self.push_indent();
self.buf.push_str("self\n");
self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");
}

fn append_builder_oneof_field_setter(&mut self, oneof: &OneofField) {
let rust_name = oneof.rust_name();
let ty = to_upper_camel(oneof.descriptor.name());

debug!(" oneof field setter: {:?}, arg: {:?}", rust_name, ty);

self.push_indent();
self.buf.push_str("#[inline]\n");
self.push_indent();
self.buf.push_str("pub fn ");
self.buf.push_str(&rust_name);
self.buf.push_str("(mut self, value: ");
self.buf.push_str(&ty);
self.buf.push_str(") -> Self {\n");
self.depth += 1;

self.push_indent();
self.buf.push_str("self.inner.");
self.buf.push_str(&rust_name);
self.buf.push_str(" = Some(value);\n");

self.push_indent();
self.buf.push_str("self\n");

self.depth -= 1;
self.push_indent();
self.buf.push_str("}\n");
}

fn location(&self) -> Option<&Location> {
let source_info = self.source_info.as_ref()?;
let idx = source_info
Expand Down
38 changes: 38 additions & 0 deletions prost-build/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ pub struct Config {
pub(crate) skip_protoc_run: bool,
pub(crate) include_file: Option<PathBuf>,
pub(crate) prost_path: Option<String>,
pub(crate) builders: PathMap<String>,
#[cfg(feature = "format")]
pub(crate) fmt: bool,
}
Expand Down Expand Up @@ -608,6 +609,41 @@ impl Config {
self
}

/// Generate the [builder pattern] API for matched message types.
///
/// [builder pattern]: https://rust-lang.github.io/api-guidelines/type-safety.html#c-builder
///
/// # Arguments
///
/// **`path`** - a path matching any number of types. It works the same way as in
/// [`btree_map`](#method.btree_map), just with the field name omitted.
///
/// **`builder_name`** - A name for the builder type. The struct with this name
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path can match multiple messages. I think the builder_name should be a suffix so that it doesn't generate multiple builders with the same name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The builder is generated inside the submodule named after each message, so a collision could only occur with the message's nested types and enums. This should be rare enough that it can be solved by a specific matcher for messages where it occurs.

Stylistically, I prefer my_message::Builder to my_message::MyMessageBuilder or self::MyMessageBuilder.

/// will be generated in the same module as message's nested types.
/// The same name, converted to snake case, will be used for the associated
/// function to the message type that creates the builder.
///
/// # Example
///
/// Builder API can be used to enable forward compatibility in the face of
/// new fields added to the Protobuf message definition, when combined with
/// the `#[non_exhaustive]` attribute on the message structs.
///
/// ```rust
/// # let mut config = prost_build::Config::new();
/// config.builder(".my_messages", "Builder");
/// config.message_attribute(".my_messages", "#[non_exhaustive]");
/// ```
pub fn builder<P, B>(&mut self, path: P, builder_name: B) -> &mut Self
where
P: AsRef<str>,
B: AsRef<str>,
{
self.builders
.insert(path.as_ref().to_owned(), builder_name.as_ref().to_owned());
self
}

/// Configures the output directory where generated Rust files will be written.
///
/// If unset, defaults to the `OUT_DIR` environment variable. `OUT_DIR` is set by Cargo when
Expand Down Expand Up @@ -1097,6 +1133,7 @@ impl default::Default for Config {
skip_protoc_run: false,
include_file: None,
prost_path: None,
builders: PathMap::default(),
#[cfg(feature = "format")]
fmt: true,
}
Expand All @@ -1123,6 +1160,7 @@ impl fmt::Debug for Config {
.field("disable_comments", &self.disable_comments)
.field("skip_debug", &self.skip_debug)
.field("prost_path", &self.prost_path)
.field("builders", &self.builders)
.finish()
}
}
Expand Down
1 change: 1 addition & 0 deletions tests-2015/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ edition = "2015"
build = "../tests/src/build.rs"

[lib]
name = "tests"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? It is not related to builders.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed so that the doctests I have added to the tests crate also work in these shim crates.

doctest = false
Copy link
Contributor Author

@mzabaluev mzabaluev Nov 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is doctest = false in both crates (also in tests-2018 which is not included in the workspace for some reason), but doctests are compiled by cargo test even without the --doc option.

path = "../tests/src/lib.rs"

Expand Down
Loading
Loading