Skip to content

Commit

Permalink
Vendor Generated Protobuf Code (apache#3947) (apache#3950)
Browse files Browse the repository at this point in the history
* Vendor generated protobuf code (apache#3947)

* RAT

* Fix build without json

* Review feedback

* Doc tweak

* Fix Arch install instructions
  • Loading branch information
tustvold authored and Dandandan committed Nov 5, 2022
1 parent c3b7508 commit bcb2315
Show file tree
Hide file tree
Showing 10 changed files with 14,793 additions and 54 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
datafusion/proto/src/generated/prost.rs linguist-generated
datafusion/proto/src/generated/pbjson.rs linguist-generated
13 changes: 2 additions & 11 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,6 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
- name: Cache Cargo
uses: actions/cache@v3
with:
Expand Down Expand Up @@ -124,7 +114,8 @@ jobs:
cargo run --example rewrite_expr
cargo run --example simple_udf
cargo run --example simple_udaf
- name: Verify Working Directory Clean
run: git diff --exit-code

integration-test:
name: "Compare to postgres"
Expand Down
8 changes: 4 additions & 4 deletions datafusion/proto/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ keywords = ["arrow", "query", "sql"]
edition = "2021"
rust-version = "1.62"

[package.metadata.docs.rs]
rustc-args = ["--cfg", "docsrs"]
# Exclude proto files so crates.io consumers don't need protoc
exclude = ["*.proto"]

[lib]
name = "datafusion_proto"
path = "src/lib.rs"

[features]
default = []
json = ["pbjson", "pbjson-build", "serde", "serde_json"]
json = ["pbjson", "serde", "serde_json"]

[dependencies]
arrow = "25.0.0"
Expand All @@ -55,5 +55,5 @@ doc-comment = "0.3"
tokio = "1.18"

[build-dependencies]
pbjson-build = { version = "0.5", optional = true }
pbjson-build = { version = "0.5" }
prost-build = { version = "0.11.1" }
44 changes: 15 additions & 29 deletions datafusion/proto/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,29 @@
// specific language governing permissions and limitations
// under the License.

use std::path::{Path, PathBuf};

type Error = Box<dyn std::error::Error>;
type Result<T, E = Error> = std::result::Result<T, E>;

fn main() -> Result<(), String> {
// for use in docker build where file changes can be wonky
println!("cargo:rerun-if-env-changed=FORCE_REBUILD");
println!("cargo:rerun-if-changed=proto/datafusion.proto");

build()?;
// We don't include the proto files in releases so that downstreams
// do not need to have PROTOC included
if Path::new("proto/datafusion.proto").exists() {
println!("cargo:rerun-if-changed=proto/datafusion.proto");
build()?
}

Ok(())
}

fn build() -> Result<(), String> {
use std::io::Write;

let out = std::path::PathBuf::from(
std::env::var("OUT_DIR").expect("Cannot find OUT_DIR environment variable"),
);
let out: PathBuf = std::env::var("OUT_DIR")
.expect("Cannot find OUT_DIR environment variable")
.into();
let descriptor_path = out.join("proto_descriptor.bin");

prost_build::Config::new()
Expand All @@ -43,11 +47,9 @@ fn build() -> Result<(), String> {
.compile_protos(&["proto/datafusion.proto"], &["proto"])
.map_err(|e| format!("protobuf compilation failed: {}", e))?;

#[cfg(feature = "json")]
let descriptor_set = std::fs::read(&descriptor_path)
.expect(&*format!("Cannot read {:?}", &descriptor_path));

#[cfg(feature = "json")]
pbjson_build::Builder::new()
.register_descriptors(&descriptor_set)
.expect(&*format!(
Expand All @@ -57,27 +59,11 @@ fn build() -> Result<(), String> {
.build(&[".datafusion"])
.map_err(|e| format!("pbjson compilation failed: {}", e))?;

// .serde.rs is not a valid package name, so append to datafusion.rs so we can treat it normally
let proto = std::fs::read_to_string(out.join("datafusion.rs")).unwrap();

#[cfg(feature = "json")]
let json = std::fs::read_to_string(out.join("datafusion.serde.rs")).unwrap();

#[cfg(feature = "docsrs")]
let path = out.join("datafusion.rs");
#[cfg(not(feature = "docsrs"))]
let path = "src/generated/datafusion.rs";

let mut file = std::fs::OpenOptions::new()
.write(true)
.truncate(true)
.create(true)
.open(path)
.unwrap();
file.write_all(proto.as_str().as_ref()).unwrap();
let prost = out.join("datafusion.rs");
let pbjson = out.join("datafusion.serde.rs");

#[cfg(feature = "json")]
file.write_all(json.as_str().as_ref()).unwrap();
std::fs::copy(prost, "src/generated/prost.rs").unwrap();
std::fs::copy(pbjson, "src/generated/pbjson.rs").unwrap();

Ok(())
}
4 changes: 0 additions & 4 deletions datafusion/proto/src/generated/.gitignore

This file was deleted.

8 changes: 2 additions & 6 deletions datafusion/proto/src/generated/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,9 @@
#[allow(clippy::all)]
#[rustfmt::skip]
#[cfg(not(docsrs))]
pub mod datafusion;

#[cfg(docsrs)]
#[allow(clippy::all)]
pub mod datafusion {
include!(concat!(env!("OUT_DIR"), "/datafusion.rs"));
include!("prost.rs");

#[cfg(feature = "json")]
include!(concat!(env!("OUT_DIR"), "/datafusion.serde.rs"));
include!("pbjson.rs");
}
Loading

0 comments on commit bcb2315

Please sign in to comment.