Skip to content

Commit

Permalink
Using the cached-path crate for downloading and unzipping (#1744)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored Mar 29, 2022
1 parent cfb9792 commit 1fdaa35
Show file tree
Hide file tree
Showing 10 changed files with 293 additions and 423 deletions.
294 changes: 200 additions & 94 deletions Cargo.lock

Large diffs are not rendered by default.

24 changes: 10 additions & 14 deletions provider/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ skip_optional_dependencies = true
all-features = true

[dependencies]
cached-path = { version = "0.5", optional = true }
clap = "2.33"
dhat = "0.3.0"
displaydoc = { version = "0.2.3", default-features = false }
elsa = "1.7"
eyre = "0.6"
futures = "0.3"
icu_calendar = { version = "0.5", path = "../../components/calendar", features = ["datagen"] }
icu_casemapping = { version = "0.1", path = "../../experimental/casemapping", features = ["provider_serde", "provider_transform_internals"], optional = true }
icu_codepointtrie = { version = "0.3.3", path = "../../utils/codepointtrie", features = ["serialize"] }
Expand Down Expand Up @@ -67,16 +67,14 @@ simple_logger = "1.12"
smallstr = { version = "0.2", features = ["serde"] }
smallvec = "1.6"
tinystr = { path = "../../utils/tinystr", version = "0.5.0", features = ["alloc", "serde", "zerovec"], default-features = false }
tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "fs"] }
toml = { version = "0.5" }
writeable = { version = "0.3", path = "../../utils/writeable" }
zerovec = { version = "0.6", path = "../../utils/zerovec", features = ["serde", "yoke"] }

# Dependencies for the download feature
urlencoding = { version = "1.1", optional = true }
reqwest = { version = "0.11", features = ["json", "stream", "gzip", "blocking"], optional = true }
unzip = { version = "0.1", optional = true }
dirs = { version = "3.0", optional = true }
# Dependencies for the "download-testdata" feature
reqwest = { version = "0.11", features = ["stream"], optional = true }
tokio = { version = "1.5", features = ["rt-multi-thread", "macros", "fs"], optional = true }
futures = { version = "0.3", optional = true }

[dev-dependencies]
mktemp = "0.4"
Expand All @@ -86,21 +84,19 @@ writeable = { version = "0.3", path = "../../utils/writeable" }
[features]
experimental = []
casemapping = ["icu_casemapping"]
# Automatically download CLDR data from a host
download = ["urlencoding", "reqwest", "unzip", "dirs"]
# TODO(#1739): Disable the "download" feature by default
default = ["casemapping", "download"]
# Automatically download CLDR and uprops data
download = ["cached-path"]
download-testdata = ["reqwest", "icu_testdata/metadata", "tokio", "futures"]
default = ["casemapping"]

[[bin]]
name = "icu4x-datagen"
path = "src/bin/datagen.rs"
# TODO(#1739): Make it such that icu4x-datagen does not require the download feature
required-features = ["download"]

[[bin]]
name = "icu4x-testdata-download"
path = "src/bin/testdata-download.rs"
required-features = ["download"]
required-features = ["download-testdata"]

[[bin]]
name = "icu4x-verify-zero-copy"
Expand Down
135 changes: 80 additions & 55 deletions provider/datagen/src/bin/datagen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
use clap::{App, Arg, ArgGroup, ArgMatches};
use eyre::WrapErr;
use icu_datagen::cldr;
use icu_datagen::cldr::download::CldrAllInOneDownloader;
use icu_datagen::cldr::CldrPathsAllInOne;
use icu_datagen::get_all_keys;
use icu_datagen::segmenter;
Expand Down Expand Up @@ -102,18 +101,30 @@ fn main() -> eyre::Result<()> {
)
.takes_value(true),
)
.arg(
Arg::with_name("UPROPS_TAG")
.long("uprops-tag")
.value_name("TAG")
.takes_value(true),
)
.arg(
Arg::with_name("UPROPS_ROOT")
.long("uprops-root")
.value_name("PATH")
.help(
"Path to the icuexportdata uprops directory. Download a \
icuexportdata_uprops_*.zip file and point to either the \
'small' or the 'fast' subdirectory.\n\
https://github.com/unicode-org/icu/releases",
"Path to a local icuexportdata_uprops_full directory. Ignored if \
'--uprops-tag is present.\n\
https://github.com/unicode-org/icu/releases",
)
.takes_value(true),
)
.arg(
Arg::with_name("UPROPS_MODE")
.takes_value(true)
.possible_values(&["small", "fast"])
.help("Whether to optimize Unicode property data structures for size (\"small\") or speed (\"fast\")")
.default_value("small"),
)
.arg(
Arg::with_name("INPUT_FROM_TESTDATA")
.long("input-from-testdata")
Expand Down Expand Up @@ -288,62 +299,76 @@ fn main() -> eyre::Result<()> {
filtered
};

let mut provider: Box<dyn IterableDynProvider<SerializeMarker> + Sync> =
if matches.is_present("HELLO_WORLD") {
Box::new(HelloWorldProvider::new_with_placeholder_data())
} else {
let cldr_paths = if let Some(tag) = matches.value_of("CLDR_TAG") {
Box::new(
CldrAllInOneDownloader::try_new_from_github(
tag,
matches.value_of("CLDR_LOCALE_SUBSET").unwrap_or("full"),
let mut provider: Box<dyn IterableDynProvider<SerializeMarker> + Sync> = if matches
.is_present("HELLO_WORLD")
{
Box::new(HelloWorldProvider::new_with_placeholder_data())
} else {
let cldr_paths = CldrPathsAllInOne {
cldr_json_root: if let Some(_tag) = matches.value_of("CLDR_TAG") {
#[cfg(not(feature = "download"))]
eyre::bail!("--cldr-tag requires the download feature");
#[cfg(feature = "download")]
cached_path::CacheBuilder::new().freshness_lifetime(u64::MAX).build()?
.cached_path_with_options(
&format!(
"https://github.com/unicode-org/cldr-json/releases/download/{}/cldr-{}-json-{}.zip",
_tag, _tag, matches.value_of("CLDR_LOCALE_SUBSET").unwrap_or("full")),
&cached_path::Options::default().extract(),
)?
.download()?,
)
} else {
let cldr_json_root = if let Some(path) = matches.value_of("CLDR_ROOT") {
PathBuf::from(path)
} else if matches.is_present("INPUT_FROM_TESTDATA") {
icu_testdata::paths::cldr_json_root()
} else {
eyre::bail!(
"Either --cldr-tag or --cldr-root or --input-from-testdata must be specified",
)
};
Box::new(CldrPathsAllInOne {
cldr_json_root,
locale_subset: matches
.value_of("CLDR_LOCALE_SUBSET")
.unwrap_or("full")
.to_string(),
})
};

let uprops_root = if let Some(path) = matches.value_of("UPROPS_ROOT") {
} else if let Some(path) = matches.value_of("CLDR_ROOT") {
PathBuf::from(path)
} else if matches.is_present("INPUT_FROM_TESTDATA") {
icu_testdata::paths::uprops_toml_root()
icu_testdata::paths::cldr_json_root()
} else {
eyre::bail!("Value for --uprops-root must be specified",)
};

let segmenter_data_root = icu_datagen::segmenter::segmenter_data_root();

Box::new(MultiForkByKeyProvider {
providers: vec![
Box::new(cldr::create_exportable_provider(
cldr_paths.as_ref(),
uprops_root.clone(),
)?),
Box::new(uprops::create_exportable_provider(&uprops_root)?),
Box::new(segmenter::create_exportable_provider(
&segmenter_data_root,
&uprops_root,
)?),
],
})
eyre::bail!(
"Either --cldr-tag or --cldr-root or --input-from-testdata must be specified",
)
},
locale_subset: matches
.value_of("CLDR_LOCALE_SUBSET")
.unwrap_or("full")
.to_string(),
};

let uprops_root = if let Some(_tag) = matches.value_of("UPROPS_TAG") {
#[cfg(not(feature = "download"))]
eyre::bail!("--uprops-tag requires the download feature");
#[cfg(feature = "download")]
cached_path::CacheBuilder::new().freshness_lifetime(u64::MAX).build()?
.cached_path_with_options(
&format!("https://github.com/unicode-org/icu/releases/download/{}/icuexportdata_uprops_full.zip", _tag),
&cached_path::Options::default().extract()
)?
.join("icuexportdata_uprops_full")
.join(matches.value_of("UPROPS_MODE").unwrap())
} else if let Some(path) = matches.value_of("UPROPS_ROOT") {
PathBuf::from(path)
} else if matches.is_present("INPUT_FROM_TESTDATA") {
icu_testdata::paths::uprops_toml_root()
} else {
eyre::bail!(
"Either --uprops-tag or --uprops-root or --input-from-testdata must be specified",
)
};

let segmenter_data_root = icu_datagen::segmenter::segmenter_data_root();

Box::new(MultiForkByKeyProvider {
providers: vec![
Box::new(cldr::create_exportable_provider(
&cldr_paths,
uprops_root.clone(),
)?),
Box::new(uprops::create_exportable_provider(&uprops_root)?),
Box::new(segmenter::create_exportable_provider(
&segmenter_data_root,
&uprops_root,
)?),
],
})
};

if let Some(locales) = selected_locales.as_ref() {
provider = Box::new(
provider
Expand Down
75 changes: 0 additions & 75 deletions provider/datagen/src/cldr/download/cldr_allinone.rs

This file was deleted.

35 changes: 0 additions & 35 deletions provider/datagen/src/cldr/download/error.rs

This file was deleted.

Loading

0 comments on commit 1fdaa35

Please sign in to comment.