Skip to content

Commit

Permalink
Less ZeroMap mutation in datagen (#3098)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored Feb 9, 2023
1 parent 1c29d57 commit c6c67a1
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 164 deletions.
13 changes: 5 additions & 8 deletions provider/datagen/src/transform/cldr/calendar/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ use std::env;
use std::str::FromStr;
use tinystr::tinystr;
use tinystr::TinyStr16;
use zerovec::ule::AsULE;
use zerovec::ZeroVec;

const JAPANESE_FILE: &str = include_str!("./snapshot-japanese@1.json");

Expand Down Expand Up @@ -53,9 +51,7 @@ impl crate::DatagenProvider {
.abbr;
let era_dates_map = &era_dates.supplemental.calendar_data.japanese.eras;

let mut ret = JapaneseErasV1 {
dates_to_eras: ZeroVec::new(),
};
let mut dates_to_eras = BTreeMap::new();

for (era_id, era_name) in era_name_map.iter() {
// These don't exist but may in the future
Expand All @@ -78,12 +74,13 @@ impl crate::DatagenProvider {
let code = era_to_code(era_name, start_date.year)
.map_err(|e| DataError::custom("Era codes").with_display_context(&e))?;
if start_date.year >= 1868 || japanext {
ret.dates_to_eras
.with_mut(|v| v.push((start_date, code).to_unaligned()));
dates_to_eras.insert(start_date, code);
}
}

ret.dates_to_eras.to_mut_slice().sort_unstable();
let ret = JapaneseErasV1 {
dates_to_eras: dates_to_eras.into_iter().collect(),
};

// Integrity check
//
Expand Down
7 changes: 3 additions & 4 deletions provider/datagen/src/transform/cldr/datetime/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use icu_datetime::provider::calendar::*;
use std::borrow::Cow;
use std::collections::BTreeMap;
use tinystr::{tinystr, TinyStr16, TinyStr4};
use zerovec::ZeroMap;

pub fn convert_dates(other: &cldr_serde::ca::Dates, calendar: &str) -> DateSymbolsV1<'static> {
DateSymbolsV1 {
Expand Down Expand Up @@ -222,7 +221,7 @@ impl cldr_serde::ca::months::Symbols {
}
months::SymbolsV1::SolarTwelve(arr)
} else {
let mut map: ZeroMap<MonthCode, str> = ZeroMap::default();
let mut map = BTreeMap::new();
for (k, v) in self.0.iter() {
let index: usize = k
.parse()
Expand All @@ -234,9 +233,9 @@ impl cldr_serde::ca::months::Symbols {
.get(index - 1)
.expect("Found out of bounds month index for calendar");

map.insert(&MonthCode(*code), v);
map.insert(MonthCode(*code), v.as_ref());
}
months::SymbolsV1::Other(map)
months::SymbolsV1::Other(map.into_iter().collect())
}
}
}
Expand Down
18 changes: 9 additions & 9 deletions provider/datagen/src/transform/cldr/displaynames/language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use core::convert::TryFrom;
use icu_displaynames::provider::*;
use icu_provider::datagen::IterableDataProvider;
use icu_provider::prelude::*;
use std::collections::BTreeMap;
use zerovec::ule::UnvalidatedStr;
use zerovec::ZeroMap;

impl DataProvider<LanguageDisplayNamesV1Marker> for crate::DatagenProvider {
fn load(
Expand Down Expand Up @@ -57,10 +57,10 @@ const ALT_MENU_SUBSTRING: &str = "-alt-menu";

impl From<&cldr_serde::language_displaynames::Resource> for LanguageDisplayNamesV1<'static> {
fn from(other: &cldr_serde::language_displaynames::Resource) -> Self {
let mut names = ZeroMap::new();
let mut short_names = ZeroMap::new();
let mut long_names = ZeroMap::new();
let mut menu_names = ZeroMap::new();
let mut names = BTreeMap::new();
let mut short_names = BTreeMap::new();
let mut long_names = BTreeMap::new();
let mut menu_names = BTreeMap::new();
for lang_data_entry in other.main.0.iter() {
for entry in lang_data_entry.1.localedisplaynames.languages.iter() {
if let Some(region) = entry.0.strip_suffix(ALT_SHORT_SUBSTRING) {
Expand All @@ -79,10 +79,10 @@ impl From<&cldr_serde::language_displaynames::Resource> for LanguageDisplayNames
}
}
Self {
names,
short_names,
long_names,
menu_names,
names: names.into_iter().collect(),
short_names: short_names.into_iter().collect(),
long_names: long_names.into_iter().collect(),
menu_names: menu_names.into_iter().collect(),
}
}
}
Expand Down
41 changes: 14 additions & 27 deletions provider/datagen/src/transform/cldr/displaynames/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ use core::convert::TryFrom;
use icu_displaynames::provider::*;
use icu_provider::datagen::IterableDataProvider;
use icu_provider::prelude::*;
use std::collections::BTreeMap;
use tinystr::TinyAsciiStr;
use tinystr::TinyStrError;
use zerovec::ZeroMap;

impl DataProvider<RegionDisplayNamesV1Marker> for crate::DatagenProvider {
fn load(
Expand Down Expand Up @@ -50,39 +50,26 @@ impl IterableDataProvider<RegionDisplayNamesV1Marker> for crate::DatagenProvider
/// Substring used to denote alternative region names data variants for a given region. For example: "BA-alt-short", "TL-alt-variant".
const ALT_SUBSTRING: &str = "-alt-";
/// Substring used to denote short region display names data variants for a given region. For example: "BA-alt-short".
const SHORT_SUBSTRING: &str = "-short";
const SHORT_SUBSTRING: &str = "-alt-short";

impl TryFrom<&cldr_serde::region_displaynames::Resource> for RegionDisplayNamesV1<'static> {
type Error = TinyStrError;
fn try_from(other: &cldr_serde::region_displaynames::Resource) -> Result<Self, Self::Error> {
let mut names = ZeroMap::new();
let mut short_names = ZeroMap::new();
for lang_data_entry in other.main.0.iter() {
for entry in lang_data_entry.1.localedisplaynames.regions.iter() {
let mut region = String::from(entry.0);
if !region.contains(ALT_SUBSTRING) {
match <TinyAsciiStr<3>>::from_str(&region) {
Ok(key) => {
names.insert(&key, entry.1.as_ref());
}
Err(err) => {
return Err(err);
}
}
} else if region.contains(SHORT_SUBSTRING) {
region.truncate(region.find('-').unwrap());
match <TinyAsciiStr<3>>::from_str(&region) {
Ok(key) => {
short_names.insert(&key, entry.1.as_ref());
}
Err(err) => {
return Err(err);
}
}
let mut names = BTreeMap::new();
let mut short_names = BTreeMap::new();
for (_, lang_display_names) in other.main.0.iter() {
for (region, value) in lang_display_names.localedisplaynames.regions.iter() {
if let Some(region) = region.strip_suffix(SHORT_SUBSTRING) {
short_names.insert(TinyAsciiStr::from_str(region)?, value.as_ref());
} else if !region.contains(ALT_SUBSTRING) {
names.insert(TinyAsciiStr::from_str(region)?, value.as_ref());
}
}
}
Ok(Self { names, short_names })
Ok(Self {
names: names.into_iter().collect(),
short_names: short_names.into_iter().collect(),
})
}
}

Expand Down
32 changes: 19 additions & 13 deletions provider/datagen/src/transform/cldr/fallback/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ use icu_locid::{
use icu_provider::datagen::IterableDataProvider;
use icu_provider::prelude::*;
use icu_provider_adapters::fallback::provider::*;

use std::collections::BTreeMap;
use tinystr::TinyAsciiStr;
use writeable::Writeable;
use zerovec::{maps::ZeroMap2d, ule::UnvalidatedStr, ZeroMap};
use zerovec::{maps::ZeroMap2d, ule::UnvalidatedStr};

impl DataProvider<LocaleFallbackLikelySubtagsV1Marker> for crate::DatagenProvider {
fn load(
Expand Down Expand Up @@ -119,9 +120,9 @@ impl IterableDataProvider<CollationFallbackSupplementV1Marker> for crate::Datage

impl From<&cldr_serde::likely_subtags::Resource> for LocaleFallbackLikelySubtagsV1<'static> {
fn from(source_data: &cldr_serde::likely_subtags::Resource) -> Self {
let mut l2s = ZeroMap::new();
let mut l2s = BTreeMap::<TinyAsciiStr<3>, _>::new();
let mut lr2s = ZeroMap2d::new();
let mut l2r = ZeroMap::new();
let mut l2r = BTreeMap::<TinyAsciiStr<3>, _>::new();
let mut ls2r = ZeroMap2d::new();

// First collect the l2s and l2r maps
Expand All @@ -138,10 +139,10 @@ impl From<&cldr_serde::likely_subtags::Resource> for LocaleFallbackLikelySubtags
let script = maximized.script.expect("maximized");
let region = maximized.region.expect("maximized");
if script != DEFAULT_SCRIPT {
l2s.insert(&language.into(), &script);
l2s.insert(language.into(), script);
}
if region != DEFAULT_REGION {
l2r.insert(&language.into(), &region);
l2r.insert(language.into(), region);
}
}

Expand All @@ -160,14 +161,14 @@ impl From<&cldr_serde::likely_subtags::Resource> for LocaleFallbackLikelySubtags
let region = maximized.region.expect("maximized");
if minimized.script.is_some() {
assert!(minimized.region.is_none(), "{minimized:?}");
let region_for_lang = l2r.get_copied(&language.into()).unwrap_or(DEFAULT_REGION);
let region_for_lang = l2r.get(&language.into()).copied().unwrap_or(DEFAULT_REGION);
if region != region_for_lang {
ls2r.insert(&language.into(), &script.into(), &region);
}
continue;
}
if minimized.region.is_some() {
let script_for_lang = l2s.get_copied(&language.into()).unwrap_or(DEFAULT_SCRIPT);
let script_for_lang = l2s.get(&language.into()).copied().unwrap_or(DEFAULT_SCRIPT);
if script != script_for_lang {
lr2s.insert(&language.into(), &region.into(), &script);
}
Expand All @@ -177,17 +178,17 @@ impl From<&cldr_serde::likely_subtags::Resource> for LocaleFallbackLikelySubtags
}

LocaleFallbackLikelySubtagsV1 {
l2s,
l2s: l2s.into_iter().collect(),
lr2s,
l2r,
l2r: l2r.into_iter().collect(),
ls2r,
}
}
}

impl From<&cldr_serde::parent_locales::Resource> for LocaleFallbackParentsV1<'static> {
fn from(source_data: &cldr_serde::parent_locales::Resource) -> Self {
let mut parents = ZeroMap::new();
let mut parents = BTreeMap::<_, (Language, Option<Script>, Option<Region>)>::new();

for (source, target) in source_data.supplemental.parent_locales.parent_locale.iter() {
assert!(!source.language.is_empty());
Expand All @@ -198,10 +199,15 @@ impl From<&cldr_serde::parent_locales::Resource> for LocaleFallbackParentsV1<'st
// We always fall back from language-script to und
continue;
}
parents.insert((&*source.write_to_string()).into(), &target.into());
parents.insert(source.write_to_string(), target.into());
}

LocaleFallbackParentsV1 { parents }
LocaleFallbackParentsV1 {
parents: parents
.iter()
.map(|(k, v)| (<&UnvalidatedStr>::from(k.as_ref()), v))
.collect(),
}
}
}

Expand Down
Loading

0 comments on commit c6c67a1

Please sign in to comment.