Skip to content

Commit

Permalink
fix: do not use 'and' translations from Crowdin for ingredient analys…
Browse files Browse the repository at this point in the history
…is (#8809)

* fix: do not use 'and' translations from Crowdin for ingredient analysis #8794

* fix lc

* move translations for _and_ from .po files
  • Loading branch information
stephanegigandet authored Aug 10, 2023
1 parent 4d1d65f commit 7c4cec4
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 16 deletions.
1 change: 1 addition & 0 deletions cgi/test_ingredients_analysis.pl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
$template_data_ref->{html_details} = $html_details;
$template_data_ref->{display_ingredients_analysis} = display_ingredients_analysis($product_ref);
$template_data_ref->{product_ref} = $product_ref;
$template_data_ref->{preparsed_ingredients_text} = preparse_ingredients_text($lc, $ingredients_text);

my $json = JSON::PP->new->pretty->encode($product_ref->{ingredients});
$template_data_ref->{json} = $json;
Expand Down
38 changes: 31 additions & 7 deletions lib/ProductOpener/Ingredients.pm
Original file line number Diff line number Diff line change
Expand Up @@ -385,25 +385,44 @@ my %from = (

my %and = (
en => " and ",
br => " ha | hag ",
ca => " i ",
cs => " a ",
da => " og ",
de => " und ",
el => " και ",
es => " y ", # Spanish "e" before "i" and "hi" is handled by preparse_text()
et => " ja ",
fi => " ja ",
fr => " et ",
gl => " e ",
hr => " i ",
hu => " és ",
id => " dan ",
is => " og ",
it => " e ",
lt => " ir ",
lv => " un ",
mg => " sy ",
ms => " dan ",
nl => " en ",
nb => " og ",
nn => " og ",
oc => " e ",
pl => " i ",
pt => " e ",
ro => " și ",
ru => " и ",
sk => " a ",
sl => " in ",
sq => " dhe ",
sv => " och ",
tl => " at ",
tr => " ve ",
uk => " i ",
uz => " va ",
vi => "",
yo => " ati ",
);

my %and_of = (
Expand Down Expand Up @@ -3391,8 +3410,8 @@ sub normalize_enumeration ($lc, $type, $enumeration) {
$trailing_space = " ";
}

my $and = $Lang{_and_}{$lc};
#my $enumeration_separators = $obrackets . '|' . $cbrackets . '|\/| \/ | ' . $dashes . ' |' . $commas . ' |' . $commas. '|' . $Lang{_and_}{$lc};
# do not match anything if we don't have a translation for "and"
my $and = $and{$lc} || " will not match ";

my @list = split(/$obrackets|$cbrackets|\/| \/ | $dashes |$commas |$commas|$and/i, $enumeration);

Expand All @@ -3409,7 +3428,8 @@ sub normalize_additives_enumeration ($lc, $enumeration) {

$log->debug("normalize_additives_enumeration", {enumeration => $enumeration}) if $log->is_debug();

my $and = $Lang{_and_}{$lc};
# do not match anything if we don't have a translation for "and"
my $and = $and{$lc} || " will not match ";

my @list = split(/$obrackets|$cbrackets|\/| \/ | $dashes |$commas |$commas|$and/i, $enumeration);

Expand Down Expand Up @@ -3440,7 +3460,8 @@ sub normalize_vitamin ($lc, $a) {

sub normalize_vitamins_enumeration ($lc, $vitamins_list) {

my $and = $Lang{_and_}{$lc};
# do not match anything if we don't have a translation for "and"
my $and = $and{$lc} || " will not match ";

# The ?: makes the group non-capturing, so that the split does not create an extra item for the group
my @vitamins = split(/(?:\(|\)|\/| \/ | - |, |,|$and)+/i, $vitamins_list);
Expand Down Expand Up @@ -3502,7 +3523,8 @@ sub normalize_allergens_enumeration ($type, $lc, $before, $allergens_list, $afte
$log->debug("splitting allergens", {input => $allergens_list, before => $before, after => $after})
if $log->is_debug();

my $and = $Lang{_and_}{$lc};
# do not match anything if we don't have a translation for "and"
my $and = $and{$lc} || " will not match ";

$log->debug("splitting allergens", {input => $allergens_list}) if $log->is_debug();

Expand Down Expand Up @@ -5123,7 +5145,8 @@ sub extract_ingredients_classes_from_text ($product_ref) {
not defined $product_ref->{ingredients_text} and return;

my $text = preparse_ingredients_text($product_ref->{lc}, $product_ref->{ingredients_text});
my $and = $Lang{_and_}{$product_ref->{lc}};
# do not match anything if we don't have a translation for "and"
my $and = $and{$product_ref->{lc}} || " will not match ";
$and =~ s/ /-/g;

# remove % / percent (to avoid identifying 100% as E100 in some cases)
Expand Down Expand Up @@ -5997,7 +6020,8 @@ sub detect_allergens_from_text ($product_ref) {
my $text = $product_ref->{"ingredients_text_" . $language};
next if not defined $text;

my $and = $Lang{_and_}{$language};
# do not match anything if we don't have a translation for "and"
my $and = $and{$language} || " will not match ";
my $of = ' - ';
if (defined $of{$language}) {
$of = $of{$language};
Expand Down
5 changes: 0 additions & 5 deletions po/common/common.pot
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ msgctxt "1_product"
msgid "1 product"
msgstr ""

# leave a space before and after, unless there are no spaces between "A and B" in the target language
msgctxt "_and_"
msgid " and "
msgstr ""

msgctxt "about"
msgid "About me"
msgstr ""
Expand Down
4 changes: 0 additions & 4 deletions po/common/en.po
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ msgctxt "1_product"
msgid "1 product"
msgstr "1 product"

msgctxt "_and_"
msgid " and "
msgstr " and "

msgctxt "about"
msgid "About me"
msgstr "About me"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
</form>

[% IF action == 'process' %]
<h4>Preparsed ingredients text</h4>
<p>[% preparsed_ingredients_text %]</p>
[% display_ingredients_analysis %]
<h4>Ingredients analysis</h4>
[% html_details %]
Expand Down

0 comments on commit 7c4cec4

Please sign in to comment.