From 7c4cec4f09de99c6c43708e3f216836d33f243e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Thu, 10 Aug 2023 12:19:00 +0200 Subject: [PATCH] fix: do not use 'and' translations from Crowdin for ingredient analysis (#8809) * fix: do not use 'and' translations from Crowdin for ingredient analysis #8794 * fix lc * move translations for _and_ from .po files --- cgi/test_ingredients_analysis.pl | 1 + lib/ProductOpener/Ingredients.pm | 38 +++++++++++++++---- po/common/common.pot | 5 --- po/common/en.po | 4 -- .../test_ingredients_analysis.tt.html | 2 + 5 files changed, 34 insertions(+), 16 deletions(-) diff --git a/cgi/test_ingredients_analysis.pl b/cgi/test_ingredients_analysis.pl index 4b5abc123457a..a66730987ab1e 100755 --- a/cgi/test_ingredients_analysis.pl +++ b/cgi/test_ingredients_analysis.pl @@ -80,6 +80,7 @@ $template_data_ref->{html_details} = $html_details; $template_data_ref->{display_ingredients_analysis} = display_ingredients_analysis($product_ref); $template_data_ref->{product_ref} = $product_ref; + $template_data_ref->{preparsed_ingredients_text} = preparse_ingredients_text($lc, $ingredients_text); my $json = JSON::PP->new->pretty->encode($product_ref->{ingredients}); $template_data_ref->{json} = $json; diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm index 1d284e5f39acd..29879f6a83e79 100644 --- a/lib/ProductOpener/Ingredients.pm +++ b/lib/ProductOpener/Ingredients.pm @@ -385,25 +385,44 @@ my %from = ( my %and = ( en => " and ", + br => " ha | hag ", ca => " i ", + cs => " a ", da => " og ", de => " und ", + el => " και ", es => " y ", # Spanish "e" before "i" and "hi" is handled by preparse_text() et => " ja ", fi => " ja ", fr => " et ", + gl => " e ", hr => " i ", + hu => " és ", + id => " dan ", is => " og ", it => " e ", lt => " ir ", lv => " un ", + mg => " sy ", + ms => " dan ", nl => " en ", nb => " og ", + nn => " og ", + oc => " e ", pl => " i ", pt => " e ", ro => " și ", ru => " и ", + sk => " a ", + sl => " in ", + sq => " dhe ", sv => " och ", + tl => " at ", + tr => " ve ", + uk => " i ", + uz => " va ", + vi => " và ", + yo => " ati ", ); my %and_of = ( @@ -3391,8 +3410,8 @@ sub normalize_enumeration ($lc, $type, $enumeration) { $trailing_space = " "; } - my $and = $Lang{_and_}{$lc}; - #my $enumeration_separators = $obrackets . '|' . $cbrackets . '|\/| \/ | ' . $dashes . ' |' . $commas . ' |' . $commas. '|' . $Lang{_and_}{$lc}; + # do not match anything if we don't have a translation for "and" + my $and = $and{$lc} || " will not match "; my @list = split(/$obrackets|$cbrackets|\/| \/ | $dashes |$commas |$commas|$and/i, $enumeration); @@ -3409,7 +3428,8 @@ sub normalize_additives_enumeration ($lc, $enumeration) { $log->debug("normalize_additives_enumeration", {enumeration => $enumeration}) if $log->is_debug(); - my $and = $Lang{_and_}{$lc}; + # do not match anything if we don't have a translation for "and" + my $and = $and{$lc} || " will not match "; my @list = split(/$obrackets|$cbrackets|\/| \/ | $dashes |$commas |$commas|$and/i, $enumeration); @@ -3440,7 +3460,8 @@ sub normalize_vitamin ($lc, $a) { sub normalize_vitamins_enumeration ($lc, $vitamins_list) { - my $and = $Lang{_and_}{$lc}; + # do not match anything if we don't have a translation for "and" + my $and = $and{$lc} || " will not match "; # The ?: makes the group non-capturing, so that the split does not create an extra item for the group my @vitamins = split(/(?:\(|\)|\/| \/ | - |, |,|$and)+/i, $vitamins_list); @@ -3502,7 +3523,8 @@ sub normalize_allergens_enumeration ($type, $lc, $before, $allergens_list, $afte $log->debug("splitting allergens", {input => $allergens_list, before => $before, after => $after}) if $log->is_debug(); - my $and = $Lang{_and_}{$lc}; + # do not match anything if we don't have a translation for "and" + my $and = $and{$lc} || " will not match "; $log->debug("splitting allergens", {input => $allergens_list}) if $log->is_debug(); @@ -5123,7 +5145,8 @@ sub extract_ingredients_classes_from_text ($product_ref) { not defined $product_ref->{ingredients_text} and return; my $text = preparse_ingredients_text($product_ref->{lc}, $product_ref->{ingredients_text}); - my $and = $Lang{_and_}{$product_ref->{lc}}; + # do not match anything if we don't have a translation for "and" + my $and = $and{$product_ref->{lc}} || " will not match "; $and =~ s/ /-/g; # remove % / percent (to avoid identifying 100% as E100 in some cases) @@ -5997,7 +6020,8 @@ sub detect_allergens_from_text ($product_ref) { my $text = $product_ref->{"ingredients_text_" . $language}; next if not defined $text; - my $and = $Lang{_and_}{$language}; + # do not match anything if we don't have a translation for "and" + my $and = $and{$language} || " will not match "; my $of = ' - '; if (defined $of{$language}) { $of = $of{$language}; diff --git a/po/common/common.pot b/po/common/common.pot index 0dd9a1e8105cc..4517c015d4e27 100644 --- a/po/common/common.pot +++ b/po/common/common.pot @@ -16,11 +16,6 @@ msgctxt "1_product" msgid "1 product" msgstr "" -# leave a space before and after, unless there are no spaces between "A and B" in the target language -msgctxt "_and_" -msgid " and " -msgstr "" - msgctxt "about" msgid "About me" msgstr "" diff --git a/po/common/en.po b/po/common/en.po index 76c30c0dadfb9..75d510b93aa81 100644 --- a/po/common/en.po +++ b/po/common/en.po @@ -19,10 +19,6 @@ msgctxt "1_product" msgid "1 product" msgstr "1 product" -msgctxt "_and_" -msgid " and " -msgstr " and " - msgctxt "about" msgid "About me" msgstr "About me" diff --git a/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html b/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html index 358aad01b388b..0936b1bc2ecc1 100644 --- a/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html +++ b/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html @@ -13,6 +13,8 @@ [% IF action == 'process' %] +

Preparsed ingredients text

+

[% preparsed_ingredients_text %]

[% display_ingredients_analysis %]

Ingredients analysis

[% html_details %]