From 7c4cec4f09de99c6c43708e3f216836d33f243e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= <stephane@openfoodfacts.org>
Date: Thu, 10 Aug 2023 12:19:00 +0200
Subject: [PATCH] fix: do not use 'and' translations from Crowdin for
 ingredient analysis  (#8809)

* fix: do not use 'and' translations from Crowdin for ingredient analysis #8794

* fix lc

* move translations for _and_ from .po files
---
 cgi/test_ingredients_analysis.pl              |  1 +
 lib/ProductOpener/Ingredients.pm              | 38 +++++++++++++++----
 po/common/common.pot                          |  5 ---
 po/common/en.po                               |  4 --
 .../test_ingredients_analysis.tt.html         |  2 +
 5 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/cgi/test_ingredients_analysis.pl b/cgi/test_ingredients_analysis.pl
index 4b5abc123457a..a66730987ab1e 100755
--- a/cgi/test_ingredients_analysis.pl
+++ b/cgi/test_ingredients_analysis.pl
@@ -80,6 +80,7 @@
 	$template_data_ref->{html_details} = $html_details;
 	$template_data_ref->{display_ingredients_analysis} = display_ingredients_analysis($product_ref);
 	$template_data_ref->{product_ref} = $product_ref;
+	$template_data_ref->{preparsed_ingredients_text} = preparse_ingredients_text($lc, $ingredients_text);
 
 	my $json = JSON::PP->new->pretty->encode($product_ref->{ingredients});
 	$template_data_ref->{json} = $json;
diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm
index 1d284e5f39acd..29879f6a83e79 100644
--- a/lib/ProductOpener/Ingredients.pm
+++ b/lib/ProductOpener/Ingredients.pm
@@ -385,25 +385,44 @@ my %from = (
 
 my %and = (
 	en => " and ",
+	br => " ha | hag ",
 	ca => " i ",
+	cs => " a ",
 	da => " og ",
 	de => " und ",
+	el => " και ",
 	es => " y ",    # Spanish "e" before "i" and "hi" is handled by preparse_text()
 	et => " ja ",
 	fi => " ja ",
 	fr => " et ",
+	gl => " e ",
 	hr => " i ",
+	hu => " és ",
+	id => " dan ",
 	is => " og ",
 	it => " e ",
 	lt => " ir ",
 	lv => " un ",
+	mg => " sy ",
+	ms => " dan ",
 	nl => " en ",
 	nb => " og ",
+	nn => " og ",
+	oc => " e ",
 	pl => " i ",
 	pt => " e ",
 	ro => " și ",
 	ru => " и ",
+	sk => " a ",
+	sl => " in ",
+	sq => " dhe ",
 	sv => " och ",
+	tl => " at ",
+	tr => " ve ",
+	uk => " i ",
+	uz => " va ",
+	vi => " và ",
+	yo => " ati ",
 );
 
 my %and_of = (
@@ -3391,8 +3410,8 @@ sub normalize_enumeration ($lc, $type, $enumeration) {
 		$trailing_space = " ";
 	}
 
-	my $and = $Lang{_and_}{$lc};
-	#my $enumeration_separators = $obrackets . '|' . $cbrackets . '|\/| \/ | ' . $dashes . ' |' . $commas . ' |' . $commas. '|'  . $Lang{_and_}{$lc};
+	# do not match anything if we don't have a translation for "and"
+	my $and = $and{$lc} || " will not match ";
 
 	my @list = split(/$obrackets|$cbrackets|\/| \/ | $dashes |$commas |$commas|$and/i, $enumeration);
 
@@ -3409,7 +3428,8 @@ sub normalize_additives_enumeration ($lc, $enumeration) {
 
 	$log->debug("normalize_additives_enumeration", {enumeration => $enumeration}) if $log->is_debug();
 
-	my $and = $Lang{_and_}{$lc};
+	# do not match anything if we don't have a translation for "and"
+	my $and = $and{$lc} || " will not match ";
 
 	my @list = split(/$obrackets|$cbrackets|\/| \/ | $dashes |$commas |$commas|$and/i, $enumeration);
 
@@ -3440,7 +3460,8 @@ sub normalize_vitamin ($lc, $a) {
 
 sub normalize_vitamins_enumeration ($lc, $vitamins_list) {
 
-	my $and = $Lang{_and_}{$lc};
+	# do not match anything if we don't have a translation for "and"
+	my $and = $and{$lc} || " will not match ";
 
 	# The ?: makes the group non-capturing, so that the split does not create an extra item for the group
 	my @vitamins = split(/(?:\(|\)|\/| \/ | - |, |,|$and)+/i, $vitamins_list);
@@ -3502,7 +3523,8 @@ sub normalize_allergens_enumeration ($type, $lc, $before, $allergens_list, $afte
 	$log->debug("splitting allergens", {input => $allergens_list, before => $before, after => $after})
 		if $log->is_debug();
 
-	my $and = $Lang{_and_}{$lc};
+	# do not match anything if we don't have a translation for "and"
+	my $and = $and{$lc} || " will not match ";
 
 	$log->debug("splitting allergens", {input => $allergens_list}) if $log->is_debug();
 
@@ -5123,7 +5145,8 @@ sub extract_ingredients_classes_from_text ($product_ref) {
 	not defined $product_ref->{ingredients_text} and return;
 
 	my $text = preparse_ingredients_text($product_ref->{lc}, $product_ref->{ingredients_text});
-	my $and = $Lang{_and_}{$product_ref->{lc}};
+	# do not match anything if we don't have a translation for "and"
+	my $and = $and{$product_ref->{lc}} || " will not match ";
 	$and =~ s/ /-/g;
 
 	#  remove % / percent (to avoid identifying 100% as E100 in some cases)
@@ -5997,7 +6020,8 @@ sub detect_allergens_from_text ($product_ref) {
 			my $text = $product_ref->{"ingredients_text_" . $language};
 			next if not defined $text;
 
-			my $and = $Lang{_and_}{$language};
+			# do not match anything if we don't have a translation for "and"
+			my $and = $and{$language} || " will not match ";
 			my $of = ' - ';
 			if (defined $of{$language}) {
 				$of = $of{$language};
diff --git a/po/common/common.pot b/po/common/common.pot
index 0dd9a1e8105cc..4517c015d4e27 100644
--- a/po/common/common.pot
+++ b/po/common/common.pot
@@ -16,11 +16,6 @@ msgctxt "1_product"
 msgid "1 product"
 msgstr ""
 
-# leave a space before and after, unless there are no spaces between "A and B" in the target language
-msgctxt "_and_"
-msgid " and "
-msgstr ""
-
 msgctxt "about"
 msgid "About me"
 msgstr ""
diff --git a/po/common/en.po b/po/common/en.po
index 76c30c0dadfb9..75d510b93aa81 100644
--- a/po/common/en.po
+++ b/po/common/en.po
@@ -19,10 +19,6 @@ msgctxt "1_product"
 msgid "1 product"
 msgstr "1 product"
 
-msgctxt "_and_"
-msgid " and "
-msgstr " and "
-
 msgctxt "about"
 msgid "About me"
 msgstr "About me"
diff --git a/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html b/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html
index 358aad01b388b..0936b1bc2ecc1 100644
--- a/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html
+++ b/templates/web/pages/test_ingredients/test_ingredients_analysis.tt.html
@@ -13,6 +13,8 @@
 </form>
 
 [% IF action == 'process' %]
+    <h4>Preparsed ingredients text</h4>
+    <p>[% preparsed_ingredients_text %]</p>
     [% display_ingredients_analysis %]
     <h4>Ingredients analysis</h4>
     [% html_details %]