diff --git a/scripts/check_products_in_mongodb.pl b/scripts/check_products_in_mongodb.pl
index 5478fa22fa894..9cedff23b375b 100755
--- a/scripts/check_products_in_mongodb.pl
+++ b/scripts/check_products_in_mongodb.pl
@@ -24,15 +24,16 @@
use utf8;
my $usage = <.
+
+=head1 NAME
+
+fix_non_normalized_codes - A script to fix non normalized codes
+
+=head1 DESCRIPTION
+
+Products code needs to be normalized to avoid confusions in products (false distinct).
+But there may be leaks in the code, or some other tools (eg import scripts)
+that creates non normalized entries in the MongoDB or on the file system.
+
+This scripts tries to check and fix this.
+
+=cut
+
+use ProductOpener::PerlStandards;
+
+use ProductOpener::Config qw/:all/;
+use ProductOpener::Paths qw/%BASE_DIRS/;
+use ProductOpener::Data qw/get_products_collection remove_documents_by_ids/;
+use ProductOpener::Products qw/:all/;
+use ProductOpener::Store qw/retrieve sto_iter store/;
+use Getopt::Long;
+
+# how many operations in bulk write
+my $BULK_WRITE_SIZE = 100;
+
+sub find_non_normalized_sto ($product_path) {
+ # find all .sto files that have a non normalized code
+ # we take a very brute force approach on filename
+ # return a list with path, product_id and normalized_id
+ my $iter = sto_iter($BASE_DIRS{PRODUCTS}, qr/product\.sto$/i);
+ my @anomalous = ();
+ while (my $product_path = $iter->()) {
+
+ my $product_ref = retrieve($product_path);
+ if (defined $product_ref) {
+ my $code = $product_ref->{code};
+ my $product_id = $product_ref->{_id};
+ my $normalized_code = normalize_code($code);
+ my $normalized_product_id = product_id_for_owner(undef, $normalized_code);
+ my $normalized_product_path = product_path_from_id($normalized_product_id);
+
+ $product_path =~ s/.*\/products\///;
+ $product_path =~ s/\/product\.sto$//;
+ #print STDERR "code: $code - normalized_code: $normalized_code - product_id: $product_id - normalized_product_id: $normalized_product_id - product_path: $product_path - normalized_product_path: $normalized_product_path\n";
+
+ if (($code ne $normalized_code) or ($product_id ne $normalized_product_id) or ($product_path ne $normalized_product_path)) {
+ push(@anomalous, [$product_path, $normalized_product_path, $code, $normalized_code, $product_id, $normalized_product_id]);
+ }
+ }
+ }
+ return @anomalous;
+}
+
+sub fix_non_normalized_sto ($product_path, $dry_run, $out) {
+ my @items = find_non_normalized_sto($product_path);
+
+ foreach my $item (@items) {
+ my ($product_path, $normalized_product_path, $code, $normalized_code, $product_id, $normalized_id) = @$item;
+
+ my $is_duplicate = (-e "$BASE_DIRS{PRODUCTS}/$normalized_product_path") || 0;
+
+ my $is_invalid = ($normalized_product_path eq "invalid") || 0;
+
+ print STDERR "product_path: $product_path - normalized_product_path: $normalized_product_path - code: $code - normalized_code: $normalized_code - product_id: $product_id - normalized_id: $normalized_id - is_duplicate: $is_duplicate - is_invalid: $is_invalid\n";
+
+ }
+
+ print STDERR "Found " . scalar(@items) . " non normalized codes / ids / paths\n";
+return;}
+
+my $int_codes_query_ref = {'code' => {'$not' => {'$type' => 'string'}}};
+
+sub search_int_codes() {
+ # search for product with int code in mongodb
+
+ # 2 mins, instead of 30s default, to not die as easily if mongodb is busy.
+ my $socket_timeout_ms = 2 * 60000;
+ my $products_collection = get_products_collection({timeout => $socket_timeout_ms});
+
+ # find int codes
+ my @int_ids = ();
+ # it's better we do it with a specific queries as it's hard to keep "integer" as integers in perl
+ my $cursor
+ = $products_collection->query($int_codes_query_ref)->fields({_id => 1, code => 1});
+ $cursor->immortal(1);
+ while (my $product_ref = $cursor->next) {
+ push(@int_ids, $product_ref->{_id});
+ }
+
+ return @int_ids;
+
+}
+
+
+### script
+my $usage = < \$dry_run,)
+ or die("Error in command line arguments:\n\n$usage");
+
+# fix errors on filesystem
+my $product_path = $BASE_DIRS{PRODUCTS};
+fix_non_normalized_sto($product_path, $dry_run, \*STDOUT);
+
+