diff --git a/lib/ProductOpener/Display.pm b/lib/ProductOpener/Display.pm index 043199964c139..550590b74eb56 100644 --- a/lib/ProductOpener/Display.pm +++ b/lib/ProductOpener/Display.pm @@ -1017,12 +1017,12 @@ sub set_user_agent_request_ref_attributes ($request_ref) { my $is_crawl_bot = 0; my $is_denied_crawl_bot = 0; if ($user_agent_str - =~ /\b(Googlebot|Googlebot-Image|Google-InspectionTool|bingbot|Applebot|Yandex|DuckDuck|DotBot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Qwant|facebookexternalhit|Bytespider|GPTBot|cohere-ai|anthropic-ai|PerplexityBot|ClaudeBot|Claude-Web|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|Amazon|aiohttp|python-request)/i + =~ /\b(Googlebot|Googlebot-Image|Google-InspectionTool|bingbot|Applebot|Yandex|DuckDuck|DotBot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Qwant|facebookexternalhit|Bytespider|GPTBot|ChatGPT-User|cohere-ai|anthropic-ai|PerplexityBot|ClaudeBot|Claude-Web|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|Amazon|aiohttp|python-request|ImagesiftBot|Diffbot)/i ) { $is_crawl_bot = 1; if ($user_agent_str - =~ /\b(bingbot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Bytespider|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|YandexMarket|Amazon|GPTBot|PerplexityBot|ClaudeBot|Claude-Web|cohere-ai|anthropic-ai)/i + =~ /\b(bingbot|Seekport|Ahrefs|DataForSeo|Seznam|ZoomBot|Mojeek|QRbot|Bytespider|SEOkicks|Searchmetrics|MJ12|SurveyBot|SEOdiver|wotbox|Cliqz|Paracrawl|Scrapy|VelenPublicWebCrawler|Semrush|MegaIndex\.ru|YandexMarket|Amazon|GPTBot|ChatGPT-User|PerplexityBot|ClaudeBot|Claude-Web|cohere-ai|anthropic-ai|ImagesiftBot|Diffbot)/i ) { $is_denied_crawl_bot = 1; diff --git a/templates/web/pages/robots/robots.tt.txt b/templates/web/pages/robots/robots.tt.txt index b69bc635a753f..fb17ae09b0099 100644 --- a/templates/web/pages/robots/robots.tt.txt +++ b/templates/web/pages/robots/robots.tt.txt @@ -103,4 +103,10 @@ User-agent: Claude-Web Disallow: / User-agent: PerplexityBot Disallow: / +User-agent: Diffbot +Disallow: / +User-agent: ImagesiftBot +Disallow: / +User-agent: ChatGPT-User +Disallow: / [% END %] \ No newline at end of file diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text index 747316e7e8391..b96fce2da555a 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-ch-it.text @@ -221,3 +221,9 @@ User-agent: Claude-Web Disallow: / User-agent: PerplexityBot Disallow: / +User-agent: Diffbot +Disallow: / +User-agent: ImagesiftBot +Disallow: / +User-agent: ChatGPT-User +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text index bc4c465474b58..f7b94b4602f15 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr-pro-platform.text @@ -310,3 +310,9 @@ User-agent: Claude-Web Disallow: / User-agent: PerplexityBot Disallow: / +User-agent: Diffbot +Disallow: / +User-agent: ImagesiftBot +Disallow: / +User-agent: ChatGPT-User +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text index bc4c465474b58..f7b94b4602f15 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-fr.text @@ -310,3 +310,9 @@ User-agent: Claude-Web Disallow: / User-agent: PerplexityBot Disallow: / +User-agent: Diffbot +Disallow: / +User-agent: ImagesiftBot +Disallow: / +User-agent: ChatGPT-User +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text index 747316e7e8391..b96fce2da555a 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world-pro-platform.text @@ -221,3 +221,9 @@ User-agent: Claude-Web Disallow: / User-agent: PerplexityBot Disallow: / +User-agent: Diffbot +Disallow: / +User-agent: ImagesiftBot +Disallow: / +User-agent: ChatGPT-User +Disallow: / diff --git a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text index 747316e7e8391..b96fce2da555a 100644 --- a/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text +++ b/tests/integration/expected_test_results/page_crawler/get-robots-txt-world.text @@ -221,3 +221,9 @@ User-agent: Claude-Web Disallow: / User-agent: PerplexityBot Disallow: / +User-agent: Diffbot +Disallow: / +User-agent: ImagesiftBot +Disallow: / +User-agent: ChatGPT-User +Disallow: /