From bb6bb787e3d1e1edca9334af8eb6730671f2dd7d Mon Sep 17 00:00:00 2001 From: Rick Venutolo Date: Sat, 2 Apr 2016 14:01:13 -0400 Subject: [PATCH] Added unit test to demonstrate bug when crawler user agent is non-lowercase --- ...otstxtParserNonLowercaseUserAgentTest.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 src/test/java/edu/uci/ics/crawler4j/tests/RobotstxtParserNonLowercaseUserAgentTest.java diff --git a/src/test/java/edu/uci/ics/crawler4j/tests/RobotstxtParserNonLowercaseUserAgentTest.java b/src/test/java/edu/uci/ics/crawler4j/tests/RobotstxtParserNonLowercaseUserAgentTest.java new file mode 100644 index 000000000..3ea33a0c4 --- /dev/null +++ b/src/test/java/edu/uci/ics/crawler4j/tests/RobotstxtParserNonLowercaseUserAgentTest.java @@ -0,0 +1,22 @@ +package edu.uci.ics.crawler4j.tests; + +import edu.uci.ics.crawler4j.robotstxt.HostDirectives; +import edu.uci.ics.crawler4j.robotstxt.RobotstxtParser; +import org.junit.Test; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; + +public class RobotstxtParserNonLowercaseUserAgentTest { + + @Test + public void testParseWithNonLowercaseUserAgent() { + String userAgent = "testAgent"; + String content = "User-agent: " + userAgent + "\n" + + "Disallow: /test/path/\n"; + HostDirectives hostDirectives = RobotstxtParser.parse(content, userAgent); + assertNotNull("parsed HostDirectives is null", hostDirectives); + assertFalse("HostDirectives should not allow path: '/test/path/'", hostDirectives.allows("/test/path/")); + } + +}