Skip to content

Commit

Permalink
Modify robots
Browse files Browse the repository at this point in the history
  • Loading branch information
Basantloay committed May 16, 2021
1 parent 4986e7a commit c95139e
Show file tree
Hide file tree
Showing 16 changed files with 48 additions and 6,969 deletions.
Binary file modified out/production/Search_Engine/com/company/Crawler/Crawler.class
Binary file not shown.
572 changes: 0 additions & 572 deletions robots0.txt

This file was deleted.

43 changes: 0 additions & 43 deletions robots1.txt

This file was deleted.

341 changes: 0 additions & 341 deletions robots14.txt

This file was deleted.

Empty file removed robots36.txt
Empty file.
216 changes: 0 additions & 216 deletions robots52.txt

This file was deleted.

183 changes: 0 additions & 183 deletions robots53.txt

This file was deleted.

17 changes: 0 additions & 17 deletions robots55.txt

This file was deleted.

Empty file removed robots57.txt
Empty file.
Empty file removed robots63.txt
Empty file.
17 changes: 0 additions & 17 deletions robots64.txt

This file was deleted.

10 changes: 0 additions & 10 deletions robots65.txt

This file was deleted.

10 changes: 0 additions & 10 deletions robots66.txt

This file was deleted.

4 changes: 2 additions & 2 deletions robots7.txt

Large diffs are not rendered by default.

5,525 changes: 0 additions & 5,525 deletions robots8.txt

This file was deleted.

79 changes: 46 additions & 33 deletions src/com/company/Crawler/Crawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.nio.file.FileSystems;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicInteger;
import java.io.File;
import java.io.FileNotFoundException; // Import this class to handle errors
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.channels.Channels;
Expand Down Expand Up @@ -43,40 +40,56 @@ public Crawler(int id, LinkedList<String> seedSet, Set<String> seedSetVisited, V
}

public boolean robots(String args, Integer num) throws IOException {

boolean cont=false;boolean find=false;
URL w = new URL(args + "/robots.txt");
//Document doc;
try {
ReadableByteChannel rbc = Channels.newChannel(w.openStream());
FileOutputStream robots = new FileOutputStream("robots" + num.toString() + ".txt");
robots.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
int j = 0;
try {
Scanner scannedFile = new Scanner((Readable) robots);
while (scannedFile.hasNextLine()) {
String line = scannedFile.nextLine();
System.out.println("\n" + line);
if (line.contains("<!DOCTYPE html>")) {
System.out.println("\nl2naha");
robots.close();
new File("robots" + num.toString() + ".txt").delete();
return false;
try(BufferedReader in = new BufferedReader(new InputStreamReader(w.openStream()))) {
String line = null;
while((line = in.readLine()) != null) {
System.out.println(line);
if (line.contains("<!DOCTYPE html>")) {
System.out.println("\nl2naha");
return false;
}
else
{
if(line.contains("User-agent"))
{
if (line.contains("*")) {
cont = true;
}
find=true;
}
else if(find && cont)
{
if(line.contains("Disallow"))
{
if(line.contains("*"))
{

}
else{
//10 b3d disallow
disallowed.add(args + (line.substring(10)));
System.out.println(args + (line.substring(10)));
}
}
else if(line.contains("Allow"))
{
//7 b3d allow
allowed.add(args+(line.substring(7)));
System.out.println(args+(line.substring(7)));
}
}

}
return true;
} catch (ClassCastException a) {
return false;

}
}
catch (IOException x) {
return false;
}
return true;




} catch (FileNotFoundException e ) {
return false;
}catch( IOException a)
{
return false;
}
}


Expand Down

0 comments on commit c95139e

Please sign in to comment.