diff --git a/.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml b/.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml
new file mode 100644
index 0000000..30ff5cb
--- /dev/null
+++ b/.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_mongodb_mongo_java_driver_2_13_3.xml b/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_12_8.xml
similarity index 59%
rename from .idea/libraries/Maven__org_mongodb_mongo_java_driver_2_13_3.xml
rename to .idea/libraries/Maven__org_mongodb_mongo_java_driver_3_12_8.xml
index d375251..b9089b5 100644
--- a/.idea/libraries/Maven__org_mongodb_mongo_java_driver_2_13_3.xml
+++ b/.idea/libraries/Maven__org_mongodb_mongo_java_driver_3_12_8.xml
@@ -1,13 +1,13 @@
-
+
-
+
-
+
-
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_slf4j_jcl_over_slf4j_1_7_10.xml b/.idea/libraries/Maven__org_slf4j_jcl_over_slf4j_1_7_10.xml
new file mode 100644
index 0000000..5f42bd6
--- /dev/null
+++ b/.idea/libraries/Maven__org_slf4j_jcl_over_slf4j_1_7_10.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_30.xml b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_30.xml
new file mode 100644
index 0000000..02b6812
--- /dev/null
+++ b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_30.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_slf4j_slf4j_simple_1_7_30.xml b/.idea/libraries/Maven__org_slf4j_slf4j_simple_1_7_30.xml
new file mode 100644
index 0000000..e5856a0
--- /dev/null
+++ b/.idea/libraries/Maven__org_slf4j_slf4j_simple_1_7_30.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_data_spring_data_commons_1_10_0_RELEASE.xml b/.idea/libraries/Maven__org_springframework_data_spring_data_commons_1_10_0_RELEASE.xml
new file mode 100644
index 0000000..814cd16
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_data_spring_data_commons_1_10_0_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_data_spring_data_mongodb_1_7_0_RELEASE.xml b/.idea/libraries/Maven__org_springframework_data_spring_data_mongodb_1_7_0_RELEASE.xml
new file mode 100644
index 0000000..0f80305
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_data_spring_data_mongodb_1_7_0_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_spring_aop_4_0_9_RELEASE.xml b/.idea/libraries/Maven__org_springframework_spring_aop_4_0_9_RELEASE.xml
new file mode 100644
index 0000000..b9f0278
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_spring_aop_4_0_9_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_spring_beans_4_0_9_RELEASE.xml b/.idea/libraries/Maven__org_springframework_spring_beans_4_0_9_RELEASE.xml
new file mode 100644
index 0000000..f98ea8c
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_spring_beans_4_0_9_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_spring_context_4_0_9_RELEASE.xml b/.idea/libraries/Maven__org_springframework_spring_context_4_0_9_RELEASE.xml
new file mode 100644
index 0000000..802cb82
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_spring_context_4_0_9_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_spring_core_4_0_9_RELEASE.xml b/.idea/libraries/Maven__org_springframework_spring_core_4_0_9_RELEASE.xml
new file mode 100644
index 0000000..1019cb9
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_spring_core_4_0_9_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_spring_expression_4_0_9_RELEASE.xml b/.idea/libraries/Maven__org_springframework_spring_expression_4_0_9_RELEASE.xml
new file mode 100644
index 0000000..20892fa
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_spring_expression_4_0_9_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_springframework_spring_tx_4_0_9_RELEASE.xml b/.idea/libraries/Maven__org_springframework_spring_tx_4_0_9_RELEASE.xml
new file mode 100644
index 0000000..43aa1ad
--- /dev/null
+++ b/.idea/libraries/Maven__org_springframework_spring_tx_4_0_9_RELEASE.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Search_Engine.iml b/Search_Engine.iml
index c2cd65a..e820eb3 100644
--- a/Search_Engine.iml
+++ b/Search_Engine.iml
@@ -23,7 +23,7 @@
-
+
@@ -32,5 +32,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Test.txt b/Test.txt
new file mode 100644
index 0000000..e69de29
diff --git a/pom.xml b/pom.xml
index a5d9d18..29f1cc6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -19,11 +19,13 @@
mongodb-driver-sync
3.12.8
+
org.mongodb
mongo-java-driver
- 2.13.3
+ 3.12.8
+
org.jsoup
@@ -53,6 +55,27 @@
lucene-analyzers-common
4.10.4
+
+ org.springframework.data
+ spring-data-mongodb
+ 1.7.0.RELEASE
+
+
+
+ org.slf4j
+ slf4j-api
+ 1.7.30
+
+
+
+
+
+ org.slf4j
+ slf4j-simple
+ 1.7.30
+ test
+
+
diff --git a/src/com/company/Crawler/Database.java b/src/com/company/Crawler/Database.java
index f4376ed..367853d 100644
--- a/src/com/company/Crawler/Database.java
+++ b/src/com/company/Crawler/Database.java
@@ -13,6 +13,12 @@
import com.mongodb.MongoClient;
import com.mongodb.ParallelScanOptions;
import com.mongodb.ServerAddress;
+import org.jsoup.Connection;
+import org.jsoup.HttpStatusException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
import javax.swing.*;
import java.net.UnknownHostException;
@@ -33,30 +39,24 @@ public class Database {
//DBCollection htmlDocuments;
//DBCollection time;
public Database() {
- try {
- this.mongoClient = new MongoClient("localhost", 27017);
- //create database
- this.crawlerDatabase = mongoClient.getDB("CrawlerDatabase");
- //create collections and fields
- websites = crawlerDatabase.getCollection("websites");
- websites.createIndex("URL");
- websites.createIndex("crawled");
- websites.createIndex("indexed");
- websites.createIndex("rank");
- //websites.createIndex("hyberlinks");
- websites.createIndex("HTMLDocuments");
- websites.createIndex("Time");
- disallowedWebsite=crawlerDatabase.getCollection("DisallowedWebsites");
- //hyberlinks = crawlerDatabase.getCollection("hyberlinks");
- //hyberlinks.createIndex("URL");
- //hyberlinks.createIndex("refTo");
- //htmlDocuments = crawlerDatabase.getCollection("HTMLDocuments");
- //time = crawlerDatabase.getCollection("time");
-
-
- } catch (UnknownHostException e) {
- e.printStackTrace();
- }
+ this.mongoClient = new MongoClient("localhost", 27017);
+ //create database
+ this.crawlerDatabase = mongoClient.getDB("CrawlerDatabase");
+ //create collections and fields
+ websites = crawlerDatabase.getCollection("websites");
+ websites.createIndex("URL");
+ websites.createIndex("crawled");
+ websites.createIndex("indexed");
+ websites.createIndex("rank");
+ //websites.createIndex("hyberlinks");
+ websites.createIndex("HTMLDocuments");
+ websites.createIndex("Time");
+ disallowedWebsite=crawlerDatabase.getCollection("DisallowedWebsites");
+ //hyberlinks = crawlerDatabase.getCollection("hyberlinks");
+ //hyberlinks.createIndex("URL");
+ //hyberlinks.createIndex("refTo");
+ //htmlDocuments = crawlerDatabase.getCollection("HTMLDocuments");
+ //time = crawlerDatabase.getCollection("time");
}
diff --git a/target/classes/com/company/Crawler/Database.class b/target/classes/com/company/Crawler/Database.class
index 5e8dee7..3f6ae0a 100644
Binary files a/target/classes/com/company/Crawler/Database.class and b/target/classes/com/company/Crawler/Database.class differ