Skip to content

Commit 2a66755

Browse files
custom keyword changed to source
1 parent d9a95c1 commit 2a66755

File tree

6 files changed

+74
-72
lines changed

6 files changed

+74
-72
lines changed

build/lib/crawlers/spiders/scrapit.py

Lines changed: 37 additions & 37 deletions
Large diffs are not rendered by default.

build/lib/databaseConnector/databaseConnector.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
def sendData(testObj):
77
# url = 'http://localhost:8888/scrap/wp-json/wp/v2/posts' #using for mac
8-
url = 'https://192.168.0.159/wp-json/wp/v2/posts' #using for linux(office)
8+
url = 'https://politicl.com/wp-json/wp/v2/posts' #using for linux(office)
99
r = requests.post(url, data=testObj, auth=HTTPBasicAuth(
1010
'P0l1t1clAdm1n', 'Politicl123@!@#$%^&*()'))
11+
# 'sidhanshu', 'IJnJFAP$)4oqsRXoc1zZhmb&'))

build/lib/duplicateCheck/duplicateCheck.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ def duplicates(title):
88
new_title = new_title1.lower()
99
# proxies = { 'http': '192.168.0.159:80','https': '192.168.0.159:80'}
1010
# url = 'http://localhost:8888/scrap/wp-json/wp/v2/posts?slug='+new_title # using for mac
11-
url = 'https://192.168.0.159/wp-json/wp/v2/posts?slug='+new_title #using for linux(office)
11+
url = 'https://politicl.com/wp-json/wp/v2/posts?slug='+new_title #using for linux(office)
1212
r = requests.get(url, auth=HTTPBasicAuth(
13+
# 'sidhanshu', 'IJnJFAP$)4oqsRXoc1zZhmb&'))
1314
'P0l1t1clAdm1n', 'Politicl123@!@#$%^&*()'))
1415
if len(r.text) >= 3:
1516
print(len(r.text))

crawlers/spiders/scrapit.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,34 +25,34 @@ class ScrapSpider(scrapy.Spider):
2525
def start_requests(self):
2626
# todayFolder(self)
2727
urls = [
28-
# "http://www.dailyo.in/politics",
28+
"http://www.dailyo.in/politics",
2929
"http://www.deccanchronicle.com/opinion",
30-
# "http://www.firstpost.com/category/politics",
31-
# "http://www.forbesindia.com",
32-
# "http://www.frontline.in",
33-
# "http://www.hindustantimes.com/opinion/",
34-
# "http://www.ndtv.com/opinion",
35-
# "http://www.news18.com/blogs",
36-
# "https://www.outlookindia.com/website",
37-
# "https://www.outlookindia.com/magazine",
38-
# "http://scroll.in",
39-
# "https://blogs.economictimes.indiatimes.com",
40-
# "https://www.thehindu.com/opinion/",
41-
# "https://www.thehindubusinessline.com/opinion/",
42-
# "http://qrius.com",
43-
# "http://www.indianexpress.com/opinion",
44-
# "http://www.newindianexpress.com/Opinions",
45-
# "http://www.dailypioneer.com/columnists",
46-
# "http://blogs.timesofindia.indiatimes.com",
47-
# "https://www.tribuneindia.com/news/opinion/",
48-
# "https://thewire.in",
49-
# "https://www.telegraphindia.com/opinion",
50-
# "http://www.rediff.com/news/interviews10.html",
51-
# "http://www.rediff.com/news/columns10.html",
52-
# "https://www.huffingtonpost.in/blogs",
53-
# "https://www.dnaindia.com/analysis",
54-
# "http://www.livemint.com/opinion/",
55-
# "http://www.financialexpress.com/print/edits-columns"
30+
"http://www.firstpost.com/category/politics",
31+
"http://www.forbesindia.com",
32+
"http://www.frontline.in",
33+
"http://www.hindustantimes.com/opinion/",
34+
"http://www.ndtv.com/opinion",
35+
"http://www.news18.com/blogs",
36+
"https://www.outlookindia.com/website",
37+
"https://www.outlookindia.com/magazine",
38+
"http://scroll.in",
39+
"https://blogs.economictimes.indiatimes.com",
40+
"https://www.thehindu.com/opinion/",
41+
"https://www.thehindubusinessline.com/opinion/",
42+
"http://qrius.com",
43+
"http://www.indianexpress.com/opinion",
44+
"http://www.newindianexpress.com/Opinions",
45+
"http://www.dailypioneer.com/columnists",
46+
"http://blogs.timesofindia.indiatimes.com",
47+
"https://www.tribuneindia.com/news/opinion/",
48+
"https://thewire.in",
49+
"https://www.telegraphindia.com/opinion",
50+
"http://www.rediff.com/news/interviews10.html",
51+
"http://www.rediff.com/news/columns10.html",
52+
"https://www.huffingtonpost.in/blogs",
53+
"https://www.dnaindia.com/analysis",
54+
"http://www.livemint.com/opinion/",
55+
"http://www.financialexpress.com/print/edits-columns"
5656
]
5757
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:48.0) Gecko/20100101 Firefox/48.0'}
5858
for url in urls:

databaseConnector/databaseConnector.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
def sendData(testObj):
77
# url = 'http://localhost:8888/scrap/wp-json/wp/v2/posts' #using for mac
8-
url = 'http://192.168.0.159/wp-json/wp/v2/posts' #using for linux(office)
8+
url = 'https://politicl.com/wp-json/wp/v2/posts' #using for linux(office)
99
r = requests.post(url, data=testObj, auth=HTTPBasicAuth(
10-
# 'P0l1t1clAdm1n', 'Politicl123@!@#$%^&*()'))
11-
'sidhanshu', 'IJnJFAP$)4oqsRXoc1zZhmb&'))
10+
'P0l1t1clAdm1n', 'Politicl123@!@#$%^&*()'))
11+
# 'sidhanshu', 'IJnJFAP$)4oqsRXoc1zZhmb&'))

duplicateCheck/duplicateCheck.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ def duplicates(title):
88
new_title = new_title1.lower()
99
# proxies = { 'http': '192.168.0.159:80','https': '192.168.0.159:80'}
1010
# url = 'http://localhost:8888/scrap/wp-json/wp/v2/posts?slug='+new_title # using for mac
11-
url = 'http://192.168.0.159/wp-json/wp/v2/posts?slug='+new_title #using for linux(office)
11+
url = 'https://politicl.com/wp-json/wp/v2/posts?slug='+new_title #using for linux(office)
1212
r = requests.get(url, auth=HTTPBasicAuth(
13-
'sidhanshu', 'IJnJFAP$)4oqsRXoc1zZhmb&'))
14-
# 'P0l1t1clAdm1n', 'Politicl123@!@#$%^&*()'))
13+
# 'sidhanshu', 'IJnJFAP$)4oqsRXoc1zZhmb&'))
14+
'P0l1t1clAdm1n', 'Politicl123@!@#$%^&*()'))
1515
if len(r.text) >= 3:
1616
print(len(r.text))
1717
return 0

0 commit comments

Comments
 (0)