file name changes

sidhanshumonga · sidhanshumonga · commit d40741f9f4ea · 2018-11-06T17:56:46.000+05:30
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "/usr/local/bin/python3"
+}
diff --git a/countersReport.py b/countersReport.py
@@ -1,12 +1,40 @@
 import datetime
 import json
 import os.path
-from scheduler import csvExporter
+from csvExportFile import csvExporter
 
 today = datetime.datetime.today().strftime('%Y-%m-%d')
 dailyZeroArray = {
     "www.deccanchronicle.com": 0,
-    "www.dailyo.in": 0
+    "www.dailyo.in": 0,
+    "www.dnaindia.com/analysis":0,
+    "www.firstpost.com/category/politics":0,
+    "www.forbesindia.com":0,
+    "www.frontline.in":0,
+    "www.hindustantimes.com/opinion":0,
+    "indiatoday.intoday.in/calendar":0,
+    "www.livemint.com/opinion":0,
+    "www.ndtv.com/opinion":0,
+    "www.news18.com/blogs":0,
+    "www.outlookindia.com/website":0,
+    "www.outlookindia.com/magazine":0,
+    "www.rediff.com/news/interviews10.html":0,
+    "www.rediff.com/news/columns10.html":0,
+    "scroll.in":0,
+    "blogs.economictimes.indiatimes.com":0,
+    "www.financialexpress.com/print/edits-columns":0,
+    "www.thehindu.com/opinion":0,
+    "www.thehindubusinessline.com/opinion":0,
+    "www.huffingtonpost.in/the-blog":0,
+    "theindianeconomist.com":0,
+    "indianexpress.com/opinion":0,
+    "www.newindianexpress.com/Opinions":0,
+    "www.dailypioneer.com/columnists":0,
+    "blogs.timesofindia.indiatimes.com":0,
+    "www.tribuneindia.com/news/opinion":0,
+    "thewire.in":0,
+    "www.telegraphindia.com/opinion":0,
+
 }
 weeklyZeroArray = {
     "www.deccanchronicle.com": 0,
diff --git a/crawlers/spiders/scrapit.py b/crawlers/spiders/scrapit.py
@@ -16,8 +16,35 @@ class ScrapSpider(scrapy.Spider):
     def start_requests(self):
         todayFolder(self)
         urls = [
-            'http://www.dailyo.in/politics',
-            'http://www.deccanchronicle.com/opinion',
+            "http://www.dailyo.in/politics",
+            "http://www.deccanchronicle.com/opinion",
+            "http://www.dnaindia.com/analysis",
+            "http://www.firstpost.com/category/politics",
+            "http://www.forbesindia.com",
+            "http://www.frontline.in",
+            "http://www.hindustantimes.com/opinion",
+            "http://indiatoday.intoday.in/calendar",
+            "http://www.livemint.com/opinion",
+            "http://www.ndtv.com/opinion",
+            "http://www.news18.com/blogs",
+            "http://www.outlookindia.com/website",
+            "http://www.outlookindia.com/magazine",
+            "http://www.rediff.com/news/interviews10.html",
+            "http://www.rediff.com/news/columns10.html",
+            "http://scroll.in",
+            "https://blogs.economictimes.indiatimes.com",
+            "http://www.financialexpress.com/print/edits-columns",
+            "http://www.thehindu.com/opinion",
+            "http://www.thehindubusinessline.com/opinion",
+            "http://www.huffingtonpost.in/the-blog",
+            "http://theindianeconomist.com",
+            "http://indianexpress.com/opinion",            
+            "http://www.newindianexpress.com/Opinions",
+            "http://www.dailypioneer.com/columnists",
+            "http://blogs.timesofindia.indiatimes.com",
+            "http://www.tribuneindia.com/news/opinion",
+            "http://hewire.in",
+            "https://www.telegraphindia.com/opinion",
         ]
         for url in urls:
             request = scrapy.Request(
@@ -28,8 +55,7 @@ def start_requests(self):
     def parse(self, response):
         today = datetime.datetime.today().strftime('%Y-%m-%d')
         domain = (response.url).split('/')[2]
-   
-   
+
         # www.deccanchronicle.com parsing
         if (domain == 'www.deccanchronicle.com'):
             deccanchroniclearray = []
@@ -38,41 +64,37 @@ def parse(self, response):
             res2 = case.css('div.opnionTopBig')
             for news in res:
                 dcobj = {"title": news.css("a > h3::text").extract_first(),
-                          "link": domain + news.css("a::attr(href)").extract_first(),
-                          "source": domain,
-                          }
+                         "link": domain + news.css("a::attr(href)").extract_first(),
+                         "source": domain,
+                         }
                 # yield deploy
                 deccanchroniclearray.append(dcobj.copy())
                 addCounter(domain)
             for news in res2:
                 dcobj = {"title": news.css("a > h3::text").extract_first(),
-                          "link": domain + news.css("a::attr(href)").extract_first(),
-                          "source": domain,
-                          }
+                         "link": domain + news.css("a::attr(href)").extract_first(),
+                         "source": domain,
+                         }
                 # yield deploy
                 deccanchroniclearray.append(dcobj.copy())
                 addCounter(domain)
-            with open('./jsons/%s/%s.json' %(today,domain), 'w') as fp:
-                    json.dump(deccanchroniclearray, fp)
-
+            with open('./jsons/%s/%s.json' % (today, domain), 'w') as fp:
+                json.dump(deccanchroniclearray, fp)
 
         # www.dailyo.in parsing
         elif (domain == 'www.dailyo.in'):
             dailyoarray = []
             case2 = response.css('div#story_container > div > div.story-list')
             for news in case2:
-                dailyoobj =  {"title": news.css("div.storybox > div.storytext > h2 > a::text").extract_first(),
-                       "link": domain + news.css("div.storybox > div.storytext > h2 > a::attr(href)").extract_first(),
-                       "source": domain,
-                       }
+                dailyoobj = {"title": news.css("div.storybox > div.storytext > h2 > a::text").extract_first(),
+                             "link": domain + news.css("div.storybox > div.storytext > h2 > a::attr(href)").extract_first(),
+                             "source": domain,
+                             }
                 # yield deploy
                 dailyoarray.append(dailyoobj.copy())
                 addCounter(domain)
-            with open('./jsons/%s/%s.json' %(today,domain), 'w') as fp:
-                    json.dump(dailyoarray, fp)
+            with open('./jsons/%s/%s.json' % (today, domain), 'w') as fp:
+                json.dump(dailyoarray, fp)
         # with open('./counters/%s_daily-counters.json' %today, 'w') as fp:
         #         self.log(getCounter('daily'))
         #         json.dump(getCounter('daily'), fp)
-
-
-
diff --git a/csvExportFile.py b/csvExportFile.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{`
	`2`	`+ "python.pythonPath": "/usr/local/bin/python3"`
	`3`	`+}`