Skip to content

Commit

Permalink
wayne_full_commission and wayne_building_authority - 457 wayne commis…
Browse files Browse the repository at this point in the history
…sion aggregate (#463)

* 457 aggregate currently ready wayne commission spiders and their tests

* 457 Move location out of spiders and into mixin

* 457 move parse method out of spiders and into mixin; add meeting_name to spiders

* 457 wayne_commission, move yearStr into _parse_start method

* 457 Update wayne_cow, wayne_audit, and wayne_ways_means spiders and their tests to use the wayne_commission mixin

* 457 wayne spiders - add wayne_full_commission spider and test

* 457 wayne_building_authority spider and test

* 457 wayne_full_commission update description from diaholliday

* 457 update wayne_commission mixin with more generic logic for status text,
remove re dependency

* 457 Remove descriptions, replace comment with different var name

* 457 wayne_building_authority simplify parse_status method.

* 457 wayne_building_authority take parse_status method from mixin instead of spider.
  • Loading branch information
novellac authored and pjsier committed Jul 26, 2018
1 parent 06e5e81 commit 09e9845
Show file tree
Hide file tree
Showing 23 changed files with 12,184 additions and 126 deletions.
14 changes: 7 additions & 7 deletions city_scrapers/mixins/wayne_commission.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# BUT IT WILL BE INTEGRATED INTO A REGULAR AGENCY SPIDER.

# -*- coding: utf-8 -*-
import re
from datetime import datetime
from dateutil.parser import parse as dateparse
from urllib.parse import urljoin
Expand All @@ -17,6 +16,7 @@ class Wayne_commission:
'address': '500 Griswold St, Detroit, MI 48226',
'neighborhood': '',
}
description = ''

def parse(self, response):
"""
Expand All @@ -31,7 +31,7 @@ def parse(self, response):
data = {
'_type': 'event',
'name': self.meeting_name,
'event_description': self._parse_description(item),
'event_description': self.description,
'classification': self.classification,
'start': self._parse_start(item),
'end': {'date': None, 'time': None, 'note': ''},
Expand All @@ -46,7 +46,7 @@ def parse(self, response):
yield data

def _parse_entries(self, response):
return response.xpath('//tbody/tr')
return response.xpath('//tbody/tr[child::td/text()]')

@staticmethod
def _parse_documents(item, base_url):
Expand Down Expand Up @@ -82,10 +82,10 @@ def _parse_status(self, item, data):
Postponed meetings will be considered cancelled.
"""

status_str = item.xpath('.//td[4]/text() | .//td[4]/a/text() | .//td[4]/p/a/text()').extract_first()
# If the agenda column text contains "postponed," we consider it cancelled.
if re.search(r'postpone', status_str, re.IGNORECASE):
status_str = item.xpath('.//td[4]//text()').extract_first()
# If the agenda column text contains "postpone" or "cancel" we consider it cancelled.
if ('cancel' in status_str.lower()) or ('postpone' in status_str.lower()):
return 'cancelled'
# If it's not cancelled, use the status logic from spider.py
# If it's not one of the above statuses, use the status logic from spider.py
else:
return self._generate_status(data, '')
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_auditSpider(Wayne_commission, Spider):
agency_id = 'Wayne County Audit Committee'
start_urls = ['https://www.waynecounty.com/elected/commission/audit.aspx']
meeting_name = 'Wayne County Audit Committee'

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Audit")]/following-sibling::div/section/p/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
47 changes: 47 additions & 0 deletions city_scrapers/spiders/wayne_building_authority.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-

# THIS SPIDER USES A MIXIN FOR SHARED FUNCTIONALITY.
# MIXINS ARE STORED IN /city-scrapers/city-scrapers/mixins
# YOU CAN OVERRIDE THE MIXIN HERE BY CREATING YOUR OWN DEFINITION.

import re
from datetime import datetime
from dateutil.parser import parse as dateparse
from city_scrapers.spider import Spider
from city_scrapers.mixins.wayne_commission import Wayne_commission


class Wayne_building_authoritySpider(Wayne_commission, Spider):
name = 'wayne_building_authority'
agency_id = 'Wayne County Building Authority'
start_urls = ['https://www.waynecounty.com/boards/buildingauthority/meetings.aspx']
meeting_name = 'Wayne County Building Authority'

# Override the mixin for any unique attributes.
location = {
'name': '6th Floor, Guardian Building',
'address': '500 Griswold St, Detroit, MI 48226',
'neighborhood': '',
}

def _parse_entries(self, response):
current_year = datetime.now().year
current_year_non_empty_rows = response.xpath('//section[contains(.,"%s")]//tbody/tr[child::td/text()]' %current_year)

return current_year_non_empty_rows

def _parse_start(self, item):
"""
Parse start date and time.
"""
# Strong text indicates a replacement meeting date
strong_text = item.xpath('.//td[2]/strong/text()').extract_first()
if strong_text is not None:
date_str = strong_text
else:
date_str = item.xpath('.//td[2]/text()').extract_first()

time_str = item.xpath('.//td[3]/text()').extract_first()
date_time_str = dateparse('{0} {1}'.format(date_str, time_str))

return {'date': date_time_str.date(), 'time': date_time_str.time(), 'note': ''}
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_cow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_cowSpider(Wayne_commission, Spider):
agency_id = 'Detroit Committee of the Whole'
start_urls = ['https://www.waynecounty.com/elected/commission/committee-of-the-whole.aspx']
meeting_name = 'Wayne County Committee of the Whole'

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Committee of the Whole")]/following-sibling::div/section/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_economic_development.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_economic_developmentSpider(Wayne_commission, Spider):
agency_id = 'Wayne County Committee on Economic Development'
start_urls = ['https://www.waynecounty.com/elected/commission/economic-development.aspx']
meeting_name = 'Wayne County Committee on Economic Development'

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Economic Development")]/following-sibling::div/section/p/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
23 changes: 23 additions & 0 deletions city_scrapers/spiders/wayne_full_commission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-

# THIS SPIDER USES A MIXIN FOR SHARED FUNCTIONALITY.
# MIXINS ARE STORED IN /city-scrapers/city-scrapers/mixins
# YOU CAN OVERRIDE THE MIXIN HERE BY CREATING YOUR OWN DEFINITION.

from city_scrapers.spider import Spider
from city_scrapers.mixins.wayne_commission import Wayne_commission


class Wayne_full_commissionSpider(Wayne_commission, Spider):
name = 'wayne_full_commission'
agency_id = 'Wayne County Full Commission'
start_urls = ['https://www.waynecounty.com/elected/commission/full-commission.aspx']
meeting_name = 'Wayne County Full Commission'

# Override the mixin for any unique attributes.
classification = 'Board'
location = {
'name': 'Mezzanine level, Guardian Building',
'address': '500 Griswold St, Detroit, MI 48226',
'neighborhood': '',
}
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_government_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_government_operationsSpider(Wayne_commission, Spider):
agency_id = 'Wayne County Committe on Government Operations'
start_urls = ['https://www.waynecounty.com/elected/commission/government-operations.aspx']
meeting_name = 'Wayne County Committee on Government Operations'

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Government Operations")]/following-sibling::div/section/p/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_health_human_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_health_human_servicesSpider(Wayne_commission, Spider):
agency_id = 'Wayne County Committee on Health and Human Services'
start_urls = ['https://www.waynecounty.com/elected/commission/health-human-services.aspx']
meeting_name = 'Wayne County Committee on Health and Human Services'

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Health & Human Services")]/following-sibling::div/section/p/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_public_safety.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_public_safetySpider(Wayne_commission, Spider):
agency_id = 'Wayne County Committee on Public Safety, Judiciary, and Homeland Security'
start_urls = ['https://www.waynecounty.com/elected/commission/public-safety-judiciary.aspx']
meeting_name = "Wayne County Committee on Public Safety, Judiciary, and Homeland Security"

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Public Safety, Judiciary")]/following-sibling::div/section/p/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_public_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_public_servicesSpider(Wayne_commission, Spider):
agency_id = 'Wayne County Committee on Public Services'
start_urls = ['https://www.waynecounty.com/elected/commission/public-services.aspx']
meeting_name = 'Wayne County Committee on Public Services'

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Public Services")]/following-sibling::div/section/p/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
9 changes: 0 additions & 9 deletions city_scrapers/spiders/wayne_ways_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,3 @@ class Wayne_ways_meansSpider(Wayne_commission, Spider):
agency_id = 'Wayne County Ways and Means Committee'
start_urls = ['https://www.waynecounty.com/elected/commission/ways-means.aspx']
meeting_name = 'Wayne County Ways and Means Committee'

@staticmethod
def _parse_description(response):
"""
Event description taken from static text at top of page.
"""
desc_xpath = '//h2[contains(text(), "Ways & Means")]/following-sibling::div/section/p/text()'
desc = response.xpath(desc_xpath).extract_first()
return desc
Loading

0 comments on commit 09e9845

Please sign in to comment.