Skip to content

Commit

Permalink
Rename documenters_aggregator to city_scrapers.
Browse files Browse the repository at this point in the history
  • Loading branch information
jim committed Apr 10, 2018
1 parent b0cbcab commit 292d32e
Show file tree
Hide file tree
Showing 83 changed files with 104 additions and 104 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import time

from airtable import Airtable
from documenters_aggregator.utils import get_key
from city_scrapers.utils import get_key
from random import randint
from requests.exceptions import HTTPError
from scrapy.exceptions import DropItem
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from scrapy.xlib.pydispatch import dispatcher
from scrapy import signals
from scrapy.exporters import CsvItemExporter
from documenters_aggregator.utils import get_key
from city_scrapers.utils import get_key
import datetime
from os import remove
import subprocess
Expand All @@ -13,7 +13,7 @@ class CsvPipeline(object):
Outputs csv files for local development to the /local_output/ folder
"""

path = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).decode("utf-8").rstrip() + '/documenters_aggregator/local_outputs/'
path = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']).decode("utf-8").rstrip() + '/city_scrapers/local_outputs/'

def __init__(self):
dispatcher.connect(self.spider_opened, signals.spider_opened)
Expand Down
File renamed without changes.
26 changes: 13 additions & 13 deletions documenters_aggregator/settings.py → city_scrapers/settings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Scrapy settings for documenters_aggregator project
# Scrapy settings for city_scrapers project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
Expand All @@ -9,32 +9,32 @@
# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'documenters_aggregator'
BOT_NAME = 'city_scrapers'

SPIDER_MODULES = ['documenters_aggregator.spiders']
NEWSPIDER_MODULE = 'documenters_aggregator.spiders'
SPIDER_MODULES = ['city_scrapers.spiders']
NEWSPIDER_MODULE = 'city_scrapers.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Documenters Aggregator [development mode]. Learn more and say hello at https://city-bureau.github.io/city-scrapers/'
USER_AGENT = 'City Scrapers [development mode]. Learn more and say hello at https://city-bureau.github.io/city-scrapers/'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False
DOCUMENTERS_AGGREGATOR_ROBOTSTXT_OBEY = True
DOCUMENTERS_AGGREGATOR_ROBOTSTXT_LOGONLY = True
city_scrapers_ROBOTSTXT_OBEY = True
city_scrapers_ROBOTSTXT_LOGONLY = True

# Disable cookies (enabled by default)
COOKIES_ENABLED = False

# Configure item pipelines
#
# One of:
# * documenters_aggregator.pipelines.ValidationPipeline,
# * documenters_aggregator.pipelines.AirtablePipeline
# * city_scrapers.pipelines.ValidationPipeline,
# * city_scrapers.pipelines.AirtablePipeline
#
# Or define your own.
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'documenters_aggregator.pipelines.CsvPipeline': 400
'city_scrapers.pipelines.CsvPipeline': 400
}

# Configure maximum concurrent requests performed by Scrapy (default: 16)
Expand All @@ -60,16 +60,16 @@
# Enable or disable spider middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# 'documenters_aggregator.middlewares.DocumentersAggregatorSpiderMiddleware': 543,
# 'city_scrapers.middlewares.DocumentersAggregatorSpiderMiddleware': 543,
#}

# Enable or disable downloader middlewares
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
'documenters_aggregator.middlewares.DocumentersAggregatorRobotsTxtMiddleware': 543,
'city_scrapers.middlewares.CityScrapersRobotsTxtMiddleware': 543,
}

COMMANDS_MODULE = 'documenters_aggregator.commands'
COMMANDS_MODULE = 'city_scrapers.commands'

# Enable or disable extensions
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
specification (http://docs.opencivicdata.org/en/latest/data/event.html).
"""
from dateutil.parser import parse as dateparse
from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_animalSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from datetime import datetime

import scrapy
from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_buildingsSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from datetime import datetime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_cityCollegeSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pytz import timezone
import dateutil.parser

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_citycouncilSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
import datetime as dt

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_infraSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import datetime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_librarySpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import datetime
from legistar.events import LegistarEventsScraper

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_parksSpider(Spider):
Expand Down Expand Up @@ -107,7 +107,7 @@ def _parse_description(self, item):
Parse or generate event name.
"""
return ("The Chicago Park District Act provides that the Chicago"
"Park District shall be governed by a board of seven"
"Park District shall be governed by a board of seven"
"non-salaried Commissioners who are appointed by the Mayor"
"of the City of Chicago with the approval of the Chicago City"
"Council. Under the Chicago Park District Code, the Commissioners"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import json
from datetime import datetime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_policeSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from pytz import timezone

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_policeboardSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from datetime import datetime
from time import strptime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_pubhealthSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-

from datetime import datetime
from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class ChiSchoolActionsSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from datetime import datetime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Chi_schoolsSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from datetime import datetime
from pytz import timezone
from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pytz import timezone
from legistar.events import LegistarEventsScraper

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Cook_boardSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from datetime import datetime
from pytz import timezone

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Cook_countySpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from datetime import datetime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Cook_electoralSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from datetime import datetime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Cook_hospitalsSpider(Spider):
Expand Down Expand Up @@ -96,7 +96,7 @@ def _parse_description(self, subitem):
"dignity and respect regardless of a patient’s ability to pay; "
"fostering partnerships with other health providers and communities "
"to enhance the health of the public; and advocating for policies "
"that promote the physical, mental and social well being of the people of Cook County. "
"that promote the physical, mental and social well being of the people of Cook County. "
"The CCHHS Board of Directors has five standing committees.")

def _parse_start(self, subitem):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import datetime as dt

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Cook_landbankSpider(Spider):
Expand Down Expand Up @@ -38,11 +38,11 @@ class Cook_landbankSpider(Spider):
'DOWNLOAD_DELAY': 1,
'CONCURRENT_REQUESTS_PER_UP': 1,
'LOG_ENABLED': True,
'BOT_NAME': 'documenters_aggregator',
'BOT_NAME': 'city_scrapers',
'COOKIES_ENABLED': False,
'NEWSPIDER_MODULE': 'documenters_aggregator.spiders',
'NEWSPIDER_MODULE': 'city_scrapers.spiders',
'ROBOTSTXT_OBEY': True,
'SPIDER_MODULES': ['documenters_aggregator.spiders'],
'SPIDER_MODULES': ['city_scrapers.spiders'],
'USER_AGENT': 'Documenters Aggregator (learn more and say hello at https://TKTK)'
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import datetime, timedelta
import time as Time

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Cook_pubhealthSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from datetime import datetime
from pytz import timezone

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Il_laborSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import scrapy
from dateutil.parser import parse as dateparse

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Il_pubhealthSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from datetime import datetime
import re

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


class Metra_boardSpider(Spider):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re
from datetime import datetime

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider


# The RTA's Board and other meetings are are displayed on their
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

from dateutil.rrule import rrule, MONTHLY, WEEKLY, MO, TU, WE, TH, FR, SA, SU

from documenters_aggregator.spider import Spider
from city_scrapers.spider import Spider

GOOGLE_API_KEY = os.environ.get('DOCUMENTERS_AGGREGATOR_GOOGLE_API_KEY') or 'test-token'
GOOGLE_API_KEY = os.environ.get('city_scrapers_GOOGLE_API_KEY') or 'test-token'
SPREADSHEET_URL = 'https://sheets.googleapis.com/v4/spreadsheets/1xnt4kZI9Ruinw91wM-nnWftsFD-ZaKaozepdNXeIrpo'


Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion deploy/aws_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
DEPLOY_TAG = 'latest' # datetime.now().strftime("%Y%m%d%H%M")
ECS_URI = environ.get('ECS_REPOSITORY_URI')

SPIDER_PATH = 'documenters_aggregator/spiders'
SPIDER_PATH = 'city_scrapers/spiders'

spider_names = [
path.splitext(f)[0]
Expand Down
8 changes: 4 additions & 4 deletions docs/03_contribute.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Run the `genspider` task with a spider slug, spider name, and URLs to start scra
You should see some output like:

```
Created /Users/eads/Code/dcity-scrapers/documenters_aggregator/spiders/chi_housing.py
Created /Users/eads/Code/city-scrapers/city_scrapers/spiders/chi_housing.py
Created /Users/eads/Code/city-scrapers/tests/test_chi_housing.py
Created /Users/eads/Code/city-scrapers/tests/files/chi_housing_thecha.html
```
Expand All @@ -51,7 +51,7 @@ You now have a spider named `chi_housing`. To run it (admittedly, not much will
```

If there are no error messages, congratulations! You have a barebones spider.
Additionally, each time you run your scraper, you can see your results as a csv output in the /documenters_aggregator/local_outputs/ folder. Each `scrapy crawl` command produces a unique file with the agency name and timestamp. These files are ignored by git, but you may want to clean the folder up locally after some testing.
Additionally, each time you run your scraper, you can see your results as a csv output in the /city_scrapers/local_outputs/ folder. Each `scrapy crawl` command produces a unique file with the agency name and timestamp. These files are ignored by git, but you may want to clean the folder up locally after some testing.

### 5. Run the automated tests

Expand Down Expand Up @@ -101,7 +101,7 @@ That's OK.

*If you run into any troubles, feel free to reach out on [slack](https://citybureau.slack.com/) or open a pull request so others can take a look at your code. Pull requests don't need to contain perfect code. See [CONTRIBUTING.md](https://github.com/City-Bureau/city-scrapers/blob/master/CONTRIBUTING.md).*

Open `documenters_aggregator/spiders/chi_housing.py` to work on your spider. A simple structure has been created for you to use. Let's look at the basics.
Open `city_scrapers/spiders/chi_housing.py` to work on your spider. A simple structure has been created for you to use. Let's look at the basics.

The spider should look something like this:

Expand Down Expand Up @@ -206,7 +206,7 @@ Here is the test setup and an example test from the Idph spider:
import pytest

from tests.utils import file_response
from documenters_aggregator.spiders.idph import IdphSpider
from city_scrapers.spiders.idph import IdphSpider

test_response = file_response('files/idph.html')
spider = IdphSpider()
Expand Down
Loading

0 comments on commit 292d32e

Please sign in to comment.