diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0c2f8f3..22de2f0 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,13 @@ +v0.2.2 +====== + +2020/11/09 + +* allows passing matplotlib axes to urbanaccess.plot.plot_net() +* adds flexibility to calendar/date handling (calendar_dates.txt now supported) +* improves GTFS downloading (solves issue where requests were rejected due to missing user agent header) +* improves text encoding support + v0.2.1 ====== diff --git a/README.rst b/README.rst index 03af455..2391036 100644 --- a/README.rst +++ b/README.rst @@ -51,7 +51,7 @@ Citation and academic literature To cite this tool and for a complete description of the UrbanAccess methodology see the paper below: -`Samuel D. Blanchard and Paul Waddell. 2017. "UrbanAccess: Generalized Methodology for Measuring Regional Accessibility with an Integrated Pedestrian and Transit Network." Transportation Research Record: Journal of the Transportation Research Board. No. 2653. pp. 35–44. `__ +`Samuel D. Blanchard and Paul Waddell. 2017. "UrbanAccess: Generalized Methodology for Measuring Regional Accessibility with an Integrated Pedestrian and Transit Network." Transportation Research Record: Journal of the Transportation Research Board. No. 2653. pp. 35–44. `__ For other related literature see `here `__. @@ -113,9 +113,8 @@ Minimum GTFS data requirements The minimum `GTFS data types `__ required to use -UrbanAccess are: ``stop_times``, ``stops``, ``routes``, ``calendar``, -and ``trips`` however if there is no ``calendar``, ``calendar_dates`` -can be used as a replacement. +UrbanAccess are: ``stop_times``, ``stops``, ``routes`` and ``trips`` and +one of either ``calendar`` or ``calendar_dates``. Related UDST libraries ---------------------- diff --git a/docs/source/conf.py b/docs/source/conf.py index f365eb5..a073f17 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -30,8 +30,8 @@ project = u'UrbanAccess' author = u'UrbanSim Inc.' copyright = u'{}, {}'.format(datetime.now().year, author) -version = u'0.2.1' -release = u'0.2.1' +version = u'0.2.2' +release = u'0.2.2' language = None # List of patterns to ignore when looking for source files. diff --git a/docs/source/index.rst b/docs/source/index.rst index 5b6133a..3c6cec9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,7 +3,7 @@ UrbanAccess A tool for computing GTFS transit and OSM pedestrian networks for accessibility analysis. -v0.2.1, released August 28, 2020. +v0.2.2, released November 9, 2020. Contents -------- diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index 0f8e0ed..2274b91 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -39,7 +39,7 @@ A `demo `__ is available a Minimum GTFS data requirements ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The minimum `GTFS data types `__ required to use UrbanAccess are: ``stop_times``, ``stops``, ``routes``, ``calendar``, and ``trips``. If you are using a feed that does not have or utilize a calendar you may use the ``calendar_dates`` file instead of ``calendar`` with the ``calendar_dates_lookup`` parameter :ref:`here `. +The minimum `GTFS data types `__ required to use UrbanAccess are: ``stop_times``, ``stops``, ``routes``, and ``trips`` and either ``calendar`` or ``calendar_dates``. If you are using a feed that does not have or utilize a calendar you may use the ``calendar_dates`` file instead of ``calendar`` with the ``calendar_dates_lookup`` parameter :ref:`here `. License ~~~~~~~~ @@ -51,11 +51,11 @@ Citation and academic literature To cite this tool and for a complete description of the UrbanAccess methodology see the paper below: -`Samuel D. Blanchard and Paul Waddell. 2017. "UrbanAccess: Generalized Methodology for Measuring Regional Accessibility with an Integrated Pedestrian and Transit Network." Transportation Research Record: Journal of the Transportation Research Board. No. 2653. pp. 35–44. `__ +`Samuel D. Blanchard and Paul Waddell. 2017. "UrbanAccess: Generalized Methodology for Measuring Regional Accessibility with an Integrated Pedestrian and Transit Network." Transportation Research Record: Journal of the Transportation Research Board. No. 2653. pp. 35–44. `__ For a detailed use case of the tool see the following paper: -`Samuel D. Blanchard and Paul Waddell. 2017. "Assessment of Regional Transit Accessibility in the San Francisco Bay Area of California with UrbanAccess." Transportation Research Record: Journal of the Transportation Research Board. No. 2654. pp. 45–54. `__ +`Samuel D. Blanchard and Paul Waddell. 2017. "Assessment of Regional Transit Accessibility in the San Francisco Bay Area of California with UrbanAccess." Transportation Research Record: Journal of the Transportation Research Board. No. 2654. pp. 45–54. `__ Reporting bugs ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/requirements-dev.txt b/requirements-dev.txt index fbaf9c8..d578087 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,6 +7,7 @@ pycodestyle # testing demo notebook jupyter cartopy # requires conda +pyepsg # building documentation numpydoc diff --git a/setup.py b/setup.py index 1e75e96..297359e 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name='urbanaccess', - version='0.2.1', + version='0.2.2', license='AGPL', description=description, long_description=long_description, diff --git a/urbanaccess/__init__.py b/urbanaccess/__init__.py index c45aee2..4c015c1 100644 --- a/urbanaccess/__init__.py +++ b/urbanaccess/__init__.py @@ -9,6 +9,6 @@ from .gtfsfeeds import * from .plot import * -__version__ = "0.2.1" +__version__ = "0.2.2" version = __version__ diff --git a/urbanaccess/config.py b/urbanaccess/config.py index 421458c..cba8f25 100644 --- a/urbanaccess/config.py +++ b/urbanaccess/config.py @@ -16,11 +16,12 @@ def _format_check(settings): """ valid_keys = ['data_folder', 'logs_folder', 'log_file', - 'log_console', 'log_name', 'log_filename', 'gtfs_api'] + 'log_console', 'log_name', 'log_filename', + 'txt_encoding', 'gtfs_api'] for key in settings.keys(): if key not in valid_keys: - raise ValueError('{} not found in list of valid configuation ' + raise ValueError('{} not found in list of valid configuration ' 'keys'.format(key)) if not isinstance(key, str): raise ValueError('{} must be a string'.format(key)) @@ -42,13 +43,17 @@ class urbanaccess_config(object): logs_folder : str location to write log files log_file : bool - if true, save log output to a log file in logs_folder + if True, save log output to a log file in logs_folder log_console : bool - if true, print log output to the console + if True, print log output to the console log_name : str name of the logger log_filename : str name of the log file + txt_encoding : str + default text encoding used by the GTFS files, to be passed to + Python's open() function. Must be a valid encoding recognized by + Python codecs. gtfs_api : dict dictionary of the name of the GTFS API service as the key and the GTFS API server root URL as the value to pass to the GTFS loader @@ -61,6 +66,7 @@ def __init__(self, log_console=False, log_name='urbanaccess', log_filename='urbanaccess', + txt_encoding='utf-8', gtfs_api={'gtfsdataexch': ( 'http://www.gtfs-data-exchange.com/' 'api/agencies?format=csv')}): @@ -71,6 +77,7 @@ def __init__(self, self.log_console = log_console self.log_name = log_name self.log_filename = log_filename + self.txt_encoding = txt_encoding self.gtfs_api = gtfs_api @classmethod @@ -110,6 +117,7 @@ def from_yaml(cls, configdir='configs', log_name=yaml_config.get('log_name', 'urbanaccess'), log_filename=yaml_config.get('log_filename', 'urbanaccess'), + txt_encoding=yaml_config.get('txt_encoding', 'utf-8'), gtfs_api=yaml_config.get('gtfs_api', { 'gtfsdataexch': ('http://www.gtfs-data-exchange.com/' @@ -128,6 +136,7 @@ def to_dict(self): 'log_console': self.log_console, 'log_name': self.log_name, 'log_filename': self.log_filename, + 'txt_encoding': self.txt_encoding, 'gtfs_api': self.gtfs_api, } diff --git a/urbanaccess/gtfs/load.py b/urbanaccess/gtfs/load.py index 0facd16..43b88af 100644 --- a/urbanaccess/gtfs/load.py +++ b/urbanaccess/gtfs/load.py @@ -4,6 +4,7 @@ import time import pandas as pd import six +import logging as lg from urbanaccess import config from urbanaccess.utils import log @@ -20,7 +21,7 @@ def _standardize_txt(csv_rootpath=os.path.join(config.settings.data_folder, Parameters ---------- csv_rootpath : str, optional - root path where all gtfs feeds that make up a contiguous metropolitan + root path where all GTFS feeds that make up a contiguous metropolitan area are stored Returns @@ -59,6 +60,7 @@ def _txt_encoder_check(gtfsfiles_to_use, """ # UnicodeDecodeError start_time = time.time() + log('Checking GTFS text file for encoding issues...') folderlist = [foldername for foldername in os.listdir(csv_rootpath) if os.path.isdir(os.path.join(csv_rootpath, foldername))] @@ -74,14 +76,16 @@ def _txt_encoder_check(gtfsfiles_to_use, for textfile in textfilelist: if textfile in gtfsfiles_to_use: # Read from file - file_open = open(os.path.join(csv_rootpath, folder, textfile)) + file_path = os.path.join(csv_rootpath, folder, textfile) + file_open = open(file_path) raw = file_open.read() file_open.close() if raw.startswith(codecs.BOM_UTF8): + msg = 'Correcting encoding issue in: {}...' + log(msg.format(file_path)) raw = raw.replace(codecs.BOM_UTF8, '', 1) # Write to file - file_open = open( - os.path.join(csv_rootpath, folder, textfile), 'w') + file_open = open(file_path, 'w') file_open.write(raw) file_open.close() @@ -100,9 +104,9 @@ def _txt_header_whitespace_check(gtfsfiles_to_use, Parameters ---------- gtfsfiles_to_use : list - list of gtfs feed txt files to utilize + list of GTFS feed txt files to utilize csv_rootpath : str, optional - root path where all gtfs feeds that make up a contiguous metropolitan + root path where all GTFS feeds that make up a contiguous metropolitan area are stored Returns @@ -111,6 +115,11 @@ def _txt_header_whitespace_check(gtfsfiles_to_use, """ start_time = time.time() + txt_encoding = config.settings.txt_encoding + msg = ('Checking GTFS text file header whitespace... ' + 'Reading files using encoding: {} set in configuration.') + log(msg.format(txt_encoding)) + folderlist = [foldername for foldername in os.listdir(csv_rootpath) if os.path.isdir(os.path.join(csv_rootpath, foldername))] @@ -124,25 +133,41 @@ def _txt_header_whitespace_check(gtfsfiles_to_use, for textfile in textfilelist: if textfile in gtfsfiles_to_use: + file_path = os.path.join(csv_rootpath, folder, textfile) # Read from file - with open(os.path.join(csv_rootpath, folder, textfile)) as f: - lines = f.readlines() - lines[0] = re.sub(r'\s+', '', lines[0]) + '\n' - # Write to file try: - with open(os.path.join(csv_rootpath, folder, textfile), - 'w') as f: - f.writelines(lines) - except Exception: - log('Unable to read {}. Check that file is not currently' - 'being read or is not already in memory as this is ' - 'likely the cause of the error.' - ''.format(os.path.join(csv_rootpath, - folder, textfile))) - log( - 'GTFS text file header whitespace check completed. Took {:,' - '.2f} seconds'.format( - time.time() - start_time)) + if six.PY2: + with open(file_path) as f: + lines = f.readlines() + else: + # read with default 'utf-8' encoding + with open( + file_path, + encoding=txt_encoding) as f: + lines = f.readlines() + line_wo_whitespace = re.sub(r'\s+', '', lines[0]) + '\n' + # only write the file if there are changes to be made + if lines[0] != line_wo_whitespace: + msg = 'Removing whitespace from header(s) in: {}...' + log(msg.format(file_path)) + lines[0] = line_wo_whitespace + # Write to file + if six.PY2: + with open( + file_path, 'w') as f: + f.writelines(lines) + else: + # write with default 'utf-8' encoding + with open( + file_path, 'w', + encoding=txt_encoding) as f: + f.writelines(lines) + except Exception as e: + msg = 'Unable to process: {}. Exception: {}' + raise Exception(log(msg.format(file_path, e), + level=lg.ERROR)) + log('GTFS text file header whitespace check completed. ' + 'Took {:,.2f} seconds'.format(time.time() - start_time)) def gtfsfeed_to_df(gtfsfeed_path=None, validation=False, verbose=True, @@ -156,7 +181,7 @@ def gtfsfeed_to_df(gtfsfeed_path=None, validation=False, verbose=True, Parameters ---------- gtfsfeed_path : str, optional - root path where all gtfs feeds that make up a contiguous metropolitan + root path where all GTFS feeds that make up a contiguous metropolitan area are stored validation : bool if true, the validation check on stops checking for stops outside @@ -236,8 +261,20 @@ def gtfsfeed_to_df(gtfsfeed_path=None, validation=False, verbose=True, os.listdir(os.path.join(gtfsfeed_path, folder)) if textfilename.endswith(".txt")] required_gtfsfiles = ['stops.txt', 'routes.txt', 'trips.txt', - 'stop_times.txt', 'calendar.txt'] - optional_gtfsfiles = ['agency.txt', 'calendar_dates.txt'] + 'stop_times.txt'] + optional_gtfsfiles = ['agency.txt'] + # either calendar or calendar_dates is required + calendar_gtfsfiles = ['calendar.txt', 'calendar_dates.txt'] + + calendar_files = [i for i in calendar_gtfsfiles if i in textfilelist] + if len(calendar_files) == 0: + error_msg = ( + 'at least one of `calendar.txt` or `calendar_dates.txt` is ' + 'required to complete a GTFS dataset but neither was found in ' + 'folder {}') + raise ValueError(error_msg.format(os.path.join( + gtfsfeed_path, folder))) + for required_file in required_gtfsfiles: if required_file not in textfilelist: raise ValueError( @@ -263,10 +300,32 @@ def gtfsfeed_to_df(gtfsfeed_path=None, validation=False, verbose=True, stop_times_df = utils_format._read_gtfs_stop_times( textfile_path=os.path.join(gtfsfeed_path, folder), textfile=textfile) + + for textfile in calendar_files: + # use both calendar and calendar_dates if they exist, otherwise + # if only one of them exists use the one that exists and set the + # other one that does not exist to a blank df if textfile == 'calendar.txt': calendar_df = utils_format._read_gtfs_calendar( textfile_path=os.path.join(gtfsfeed_path, folder), textfile=textfile) + # if only calendar, set calendar_dates as blank + # with default required columns + if len(calendar_files) == 1: + calendar_dates_df = pd.DataFrame( + columns=['service_id', 'dates', 'exception_type']) + else: + calendar_dates_df = utils_format._read_gtfs_calendar_dates( + textfile_path=os.path.join(gtfsfeed_path, folder), + textfile=textfile) + # if only calendar_dates, set calendar as blank + # with default required columns + if len(calendar_files) == 1: + calendar_df = pd.DataFrame( + columns=['service_id', 'monday', + 'tuesday', 'wednesday', 'thursday', + 'friday', 'saturday', 'sunday', + 'start_date', 'end_date']) for textfile in optional_gtfsfiles: if textfile == 'agency.txt': @@ -276,13 +335,6 @@ def gtfsfeed_to_df(gtfsfeed_path=None, validation=False, verbose=True, textfile=textfile) else: agency_df = pd.DataFrame() - if textfile == 'calendar_dates.txt': - if textfile in textfilelist: - calendar_dates_df = utils_format._read_gtfs_calendar_dates( - textfile_path=os.path.join(gtfsfeed_path, folder), - textfile=textfile) - else: - calendar_dates_df = pd.DataFrame() stops_df, routes_df, trips_df, stop_times_df, calendar_df, \ calendar_dates_df = (utils_format diff --git a/urbanaccess/gtfs/network.py b/urbanaccess/gtfs/network.py index fa6592b..39b6725 100644 --- a/urbanaccess/gtfs/network.py +++ b/urbanaccess/gtfs/network.py @@ -58,7 +58,7 @@ def create_transit_net(gtfsfeeds_dfs, day, DataFrame for the same time period stored in the gtfsfeeds_dfs object it will be used instead of re-calculated save_processed_gtfs : bool, optional - if true, all processed gtfs DataFrames will + if true, all processed GTFS DataFrames will be stored to disk in a hdf5 file save_dir : str, optional directory to save the hdf5 file @@ -97,10 +97,15 @@ def create_transit_net(gtfsfeeds_dfs, day, level=lg.WARNING) if gtfsfeeds_dfs is None: raise ValueError('gtfsfeeds_dfs is None') - if gtfsfeeds_dfs.trips.empty or gtfsfeeds_dfs.calendar.empty or \ - gtfsfeeds_dfs.stop_times.empty or gtfsfeeds_dfs.stops.empty: - raise ValueError('one of the gtfsfeeds_dfs object trips, calendar, ' - 'stops, or stop_times were found to be empty.') + error_msg = ('one of the following gtfsfeeds_dfs objects {} were ' + 'found to be empty.') + if gtfsfeeds_dfs.trips.empty or gtfsfeeds_dfs.stop_times.empty or \ + gtfsfeeds_dfs.stops.empty: + error_msg_case_1 = 'trips, stops, or stop_times' + raise ValueError(error_msg.format(error_msg_case_1)) + if gtfsfeeds_dfs.calendar.empty and gtfsfeeds_dfs.calendar_dates.empty: + error_msg_case_2 = 'calendar or calendar_dates' + raise ValueError(error_msg.format(error_msg_case_2)) if not isinstance(overwrite_existing_stop_times_int, bool): raise ValueError('overwrite_existing_stop_times_int must be bool') if not isinstance(use_existing_stop_times_int, bool): @@ -117,6 +122,9 @@ def create_transit_net(gtfsfeeds_dfs, day, if 'direction_id' not in gtfsfeeds_dfs.trips.columns: columns.remove('direction_id') + # TODO: support use case where only calendar_dates is in use: make 'day' + # optional as None but require either day or calendar_dates_lookup + # to exist but both are not required calendar_selected_trips_df = _trip_schedule_selector( input_trips_df=gtfsfeeds_dfs.trips[columns], input_calendar_df=gtfsfeeds_dfs.calendar, @@ -129,8 +137,7 @@ def create_transit_net(gtfsfeeds_dfs, day, is False: gtfsfeeds_dfs.stop_times_int = _interpolate_stop_times( stop_times_df=gtfsfeeds_dfs.stop_times, - calendar_selected_trips_df=calendar_selected_trips_df, - day=day) + calendar_selected_trips_df=calendar_selected_trips_df) gtfsfeeds_dfs.stop_times_int = _time_difference( stop_times_df=gtfsfeeds_dfs.stop_times_int) @@ -209,7 +216,7 @@ def _trip_schedule_selector(input_trips_df, input_calendar_df, day in the GTFS calendar calendar_dates_lookup : dict, optional dictionary of the lookup column (key) as a string and corresponding - string (value) a s string or list of strings to use to subset trips + string (value) as string or list of strings to use to subset trips using the calendar_dates DataFrame. Search will be exact. If none, then the calendar_dates DataFrame will not be used to select trips that are not in the calendar DataFrame. Note search will select all @@ -369,18 +376,19 @@ def _trip_schedule_selector(input_trips_df, input_calendar_df, subset_result = input_calendar_dates_df[ input_calendar_dates_df[col_name_key].str.match( text, case=False, na=False)] - feed_id_list = subset_result['unique_feed_id'].unique() - for index, id in enumerate(feed_id_list): - feed_id_list[index] = ' '.join(id.split('_')[:-1]) + if len(subset_result) != 0: + feed_id_list = subset_result['unique_feed_id'].unique() + for index, id in enumerate(feed_id_list): + feed_id_list[index] = ' '.join(id.split('_')[:-1]) - log('Found {:,} records that matched query: column: {} and ' - 'string: {} for GTFS feed(s): {}'.format(len( - subset_result), - col_name_key, - text, - feed_id_list)) + log('Found {:,} records that matched query: column: {} ' + 'and string: {} for GTFS feed(s): {}'.format(len( + subset_result), + col_name_key, + text, + feed_id_list)) - subset_result_df = subset_result_df.append(subset_result) + subset_result_df = subset_result_df.append(subset_result) subset_result_df.drop_duplicates(inplace=True) subset_result_df = subset_result_df[['unique_service_id']] @@ -428,7 +436,7 @@ def _trip_schedule_selector(input_trips_df, input_calendar_df, return calendar_selected_trips_df -def _interpolate_stop_times(stop_times_df, calendar_selected_trips_df, day): +def _interpolate_stop_times(stop_times_df, calendar_selected_trips_df): """ Interpolate missing stop times using a linear interpolator between known stop times @@ -439,10 +447,6 @@ def _interpolate_stop_times(stop_times_df, calendar_selected_trips_df, day): stop times DataFrame calendar_selected_trips_df : pandas.DataFrame DataFrame of trips that run on specific day - day : {'friday','monday','saturday','sunday','thursday','tuesday', - 'wednesday'} - day of the week to extract transit schedule from that corresponds - to the day in the GTFS calendar Returns ------- @@ -480,7 +484,7 @@ def _interpolate_stop_times(stop_times_df, calendar_selected_trips_df, day): 'unique_trip_id'].unique().tolist() # select trip ids that match the trips in the # calendar_selected_trips_df -- resulting df will be stop times - # only for trips that run on the service day of interest + # only for trips that run on the service day or dates of interest stop_times_df = stop_times_df[ stop_times_df['unique_trip_id'].isin(uniquetriplist)] @@ -498,10 +502,10 @@ def _interpolate_stop_times(stop_times_df, calendar_selected_trips_df, day): level=lg.WARNING) log('Starting departure stop time interpolation...') log( - 'Departure time records missing from trips following {} ' - 'schedule: {:,} ({:.2f} percent of {:,} total ' + 'Departure time records missing from trips following the ' + 'specified schedule: {:,} ({:.2f} percent of {:,} total ' 'records)'.format( - day, missing_stop_times_count, + missing_stop_times_count, (missing_stop_times_count / len(stop_times_df)) * 100, len(stop_times_df['departure_time_sec']))) @@ -510,8 +514,8 @@ def _interpolate_stop_times(stop_times_df, calendar_selected_trips_df, day): else: log('There are no departure time records missing from trips ' - 'following {} schedule. There are no records to ' - 'interpolate.'.format(day)) + 'following the specified schedule. There are no records to ' + 'interpolate.') # Find trips with more than one missing time # Note: all trip ids have at least 1 null departure time because the diff --git a/urbanaccess/gtfs/utils_format.py b/urbanaccess/gtfs/utils_format.py index 6ed694d..65d71ef 100644 --- a/urbanaccess/gtfs/utils_format.py +++ b/urbanaccess/gtfs/utils_format.py @@ -179,13 +179,11 @@ def _read_gtfs_calendar(textfile_path, textfile): df = pd.read_csv(os.path.join(textfile_path, textfile), dtype={'service_id': object}, low_memory=False) - if len(df) == 0: - error_msg = ('{} has no records. This could indicate that this feed ' - 'is using calendar_dates.txt for service_ids. If so, ' - 'make a dummy row in calendar.txt to proceed.') - raise ValueError(error_msg.format(os.path.join(textfile_path, - textfile))) + warning_msg = ('{} has no records. This could indicate that this feed ' + 'is using calendar_dates.txt for service_ids.') + log(warning_msg.format(os.path.join( + textfile_path, textfile)), level=lg.WARNING) columnlist = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday'] @@ -217,8 +215,11 @@ def _read_gtfs_calendar_dates(textfile_path, textfile): df = pd.read_csv(os.path.join(textfile_path, textfile), dtype={'service_id': object}, low_memory=False) if len(df) == 0: - raise ValueError('{} has no records'.format(os.path.join( - textfile_path, textfile))) + warning_msg = ('{} has no records. This could indicate that this feed ' + 'is using calendar.txt for service_ids.') + log(warning_msg.format(os.path.join( + textfile_path, textfile)), level=lg.WARNING) + # remove any extra whitespace in column names df.rename(columns=lambda x: x.strip(), inplace=True) return df diff --git a/urbanaccess/gtfsfeeds.py b/urbanaccess/gtfsfeeds.py index 0eb5373..26acb02 100644 --- a/urbanaccess/gtfsfeeds.py +++ b/urbanaccess/gtfsfeeds.py @@ -5,7 +5,7 @@ import os import logging as lg import time -from six.moves.urllib.request import urlopen +from six.moves.urllib import request from urbanaccess.utils import log from urbanaccess import config @@ -78,9 +78,11 @@ def from_yaml(cls, gtfsfeeddir=os.path.join(config.settings.data_folder, for value in yaml_config['gtfs_feeds'][key]: if not isinstance(value, str): raise ValueError('{} must be a string'.format(value)) - - if (pd.Series( - yaml_config['gtfs_feeds'].values()).value_counts() != 1).all(): + unique_url_count = len( + pd.DataFrame.from_dict(yaml_config['gtfs_feeds'], orient='index')[ + 0].unique()) + url_count = len(yaml_config['gtfs_feeds']) + if unique_url_count != url_count: raise ValueError( 'duplicate values were found when the passed add_dict ' 'dictionary was added to the existing dictionary. Feed URL ' @@ -439,7 +441,7 @@ def download(data_folder=os.path.join(config.settings.data_folder), raise ValueError('{} must be a string'.format(value)) for key, value in feed_dict.items(): - if value in feed_dict.gtfs_feeds.values(): + if value in feeds.gtfs_feeds.values(): raise ValueError( 'duplicate values were found when the passed add_dict ' 'dictionary was added to the existing dictionary. Feed ' @@ -458,70 +460,81 @@ def download(data_folder=os.path.join(config.settings.data_folder), if not os.path.exists(download_folder): os.makedirs(download_folder) log('{} does not exist. Directory was created'.format(download_folder)) - log('{} GTFS feeds will be downloaded here: {}'.format( + log('{:,} GTFS feed(s) will be downloaded here: {}'.format( len(feeds.gtfs_feeds), download_folder)) start_time1 = time.time() + msg_no_connection_w_status = ('Unable to connect. URL at {} returned ' + 'status code {} and no data') + msg_no_connection = 'Unable to connect to: {}. Error: {}' + msg_download_succeed = ('{} GTFS feed downloaded successfully. ' + 'Took {:,.2f} seconds for {:,.1f}KB') # TODO: add file counter and print number to user for feed_name_key, feed_url_value in feeds.gtfs_feeds.items(): start_time2 = time.time() zipfile_path = ''.join([download_folder, '/', feed_name_key, '.zip']) - if 'http' in feed_url_value: - status_code = urlopen(feed_url_value).getcode() - if status_code == 200: - file = urlopen(feed_url_value) - - _zipfile_type_check(file=file, - feed_url_value=feed_url_value) + # add default user-agent header in request to avoid 403 Errors + opener = request.build_opener() + opener.addheaders = [('User-agent', '')] + request.install_opener(opener) - with open(zipfile_path, "wb") as local_file: - local_file.write(file.read()) - log( - '{} GTFS feed downloaded successfully. Took {:,' - '.2f} seconds for {:,.1f}KB'.format( - feed_name_key, time.time() - start_time2, - os.path.getsize(zipfile_path))) - elif status_code in [429, 504]: - log( - 'URL at {} returned status code {} and no data. ' - 'Re-trying request in {:.2f} seconds.'.format( - feed_url_value, status_code, error_pause_duration), - level=lg.WARNING) - time.sleep(error_pause_duration) - try: - file = urlopen(feed_url_value) + if 'http' in feed_url_value: + try: + status_code = request.urlopen(feed_url_value).getcode() + if status_code == 200: + file = request.urlopen(feed_url_value) _zipfile_type_check(file=file, feed_url_value=feed_url_value) with open(zipfile_path, "wb") as local_file: local_file.write(file.read()) - except Exception: - log('Unable to connect. URL at {} returned status code ' - '{} and no data'.format(feed_url_value, status_code), + log(msg_download_succeed.format( + feed_name_key, time.time() - start_time2, + os.path.getsize(zipfile_path))) + elif status_code in [429, 504]: + msg = ('URL at {} returned status code {} and no data. ' + 'Re-trying request in {:.2f} seconds.') + log(msg.format(feed_url_value, status_code, + error_pause_duration), + level=lg.WARNING) + time.sleep(error_pause_duration) + try: + file = request.urlopen(feed_url_value) + + _zipfile_type_check(file=file, + feed_url_value=feed_url_value) + + with open(zipfile_path, "wb") as local_file: + local_file.write(file.read()) + except Exception: + log(msg_no_connection_w_status.format( + feed_url_value, status_code), + level=lg.ERROR) + else: + log(msg_no_connection_w_status.format( + feed_url_value, status_code), level=lg.ERROR) - else: - log( - 'Unable to connect. URL at {} returned status code {} ' - 'and no data'.format( - feed_url_value, status_code), level=lg.ERROR) + except Exception: + log(msg_no_connection.format( + feed_url_value, traceback.format_exc()), + level=lg.ERROR) else: try: - file = urlopen(feed_url_value) + file = request.urlopen(feed_url_value) _zipfile_type_check(file=file, feed_url_value=feed_url_value) - with open( - ''.join([download_folder, '/', feed_name_key, '.zip']), - "wb") as local_file: + file_path = ''.join( + [download_folder, '/', feed_name_key, '.zip']) + with open(file_path, "wb") as local_file: local_file.write(file.read()) - log( - '{} GTFS feed downloaded successfully. Took {:,' - '.2f} seconds for {:,.1f}KB'.format( - feed_name_key, time.time() - start_time2, - os.path.getsize(zipfile_path))) + log(msg_download_succeed.format( + feed_name_key, time.time() - start_time2, + os.path.getsize(zipfile_path))) except Exception: - log('Unable to connect: {}'.format(traceback.format_exc()), + log(msg_no_connection.format( + feed_url_value, traceback.format_exc()), level=lg.ERROR) log('GTFS feed download completed. Took {:,.2f} seconds'.format( diff --git a/urbanaccess/plot.py b/urbanaccess/plot.py index 69dbae3..ab24594 100644 --- a/urbanaccess/plot.py +++ b/urbanaccess/plot.py @@ -13,7 +13,8 @@ def plot_net(nodes, edges, x_col=None, y_col=None, from_col=None, fig_height=6, margin=0.02, edge_color='#999999', edge_linewidth=1, edge_alpha=1, node_color='black', node_size=15, node_alpha=1, - node_edgecolor='none', node_zorder=3, nodes_only=False): + node_edgecolor='none', node_zorder=3, nodes_only=False, + ax=None): """ plot urbanaccess network nodes and edges @@ -59,6 +60,9 @@ def plot_net(nodes, edges, x_col=None, y_col=None, from_col=None, nodes under the edges, 3 will plot nodes on top nodes_only : bool if true only the nodes will plot + ax : matplotlib.axes._subplots.AxesSubplot, optional + matplotlib axes, as given by, for example, plt.subplot. + Use to specify the projection. Returns ------- @@ -117,8 +121,11 @@ def plot_net(nodes, edges, x_col=None, y_col=None, from_col=None, 'in a negative value or 0')) bbox_aspect_ratio = (y_max - y_min) / (x_max - x_min) - fig, ax = plt.subplots(figsize=(fig_height / bbox_aspect_ratio, - fig_height)) + if ax is None: + fig, ax = plt.subplots(figsize=(fig_height / bbox_aspect_ratio, + fig_height)) + else: + fig = ax.figure if nodes_only is False: # TODO: optimize for speed by calculating only for edges that are diff --git a/urbanaccess/tests/conftest.py b/urbanaccess/tests/conftest.py new file mode 100644 index 0000000..5ca8297 --- /dev/null +++ b/urbanaccess/tests/conftest.py @@ -0,0 +1,799 @@ +import pytest +import os +import pandas as pd +import numpy as np + + +@pytest.fixture +def agency_feed_1(): + data = { + 'agency_id': 'agency a', + 'agency_name': 'agency a city a', + 'agency_url': 'http://www.agency_a.org', + 'agency_timezone': 'America/Los_Angeles', + 'agency_phone': '(000) 000-0000' + } + index = range(1) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def agency_feed_2(): + data = { + 'agency_id': ['agency b bus', 'agency b rail'], + 'agency_name': ['agency b district 1', 'agency b district 2'], + 'agency_url': ['http://www.agency_b.org', 'http://www.agency_b.org'], + 'agency_timezone': ['America/Los_Angeles', 'America/Los_Angeles'], + 'agency_phone': ['(000) 000-0000', '(000) 000-0000'] + } + index = range(2) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def agency_feed_3(): + data = { + 'agency_id': '', + 'agency_name': 'agency c', + 'agency_url': 'http://www.agency_c.org', + 'agency_timezone': 'America/Los_Angeles', + 'agency_phone': '(000) 000-0000' + } + index = range(1) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def agency_feed_4(): + data = { + 'agency_id': ['agency 1', 'agency 2', 'agency 3'], + 'agency_name': ['agency 1 bus', 'agency 2 rail', 'agency 3 metro'], + 'agency_url': ['http://www.agency_1.org', 'http://www.agency_2.org', + 'http://www.agency_2.org'], + 'agency_timezone': ['America/Los_Angeles', 'America/Los_Angeles', + 'America/Los_Angeles'], + 'agency_phone': ['(000) 000-0000', '(000) 000-0000', '(000) 000-0000'] + } + index = range(3) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def routes_feed_1(): + data = { + 'agency_id': ['agency a'] * 4, + 'route_id': ['10-101', '11-101', '12-101', '13-101'], + 'route_short_name': ['10', '11', 'red', 'blue'], + 'route_long_name': ['ave a local', 'ave a express', 'red line', + 'blue line'], + 'route_type': [3, 3, 1, 1] + } + + index = range(4) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def routes_feed_2(): + data = { + 'agency_id': ['agency b bus', 'agency b bus', 'agency b rail', + 'agency b rail'], + 'route_id': ['40-4', '40-4x', 'r-2', 'r-2ext'], + 'route_short_name': ['40', '40', 'red', 'red-ext'], + 'route_long_name': ['ave a local', 'ave a express', 'red line', + 'red line extension'], + 'route_type': [3, 3, 1, 1] + } + + index = range(4) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def routes_feed_4(): + data = { + 'agency_id': ['agency 1', 'agency 1', 'agency 2', 'agency 2', + 'agency 3', 'agency 3'], + 'route_id': ['a1x', 'a1', 'a2x', 'a2', 'a3x', 'a3'], + 'route_short_name': ['1x', '1', '2x', '2', '3x', '3'], + 'route_long_name': ['1 express', '1 local', '2 express', + '2 local', '3 express', '3 local'], + 'route_type': [3, 3, 3, 3, 3, 3] + } + + index = range(6) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def stops_feed_1(): + data = { + 'stop_id': ['1', '2', '3', '4', '5', '6', + '7', '8', '9'], + 'stop_name': ['ave a', 'ave b', 'ave c', 'ave d', 'ave e', 'ave f', + '1st st', '2nd st', '3rd st'], + 'stop_lat': [37.797484, 37.774963, 37.803664, 37.80787, 37.828415, + 37.844601, 37.664174, 37.591208, 37.905628], + 'stop_lon': [-122.265609, -122.224274, -122.271604, -122.269029, + -122.267227, -122.251793, -122.444116, -122.017867, + -122.067423], + 'location_type': [1, 1, 1, 1, 1, 1, + 2, 2, 2], + 'wheelchair_boarding': [1, 0, 0, 0, 0, 0, + 1, 1, 1] + } + + index = range(9) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def stops_feed_2(): + data = { + 'stop_id': ['60', '61', '62', '63', '64', '65', + '66', '67', '68', + '600', '601', '602', '603', '604', '605', '606'], + 'stop_name': ['ave m', 'ave n', 'ave o', 'ave p', 'ave q', 'ave r', + '10th st', '11th st', '12th st', + '121th st', '122th st', '123th st', '124th st', + '125th st', '126th st', '127th st'], + 'stop_lat': [38.797484, 38.774963, 38.803664, 38.80787, 38.828415, + 38.844601, 38.664174, 38.591208, 38.905628, + 38.603664, 38.60787, 38.628415, + 38.644601, 38.660000, 38.691208, 38.605628], + 'stop_lon': [-121.265609, -121.224274, -121.271604, -121.269029, + -121.267227, -121.251793, -121.444116, -121.017867, + -121.067423, -122.271604, -122.269029, -122.267227, + -122.251793, -122.444116, -122.017867, -122.067423], + 'location_type': [1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], + 'wheelchair_boarding': [1, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + } + + index = range(16) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def stops_feed_4(): + data = { + 'stop_id': ['70', '71', '72', '73', '74', '75', + '76', '77', '78'], + 'stop_name': ['station 1', 'station 2', 'station 3', 'station 4', + 'station 5', 'station 6', + 'station 7', 'station 8', 'station 9'], + 'stop_lat': [20.797484, 20.774963, 20.803664, 20.80787, 20.828415, + 20.844601, 20.664174, 20.591208, 20.905628], + 'stop_lon': [-100.265609, -100.224274, -100.271604, -100.269029, + -100.267227, -100.251793, -100.444116, -100.017867, + -100.067423] + } + + index = range(9) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def trips_feed_1(): + data = { + 'route_id': ['10-101', '10-101', '10-101', '10-101', + '11-101', '11-101', + '12-101', '12-101', + '13-101', '13-101'], + 'trip_id': ['a1', 'a2', 'a3', 'a4', + 'b1', 'b2', + 'c1', 'c2', + 'd1', 'd2'], + 'service_id': ['weekday-1', 'weekday-1', 'weekday-1', 'weekday-1', + 'weekday-2', 'weekday-2', + 'weekday-3', 'weekday-3', + 'weekend-1', 'weekend-1'], + 'direction_id': [1, 0, 1, 0, + 1, 0, + 1, 0, + 1, 0], + 'wheelchair_accessible': [1, 1, 1, 1, + 0, 0, + 0, 0, + 0, 0], + 'bikes_allowed': [1, 1, 1, 1, + 0, 0, + 0, 0, + 0, 0] + } + + index = range(10) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def trips_feed_2(): + data = { + + 'route_id': ['40-4', '40-4', '40-4', '40-4', + '40-4x', '40-4x', + 'r-2', 'r-2', + 'r-2ext', 'r-2ext'], + 'trip_id': ['11', '12', '13', '14', + '21', '22', + '31', '32', + '41', '42'], + 'service_id': ['weekday-1', 'weekday-1', 'weekday-1', 'weekday-1', + 'weekday-2', 'weekday-2', + 'weekday-3', 'weekday-3', + 'weekend-1', 'weekend-1'] + } + + index = range(10) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def trips_feed_4(): + data = { + 'route_id': ['a1x', 'a1x', 'a1x', 'a1x', + 'a1', 'a1', + 'a2x', 'a2x', + 'a2', 'a2', + 'a3x', 'a3x', + 'a3', 'a3'], + 'trip_id': ['a131', 'a132', 'a133', 'a134', + 'a135', 'a136', + 'a237', 'a238', + 'a239', 'a240', + 'a341', 'a342', + 'a343', 'a344'], + 'service_id': ['wk-1', 'wk-1', 'wk-1', 'wk-1', + 'wk-1', 'wk-1', + 'wk-1', 'wk-1', + 'wk-1', 'wk-1', + 'wk-1', 'wk-1', + 'wk-1', 'wk-1'] + } + + index = range(14) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def calendar_feed_1(): + data = { + 'service_id': ['weekday-1', + 'weekday-2', + 'weekday-3', + 'weekend-1'], + 'monday': [1, 1, 1, 0], + 'tuesday': [1, 1, 1, 0], + 'wednesday': [1, 1, 1, 0], + 'thursday': [1, 1, 1, 0], + 'friday': [1, 1, 1, 0], + 'saturday': [0, 0, 0, 1], + 'sunday': [0, 0, 0, 1], + 'start_date': [20161224] * 4, + 'end_date': [20170318] * 4} + + index = range(4) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def calendar_feed_2(): + data = { + 'service_id': ['weekday-1', + 'weekday-2', + 'weekday-3', + 'weekend-1'], + 'monday': [1, 1, 1, 0], + 'tuesday': [1, 1, 1, 0], + 'wednesday': [1, 1, 1, 0], + 'thursday': [1, 1, 1, 0], + 'friday': [1, 1, 1, 0], + 'saturday': [0, 0, 0, 1], + 'sunday': [0, 0, 0, 1], + 'start_date': [20161224] * 4, + 'end_date': [20170318] * 4} + + index = range(4) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def calendar_feed_4(): + data = { + 'service_id': ['wk-1'], + 'monday': [1], + 'tuesday': [1], + 'wednesday': [1], + 'thursday': [1], + 'friday': [1], + 'saturday': [0], + 'sunday': [0], + 'start_date': [20161224], + 'end_date': [20170318]} + + index = range(1) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def calendar_empty(): + columns = {'service_id', + 'monday', + 'tuesday', + 'wednesday', + 'thursday', + 'friday', + 'saturday', + 'sunday', + 'start_date', + 'end_date'} + + df = pd.DataFrame(columns=columns) + return df + + +@pytest.fixture +def calendar_dates_feed_1(): + data = { + 'service_id': ['weekday-1', + 'weekday-2', + 'weekday-3', + 'weekend-1'], + 'date': [20161224, 20170318, 20160424, 20161230], + 'exception_type': [1, 2, 1, 1]} + + index = range(4) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def calendar_dates_feed_2(): + data = { + 'service_id': ['weekday-1', + 'weekday-2', + 'weekday-3', + 'weekend-1'], + 'date': [20161224, 20170318, 20160424, 20161230], + 'exception_type': [1, 2, 1, 1], + 'schedule_type': ['WD', 'WD', 'WD', 'SA'] + } + + index = range(4) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def calendar_dates_feed_4(): + data = { + 'service_id': ['wk-1'], + 'date': [20161224], + 'exception_type': [1]} + + index = range(1) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def stop_times_feed_1(): + data = { + 'trip_id': ['a1', 'a1', 'a1', 'a1', 'a1', 'a1', + 'a2', 'a2', 'a2', 'a2', 'a2', 'a2', + 'a3', 'a3', 'a3', 'a3', 'a3', 'a3', + 'a4', 'a4', 'a4', 'a4', 'a4', 'a4', + 'b1', 'b1', 'b1', 'b1', 'b1', 'b1', + 'b2', 'b2', 'b2', 'b2', 'b2', 'b2', + 'c1', 'c1', 'c1', 'c1', 'c1', 'c1', + 'c2', 'c2', 'c2', 'c2', 'c2', 'c2', + 'd1', 'd1', 'd1', + 'd2', 'd2', 'd2'], + 'stop_id': ['1', '2', '3', '4', '5', '6', + '6', '5', '4', '3', '2', '1', + '1', '2', '3', '4', '5', '6', + '6', '5', '4', '3', '2', '1', + '1', '2', '3', '4', '5', '6', + '6', '5', '4', '3', '2', '1', + '1', '2', '3', '4', '5', '6', + '6', '5', '4', '3', '2', '1', + '7', '8', '9', + '9', '8', '7'], + 'arrival_time': ['06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', + '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '08:15:00', '08:20:00', '08:25:00', '08:30:00', + '08:35:00', '08:40:00', + '13:15:00', '13:20:00', '13:25:00', '13:30:00', + '13:35:00', '13:40:00', + '06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', + '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '26:15:00', '26:20:00', np.nan, np.nan, '26:35:00', + '26:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00'], + 'departure_time': ['06:15:00', '06:20:00', np.nan, np.nan, + '06:35:00', '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '08:15:00', '08:20:00', '08:25:00', '08:30:00', + '08:35:00', '08:40:00', + '13:15:00', '13:20:00', '13:25:00', '13:30:00', + '13:35:00', '13:40:00', + '06:15:00', '06:20:00', np.nan, np.nan, + '06:35:00', '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '26:15:00', '26:20:00', np.nan, np.nan, + '26:35:00', '26:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00'], + 'stop_sequence': [1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, + 1, 2, 3], + 'pickup_type': [0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, + 0, 0, 0], + 'drop_off_type': [0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, + 0, 0, 0] + } + index = range(54) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def stop_times_feed_2(): + data = { + 'trip_id': ['11', '11', '11', '11', '11', '11', + '12', '12', '12', '12', '12', '12', + '13', '13', '13', '13', '13', '13', + '14', '14', '14', '14', '14', '14', + '21', '21', '21', '21', '21', '21', + '22', '22', '22', '22', '22', '22', + '31', '31', '31', '31', '31', '31', + '32', '32', '32', '32', '32', '32', + '41', '41', '41', + '42', '42', '42'], + 'stop_id': ['60', '61', '62', '63', '64', '65', + '65', '64', '63', '62', '61', '60', + '60', '61', '62', '63', '64', '65', + '65', '64', '63', '62', '61', '60', + '60', '61', '62', '63', '64', '65', + '65', '64', '63', '62', '61', '60', + '600', '601', '602', '603', '604', '605', + '606', '605', '604', '603', '602', '601', + '66', '67', '68', + '68', '67', '66'], + 'arrival_time': ['06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', + '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '08:15:00', '08:20:00', '08:25:00', '08:30:00', + '08:35:00', '08:40:00', + '13:15:00', '13:20:00', '13:25:00', '13:30:00', + '13:35:00', '13:40:00', + '06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', + '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '26:15:00', '26:20:00', np.nan, np.nan, '26:35:00', + '26:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00'], + 'departure_time': ['06:15:00', '06:20:00', np.nan, np.nan, + '06:35:00', '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '08:15:00', '08:20:00', '08:25:00', '08:30:00', + '08:35:00', '08:40:00', + '13:15:00', '13:20:00', '13:25:00', '13:30:00', + '13:35:00', '13:40:00', + '06:15:00', '06:20:00', np.nan, np.nan, + '06:35:00', '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '26:15:00', '26:20:00', np.nan, np.nan, + '26:35:00', '26:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00'], + 'stop_sequence': [1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, + 1, 2, 3] + } + index = range(54) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture +def stop_times_feed_4(): + data = { + 'trip_id': ['a131', 'a131', 'a131', 'a131', 'a131', 'a131', + 'a132', 'a132', 'a132', 'a132', 'a132', 'a132', + 'a133', 'a133', 'a133', 'a133', 'a133', 'a133', + 'a134', 'a134', 'a134', 'a134', 'a134', 'a134', + 'a135', 'a135', 'a135', 'a135', 'a135', 'a135', + 'a136', 'a136', 'a136', 'a136', 'a136', 'a136', + 'a237', 'a237', 'a237', 'a237', 'a237', 'a237', + 'a238', 'a238', 'a238', 'a238', 'a238', 'a238', + 'a239', 'a239', 'a239', + 'a240', 'a240', 'a240', + 'a341', 'a341', 'a341', + 'a342', 'a342', 'a342', + 'a343', 'a343', 'a343', + 'a344', 'a344', 'a344'], + 'stop_id': ['70', '71', '72', '73', '74', '75', + '75', '74', '73', '72', '71', '70', + '70', '71', '72', '73', '74', '75', + '75', '74', '73', '72', '71', '70', + '70', '71', '72', '73', '74', '75', + '75', '74', '73', '72', '71', '70', + '70', '71', '72', '73', '74', '75', + '75', '74', '73', '72', '71', '70', + '76', '77', '78', + '78', '77', '76', + '76', '77', '78', + '78', '77', '76', + '76', '77', '78', + '78', '77', '76'], + + 'arrival_time': ['06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', + '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '08:15:00', '08:20:00', '08:25:00', '08:30:00', + '08:35:00', '08:40:00', + '13:15:00', '13:20:00', '13:25:00', '13:30:00', + '13:35:00', '13:40:00', + '06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', + '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '26:15:00', '26:20:00', np.nan, np.nan, '26:35:00', + '26:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00'], + 'departure_time': ['06:15:00', '06:20:00', np.nan, np.nan, + '06:35:00', '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '08:15:00', '08:20:00', '08:25:00', '08:30:00', + '08:35:00', '08:40:00', + '13:15:00', '13:20:00', '13:25:00', '13:30:00', + '13:35:00', '13:40:00', + '06:15:00', '06:20:00', np.nan, np.nan, + '06:35:00', '06:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '26:15:00', '26:20:00', np.nan, np.nan, + '26:35:00', '26:40:00', + '06:15:00', '06:20:00', '06:25:00', '06:30:00', + np.nan, '06:40:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00', + '06:15:00', '06:20:00', '06:25:00'], + 'stop_sequence': [1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, 4, 5, 6, + 1, 2, 3, + 1, 2, 3, + 1, 2, 3, + 1, 2, 3, + 1, 2, 3, + 1, 2, 3] + } + index = range(66) + + df = pd.DataFrame(data, index) + return df + + +@pytest.fixture() +def agency_a_feed_on_disk_wo_calendar_dates( + tmpdir, + agency_feed_1, stop_times_feed_1, stops_feed_1, + routes_feed_1, trips_feed_1, calendar_feed_1): + feed_file_dict = {'agency': agency_feed_1, + 'stop_times': stop_times_feed_1, + 'stops': stops_feed_1, + 'routes': routes_feed_1, + 'trips': trips_feed_1, + 'calendar': calendar_feed_1} + feed_path = os.path.join(tmpdir.strpath, 'agency_a_wo_calendar_dates') + os.makedirs(feed_path) + print('writing test data to dir: {}'.format(feed_path)) + for feed_file, feed_df in feed_file_dict.items(): + feed_file_name = '{}.txt'.format(feed_file) + feed_df.to_csv(os.path.join(feed_path, feed_file_name), index=False) + return feed_path + + +@pytest.fixture() +def agency_a_feed_on_disk_wo_calendar( + tmpdir, + agency_feed_1, stop_times_feed_1, stops_feed_1, + routes_feed_1, trips_feed_1, calendar_dates_feed_1): + feed_file_dict = {'agency': agency_feed_1, + 'stop_times': stop_times_feed_1, + 'stops': stops_feed_1, + 'routes': routes_feed_1, + 'trips': trips_feed_1, + 'calendar_dates': calendar_dates_feed_1} + feed_path = os.path.join(tmpdir.strpath, 'agency_a_wo_calendar') + os.makedirs(feed_path) + print('writing test data to dir: {}'.format(feed_path)) + for feed_file, feed_df in feed_file_dict.items(): + feed_file_name = '{}.txt'.format(feed_file) + feed_df.to_csv(os.path.join(feed_path, feed_file_name), index=False) + return feed_path + + +@pytest.fixture() +def agency_a_feed_on_disk_w_calendar_and_calendar_dates( + tmpdir, + agency_feed_1, stop_times_feed_1, stops_feed_1, + routes_feed_1, trips_feed_1, calendar_feed_1, calendar_dates_feed_1): + feed_file_dict = {'agency': agency_feed_1, + 'stop_times': stop_times_feed_1, + 'stops': stops_feed_1, + 'routes': routes_feed_1, + 'trips': trips_feed_1, + 'calendar': calendar_feed_1, + 'calendar_dates': calendar_dates_feed_1} + feed_path = os.path.join(tmpdir.strpath, 'agency_a_w_both_calendars') + os.makedirs(feed_path) + print('writing test data to dir: {}'.format(feed_path)) + for feed_file, feed_df in feed_file_dict.items(): + feed_file_name = '{}.txt'.format(feed_file) + feed_df.to_csv(os.path.join(feed_path, feed_file_name), index=False) + return feed_path + + +@pytest.fixture() +def agency_a_feed_on_disk_wo_calendar_and_calendar_dates( + tmpdir, + agency_feed_1, stop_times_feed_1, stops_feed_1, + routes_feed_1, trips_feed_1): + feed_file_dict = {'agency': agency_feed_1, + 'stop_times': stop_times_feed_1, + 'stops': stops_feed_1, + 'routes': routes_feed_1, + 'trips': trips_feed_1} + feed_path = os.path.join(tmpdir.strpath, 'agency_a_wo_both_calendar') + os.makedirs(feed_path) + print('writing test data to dir: {}'.format(feed_path)) + for feed_file, feed_df in feed_file_dict.items(): + feed_file_name = '{}.txt'.format(feed_file) + feed_df.to_csv(os.path.join(feed_path, feed_file_name), index=False) + return feed_path + + +@pytest.fixture() +def agency_a_feed_on_disk_wo_req_file( + tmpdir, + agency_feed_1, stop_times_feed_1, stops_feed_1, + routes_feed_1, calendar_feed_1): + feed_file_dict = {'agency': agency_feed_1, + 'stop_times': stop_times_feed_1, + 'stops': stops_feed_1, + 'routes': routes_feed_1, + 'calendar': calendar_feed_1} + feed_path = os.path.join(tmpdir.strpath, 'agency_a_wo_req_file') + os.makedirs(feed_path) + print('writing test data to dir: {}'.format(feed_path)) + for feed_file, feed_df in feed_file_dict.items(): + feed_file_name = '{}.txt'.format(feed_file) + feed_df.to_csv(os.path.join(feed_path, feed_file_name), index=False) + return feed_path + + +@pytest.fixture() +def agency_a_feed_on_disk_wo_agency( + tmpdir, + agency_feed_1, stop_times_feed_1, stops_feed_1, + routes_feed_1, trips_feed_1, calendar_feed_1): + feed_file_dict = {'stop_times': stop_times_feed_1, + 'stops': stops_feed_1, + 'routes': routes_feed_1, + 'trips': trips_feed_1, + 'calendar': calendar_feed_1} + feed_path = os.path.join(tmpdir.strpath, 'agency_a_wo_agency') + os.makedirs(feed_path) + print('writing test data to dir: {}'.format(feed_path)) + for feed_file, feed_df in feed_file_dict.items(): + feed_file_name = '{}.txt'.format(feed_file) + feed_df.to_csv(os.path.join(feed_path, feed_file_name), index=False) + return feed_path diff --git a/urbanaccess/tests/integration/integration_sandiego.py b/urbanaccess/tests/integration/integration_sandiego.py index e6459a1..e3fd242 100644 --- a/urbanaccess/tests/integration/integration_sandiego.py +++ b/urbanaccess/tests/integration/integration_sandiego.py @@ -1,9 +1,12 @@ import os import time -import pandas as pd import matplotlib + matplotlib.use('agg') +import matplotlib.pyplot as plt + +import cartopy.crs as ccrs import urbanaccess @@ -22,19 +25,6 @@ urbanaccess.gtfsfeeds.download(data_folder=root_path) -# create dummy calendar.txt file because -dummy_txt_file = os.path.join(root_path, - 'gtfsfeed_text', - 'MTS', - 'calendar.txt') - -data = {'service_id': -99, 'monday': 0, 'tuesday': 0, 'wednesday': 0, - 'thursday': 0, 'friday': 0, 'saturday': 0, 'sunday': 0} - -index = range(1) - -pd.DataFrame(data, index).to_csv(dummy_txt_file, index=False) - validation = True verbose = True # small bbox for testing purposes @@ -56,6 +46,13 @@ 'schedule_type': 'WD'}, timerange=['07:00:00', '10:00:00']) +# This is the standard map projection for California +teale_albers = ccrs.AlbersEqualArea( + false_northing=-4000000.0, false_easting=0, + central_longitude=-120.0, central_latitude=0, + standard_parallels=(34.0, 40.5)) +teale_albers_ax = plt.axes(projection=teale_albers) + urbanaccess.plot.plot_net(nodes=transit_net.transit_nodes, edges=transit_net.transit_edges, bbox=bbox, @@ -68,7 +65,8 @@ node_alpha=1, node_edgecolor='none', node_zorder=3, - nodes_only=False) + nodes_only=False, + ax=teale_albers_ax) print('{} integration test completed successfully. Took {:,' '.2f} seconds'.format(name, time.time() - start_time)) diff --git a/urbanaccess/tests/test_gtfs_load.py b/urbanaccess/tests/test_gtfs_load.py new file mode 100644 index 0000000..f813f27 --- /dev/null +++ b/urbanaccess/tests/test_gtfs_load.py @@ -0,0 +1,233 @@ +# coding=utf-8 +import pytest +import pandas as pd +import os +import six +import codecs +import sys + +import urbanaccess.gtfs.load as gtfs_load +from urbanaccess.gtfs.gtfsfeeds_dataframe import urbanaccess_gtfs_df + + +@pytest.fixture +def expected_urbanaccess_gtfs_df_keys(): + expected_keys = ['stops', 'routes', 'trips', 'stop_times', + 'calendar', 'calendar_dates', 'stop_times_int', + 'headways'] + return expected_keys.sort() + + +@pytest.fixture +def test_txt_files(tmpdir): + # test file that does not need to be fixed + do_not_fix_txt = os.path.join(tmpdir.strpath, 'agency.txt') + data = ['name,text\n', ' Circulação , áéíóúüñ¿¡ \n'] + if six.PY2: + with open(do_not_fix_txt, 'w') as f: + f.writelines(data) + else: + with open(do_not_fix_txt, 'w', encoding='utf-8') as f: + f.writelines(data) + + # test file that does need to be fixed + fix_txt = os.path.join(tmpdir.strpath, 'calendar.txt') + data = [' name , text \n', ' Circulação , áéíóúüñ¿¡ \n'] + if six.PY2: + with open(fix_txt, 'w') as f: + f.writelines(data) + else: + with open(fix_txt, 'w', encoding='utf-8') as f: + f.writelines(data) + + fix_txt_wBOM = os.path.join(tmpdir.strpath, 'calendar_dates.txt') + if six.PY2: + data = [codecs.BOM_UTF8, + ' name , text \n', + ' Circulação , áéíóúüñ¿¡ \n'] + with open(fix_txt_wBOM, 'w') as f: + f.writelines(data) + else: + data = [str(codecs.BOM_UTF8), + ' name , text \n', + ' Circulação , áéíóúüñ¿¡ \n'] + with open(fix_txt_wBOM, 'w', encoding='utf-8') as f: + f.writelines(data) + + return tmpdir.strpath, do_not_fix_txt, fix_txt, fix_txt_wBOM + + +@pytest.fixture +def test_txt_files_to_use(): + gtfsfiles_to_use = ['stops.txt', 'routes.txt', 'trips.txt', + 'stop_times.txt', 'calendar.txt', + 'agency.txt', 'calendar_dates.txt'] + return gtfsfiles_to_use + + +def test_txt_standardization(test_txt_files): + root_dir, do_not_fix_txt, fix_txt, fix_txt_wBOM = test_txt_files + + gtfs_load._standardize_txt(csv_rootpath=root_dir) + + df = pd.read_csv(fix_txt) + assert list(df.columns) == list(df.columns.str.strip()) + + df = pd.read_csv(fix_txt_wBOM) + assert list(df.columns) == list(df.columns.str.strip()) + + +def test_txt_header_whitespace_check(test_txt_files, test_txt_files_to_use): + root_dir, do_not_fix_txt, fix_txt, fix_txt_wBOM = test_txt_files + + gtfs_load._txt_header_whitespace_check( + gtfsfiles_to_use=test_txt_files_to_use, + csv_rootpath=root_dir) + + # only check 'fix_txt' as 'fix_txt_wBOM' would need to be + # fixed by _txt_encoder_check first + df = pd.read_csv(fix_txt) + assert list(df.columns) == list(df.columns.str.strip()) + + +@pytest.mark.skipif( + sys.version_info >= (3, 0), reason="requires python < 3.0") +def test_txt_encoder_check(test_txt_files, test_txt_files_to_use): + root_dir, do_not_fix_txt, fix_txt, fix_txt_wBOM = test_txt_files + + gtfs_load._txt_encoder_check( + gtfsfiles_to_use=test_txt_files_to_use, + csv_rootpath=root_dir) + + with open(fix_txt_wBOM, 'r') as f: + raw = f.read() + assert raw.startswith(codecs.BOM_UTF8) is False + + +def test_loadgtfsfeed_to_df_wo_calendar( + agency_a_feed_on_disk_wo_calendar, + expected_urbanaccess_gtfs_df_keys): + feed_dir = agency_a_feed_on_disk_wo_calendar + loaded_feeds = gtfs_load.gtfsfeed_to_df( + gtfsfeed_path=feed_dir, + validation=False, + verbose=True, + bbox=None, + remove_stops_outsidebbox=False, + append_definitions=False) + assert isinstance(loaded_feeds, urbanaccess_gtfs_df) + urbanaccess_gtfs_df_info = vars(loaded_feeds) + expected_dfs = ['stops', 'routes', 'trips', 'stop_times', + 'calendar_dates'] + assert expected_urbanaccess_gtfs_df_keys == list( + urbanaccess_gtfs_df_info.keys()).sort() + for key, value in urbanaccess_gtfs_df_info.items(): + assert isinstance(value, pd.core.frame.DataFrame) + # check that df is not empty + if key in expected_dfs: + assert value.empty is False + + +def test_loadgtfsfeed_to_df_wo_calendar_dates( + agency_a_feed_on_disk_wo_calendar_dates, + expected_urbanaccess_gtfs_df_keys): + feed_dir = agency_a_feed_on_disk_wo_calendar_dates + loaded_feeds = gtfs_load.gtfsfeed_to_df( + gtfsfeed_path=feed_dir, + validation=False, + verbose=True, + bbox=None, + remove_stops_outsidebbox=False, + append_definitions=False) + assert isinstance(loaded_feeds, urbanaccess_gtfs_df) + urbanaccess_gtfs_df_info = vars(loaded_feeds) + expected_dfs = ['stops', 'routes', 'trips', 'stop_times', + 'calendar'] + assert expected_urbanaccess_gtfs_df_keys == list( + urbanaccess_gtfs_df_info.keys()).sort() + for key, value in urbanaccess_gtfs_df_info.items(): + assert isinstance(value, pd.core.frame.DataFrame) + # check that df is not empty + if key in expected_dfs: + assert value.empty is False + + +def test_loadgtfsfeed_to_df_w_calendar_and_calendar_dates( + agency_a_feed_on_disk_w_calendar_and_calendar_dates, + expected_urbanaccess_gtfs_df_keys): + feed_dir = agency_a_feed_on_disk_w_calendar_and_calendar_dates + loaded_feeds = gtfs_load.gtfsfeed_to_df( + gtfsfeed_path=feed_dir, + validation=False, + verbose=True, + bbox=None, + remove_stops_outsidebbox=False, + append_definitions=False) + assert isinstance(loaded_feeds, urbanaccess_gtfs_df) + urbanaccess_gtfs_df_info = vars(loaded_feeds) + expected_dfs = ['stops', 'routes', 'trips', 'stop_times', + 'calendar', 'calendar_dates'] + assert expected_urbanaccess_gtfs_df_keys == list( + urbanaccess_gtfs_df_info.keys()).sort() + for key, value in urbanaccess_gtfs_df_info.items(): + assert isinstance(value, pd.core.frame.DataFrame) + # check that df is not empty + if key in expected_dfs: + assert value.empty is False + + +def test_loadgtfsfeed_to_df_wo_calendar_and_calendar_dates( + agency_a_feed_on_disk_wo_calendar_and_calendar_dates): + feed_dir = agency_a_feed_on_disk_wo_calendar_and_calendar_dates + with pytest.raises(ValueError) as excinfo: + loaded_feeds = gtfs_load.gtfsfeed_to_df( + gtfsfeed_path=feed_dir, + validation=False, + verbose=True, + bbox=None, + remove_stops_outsidebbox=False, + append_definitions=False) + expected_error = ( + "at least one of `calendar.txt` or `calendar_dates.txt` is required " + "to complete a GTFS dataset but neither was found in folder") + assert expected_error in str(excinfo.value) + + +def test_loadgtfsfeed_to_df_wo_req_file( + agency_a_feed_on_disk_wo_req_file): + feed_dir = agency_a_feed_on_disk_wo_req_file + with pytest.raises(ValueError) as excinfo: + loaded_feeds = gtfs_load.gtfsfeed_to_df( + gtfsfeed_path=feed_dir, + validation=False, + verbose=True, + bbox=None, + remove_stops_outsidebbox=False, + append_definitions=False) + expected_error = ( + "trips.txt is a required GTFS text file and was not found in folder") + assert expected_error in str(excinfo.value) + + +def test_loadgtfsfeed_to_df_wo_agency( + agency_a_feed_on_disk_wo_agency, + expected_urbanaccess_gtfs_df_keys): + feed_dir = agency_a_feed_on_disk_wo_agency + loaded_feeds = gtfs_load.gtfsfeed_to_df( + gtfsfeed_path=feed_dir, + validation=False, + verbose=True, + bbox=None, + remove_stops_outsidebbox=False, + append_definitions=False) + assert isinstance(loaded_feeds, urbanaccess_gtfs_df) + urbanaccess_gtfs_df_info = vars(loaded_feeds) + expected_dfs = ['stops', 'routes', 'trips', 'stop_times', + 'calendar'] + assert expected_urbanaccess_gtfs_df_keys == list( + urbanaccess_gtfs_df_info.keys()).sort() + for key, value in urbanaccess_gtfs_df_info.items(): + assert isinstance(value, pd.core.frame.DataFrame) + # check that df is not empty + if key in expected_dfs: + assert value.empty is False diff --git a/urbanaccess/tests/test_gtfs_network.py b/urbanaccess/tests/test_gtfs_network.py index 4bb7f71..01de1c8 100644 --- a/urbanaccess/tests/test_gtfs_network.py +++ b/urbanaccess/tests/test_gtfs_network.py @@ -1,7 +1,31 @@ import pytest import pandas as pd import numpy as np -from urbanaccess.gtfs import network + +import urbanaccess.gtfs.network as gtfs_network +import urbanaccess.gtfs.load as gtfs_load +from urbanaccess.network import urbanaccess_network + + +@pytest.fixture +def expected_urbanaccess_network_keys(): + expected_keys = ['transit_nodes', 'transit_edges', 'net_connector_edges', + 'osm_nodes', 'osm_edges', 'net_nodes', 'net_edges'] + return expected_keys.sort() + + +@pytest.fixture +def gtfs_feed_wo_calendar_dates( + tmpdir, agency_a_feed_on_disk_wo_calendar_dates): + feed_dir = agency_a_feed_on_disk_wo_calendar_dates + loaded_feeds = gtfs_load.gtfsfeed_to_df( + gtfsfeed_path=feed_dir, + validation=False, + verbose=True, + bbox=None, + remove_stops_outsidebbox=False, + append_definitions=False) + return loaded_feeds @pytest.fixture @@ -81,8 +105,73 @@ def stop_times_interpolated(): return df +def test_create_transit_net_wo_calendar_dates( + tmpdir, gtfs_feed_wo_calendar_dates, + expected_urbanaccess_network_keys): + transit_net = gtfs_network.create_transit_net( + gtfs_feed_wo_calendar_dates, day='monday', + timerange=['07:00:00', '10:00:00'], + calendar_dates_lookup=None, + overwrite_existing_stop_times_int=False, + use_existing_stop_times_int=False, + save_processed_gtfs=False, + save_dir=tmpdir, + save_filename=None) + assert isinstance(transit_net, urbanaccess_network) + urbanaccess_network_info = vars(transit_net) + expected_dfs = ['transit_nodes', 'transit_edges'] + assert expected_urbanaccess_network_keys == list( + urbanaccess_network_info.keys()).sort() + for key, value in urbanaccess_network_info.items(): + assert isinstance(value, pd.core.frame.DataFrame) + # check that df is not empty + if key in expected_dfs: + assert value.empty is False + + +def test_create_transit_net_wo_req_file( + tmpdir, gtfs_feed_wo_calendar_dates): + # set trips df to blank df for test + gtfs_feed_wo_calendar_dates.trips = pd.DataFrame() + with pytest.raises(ValueError) as excinfo: + transit_net = gtfs_network.create_transit_net( + gtfs_feed_wo_calendar_dates, day='monday', + timerange=['07:00:00', '10:00:00'], + calendar_dates_lookup=None, + overwrite_existing_stop_times_int=False, + use_existing_stop_times_int=False, + save_processed_gtfs=False, + save_dir=tmpdir, + save_filename=None) + expected_error = ( + "one of the following gtfsfeeds_dfs objects trips, stops, " + "or stop_times were found to be empty.") + assert expected_error in str(excinfo.value) + + +def test_create_transit_net_wo_calendar_and_calendar_dates( + tmpdir, gtfs_feed_wo_calendar_dates): + # set calendar_dates and calendar dfs to blank df for test + gtfs_feed_wo_calendar_dates.calendar_dates = pd.DataFrame() + gtfs_feed_wo_calendar_dates.calendar = pd.DataFrame() + with pytest.raises(ValueError) as excinfo: + transit_net = gtfs_network.create_transit_net( + gtfs_feed_wo_calendar_dates, day='monday', + timerange=['07:00:00', '10:00:00'], + calendar_dates_lookup=None, + overwrite_existing_stop_times_int=False, + use_existing_stop_times_int=False, + save_processed_gtfs=False, + save_dir=tmpdir, + save_filename=None) + expected_error = ( + "one of the following gtfsfeeds_dfs objects calendar or " + "calendar_dates were found to be empty.") + assert expected_error in str(excinfo.value) + + def test_interpolator(stop_times, calendar): - df = network._interpolate_stop_times(stop_times, calendar, day='monday') + df = gtfs_network._interpolate_stop_times(stop_times, calendar) # unique_trip_id should be generated assert df.loc[1, 'unique_trip_id'] == 'a_citytrains' @@ -121,7 +210,7 @@ def test_skip_interpolator(stop_times, calendar): stop_times['departure_time_sec'] = series - df = network._interpolate_stop_times(stop_times, calendar, day='monday') + df = gtfs_network._interpolate_stop_times(stop_times, calendar) # everything should be the same, # with one row dropped for calendar day filter @@ -132,7 +221,7 @@ def test_skip_interpolator(stop_times, calendar): def test_edge_reformatter(stop_times_interpolated): - df = network._format_transit_net_edge(stop_times_interpolated) + df = gtfs_network._format_transit_net_edge(stop_times_interpolated) # length of edge df should be 16 assert len(df) == 16 diff --git a/urbanaccess/tests/test_gtfs_utils_format.py b/urbanaccess/tests/test_gtfs_utils_format.py index 659b872..4857c9f 100644 --- a/urbanaccess/tests/test_gtfs_utils_format.py +++ b/urbanaccess/tests/test_gtfs_utils_format.py @@ -1,689 +1,11 @@ import pytest import pandas as pd -import numpy as np import os from re import sub from urbanaccess.gtfs import utils_format -@pytest.fixture -def agency_feed_1(): - data = { - 'agency_id': 'agency a', - 'agency_name': 'agency a city a', - 'agency_url': 'http://www.agency_a.org', - 'agency_timezone': 'America/Los_Angeles', - 'agency_phone': '(000) 000-0000' - } - index = range(1) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def agency_feed_2(): - data = { - 'agency_id': ['agency b bus', 'agency b rail'], - 'agency_name': ['agency b district 1', 'agency b district 2'], - 'agency_url': ['http://www.agency_b.org', 'http://www.agency_b.org'], - 'agency_timezone': ['America/Los_Angeles', 'America/Los_Angeles'], - 'agency_phone': ['(000) 000-0000', '(000) 000-0000'] - } - index = range(2) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def agency_feed_3(): - data = { - 'agency_id': '', - 'agency_name': 'agency c', - 'agency_url': 'http://www.agency_c.org', - 'agency_timezone': 'America/Los_Angeles', - 'agency_phone': '(000) 000-0000' - } - index = range(1) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def agency_feed_4(): - data = { - 'agency_id': ['agency 1', 'agency 2', 'agency 3'], - 'agency_name': ['agency 1 bus', 'agency 2 rail', 'agency 3 metro'], - 'agency_url': ['http://www.agency_1.org', 'http://www.agency_2.org', - 'http://www.agency_2.org'], - 'agency_timezone': ['America/Los_Angeles', 'America/Los_Angeles', - 'America/Los_Angeles'], - 'agency_phone': ['(000) 000-0000', '(000) 000-0000', '(000) 000-0000'] - } - index = range(3) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def routes_feed_1(): - data = { - 'agency_id': ['agency a'] * 4, - 'route_id': ['10-101', '11-101', '12-101', '13-101'], - 'route_short_name': ['10', '11', 'red', 'blue'], - 'route_long_name': ['ave a local', 'ave a express', 'red line', - 'blue line'], - 'route_type': [3, 3, 1, 1] - } - - index = range(4) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def routes_feed_2(): - data = { - 'agency_id': ['agency b bus', 'agency b bus', 'agency b rail', - 'agency b rail'], - 'route_id': ['40-4', '40-4x', 'r-2', 'r-2ext'], - 'route_short_name': ['40', '40', 'red', 'red-ext'], - 'route_long_name': ['ave a local', 'ave a express', 'red line', - 'red line extension'], - 'route_type': [3, 3, 1, 1] - } - - index = range(4) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def routes_feed_4(): - data = { - 'agency_id': ['agency 1', 'agency 1', 'agency 2', 'agency 2', - 'agency 3', 'agency 3'], - 'route_id': ['a1x', 'a1', 'a2x', 'a2', 'a3x', 'a3'], - 'route_short_name': ['1x', '1', '2x', '2', '3x', '3'], - 'route_long_name': ['1 express', '1 local', '2 express', - '2 local', '3 express', '3 local'], - 'route_type': [3, 3, 3, 3, 3, 3] - } - - index = range(6) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def stops_feed_1(): - data = { - 'stop_id': ['1', '2', '3', '4', '5', '6', - '7', '8', '9'], - 'stop_name': ['ave a', 'ave b', 'ave c', 'ave d', 'ave e', 'ave f', - '1st st', '2nd st', '3rd st'], - 'stop_lat': [37.797484, 37.774963, 37.803664, 37.80787, 37.828415, - 37.844601, 37.664174, 37.591208, 37.905628], - 'stop_lon': [-122.265609, -122.224274, -122.271604, -122.269029, - -122.267227, -122.251793, -122.444116, -122.017867, - -122.067423], - 'location_type': [1, 1, 1, 1, 1, 1, - 2, 2, 2], - 'wheelchair_boarding': [1, 0, 0, 0, 0, 0, - 1, 1, 1] - } - - index = range(9) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def stops_feed_2(): - data = { - 'stop_id': ['60', '61', '62', '63', '64', '65', - '66', '67', '68', - '600', '601', '602', '603', '604', '605', '606'], - 'stop_name': ['ave m', 'ave n', 'ave o', 'ave p', 'ave q', 'ave r', - '10th st', '11th st', '12th st', - '121th st', '122th st', '123th st', '124th st', - '125th st', '126th st', '127th st'], - 'stop_lat': [38.797484, 38.774963, 38.803664, 38.80787, 38.828415, - 38.844601, 38.664174, 38.591208, 38.905628, - 38.603664, 38.60787, 38.628415, - 38.644601, 38.660000, 38.691208, 38.605628], - 'stop_lon': [-121.265609, -121.224274, -121.271604, -121.269029, - -121.267227, -121.251793, -121.444116, -121.017867, - -121.067423, -122.271604, -122.269029, -122.267227, - -122.251793, -122.444116, -122.017867, -122.067423], - 'location_type': [1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], - 'wheelchair_boarding': [1, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - } - - index = range(16) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def stops_feed_4(): - data = { - 'stop_id': ['70', '71', '72', '73', '74', '75', - '76', '77', '78'], - 'stop_name': ['station 1', 'station 2', 'station 3', 'station 4', - 'station 5', 'station 6', - 'station 7', 'station 8', 'station 9'], - 'stop_lat': [20.797484, 20.774963, 20.803664, 20.80787, 20.828415, - 20.844601, 20.664174, 20.591208, 20.905628], - 'stop_lon': [-100.265609, -100.224274, -100.271604, -100.269029, - -100.267227, -100.251793, -100.444116, -100.017867, - -100.067423] - } - - index = range(9) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def trips_feed_1(): - data = { - 'route_id': ['10-101', '10-101', '10-101', '10-101', - '11-101', '11-101', - '12-101', '12-101', - '13-101', '13-101'], - 'trip_id': ['a1', 'a2', 'a3', 'a4', - 'b1', 'b2', - 'c1', 'c2', - 'd1', 'd2'], - 'service_id': ['weekday-1', 'weekday-1', 'weekday-1', 'weekday-1', - 'weekday-2', 'weekday-2', - 'weekday-3', 'weekday-3', - 'weekend-1', 'weekend-1'], - 'direction_id': [1, 0, 1, 0, - 1, 0, - 1, 0, - 1, 0], - 'wheelchair_accessible': [1, 1, 1, 1, - 0, 0, - 0, 0, - 0, 0], - 'bikes_allowed': [1, 1, 1, 1, - 0, 0, - 0, 0, - 0, 0] - } - - index = range(10) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def trips_feed_2(): - data = { - - 'route_id': ['40-4', '40-4', '40-4', '40-4', - '40-4x', '40-4x', - 'r-2', 'r-2', - 'r-2ext', 'r-2ext'], - 'trip_id': ['11', '12', '13', '14', - '21', '22', - '31', '32', - '41', '42'], - 'service_id': ['weekday-1', 'weekday-1', 'weekday-1', 'weekday-1', - 'weekday-2', 'weekday-2', - 'weekday-3', 'weekday-3', - 'weekend-1', 'weekend-1'] - } - - index = range(10) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def trips_feed_4(): - data = { - 'route_id': ['a1x', 'a1x', 'a1x', 'a1x', - 'a1', 'a1', - 'a2x', 'a2x', - 'a2', 'a2', - 'a3x', 'a3x', - 'a3', 'a3'], - 'trip_id': ['a131', 'a132', 'a133', 'a134', - 'a135', 'a136', - 'a237', 'a238', - 'a239', 'a240', - 'a341', 'a342', - 'a343', 'a344'], - 'service_id': ['wk-1', 'wk-1', 'wk-1', 'wk-1', - 'wk-1', 'wk-1', - 'wk-1', 'wk-1', - 'wk-1', 'wk-1', - 'wk-1', 'wk-1', - 'wk-1', 'wk-1'] - } - - index = range(14) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def calendar_feed_1(): - data = { - 'service_id': ['weekday-1', - 'weekday-2', - 'weekday-3', - 'weekend-1'], - 'monday': [1, 1, 1, 0], - 'tuesday': [1, 1, 1, 0], - 'wednesday': [1, 1, 1, 0], - 'thursday': [1, 1, 1, 0], - 'friday': [1, 1, 1, 0], - 'saturday': [0, 0, 0, 1], - 'sunday': [0, 0, 0, 1], - 'start_date': [20161224] * 4, - 'end_date': [20170318] * 4} - - index = range(4) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def calendar_feed_2(): - data = { - 'service_id': ['weekday-1', - 'weekday-2', - 'weekday-3', - 'weekend-1'], - 'monday': [1, 1, 1, 0], - 'tuesday': [1, 1, 1, 0], - 'wednesday': [1, 1, 1, 0], - 'thursday': [1, 1, 1, 0], - 'friday': [1, 1, 1, 0], - 'saturday': [0, 0, 0, 1], - 'sunday': [0, 0, 0, 1], - 'start_date': [20161224] * 4, - 'end_date': [20170318] * 4} - - index = range(4) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def calendar_feed_4(): - data = { - 'service_id': ['wk-1'], - 'monday': [1], - 'tuesday': [1], - 'wednesday': [1], - 'thursday': [1], - 'friday': [1], - 'saturday': [0], - 'sunday': [0], - 'start_date': [20161224], - 'end_date': [20170318]} - - index = range(1) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def calendar_empty(): - columns = {'service_id', - 'monday', - 'tuesday', - 'wednesday', - 'thursday', - 'friday', - 'saturday', - 'sunday', - 'start_date', - 'end_date'} - - df = pd.DataFrame(columns=columns) - return df - - -@pytest.fixture -def calendar_dates_feed_1(): - data = { - 'service_id': ['weekday-1', - 'weekday-2', - 'weekday-3', - 'weekend-1'], - 'date': [20161224, 20170318, 20160424, 20161230], - 'exception_type': [1, 2, 1, 1]} - - index = range(4) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def calendar_dates_feed_2(): - data = { - 'service_id': ['weekday-1', - 'weekday-2', - 'weekday-3', - 'weekend-1'], - 'date': [20161224, 20170318, 20160424, 20161230], - 'exception_type': [1, 2, 1, 1], - 'schedule_type': ['WD', 'WD', 'WD', 'SA'] - } - - index = range(4) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def calendar_dates_feed_4(): - data = { - 'service_id': ['wk-1'], - 'date': [20161224], - 'exception_type': [1]} - - index = range(1) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def stop_times_feed_1(): - data = { - 'trip_id': ['a1', 'a1', 'a1', 'a1', 'a1', 'a1', - 'a2', 'a2', 'a2', 'a2', 'a2', 'a2', - 'a3', 'a3', 'a3', 'a3', 'a3', 'a3', - 'a4', 'a4', 'a4', 'a4', 'a4', 'a4', - 'b1', 'b1', 'b1', 'b1', 'b1', 'b1', - 'b2', 'b2', 'b2', 'b2', 'b2', 'b2', - 'c1', 'c1', 'c1', 'c1', 'c1', 'c1', - 'c2', 'c2', 'c2', 'c2', 'c2', 'c2', - 'd1', 'd1', 'd1', - 'd2', 'd2', 'd2'], - 'stop_id': ['1', '2', '3', '4', '5', '6', - '6', '5', '4', '3', '2', '1', - '1', '2', '3', '4', '5', '6', - '6', '5', '4', '3', '2', '1', - '1', '2', '3', '4', '5', '6', - '6', '5', '4', '3', '2', '1', - '1', '2', '3', '4', '5', '6', - '6', '5', '4', '3', '2', '1', - '7', '8', '9', - '9', '8', '7'], - 'arrival_time': ['06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', - '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '08:15:00', '08:20:00', '08:25:00', '08:30:00', - '08:35:00', '08:40:00', - '13:15:00', '13:20:00', '13:25:00', '13:30:00', - '13:35:00', '13:40:00', - '06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', - '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '26:15:00', '26:20:00', np.nan, np.nan, '26:35:00', - '26:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00'], - 'departure_time': ['06:15:00', '06:20:00', np.nan, np.nan, - '06:35:00', '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '08:15:00', '08:20:00', '08:25:00', '08:30:00', - '08:35:00', '08:40:00', - '13:15:00', '13:20:00', '13:25:00', '13:30:00', - '13:35:00', '13:40:00', - '06:15:00', '06:20:00', np.nan, np.nan, - '06:35:00', '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '26:15:00', '26:20:00', np.nan, np.nan, - '26:35:00', '26:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00'], - 'stop_sequence': [1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, - 1, 2, 3], - 'pickup_type': [0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, - 0, 0, 0], - 'drop_off_type': [0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0, 0, 0, - 0, 0, 0] - } - index = range(54) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def stop_times_feed_2(): - data = { - 'trip_id': ['11', '11', '11', '11', '11', '11', - '12', '12', '12', '12', '12', '12', - '13', '13', '13', '13', '13', '13', - '14', '14', '14', '14', '14', '14', - '21', '21', '21', '21', '21', '21', - '22', '22', '22', '22', '22', '22', - '31', '31', '31', '31', '31', '31', - '32', '32', '32', '32', '32', '32', - '41', '41', '41', - '42', '42', '42'], - 'stop_id': ['60', '61', '62', '63', '64', '65', - '65', '64', '63', '62', '61', '60', - '60', '61', '62', '63', '64', '65', - '65', '64', '63', '62', '61', '60', - '60', '61', '62', '63', '64', '65', - '65', '64', '63', '62', '61', '60', - '600', '601', '602', '603', '604', '605', - '606', '605', '604', '603', '602', '601', - '66', '67', '68', - '68', '67', '66'], - 'arrival_time': ['06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', - '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '08:15:00', '08:20:00', '08:25:00', '08:30:00', - '08:35:00', '08:40:00', - '13:15:00', '13:20:00', '13:25:00', '13:30:00', - '13:35:00', '13:40:00', - '06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', - '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '26:15:00', '26:20:00', np.nan, np.nan, '26:35:00', - '26:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00'], - 'departure_time': ['06:15:00', '06:20:00', np.nan, np.nan, - '06:35:00', '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '08:15:00', '08:20:00', '08:25:00', '08:30:00', - '08:35:00', '08:40:00', - '13:15:00', '13:20:00', '13:25:00', '13:30:00', - '13:35:00', '13:40:00', - '06:15:00', '06:20:00', np.nan, np.nan, - '06:35:00', '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '26:15:00', '26:20:00', np.nan, np.nan, - '26:35:00', '26:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00'], - 'stop_sequence': [1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, - 1, 2, 3] - } - index = range(54) - - df = pd.DataFrame(data, index) - return df - - -@pytest.fixture -def stop_times_feed_4(): - data = { - 'trip_id': ['a131', 'a131', 'a131', 'a131', 'a131', 'a131', - 'a132', 'a132', 'a132', 'a132', 'a132', 'a132', - 'a133', 'a133', 'a133', 'a133', 'a133', 'a133', - 'a134', 'a134', 'a134', 'a134', 'a134', 'a134', - 'a135', 'a135', 'a135', 'a135', 'a135', 'a135', - 'a136', 'a136', 'a136', 'a136', 'a136', 'a136', - 'a237', 'a237', 'a237', 'a237', 'a237', 'a237', - 'a238', 'a238', 'a238', 'a238', 'a238', 'a238', - 'a239', 'a239', 'a239', - 'a240', 'a240', 'a240', - 'a341', 'a341', 'a341', - 'a342', 'a342', 'a342', - 'a343', 'a343', 'a343', - 'a344', 'a344', 'a344'], - 'stop_id': ['70', '71', '72', '73', '74', '75', - '75', '74', '73', '72', '71', '70', - '70', '71', '72', '73', '74', '75', - '75', '74', '73', '72', '71', '70', - '70', '71', '72', '73', '74', '75', - '75', '74', '73', '72', '71', '70', - '70', '71', '72', '73', '74', '75', - '75', '74', '73', '72', '71', '70', - '76', '77', '78', - '78', '77', '76', - '76', '77', '78', - '78', '77', '76', - '76', '77', '78', - '78', '77', '76'], - - 'arrival_time': ['06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', - '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '08:15:00', '08:20:00', '08:25:00', '08:30:00', - '08:35:00', '08:40:00', - '13:15:00', '13:20:00', '13:25:00', '13:30:00', - '13:35:00', '13:40:00', - '06:15:00', '06:20:00', np.nan, np.nan, '06:35:00', - '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '26:15:00', '26:20:00', np.nan, np.nan, '26:35:00', - '26:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00'], - 'departure_time': ['06:15:00', '06:20:00', np.nan, np.nan, - '06:35:00', '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '08:15:00', '08:20:00', '08:25:00', '08:30:00', - '08:35:00', '08:40:00', - '13:15:00', '13:20:00', '13:25:00', '13:30:00', - '13:35:00', '13:40:00', - '06:15:00', '06:20:00', np.nan, np.nan, - '06:35:00', '06:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '26:15:00', '26:20:00', np.nan, np.nan, - '26:35:00', '26:40:00', - '06:15:00', '06:20:00', '06:25:00', '06:30:00', - np.nan, '06:40:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00', - '06:15:00', '06:20:00', '06:25:00'], - 'stop_sequence': [1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, 4, 5, 6, - 1, 2, 3, - 1, 2, 3, - 1, 2, 3, - 1, 2, 3, - 1, 2, 3, - 1, 2, 3] - } - index = range(66) - - df = pd.DataFrame(data, index) - return df - - @pytest.fixture def folder_feed_1(): return r'/data/gtfs_feeds/agency_a' diff --git a/urbanaccess/tests/test_gtfsfeeds.py b/urbanaccess/tests/test_gtfsfeeds.py new file mode 100644 index 0000000..dacf5be --- /dev/null +++ b/urbanaccess/tests/test_gtfsfeeds.py @@ -0,0 +1,219 @@ +import pytest +import os +import pandas as pd +import yaml + +from urbanaccess import gtfsfeeds +from urbanaccess.gtfsfeeds import feeds + + +@pytest.fixture +def feed_dict1(): + return { + 'ac transit': + 'http://www.actransit.org/wp-content/uploads/GTFSJune182017B.zip'} + + +@pytest.fixture +def feed_dict2(): + return { + 'Bay Area Rapid Transit': + 'http://www.gtfs-data-exchange.com/agency/bay-area-rapid-transit' + '/latest.zip'} + + +@pytest.fixture +def feed_dict3(): + return { + 'ac transit': 'http://www.actransit.org/wp-content/uploads' + '/GTFSJune182017B.zip', + 'Bay Area Rapid Transit': + 'http://www.gtfs-data-exchange.com/agency/bay-area-rapid-transit' + '/latest.zip'} + + +@pytest.fixture +def feed_yaml(tmpdir): + yaml_dict = { + 'gtfs_feeds': { + 'ac transit': 'http://www.actransit.org/wp-content/uploads' + '/GTFSJune182017B.zip', + 'Bay Area Rapid Transit': + 'http://www.gtfs-data-exchange.com/agency/bay-area-rapid' + '-transit/latest.zip'}} + + yaml_path = os.path.join(tmpdir.strpath, 'gtfsfeeds.yaml') + with open(yaml_path, 'w') as f: + yaml.dump(yaml_dict, f, default_flow_style=False) + return tmpdir.strpath + + +def test_feed_object(): + assert isinstance(gtfsfeeds.feeds, gtfsfeeds.urbanaccess_gtfsfeeds) + assert isinstance(feeds.to_dict(), dict) + + +def test_add_feed(feed_dict1, feed_dict2): + feeds.add_feed(add_dict=feed_dict1) + assert len(feeds.gtfs_feeds.keys()) == 1 + feeds.add_feed(add_dict=feed_dict2) + assert len(feeds.gtfs_feeds.keys()) == 2 + feed_dict_replace = {'Bay Area Rapid Transit': 'test'} + feeds.add_feed(add_dict=feed_dict_replace, replace=True) + + for key, value in feeds.gtfs_feeds.items(): + if key == 'Bay Area Rapid Transit': + assert value == 'test' + assert isinstance(feeds, gtfsfeeds.urbanaccess_gtfsfeeds) + # clear feeds from global memory + feeds.remove_feed(remove_all=True) + + +def test_remove_feed(feed_dict3): + feeds.add_feed(add_dict=feed_dict3) + feeds.remove_feed(del_key='ac transit') + assert len(feeds.gtfs_feeds.keys()) == 1 + assert 'ac transit' not in feeds.gtfs_feeds.keys() + feeds.remove_feed(remove_all=True) + assert len(feeds.gtfs_feeds.keys()) == 0 + assert isinstance(feeds, gtfsfeeds.urbanaccess_gtfsfeeds) + # clear feeds from global memory + feeds.remove_feed(remove_all=True) + + +def test_to_yaml_feed(tmpdir, feed_dict3): + feeds.add_feed(add_dict=feed_dict3) + feeds.to_yaml(tmpdir.strpath, overwrite=True) + + yaml_path = os.path.join(tmpdir.strpath, 'gtfsfeeds.yaml') + with open(yaml_path, 'r') as f: + yaml_config = yaml.load(f) + assert yaml_config['gtfs_feeds'] == feed_dict3 + # clear feeds from global memory + feeds.remove_feed(remove_all=True) + + +def test_from_yaml_feed(feed_yaml): + yaml_path = feed_yaml + feeds_from_yaml = feeds.from_yaml(yaml_path, 'gtfsfeeds.yaml') + + assert isinstance(feeds_from_yaml, gtfsfeeds.urbanaccess_gtfsfeeds) + assert len(feeds_from_yaml.gtfs_feeds.keys()) == 2 + + valid_feed = ('http://www.gtfs-data-exchange.com/' + 'agency/bay-area-rapid-transit/latest.zip') + assert feeds_from_yaml.gtfs_feeds['Bay Area Rapid Transit'] == valid_feed + + valid_feed = ('http://www.actransit.org/wp-content/' + 'uploads/GTFSJune182017B.zip') + assert feeds_from_yaml.gtfs_feeds['ac transit'] == valid_feed + # clear feeds from global memory + feeds.remove_feed(remove_all=True) + + +def test_search_contains_gtfs_data_exchange(): + search_result = gtfsfeeds.search(api='gtfsdataexch', + search_text=['ac transit', 'santa rosa'], + search_field=None, match='contains', + add_feed=False, overwrite_feed=False) + + assert isinstance(search_result, pd.DataFrame) + assert search_result.empty is False + assert len(search_result) == 2 + + col_list = ['dataexchange_url', 'dataexchange_id', 'name'] + for col in col_list: + assert col in search_result.columns + assert search_result[col].isnull().all() == False # noqa + + value_list = ['ac-transit', 'santa-rosa-citybus'] + for value in value_list: + assert value in list(search_result['dataexchange_id']) + + +def test_search_contains_add_feed_gtfs_data_exchange(): + gtfsfeeds.search(api='gtfsdataexch', + search_text='ac transit', + search_field=None, match='contains', + add_feed=True, overwrite_feed=False) + + assert len(feeds.gtfs_feeds.keys()) == 1 + assert 'AC Transit' in feeds.gtfs_feeds.keys() + + # test overwrite feed + gtfsfeeds.search(api='gtfsdataexch', + search_text='Bay Area Rapid Transit', + search_field=None, match='exact', + add_feed=True, overwrite_feed=True) + + assert len(feeds.gtfs_feeds.keys()) == 1 + assert 'Bay Area Rapid Transit' in feeds.gtfs_feeds.keys() + # clear feeds from global memory + feeds.remove_feed(remove_all=True) + + +def test_search_exact_search_field_gtfs_data_exchange(): + # test search field + search_result = gtfsfeeds.search(api='gtfsdataexch', + search_text='San Francisco Bay Area', + search_field=['area'], match='exact', + add_feed=False, overwrite_feed=False) + assert len(search_result) == 8 + + +def test_download_gtfs_feed_via_feed_object(feed_dict3, tmpdir): + feeds.add_feed(add_dict=feed_dict3) + tmp_path = tmpdir.strpath + gtfsfeeds.download(data_folder=tmp_path) + + filelist = ['ac transit.zip', 'Bay Area Rapid Transit.zip'] + txtlist = ['calendar.txt', 'routes.txt', 'stop_times.txt', + 'stops.txt', 'trips.txt'] + zip_path = os.path.join(tmp_path, 'gtfsfeed_zips') + txt_path = os.path.join(tmp_path, 'gtfsfeed_text') + for zipfile in filelist: + assert os.path.exists(os.path.join(zip_path, zipfile)) is True + for folder in filelist: + check_path = os.path.join(txt_path, folder.replace('.zip', '')) + assert os.path.exists(check_path) is True + for txt in txtlist: + check_path = os.path.join( + txt_path, folder.replace('.zip', ''), txt) + assert os.path.exists(check_path) is True + # clear feeds from global memory + feeds.remove_feed(remove_all=True) + + +def test_download_gtfs_feed_via_feed_name_and_dict(tmpdir): + tmp_path = tmpdir.strpath + gtfsfeeds.download( + data_folder=tmp_path, + feed_name='test_agency', + feed_url=('http://www.gtfs-data-exchange.com/' + 'agency/bay-area-rapid-transit/latest.zip'), + feed_dict=None, + error_pause_duration=5, delete_zips=False) + + gtfsfeeds.download( + data_folder=tmp_path, + feed_dict={ + 'test_agency_dict': 'http://www.gtfs-data-exchange.com/agency/' + 'ac-transit/latest.zip'}, + error_pause_duration=5, delete_zips=False) + + filelist = ['test_agency.zip', 'test_agency_dict.zip'] + txtlist = ['calendar.txt', 'routes.txt', 'stop_times.txt', + 'stops.txt', 'trips.txt'] + zip_path = os.path.join(tmp_path, 'gtfsfeed_zips') + txt_path = os.path.join(tmp_path, 'gtfsfeed_text') + for zipfile in filelist: + assert os.path.exists(os.path.join(zip_path, zipfile)) is True + for folder in filelist: + check_path = os.path.join(txt_path, folder.replace('.zip', '')) + assert os.path.exists(check_path) is True + for txt in txtlist: + check_path = os.path.join( + txt_path, folder.replace('.zip', ''), txt) + assert os.path.exists(check_path) is True + # clear feeds from global memory + feeds.remove_feed(remove_all=True) diff --git a/urbanaccess/tests/test_osm_network.py b/urbanaccess/tests/test_osm_network.py index 10ffad7..cbcf516 100644 --- a/urbanaccess/tests/test_osm_network.py +++ b/urbanaccess/tests/test_osm_network.py @@ -9,11 +9,11 @@ def bbox1(): def test_column_names(bbox1): - nodes, edges = ua_network_from_bbox(bbox=bbox1, network_type='walk', - timeout=180, memory=None, - max_query_area_size=50 * 1000 * 50 * - 1000, - remove_lcn=False) # noqa + nodes, edges = ua_network_from_bbox( + bbox=bbox1, network_type='walk', + timeout=180, memory=None, + max_query_area_size=50 * 1000 * 50 * 1000, + remove_lcn=False) col_list = ['x', 'y', 'id'] for col in col_list: assert col in nodes.columns diff --git a/urbanaccess/utils.py b/urbanaccess/utils.py index e8fbf9e..d5f0420 100644 --- a/urbanaccess/utils.py +++ b/urbanaccess/utils.py @@ -63,7 +63,7 @@ def log(message, level=None, name=None, filename=None): # convert message to ascii for proper console display in windows # terminals - message = unicodedata.normalize('NFKD', unicode(message)).encode( + message = unicodedata.normalize('NFKD', str(message)).encode( 'ascii', errors='replace').decode() print(message) sys.stdout = standard_out