Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 46 additions & 73 deletions activitysim/abm/models/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def construct_bin_labels(bins: pd.Series, label_format: str) -> pd.Series:
)

def construct_label(label_format, bounds_dict):
# parts = [part for part in ['left', 'right'] if part in label_format]
bounds_dict = {
x: bound for x, bound in bounds_dict.items() if x in label_format
}
Expand Down Expand Up @@ -203,14 +204,9 @@ def manual_breaks(
@inject.step()
def summarize(
network_los: pipeline.Pipeline,
persons: pd.DataFrame,
persons_merged: pd.DataFrame,
households: pd.DataFrame,
households_merged: pd.DataFrame,
trips: pd.DataFrame,
tours: pd.DataFrame,
tours_merged: pd.DataFrame,
land_use: pd.DataFrame,
):
"""
A standard model that uses expression files to summarize pipeline tables for vizualization.
Expand All @@ -221,8 +217,8 @@ def summarize(
Columns in pipeline tables can also be sliced and aggregated prior to summarization.
This preprocessing is configured in `summarize.yaml`.

Outputs a seperate csv summary file for each expression;
outputs starting with '_' are saved as temporary local variables.

Outputs a seperate csv summary file for each expression.
"""
trace_label = 'summarize'
model_settings_file_name = 'summarize.yaml'
Expand All @@ -237,15 +233,9 @@ def summarize(
config.config_file_path(model_settings['SPECIFICATION']), comment='#'
)

# Load dataframes from pipeline
persons = persons.to_frame()
persons_merged = persons_merged.to_frame()
households = households.to_frame()
households_merged = households_merged.to_frame()
trips = trips.to_frame()
tours = tours_merged.to_frame()
tours_merged = tours_merged.to_frame()
land_use = land_use.to_frame()

# - trips_merged - merge trips and tours_merged
trips_merged = pd.merge(
Expand All @@ -257,70 +247,53 @@ def summarize(
how="left",
)

# Add dataframes as local variables
locals_d = {
'persons': persons,
'persons_merged': persons_merged,
'households': households,
'households_merged': households_merged,
'trips': trips,
'trips_merged': trips_merged,
'tours': tours_merged,
'tours_merged': tours_merged,
'land_use': land_use,
}
locals_d = {'trips_merged': trips_merged, 'persons_merged': persons_merged}

skims = wrap_skims(network_los, trips_merged)

# Annotate trips_merged
expressions.annotate_preprocessors(
trips_merged, locals_d, skims, model_settings, 'summarize'
)

for table_name, df in locals_d.items():
if table_name in model_settings:
meta = model_settings[table_name]
df = eval(table_name)

if 'AGGREGATE' in meta and meta['AGGREGATE']:
for agg in meta['AGGREGATE']:
assert set(('column', 'label', 'map')) <= agg.keys()
df[agg['label']] = (
df[agg['column']].map(agg['map']).fillna(df[agg['column']])
)

if 'SLICERS' in meta and meta['SLICERS']:
for slicer in meta['SLICERS']:
if slicer['type'] == 'manual_breaks':
# df[slicer['label']] = pd.cut(df[slicer['column']], slicer['bin_breaks'],
# labels=slicer['bin_labels'], include_lowest=True)
df[slicer['label']] = manual_breaks(
df[slicer['column']], slicer['bin_breaks'], slicer['bin_labels']
)

elif slicer['type'] == 'quantiles':
df[slicer['label']] = quantiles(
df[slicer['column']], slicer['bins'], slicer['label_format']
)

meta = model_settings[table_name]
df = eval(table_name)
elif slicer['type'] == 'spaced_intervals':
df[slicer['label']] = spaced_intervals(
df[slicer['column']],
slicer['lower_bound'],
slicer['interval'],
slicer['label_format'],
)

if 'AGGREGATE' in meta and meta['AGGREGATE']:
for agg in meta['AGGREGATE']:
assert set(('column', 'label', 'map')) <= agg.keys()
df[agg['label']] = (
df[agg['column']].map(agg['map']).fillna(df[agg['column']])
elif slicer['type'] == 'equal_intervals':
df[slicer['label']] = equal_intervals(
df[slicer['column']], slicer['bins'], slicer['label_format']
)

if 'BIN' in meta and meta['BIN']:
for slicer in meta['BIN']:
if slicer['type'] == 'manual_breaks':
df[slicer['label']] = manual_breaks(
df[slicer['column']], slicer['bin_breaks'], slicer['bin_labels']
)

elif slicer['type'] == 'quantiles':
df[slicer['label']] = quantiles(
df[slicer['column']], slicer['bins'], slicer['label_format']
)

elif slicer['type'] == 'spaced_intervals':
df[slicer['label']] = spaced_intervals(
df[slicer['column']],
slicer['lower_bound'],
slicer['interval'],
slicer['label_format'],
)

elif slicer['type'] == 'equal_intervals':
df[slicer['label']] = equal_intervals(
df[slicer['column']], slicer['bins'], slicer['label_format']
)

# Output pipeline tables for expression development
if model_settings['EXPORT_PIPELINE_TABLES'] is True:
pipeline_table_dir = os.path.join(output_location, 'pipeline_tables')
os.makedirs(config.output_file_path(pipeline_table_dir), exist_ok=True)
for name, df in locals_d.items():
df.to_csv(config.output_file_path(os.path.join(pipeline_table_dir, f'{name}.csv')))
locals_d.update(skims)

# Add classification functions to locals
locals_d.update(
Expand All @@ -332,19 +305,19 @@ def summarize(
}
)

# Save merged tables for expression development
# locals_d['trips_merged'].to_csv(
# config.output_file_path(os.path.join(output_location, f'trips_merged.csv'))
# )
# locals_d['persons_merged'].to_csv(
# config.output_file_path(os.path.join(output_location, f'persons_merged.csv'))
# )

for i, row in spec.iterrows():

out_file = row['Output']
expr = row['Expression']

# Save temporary variables starting with underscores in locals_d
if out_file.startswith('_'):

logger.debug(f'Temp Variable: {expr} -> {out_file}')

locals_d[out_file] = eval(expr, globals(), locals_d)
continue

logger.debug(f'Summary: {expr} -> {out_file}.csv')

resultset = eval(expr, globals(), locals_d)
Expand Down
145 changes: 34 additions & 111 deletions activitysim/examples/example_mtc/configs/summarize.csv
Original file line number Diff line number Diff line change
@@ -1,115 +1,38 @@
Description,Output,Expression
# These summaries are checked by test module
,households_count,persons_merged[['household_id']].nunique().rename('households')
,trips_by_mode_count,trips_merged.groupby('trip_mode')[['number_of_participants']].sum().T
#,total_vmt,"trips_merged[['auto_distance']].sum().rename('vmt')"
#,vmt_per_capita,"pd.Series(trips_merged[['auto_distance']].sum() / len(persons_merged), name='vmt_per_capita')"
#,vmt_per_capita,"pd.Series(trips_merged[['auto_distance']].sum() / len(persons_merged), name='vmt_per_capita')"
#,vmt_per_capita_by_home_taz,(trips_merged.groupby('home_zone_id').auto_distance.sum() / persons_merged.groupby('home_zone_id').size()).fillna(0).rename('vmt_per_capita_by_home_taz').reset_index()
#,vmt_per_capita_by_work_taz,(trips_merged.groupby('workplace_zone_id').auto_distance.sum() / persons_merged[persons_merged.workplace_zone_id > 0].groupby('workplace_zone_id').size()).fillna(0).rename('vmt_per_capita_by_work_taz').reset_index()
#,households_count,persons_merged[['household_id']].nunique().rename('households')
#,persons_count,persons_merged[['household_id']].count().rename('persons')
#,person_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()[['number_of_participants']].sum().rename('tours')"
#,person_trips,trips_merged[['number_of_participants']].sum().rename('trips')
#,tours_per_household_count,"pd.Series(trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()['number_of_participants'].sum() / persons_merged['household_id'].nunique(), name='tours_per_household')"
#,trips_per_household_count,"pd.Series(trips_merged['number_of_participants'].sum() / persons_merged['household_id'].nunique(), name='trips_per_household')"
#,trip_by_income_category_major_mode,"pd.merge(trips_merged, persons_merged['income_category'], left_on='person_id', right_index=True).groupby(['income_category', 'major_trip_mode']).size().unstack(-1).reset_index()"
#,trips_by_mode_count,trips_merged.groupby('trip_mode')[['number_of_participants']].sum().T#

#,trips_by_major_mode_count,trips_merged.groupby('major_trip_mode')[['number_of_participants']].sum().T#

#,trips_by_purpose_count,trips_merged.groupby('primary_purpose_trip')[['number_of_participants']].sum().T
#,trip_purpose_by_time_of_day,"trips_merged.groupby(['depart','primary_purpose_trip'])['number_of_participants'].sum().unstack(-1).reset_index()"
#,tour_mode_to_trip_mode,"trips_merged.groupby(['tour_mode','trip_mode']).size().rename('trips').reset_index()"
#,work_tours_tod_count,"trips_merged[trips_merged['tour_type'] == 'work'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
#,school_tours_tod_count,"trips_merged[trips_merged['tour_type'] == 'school'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
#,non_mandatory_tours_tod_count,"trips_merged[trips_merged['tour_category'] == 'non_mandatory'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
#,work_tours_tod_count,trips_merged[trips_merged.tour_type == 'work'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
#,school_tours_tod_count,trips_merged[trips_merged.tour_type == 'school'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
#,non_mandatory_tours_tod_count,trips_merged[trips_merged.tour_category == 'non_mandatory'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()#

#,low_income_share_by_taz_deciles,"quantiles(persons_merged[persons_merged['income'] < 50000].groupby('home_zone_id').size() / persons_merged.groupby('home_zone_id').size(), 10, '{rank}').rename('low_income_share_by_taz_deciles').reset_index()"#

#,persons_by_income_category,persons_merged.groupby('income_category')[['income_category']].count().T#

#,vmt_per_capita_by_home_taz_quintiles,"quantiles((trips_merged.groupby('home_zone_id').auto_distance.sum() / trips_merged.groupby('home_zone_id').number_of_participants.sum() / persons_merged.groupby('home_zone_id').size()).fillna(0), 5, '{rank}').rename('vmt_per_capita_by_home_taz_quintiles').reset_index()"#

#,non_motorized_trip_distance_bins,"spaced_intervals((trips_merged.walk_distance + trips_merged.bike_distance)[trips_merged.major_trip_mode == 'Non-Motorized'], 0, 0.25).value_counts().sort_index().rename('trips').reset_index()"#

# Calculate total VMT by summing auto_distance from tours_merged
# (auto_distance is calculated by an expression in summarize_preprocessor.csv)
# Initially save as a temporary variable so it can be reused
,_total_vmt,trips_merged[['auto_distance']].sum()
# Then save to CSV
,total_vmt,_total_vmt.rename('vmt')

# Calculate vmt per capita
# (All outputs to CSV have to be a Pandas Series or DataFrame)
,vmt_per_capita,"pd.Series(_total_vmt / len(persons_merged), name='vmt_per_capita')"

# Calculate vmt per capita by home taz
,_vmt_per_home_taz,trips_merged.groupby('home_zone_id').auto_distance.sum()
,_person_per_home_taz,persons_merged.groupby('home_zone_id').size()
,_vmt_per_capita_by_home_taz,(_vmt_per_home_taz/_person_per_home_taz).fillna(0)
,vmt_per_capita_by_home_taz,_vmt_per_capita_by_home_taz.rename('vmt_per_capita_by_home_taz').reset_index()

# Calculate vmt per capita by work taz
,_vmt_per_work_taz,trips_merged.groupby('workplace_zone_id').auto_distance.sum()
,_person_per_work_taz,persons_merged.groupby('workplace_zone_id').size()
,vmt_per_capita_by_work_taz,(_vmt_per_work_taz/_person_per_work_taz).fillna(0).rename('vmt_per_capita_by_work_taz').reset_index()

# Count persons
,persons_count,persons_merged[['household_id']].count().rename('persons')

# Count person-tours
,person_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()[['number_of_participants']].sum().rename('tours')"

# Count person-trips
,person_trips,trips_merged[['number_of_participants']].sum().rename('trips')

# Count tours
,tours_count,tours_merged.reset_index()[['tour_id']].count().rename('tours')

# Count trips
,trips_count,trips_merged.reset_index()[['trip_id']].count().rename('trips')

# Count tours per household
,_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()['number_of_participants'].sum()"
,_households,persons_merged['household_id'].nunique()
,tours_per_household_count,"pd.Series(_tours / _households, name='tours_per_household')"

# Count trips per household
,_trips,trips_merged['number_of_participants'].sum()
,trips_per_household_count,"pd.Series(_trips / _households, name='trips_per_household')"

# Count trips by major mode
#,trips_by_major_mode_count,trips_merged.groupby('major_trip_mode')[['number_of_participants']].sum().T

# Count trips by income category and major mode
,_trips_with_income,"pd.merge(trips_merged, persons_merged['income_category'], left_on='person_id', right_index=True)"
,trip_by_income_category_major_mode,"_trips_with_income.groupby(['income_category', 'major_trip_mode']).size().unstack(-1).reset_index()"

# Count trips by purpose
,trips_by_purpose_count,trips_merged.groupby('primary_purpose_trip')[['number_of_participants']].sum().T

# Count trips by purpose and departure time
,trip_purpose_by_time_of_day,"trips_merged.groupby(['depart','primary_purpose_trip'])['number_of_participants'].sum().unstack(-1).reset_index()"

# Count trips with each combination of tour mode and trip mode (for Sankey)
,tour_mode_to_trip_mode,"trips_merged.groupby(['tour_mode','trip_mode']).size().rename('trips').reset_index()"

# Count work tours by time of day
,_work_tours,trips_merged[trips_merged['tour_type'] == 'work']
,work_tours_tod_count,_work_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()

# Count school tours by time of day
,_school_tours,trips_merged[trips_merged['tour_type'] == 'school']
,school_tours_tod_count,_school_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()

# Count non-manditory tours by time of day
,_non_mandatory_tours,trips_merged[trips_merged.tour_category == 'non_mandatory']
,non_mandatory_tours_tod_count,_non_mandatory_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()

# TAZ population density quintiles
,_taz_pop_dens,land_use.TOTPOP/land_use.TOTACRE
,taz_population_density_quintiles,"quantiles(_taz_pop_dens, 5, '{rank}').rename('pop_dens_quintile').reset_index()"

# Calculate share of taz population that is low income by decile
# (Output deciles by specifying '{rank}' as the label format in the quantile function)
,_low_income_pop_by_taz,persons_merged[persons_merged.income < 50000].groupby('home_zone_id').size()
,_total_pop_by_taz,persons_merged.groupby('home_zone_id').size()
,_proportion_low_income_by_taz,"_low_income_pop_by_taz / _total_pop_by_taz"
,_proportion_low_income_deciles,"quantiles(_proportion_low_income_by_taz, 10, '{rank}')"
,low_income_share_by_taz_deciles,"_proportion_low_income_deciles.rename('low_income_share_by_taz_deciles').reset_index()"

# Count persons by income category
# (income_category is calculated by an expression in summarize_preprocessor.csv)
#,persons_by_income_category,persons_merged.groupby('income_category')[['income_category']].count().T

# Calculate vmt per capita quintiles by taz
# (Output quintiles by specifying '{rank}' as the label format in the quantile function)
,_vmt_per_capita_quintiles,"quantiles(_vmt_per_capita_by_home_taz, 5, '{rank}')"
,vmt_per_capita_by_home_taz_quintiles,"_vmt_per_capita_quintiles.rename('vmt_per_capita_by_home_taz_quintiles').reset_index()"

# Counts of non-motorized trips by 0.25-mile distance bins
,_non_motorized_distances,(trips_merged.walk_distance + trips_merged.bike_distance)
,_non_motorized_trips,trips_merged.major_trip_mode == 'Non-Motorized'
,_non_motorized_trip_distances,_non_motorized_distances[_non_motorized_trips]
,_counts_of_non_motorized_trips_by_distance_bin,"spaced_intervals(_non_motorized_trip_distances, 0, 0.25).value_counts()"
,non_motorized_trip_distance_bins,"_counts_of_non_motorized_trips_by_distance_bin.sort_index().rename('trips').reset_index()"

# Counts of trips by income and travel time category
#,trips_by_income_and_travel_time_category,"trips_merged.groupby(['trip_income_category','total_time_category']).size().rename('trips').unstack(-2).reset_index()"






,taz_count,"len(land_use)"
Loading