Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
68a65a2
made all standard pipeline tables available for summary expressions
chesterharvey Feb 10, 2022
d9277a5
syntax big fix
chesterharvey Feb 10, 2022
eb36fa5
updated functionality for exporting available pipeline tables for exp…
chesterharvey Feb 10, 2022
60ca801
configured export of pipeline tables as a yaml flag
chesterharvey Feb 10, 2022
a66e265
fixed code style errors
chesterharvey Feb 23, 2022
60fd31c
allow annotate_preprocessors to annotate a table without a skim wrapper
chesterharvey Feb 23, 2022
f812657
defined separate preprocessors for different pipeline tables
chesterharvey Feb 23, 2022
0458d45
enabled preprocessing for multiple tables
chesterharvey Feb 23, 2022
83a9522
added ability to create temporary variables in summary expressions file
chesterharvey Feb 24, 2022
0deec59
added the simwrapper python package as a dependency
chesterharvey Feb 24, 2022
78dd56a
fixed python style
chesterharvey Feb 24, 2022
831d58c
updated summary expressions
chesterharvey Feb 24, 2022
e53c99c
added summarize model to example_mtc
chesterharvey Feb 24, 2022
bac92a8
updated visualization docs
chesterharvey Feb 24, 2022
c711bdf
updated docs
chesterharvey Feb 24, 2022
cc9823a
reverting to earlier commit to address travis testing failures
chesterharvey Feb 24, 2022
1dc1aa0
updates to make all pipeline tables available as locals and allow tem…
chesterharvey Feb 25, 2022
38be435
updates to viz documentation
chesterharvey Feb 25, 2022
0b68119
pycodestyle fix
chesterharvey Feb 25, 2022
85c9f04
viz documentation updates
chesterharvey Feb 25, 2022
8198d5b
remove unnecessary test pipeline tables
chesterharvey Feb 25, 2022
9e69428
Add expressions for tours and trips counts
chesterharvey Feb 25, 2022
a30f160
Allow yamls to maintained in example outputs
chesterharvey Feb 25, 2022
aec98f1
Update dashboard-1-summary.yaml
chesterharvey Feb 25, 2022
b17943b
Add summarize model to example_mtc settings
chesterharvey Feb 25, 2022
bb9ad1f
Add simwrapper as a dependency
chesterharvey Feb 25, 2022
9bb224c
added summarize config files to mtc example
chesterharvey Feb 25, 2022
a898568
Updates to docs
chesterharvey Feb 28, 2022
51728e2
rename 'slicers' as 'bins'
chesterharvey Feb 28, 2022
279f060
Allow export of pipeline tables
chesterharvey Mar 1, 2022
139aa1b
Update SLICERS to BIN
chesterharvey Mar 1, 2022
227908f
pycodestyle updates
chesterharvey Mar 1, 2022
79d88f5
removed unnecessary geojson from sample data
chesterharvey Mar 2, 2022
1b1a4c6
Added simwrapper to environment yamls temporarily with pip
chesterharvey Mar 2, 2022
3974e39
Enabled pipeline table export for expression development
chesterharvey Mar 2, 2022
d96c740
Merge branch 'mtc_tm2' into ft_vis_1
i-am-sijia Mar 3, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 73 additions & 46 deletions activitysim/abm/models/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def construct_bin_labels(bins: pd.Series, label_format: str) -> pd.Series:
)

def construct_label(label_format, bounds_dict):
# parts = [part for part in ['left', 'right'] if part in label_format]
bounds_dict = {
x: bound for x, bound in bounds_dict.items() if x in label_format
}
Expand Down Expand Up @@ -204,9 +203,14 @@ def manual_breaks(
@inject.step()
def summarize(
network_los: pipeline.Pipeline,
persons: pd.DataFrame,
persons_merged: pd.DataFrame,
households: pd.DataFrame,
households_merged: pd.DataFrame,
trips: pd.DataFrame,
tours: pd.DataFrame,
tours_merged: pd.DataFrame,
land_use: pd.DataFrame,
):
"""
A standard model that uses expression files to summarize pipeline tables for vizualization.
Expand All @@ -217,8 +221,8 @@ def summarize(
Columns in pipeline tables can also be sliced and aggregated prior to summarization.
This preprocessing is configured in `summarize.yaml`.


Outputs a seperate csv summary file for each expression.
Outputs a seperate csv summary file for each expression;
outputs starting with '_' are saved as temporary local variables.
"""
trace_label = 'summarize'
model_settings_file_name = 'summarize.yaml'
Expand All @@ -233,9 +237,15 @@ def summarize(
config.config_file_path(model_settings['SPECIFICATION']), comment='#'
)

# Load dataframes from pipeline
persons = persons.to_frame()
persons_merged = persons_merged.to_frame()
households = households.to_frame()
households_merged = households_merged.to_frame()
trips = trips.to_frame()
tours = tours_merged.to_frame()
tours_merged = tours_merged.to_frame()
land_use = land_use.to_frame()

# - trips_merged - merge trips and tours_merged
trips_merged = pd.merge(
Expand All @@ -247,53 +257,70 @@ def summarize(
how="left",
)

locals_d = {'trips_merged': trips_merged, 'persons_merged': persons_merged}
# Add dataframes as local variables
locals_d = {
'persons': persons,
'persons_merged': persons_merged,
'households': households,
'households_merged': households_merged,
'trips': trips,
'trips_merged': trips_merged,
'tours': tours_merged,
'tours_merged': tours_merged,
'land_use': land_use,
}

skims = wrap_skims(network_los, trips_merged)

# Annotate trips_merged
expressions.annotate_preprocessors(
trips_merged, locals_d, skims, model_settings, 'summarize'
)

for table_name, df in locals_d.items():
meta = model_settings[table_name]
df = eval(table_name)

if 'AGGREGATE' in meta and meta['AGGREGATE']:
for agg in meta['AGGREGATE']:
assert set(('column', 'label', 'map')) <= agg.keys()
df[agg['label']] = (
df[agg['column']].map(agg['map']).fillna(df[agg['column']])
)

if 'SLICERS' in meta and meta['SLICERS']:
for slicer in meta['SLICERS']:
if slicer['type'] == 'manual_breaks':
# df[slicer['label']] = pd.cut(df[slicer['column']], slicer['bin_breaks'],
# labels=slicer['bin_labels'], include_lowest=True)
df[slicer['label']] = manual_breaks(
df[slicer['column']], slicer['bin_breaks'], slicer['bin_labels']
)

elif slicer['type'] == 'quantiles':
df[slicer['label']] = quantiles(
df[slicer['column']], slicer['bins'], slicer['label_format']
)
if table_name in model_settings:

elif slicer['type'] == 'spaced_intervals':
df[slicer['label']] = spaced_intervals(
df[slicer['column']],
slicer['lower_bound'],
slicer['interval'],
slicer['label_format'],
)
meta = model_settings[table_name]
df = eval(table_name)

elif slicer['type'] == 'equal_intervals':
df[slicer['label']] = equal_intervals(
df[slicer['column']], slicer['bins'], slicer['label_format']
if 'AGGREGATE' in meta and meta['AGGREGATE']:
for agg in meta['AGGREGATE']:
assert set(('column', 'label', 'map')) <= agg.keys()
df[agg['label']] = (
df[agg['column']].map(agg['map']).fillna(df[agg['column']])
)

locals_d.update(skims)
if 'BIN' in meta and meta['BIN']:
for slicer in meta['BIN']:
if slicer['type'] == 'manual_breaks':
df[slicer['label']] = manual_breaks(
df[slicer['column']], slicer['bin_breaks'], slicer['bin_labels']
)

elif slicer['type'] == 'quantiles':
df[slicer['label']] = quantiles(
df[slicer['column']], slicer['bins'], slicer['label_format']
)

elif slicer['type'] == 'spaced_intervals':
df[slicer['label']] = spaced_intervals(
df[slicer['column']],
slicer['lower_bound'],
slicer['interval'],
slicer['label_format'],
)

elif slicer['type'] == 'equal_intervals':
df[slicer['label']] = equal_intervals(
df[slicer['column']], slicer['bins'], slicer['label_format']
)

# Output pipeline tables for expression development
if model_settings['EXPORT_PIPELINE_TABLES'] is True:
pipeline_table_dir = os.path.join(output_location, 'pipeline_tables')
os.makedirs(config.output_file_path(pipeline_table_dir), exist_ok=True)
for name, df in locals_d.items():
df.to_csv(config.output_file_path(os.path.join(pipeline_table_dir, f'{name}.csv')))

# Add classification functions to locals
locals_d.update(
Expand All @@ -305,19 +332,19 @@ def summarize(
}
)

# Save merged tables for expression development
# locals_d['trips_merged'].to_csv(
# config.output_file_path(os.path.join(output_location, f'trips_merged.csv'))
# )
# locals_d['persons_merged'].to_csv(
# config.output_file_path(os.path.join(output_location, f'persons_merged.csv'))
# )

for i, row in spec.iterrows():

out_file = row['Output']
expr = row['Expression']

# Save temporary variables starting with underscores in locals_d
if out_file.startswith('_'):

logger.debug(f'Temp Variable: {expr} -> {out_file}')

locals_d[out_file] = eval(expr, globals(), locals_d)
continue

logger.debug(f'Summary: {expr} -> {out_file}.csv')

resultset = eval(expr, globals(), locals_d)
Expand Down
145 changes: 111 additions & 34 deletions activitysim/examples/example_mtc/configs/summarize.csv
Original file line number Diff line number Diff line change
@@ -1,38 +1,115 @@
Description,Output,Expression
#,total_vmt,"trips_merged[['auto_distance']].sum().rename('vmt')"
#,vmt_per_capita,"pd.Series(trips_merged[['auto_distance']].sum() / len(persons_merged), name='vmt_per_capita')"
#,vmt_per_capita,"pd.Series(trips_merged[['auto_distance']].sum() / len(persons_merged), name='vmt_per_capita')"
#,vmt_per_capita_by_home_taz,(trips_merged.groupby('home_zone_id').auto_distance.sum() / persons_merged.groupby('home_zone_id').size()).fillna(0).rename('vmt_per_capita_by_home_taz').reset_index()
#,vmt_per_capita_by_work_taz,(trips_merged.groupby('workplace_zone_id').auto_distance.sum() / persons_merged[persons_merged.workplace_zone_id > 0].groupby('workplace_zone_id').size()).fillna(0).rename('vmt_per_capita_by_work_taz').reset_index()
#,households_count,persons_merged[['household_id']].nunique().rename('households')
#,persons_count,persons_merged[['household_id']].count().rename('persons')
#,person_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()[['number_of_participants']].sum().rename('tours')"
#,person_trips,trips_merged[['number_of_participants']].sum().rename('trips')
#,tours_per_household_count,"pd.Series(trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()['number_of_participants'].sum() / persons_merged['household_id'].nunique(), name='tours_per_household')"
#,trips_per_household_count,"pd.Series(trips_merged['number_of_participants'].sum() / persons_merged['household_id'].nunique(), name='trips_per_household')"
#,trip_by_income_category_major_mode,"pd.merge(trips_merged, persons_merged['income_category'], left_on='person_id', right_index=True).groupby(['income_category', 'major_trip_mode']).size().unstack(-1).reset_index()"
#,trips_by_mode_count,trips_merged.groupby('trip_mode')[['number_of_participants']].sum().T#

#,trips_by_major_mode_count,trips_merged.groupby('major_trip_mode')[['number_of_participants']].sum().T#

#,trips_by_purpose_count,trips_merged.groupby('primary_purpose_trip')[['number_of_participants']].sum().T
#,trip_purpose_by_time_of_day,"trips_merged.groupby(['depart','primary_purpose_trip'])['number_of_participants'].sum().unstack(-1).reset_index()"
#,tour_mode_to_trip_mode,"trips_merged.groupby(['tour_mode','trip_mode']).size().rename('trips').reset_index()"
#,work_tours_tod_count,"trips_merged[trips_merged['tour_type'] == 'work'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
#,school_tours_tod_count,"trips_merged[trips_merged['tour_type'] == 'school'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
#,non_mandatory_tours_tod_count,"trips_merged[trips_merged['tour_category'] == 'non_mandatory'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
#,work_tours_tod_count,trips_merged[trips_merged.tour_type == 'work'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
#,school_tours_tod_count,trips_merged[trips_merged.tour_type == 'school'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
#,non_mandatory_tours_tod_count,trips_merged[trips_merged.tour_category == 'non_mandatory'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()#

#,low_income_share_by_taz_deciles,"quantiles(persons_merged[persons_merged['income'] < 50000].groupby('home_zone_id').size() / persons_merged.groupby('home_zone_id').size(), 10, '{rank}').rename('low_income_share_by_taz_deciles').reset_index()"#

#,persons_by_income_category,persons_merged.groupby('income_category')[['income_category']].count().T#

#,vmt_per_capita_by_home_taz_quintiles,"quantiles((trips_merged.groupby('home_zone_id').auto_distance.sum() / trips_merged.groupby('home_zone_id').number_of_participants.sum() / persons_merged.groupby('home_zone_id').size()).fillna(0), 5, '{rank}').rename('vmt_per_capita_by_home_taz_quintiles').reset_index()"#

#,non_motorized_trip_distance_bins,"spaced_intervals((trips_merged.walk_distance + trips_merged.bike_distance)[trips_merged.major_trip_mode == 'Non-Motorized'], 0, 0.25).value_counts().sort_index().rename('trips').reset_index()"#
# These summaries are checked by test module
,households_count,persons_merged[['household_id']].nunique().rename('households')
,trips_by_mode_count,trips_merged.groupby('trip_mode')[['number_of_participants']].sum().T

# Calculate total VMT by summing auto_distance from tours_merged
# (auto_distance is calculated by an expression in summarize_preprocessor.csv)
# Initially save as a temporary variable so it can be reused
,_total_vmt,trips_merged[['auto_distance']].sum()
# Then save to CSV
,total_vmt,_total_vmt.rename('vmt')

# Calculate vmt per capita
# (All outputs to CSV have to be a Pandas Series or DataFrame)
,vmt_per_capita,"pd.Series(_total_vmt / len(persons_merged), name='vmt_per_capita')"

# Calculate vmt per capita by home taz
,_vmt_per_home_taz,trips_merged.groupby('home_zone_id').auto_distance.sum()
,_person_per_home_taz,persons_merged.groupby('home_zone_id').size()
,_vmt_per_capita_by_home_taz,(_vmt_per_home_taz/_person_per_home_taz).fillna(0)
,vmt_per_capita_by_home_taz,_vmt_per_capita_by_home_taz.rename('vmt_per_capita_by_home_taz').reset_index()

# Calculate vmt per capita by work taz
,_vmt_per_work_taz,trips_merged.groupby('workplace_zone_id').auto_distance.sum()
,_person_per_work_taz,persons_merged.groupby('workplace_zone_id').size()
,vmt_per_capita_by_work_taz,(_vmt_per_work_taz/_person_per_work_taz).fillna(0).rename('vmt_per_capita_by_work_taz').reset_index()

# Count persons
,persons_count,persons_merged[['household_id']].count().rename('persons')

# Count person-tours
,person_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()[['number_of_participants']].sum().rename('tours')"

# Count person-trips
,person_trips,trips_merged[['number_of_participants']].sum().rename('trips')

# Count tours
,tours_count,tours_merged.reset_index()[['tour_id']].count().rename('tours')

# Count trips
,trips_count,trips_merged.reset_index()[['trip_id']].count().rename('trips')

# Count tours per household
,_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()['number_of_participants'].sum()"
,_households,persons_merged['household_id'].nunique()
,tours_per_household_count,"pd.Series(_tours / _households, name='tours_per_household')"

# Count trips per household
,_trips,trips_merged['number_of_participants'].sum()
,trips_per_household_count,"pd.Series(_trips / _households, name='trips_per_household')"

# Count trips by major mode
#,trips_by_major_mode_count,trips_merged.groupby('major_trip_mode')[['number_of_participants']].sum().T

# Count trips by income category and major mode
,_trips_with_income,"pd.merge(trips_merged, persons_merged['income_category'], left_on='person_id', right_index=True)"
,trip_by_income_category_major_mode,"_trips_with_income.groupby(['income_category', 'major_trip_mode']).size().unstack(-1).reset_index()"

# Count trips by purpose
,trips_by_purpose_count,trips_merged.groupby('primary_purpose_trip')[['number_of_participants']].sum().T

# Count trips by purpose and departure time
,trip_purpose_by_time_of_day,"trips_merged.groupby(['depart','primary_purpose_trip'])['number_of_participants'].sum().unstack(-1).reset_index()"

# Count trips with each combination of tour mode and trip mode (for Sankey)
,tour_mode_to_trip_mode,"trips_merged.groupby(['tour_mode','trip_mode']).size().rename('trips').reset_index()"

# Count work tours by time of day
,_work_tours,trips_merged[trips_merged['tour_type'] == 'work']
,work_tours_tod_count,_work_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()

# Count school tours by time of day
,_school_tours,trips_merged[trips_merged['tour_type'] == 'school']
,school_tours_tod_count,_school_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()

# Count non-manditory tours by time of day
,_non_mandatory_tours,trips_merged[trips_merged.tour_category == 'non_mandatory']
,non_mandatory_tours_tod_count,_non_mandatory_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()

# TAZ population density quintiles
,_taz_pop_dens,land_use.TOTPOP/land_use.TOTACRE
,taz_population_density_quintiles,"quantiles(_taz_pop_dens, 5, '{rank}').rename('pop_dens_quintile').reset_index()"

# Calculate share of taz population that is low income by decile
# (Output deciles by specifying '{rank}' as the label format in the quantile function)
,_low_income_pop_by_taz,persons_merged[persons_merged.income < 50000].groupby('home_zone_id').size()
,_total_pop_by_taz,persons_merged.groupby('home_zone_id').size()
,_proportion_low_income_by_taz,"_low_income_pop_by_taz / _total_pop_by_taz"
,_proportion_low_income_deciles,"quantiles(_proportion_low_income_by_taz, 10, '{rank}')"
,low_income_share_by_taz_deciles,"_proportion_low_income_deciles.rename('low_income_share_by_taz_deciles').reset_index()"

# Count persons by income category
# (income_category is calculated by an expression in summarize_preprocessor.csv)
#,persons_by_income_category,persons_merged.groupby('income_category')[['income_category']].count().T

# Calculate vmt per capita quintiles by taz
# (Output quintiles by specifying '{rank}' as the label format in the quantile function)
,_vmt_per_capita_quintiles,"quantiles(_vmt_per_capita_by_home_taz, 5, '{rank}')"
,vmt_per_capita_by_home_taz_quintiles,"_vmt_per_capita_quintiles.rename('vmt_per_capita_by_home_taz_quintiles').reset_index()"

# Counts of non-motorized trips by 0.25-mile distance bins
,_non_motorized_distances,(trips_merged.walk_distance + trips_merged.bike_distance)
,_non_motorized_trips,trips_merged.major_trip_mode == 'Non-Motorized'
,_non_motorized_trip_distances,_non_motorized_distances[_non_motorized_trips]
,_counts_of_non_motorized_trips_by_distance_bin,"spaced_intervals(_non_motorized_trip_distances, 0, 0.25).value_counts()"
,non_motorized_trip_distance_bins,"_counts_of_non_motorized_trips_by_distance_bin.sort_index().rename('trips').reset_index()"

# Counts of trips by income and travel time category
#,trips_by_income_and_travel_time_category,"trips_merged.groupby(['trip_income_category','total_time_category']).size().rename('trips').unstack(-2).reset_index()"

,taz_count,"len(land_use)"





Loading