wsp-sag · i-am-sijia · Mar 3, 2022 · Feb 10, 2022 · Feb 10, 2022 · Feb 10, 2022
diff --git a/activitysim/abm/models/summarize.py b/activitysim/abm/models/summarize.py
@@ -87,7 +87,6 @@ def construct_bin_labels(bins: pd.Series, label_format: str) -> pd.Series:
     )
 
     def construct_label(label_format, bounds_dict):
-        # parts = [part for part in ['left', 'right'] if part in label_format]
         bounds_dict = {
             x: bound for x, bound in bounds_dict.items() if x in label_format
         }
@@ -204,9 +203,14 @@ def manual_breaks(
 @inject.step()
 def summarize(
     network_los: pipeline.Pipeline,
+    persons: pd.DataFrame,
     persons_merged: pd.DataFrame,
+    households: pd.DataFrame,
+    households_merged: pd.DataFrame,
     trips: pd.DataFrame,
+    tours: pd.DataFrame,
     tours_merged: pd.DataFrame,
+    land_use: pd.DataFrame,
 ):
     """
     A standard model that uses expression files to summarize pipeline tables for vizualization.
@@ -217,8 +221,8 @@ def summarize(
     Columns in pipeline tables can also be sliced and aggregated prior to summarization.
     This preprocessing is configured in `summarize.yaml`.
 
-
-    Outputs a seperate csv summary file for each expression.
+    Outputs a seperate csv summary file for each expression;
+    outputs starting with '_' are saved as temporary local variables.
     """
     trace_label = 'summarize'
     model_settings_file_name = 'summarize.yaml'
@@ -233,9 +237,15 @@ def summarize(
         config.config_file_path(model_settings['SPECIFICATION']), comment='#'
     )
 
+    # Load dataframes from pipeline
+    persons = persons.to_frame()
     persons_merged = persons_merged.to_frame()
+    households = households.to_frame()
+    households_merged = households_merged.to_frame()
     trips = trips.to_frame()
+    tours = tours_merged.to_frame()
     tours_merged = tours_merged.to_frame()
+    land_use = land_use.to_frame()
 
     # - trips_merged - merge trips and tours_merged
     trips_merged = pd.merge(
@@ -247,53 +257,70 @@ def summarize(
         how="left",
     )
 
-    locals_d = {'trips_merged': trips_merged, 'persons_merged': persons_merged}
+    # Add dataframes as local variables
+    locals_d = {
+        'persons': persons,
+        'persons_merged': persons_merged,
+        'households': households,
+        'households_merged': households_merged,
+        'trips': trips,
+        'trips_merged': trips_merged,
+        'tours': tours_merged,
+        'tours_merged': tours_merged,
+        'land_use': land_use,
+    }
 
     skims = wrap_skims(network_los, trips_merged)
 
+    # Annotate trips_merged
     expressions.annotate_preprocessors(
         trips_merged, locals_d, skims, model_settings, 'summarize'
     )
 
     for table_name, df in locals_d.items():
-        meta = model_settings[table_name]
-        df = eval(table_name)
-
-        if 'AGGREGATE' in meta and meta['AGGREGATE']:
-            for agg in meta['AGGREGATE']:
-                assert set(('column', 'label', 'map')) <= agg.keys()
-                df[agg['label']] = (
-                    df[agg['column']].map(agg['map']).fillna(df[agg['column']])
-                )
-
-        if 'SLICERS' in meta and meta['SLICERS']:
-            for slicer in meta['SLICERS']:
-                if slicer['type'] == 'manual_breaks':
-                    # df[slicer['label']] = pd.cut(df[slicer['column']], slicer['bin_breaks'],
-                    #                              labels=slicer['bin_labels'], include_lowest=True)
-                    df[slicer['label']] = manual_breaks(
-                        df[slicer['column']], slicer['bin_breaks'], slicer['bin_labels']
-                    )
-
-                elif slicer['type'] == 'quantiles':
-                    df[slicer['label']] = quantiles(
-                        df[slicer['column']], slicer['bins'], slicer['label_format']
-                    )
+        if table_name in model_settings:
 
-                elif slicer['type'] == 'spaced_intervals':
-                    df[slicer['label']] = spaced_intervals(
-                        df[slicer['column']],
-                        slicer['lower_bound'],
-                        slicer['interval'],
-                        slicer['label_format'],
-                    )
+            meta = model_settings[table_name]
+            df = eval(table_name)
 
-                elif slicer['type'] == 'equal_intervals':
-                    df[slicer['label']] = equal_intervals(
-                        df[slicer['column']], slicer['bins'], slicer['label_format']
+            if 'AGGREGATE' in meta and meta['AGGREGATE']:
+                for agg in meta['AGGREGATE']:
+                    assert set(('column', 'label', 'map')) <= agg.keys()
+                    df[agg['label']] = (
+                        df[agg['column']].map(agg['map']).fillna(df[agg['column']])
                     )
 
-    locals_d.update(skims)
+            if 'BIN' in meta and meta['BIN']:
+                for slicer in meta['BIN']:
+                    if slicer['type'] == 'manual_breaks':
+                        df[slicer['label']] = manual_breaks(
+                            df[slicer['column']], slicer['bin_breaks'], slicer['bin_labels']
+                        )
+
+                    elif slicer['type'] == 'quantiles':
+                        df[slicer['label']] = quantiles(
+                            df[slicer['column']], slicer['bins'], slicer['label_format']
+                        )
+
+                    elif slicer['type'] == 'spaced_intervals':
+                        df[slicer['label']] = spaced_intervals(
+                            df[slicer['column']],
+                            slicer['lower_bound'],
+                            slicer['interval'],
+                            slicer['label_format'],
+                        )
+
+                    elif slicer['type'] == 'equal_intervals':
+                        df[slicer['label']] = equal_intervals(
+                            df[slicer['column']], slicer['bins'], slicer['label_format']
+                        )
+
+    # Output pipeline tables for expression development
+    if model_settings['EXPORT_PIPELINE_TABLES'] is True:
+        pipeline_table_dir = os.path.join(output_location, 'pipeline_tables')
+        os.makedirs(config.output_file_path(pipeline_table_dir), exist_ok=True)
+        for name, df in locals_d.items():
+            df.to_csv(config.output_file_path(os.path.join(pipeline_table_dir, f'{name}.csv')))
 
     # Add classification functions to locals
     locals_d.update(
@@ -305,19 +332,19 @@ def summarize(
         }
     )
 
-    # Save merged tables for expression development
-    # locals_d['trips_merged'].to_csv(
-    #     config.output_file_path(os.path.join(output_location, f'trips_merged.csv'))
-    # )
-    # locals_d['persons_merged'].to_csv(
-    #     config.output_file_path(os.path.join(output_location, f'persons_merged.csv'))
-    # )
-
     for i, row in spec.iterrows():
 
         out_file = row['Output']
         expr = row['Expression']
 
+        # Save temporary variables starting with underscores in locals_d
+        if out_file.startswith('_'):
+
+            logger.debug(f'Temp Variable: {expr} -> {out_file}')
+
+            locals_d[out_file] = eval(expr, globals(), locals_d)
+            continue
+
         logger.debug(f'Summary: {expr} -> {out_file}.csv')
 
         resultset = eval(expr, globals(), locals_d)

diff --git a/activitysim/examples/example_mtc/configs/summarize.csv b/activitysim/examples/example_mtc/configs/summarize.csv
@@ -1,38 +1,115 @@
 Description,Output,Expression
-#,total_vmt,"trips_merged[['auto_distance']].sum().rename('vmt')"
-#,vmt_per_capita,"pd.Series(trips_merged[['auto_distance']].sum() / len(persons_merged), name='vmt_per_capita')"
-#,vmt_per_capita,"pd.Series(trips_merged[['auto_distance']].sum() / len(persons_merged), name='vmt_per_capita')"
-#,vmt_per_capita_by_home_taz,(trips_merged.groupby('home_zone_id').auto_distance.sum() / persons_merged.groupby('home_zone_id').size()).fillna(0).rename('vmt_per_capita_by_home_taz').reset_index()
-#,vmt_per_capita_by_work_taz,(trips_merged.groupby('workplace_zone_id').auto_distance.sum() / persons_merged[persons_merged.workplace_zone_id > 0].groupby('workplace_zone_id').size()).fillna(0).rename('vmt_per_capita_by_work_taz').reset_index()
-#,households_count,persons_merged[['household_id']].nunique().rename('households')
-#,persons_count,persons_merged[['household_id']].count().rename('persons')
-#,person_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()[['number_of_participants']].sum().rename('tours')"
-#,person_trips,trips_merged[['number_of_participants']].sum().rename('trips')
-#,tours_per_household_count,"pd.Series(trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()['number_of_participants'].sum() / persons_merged['household_id'].nunique(), name='tours_per_household')"
-#,trips_per_household_count,"pd.Series(trips_merged['number_of_participants'].sum() / persons_merged['household_id'].nunique(), name='trips_per_household')"
-#,trip_by_income_category_major_mode,"pd.merge(trips_merged, persons_merged['income_category'], left_on='person_id', right_index=True).groupby(['income_category', 'major_trip_mode']).size().unstack(-1).reset_index()"
-#,trips_by_mode_count,trips_merged.groupby('trip_mode')[['number_of_participants']].sum().T#
-
-#,trips_by_major_mode_count,trips_merged.groupby('major_trip_mode')[['number_of_participants']].sum().T#
-
-#,trips_by_purpose_count,trips_merged.groupby('primary_purpose_trip')[['number_of_participants']].sum().T
-#,trip_purpose_by_time_of_day,"trips_merged.groupby(['depart','primary_purpose_trip'])['number_of_participants'].sum().unstack(-1).reset_index()"
-#,tour_mode_to_trip_mode,"trips_merged.groupby(['tour_mode','trip_mode']).size().rename('trips').reset_index()"
-#,work_tours_tod_count,"trips_merged[trips_merged['tour_type'] == 'work'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
-#,school_tours_tod_count,"trips_merged[trips_merged['tour_type'] == 'school'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
-#,non_mandatory_tours_tod_count,"trips_merged[trips_merged['tour_category'] == 'non_mandatory'][['tour_id', 'start', 'number_of_participants']].drop_duplicates().groupby('start')['number_of_participants'].sum().rename('tours').reset_index()"
-#,work_tours_tod_count,trips_merged[trips_merged.tour_type == 'work'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
-#,school_tours_tod_count,trips_merged[trips_merged.tour_type == 'school'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
-#,non_mandatory_tours_tod_count,trips_merged[trips_merged.tour_category == 'non_mandatory'].groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()#
-
-#,low_income_share_by_taz_deciles,"quantiles(persons_merged[persons_merged['income'] < 50000].groupby('home_zone_id').size() / persons_merged.groupby('home_zone_id').size(), 10, '{rank}').rename('low_income_share_by_taz_deciles').reset_index()"#
-
-#,persons_by_income_category,persons_merged.groupby('income_category')[['income_category']].count().T#
-
-#,vmt_per_capita_by_home_taz_quintiles,"quantiles((trips_merged.groupby('home_zone_id').auto_distance.sum() / trips_merged.groupby('home_zone_id').number_of_participants.sum() / persons_merged.groupby('home_zone_id').size()).fillna(0), 5, '{rank}').rename('vmt_per_capita_by_home_taz_quintiles').reset_index()"#
-
-#,non_motorized_trip_distance_bins,"spaced_intervals((trips_merged.walk_distance + trips_merged.bike_distance)[trips_merged.major_trip_mode == 'Non-Motorized'], 0, 0.25).value_counts().sort_index().rename('trips').reset_index()"#
+# These summaries are checked by test module
+,households_count,persons_merged[['household_id']].nunique().rename('households')
+,trips_by_mode_count,trips_merged.groupby('trip_mode')[['number_of_participants']].sum().T
 
+# Calculate total VMT by summing auto_distance from tours_merged
+# (auto_distance is calculated by an expression in summarize_preprocessor.csv)
+# Initially save as a temporary variable so it can be reused
+,_total_vmt,trips_merged[['auto_distance']].sum()
+# Then save to CSV
+,total_vmt,_total_vmt.rename('vmt')
+
+# Calculate vmt per capita
+# (All outputs to CSV have to be a Pandas Series or DataFrame)
+,vmt_per_capita,"pd.Series(_total_vmt / len(persons_merged), name='vmt_per_capita')"
+
+# Calculate vmt per capita by home taz
+,_vmt_per_home_taz,trips_merged.groupby('home_zone_id').auto_distance.sum()
+,_person_per_home_taz,persons_merged.groupby('home_zone_id').size()
+,_vmt_per_capita_by_home_taz,(_vmt_per_home_taz/_person_per_home_taz).fillna(0)
+,vmt_per_capita_by_home_taz,_vmt_per_capita_by_home_taz.rename('vmt_per_capita_by_home_taz').reset_index()
+
+# Calculate vmt per capita by work taz
+,_vmt_per_work_taz,trips_merged.groupby('workplace_zone_id').auto_distance.sum()
+,_person_per_work_taz,persons_merged.groupby('workplace_zone_id').size()
+,vmt_per_capita_by_work_taz,(_vmt_per_work_taz/_person_per_work_taz).fillna(0).rename('vmt_per_capita_by_work_taz').reset_index()
+
+# Count persons
+,persons_count,persons_merged[['household_id']].count().rename('persons')
+
+# Count person-tours
+,person_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()[['number_of_participants']].sum().rename('tours')"
+
+# Count person-trips
+,person_trips,trips_merged[['number_of_participants']].sum().rename('trips')
+
+# Count tours
+,tours_count,tours_merged.reset_index()[['tour_id']].count().rename('tours')
+
+# Count trips
+,trips_count,trips_merged.reset_index()[['trip_id']].count().rename('trips')
+
+# Count tours per household
+,_tours,"trips_merged[['tour_id', 'number_of_participants']].drop_duplicates()['number_of_participants'].sum()"
+,_households,persons_merged['household_id'].nunique()
+,tours_per_household_count,"pd.Series(_tours / _households, name='tours_per_household')"
+
+# Count trips per household
+,_trips,trips_merged['number_of_participants'].sum()
+,trips_per_household_count,"pd.Series(_trips / _households, name='trips_per_household')"
+
+# Count trips by major mode
+#,trips_by_major_mode_count,trips_merged.groupby('major_trip_mode')[['number_of_participants']].sum().T
+
+# Count trips by income category and major mode
+,_trips_with_income,"pd.merge(trips_merged, persons_merged['income_category'], left_on='person_id', right_index=True)"
+,trip_by_income_category_major_mode,"_trips_with_income.groupby(['income_category', 'major_trip_mode']).size().unstack(-1).reset_index()"
+
+# Count trips by purpose
+,trips_by_purpose_count,trips_merged.groupby('primary_purpose_trip')[['number_of_participants']].sum().T
+
+# Count trips by purpose and departure time
+,trip_purpose_by_time_of_day,"trips_merged.groupby(['depart','primary_purpose_trip'])['number_of_participants'].sum().unstack(-1).reset_index()"
+
+# Count trips with each combination of tour mode and trip mode (for Sankey)
+,tour_mode_to_trip_mode,"trips_merged.groupby(['tour_mode','trip_mode']).size().rename('trips').reset_index()"
+
+# Count work tours by time of day
+,_work_tours,trips_merged[trips_merged['tour_type'] == 'work']
+,work_tours_tod_count,_work_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
+
+# Count school tours by time of day
+,_school_tours,trips_merged[trips_merged['tour_type'] == 'school']
+,school_tours_tod_count,_school_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
+
+# Count non-manditory tours by time of day
+,_non_mandatory_tours,trips_merged[trips_merged.tour_category == 'non_mandatory']
+,non_mandatory_tours_tod_count,_non_mandatory_tours.groupby('tour_id').depart.min().reset_index().groupby('depart').size().sort_index().rename('tours').reset_index()
+
+# TAZ population density quintiles
+,_taz_pop_dens,land_use.TOTPOP/land_use.TOTACRE
+,taz_population_density_quintiles,"quantiles(_taz_pop_dens, 5, '{rank}').rename('pop_dens_quintile').reset_index()"
+
+# Calculate share of taz population that is low income by decile
+# (Output deciles by specifying '{rank}' as the label format in the quantile function)
+,_low_income_pop_by_taz,persons_merged[persons_merged.income < 50000].groupby('home_zone_id').size()
+,_total_pop_by_taz,persons_merged.groupby('home_zone_id').size()
+,_proportion_low_income_by_taz,"_low_income_pop_by_taz / _total_pop_by_taz"
+,_proportion_low_income_deciles,"quantiles(_proportion_low_income_by_taz, 10, '{rank}')"
+,low_income_share_by_taz_deciles,"_proportion_low_income_deciles.rename('low_income_share_by_taz_deciles').reset_index()"
+
+# Count persons by income category
+# (income_category is calculated by an expression in summarize_preprocessor.csv)
+#,persons_by_income_category,persons_merged.groupby('income_category')[['income_category']].count().T
+
+# Calculate vmt per capita quintiles by taz
+# (Output quintiles by specifying '{rank}' as the label format in the quantile function)
+,_vmt_per_capita_quintiles,"quantiles(_vmt_per_capita_by_home_taz, 5, '{rank}')"
+,vmt_per_capita_by_home_taz_quintiles,"_vmt_per_capita_quintiles.rename('vmt_per_capita_by_home_taz_quintiles').reset_index()"
+
+# Counts of non-motorized trips by 0.25-mile distance bins
+,_non_motorized_distances,(trips_merged.walk_distance + trips_merged.bike_distance)
+,_non_motorized_trips,trips_merged.major_trip_mode == 'Non-Motorized'
+,_non_motorized_trip_distances,_non_motorized_distances[_non_motorized_trips]
+,_counts_of_non_motorized_trips_by_distance_bin,"spaced_intervals(_non_motorized_trip_distances, 0, 0.25).value_counts()"
+,non_motorized_trip_distance_bins,"_counts_of_non_motorized_trips_by_distance_bin.sort_index().rename('trips').reset_index()"
+
+# Counts of trips by income and travel time category
 #,trips_by_income_and_travel_time_category,"trips_merged.groupby(['trip_income_category','total_time_category']).size().rename('trips').unstack(-2).reset_index()"
 
-,taz_count,"len(land_use)"
+
+
+
+
+