ccao-data · dfsnow · Jan 28, 2025 · Jan 14, 2025 · Jan 15, 2025 · Jan 16, 2025
@@ -363,10 +363,6 @@ model:
 
 # Parameters used in the assess stage to finalize the intial model predictions
 pv:
-  # For multi-card PINs (rare), implement a heuristic that caps the potential
-  # change in value. See assess stage code for details
-  multicard_yoy_cap: 2.2
-
   # Cap the proportion of the PIN's total value dedicated to land. This is
   # necessary since sometimes the model provides low predictions relative to the
   # land rates created by Valuations

@@ -43,6 +43,18 @@ lgbm_final_full_recipe <- readRDS(paths$output$workflow_recipe$local)
 # fair-market value for each card
 assessment_card_data_pred <- read_parquet(paths$input$assessment$local) %>%
   as_tibble() %>%
+  mutate(
+    # Multi-card PINs with 2-3 cards get a special prediction based on the
+    # combined building square footage of all cards on the PIN. See below
+    # (under assessment_card_data_mc) for more details
+    og_char_bldg_sf = char_bldg_sf,
+    char_bldg_sf = ifelse(
+      ind_pin_is_multicard & meta_pin_num_cards %in% c(2, 3),
+      sum(char_bldg_sf),
+      char_bldg_sf
+    ),
+    .by = meta_pin
+  ) %>%
   mutate(
     pred_card_initial_fmv = predict(
       lgbm_final_full_fit,
@@ -51,8 +63,10 @@ assessment_card_data_pred <- read_parquet(paths$input$assessment$local) %>%
         new_data = .,
         all_predictors()
       )
-    )$.pred
-  )
+    )$.pred,
+    char_bldg_sf = og_char_bldg_sf
+  ) %>%
+  select(-og_char_bldg_sf)
 
 
 
@@ -70,7 +84,7 @@ message("Fixing multicard PINs")
 assessment_card_data_mc <- assessment_card_data_pred %>%
   select(
     meta_year, meta_pin, meta_nbhd_code, meta_class, meta_card_num,
-    char_bldg_sf, char_land_sf,
+    meta_pin_num_cards, char_bldg_sf, char_land_sf,
     meta_tieback_key_pin, meta_tieback_proration_rate,
     meta_1yr_pri_board_tot, pred_card_initial_fmv
   ) %>%
@@ -85,21 +99,21 @@ assessment_card_data_mc <- assessment_card_data_pred %>%
       mean(pred_card_initial_fmv)
     )
   ) %>%
-  # Aggregate multi-cards to the PIN-level by summing the predictions
-  # of all cards. We use a heuristic here to limit the PIN-level total
-  # value, this is to prevent super-high-value back-buildings/ADUs from
-  # blowing up the PIN-level AV
+  # For single-card PINs, the card-level predicted value is the PIN value.
+  # For multi-card PINs with 2 or 3 cards, we aggregate the building square
+  # footage of all cards into a single card (the largest), predict, then use
+  # that prediction as the PIN value. For > 3 cards, we predict each card with
+  # its original square footage then sum the predictions to get the PIN value
   group_by(meta_pin) %>%
+  arrange(meta_pin, desc(char_bldg_sf)) %>%
   mutate(
     pred_pin_card_sum = ifelse(
-      sum(pred_card_intermediate_fmv) * meta_tieback_proration_rate <=
-        params$pv$multicard_yoy_cap * first(meta_1yr_pri_board_tot * 10) |
-        is.na(meta_1yr_pri_board_tot) |
-        n() != 2,
+      meta_pin_num_cards > 3,
       sum(pred_card_intermediate_fmv),
-      max(pred_card_intermediate_fmv)
+      first(pred_card_intermediate_fmv)
     )
   ) %>%
+  arrange(meta_pin, meta_card_num) %>%
   ungroup()
 
 

@@ -101,7 +101,6 @@ metadata <- tibble::tibble(
   cv_no_improve = params$cv$no_improve,
   cv_split_prop = params$cv$split_prop,
   cv_best_metric = params$cv$best_metric,
-  pv_multicard_yoy_cap = params$pv$multicard_yoy_cap,
   pv_land_pct_of_total_cap = params$pv$land_pct_of_total_cap,
   pv_round_break = list(params$pv$round_break),
   pv_round_to_nearest = list(params$pv$round_to_nearest),

@@ -367,3 +367,81 @@ iwalk(plots, ~ {
 ```
 
 :::
+
+## Assessed values for multi-card properties
+
+The sales data we use to measure accuracy is the most recent sale per multi-card
+PIN (if there was one after 2020).
+
+```{r _challenge_groups_decile_ratio_graph}
+mc_assessment_pin <- assessment_pin %>%
+  left_join(
+    assessment_data %>%
+      select(meta_pin, ind_pin_is_multicard),
+    by = "meta_pin"
+  ) %>%
+  filter(ind_pin_is_multicard) %>%
+  mutate(ratio = pred_pin_final_fmv / sale_recent_1_price) %>%
+  filter(sale_recent_1_date >= as.Date("2020-01-01")) %>%
+  mutate(decile = ntile(sale_recent_1_price, 10))
+
+mc_df_deciles <- mc_assessment_pin %>%
+  group_by(decile) %>%
+  summarise(median_ratio = median(ratio, na.rm = TRUE))
+
+mc_p_deciles <- ggplot(mc_df_deciles, aes(x = decile, y = median_ratio)) +
+  geom_line() +
+  geom_point() +
+  geom_hline(yintercept = 1, color = "black", linetype = "dashed") +
+  scale_x_continuous(breaks = 1:10) +
+  labs(
+    title = "Median Ratio by Decile (Sales After 2020)",
+    x = "Decile",
+    y = "Median Ratio"
+  ) +
+  theme_minimal()
+
+mc_p_deciles
+```
+
+```{r _challenge_groups_scatterplot_pred_vs_sale}
+mc_max_val <- max(c(
+  mc_assessment_pin$sale_recent_1_price,
+  mc_assessment_pin$pred_pin_final_fmv
+), na.rm = TRUE)
+
+mc_p_scatter_base <- plot_ly(
+  data = mc_assessment_pin,
+  x = ~sale_recent_1_price,
+  y = ~pred_pin_final_fmv,
+  type = "scatter",
+  mode = "markers",
+  hoverinfo = "text",
+  text = ~ paste(
+    "<b>PIN:</b>", meta_pin,
+    "<br><b>Sale Price:</b>", sale_recent_1_price,
+    "<br><b>Predicted (Base):</b>", round(pred_pin_final_fmv, 2),
+    "<br><b>Sale Date:</b>", sale_recent_1_date,
+    "<br><b>Ratio:</b>", round(ratio, 3)
+  )
+) %>%
+  plotly::layout(
+    title = "FMV vs sale price",
+    shapes = list(
+      list(
+        type = "line",
+        x0   = 0,
+        y0   = 0,
+        x1   = mc_max_val,
+        y1   = mc_max_val,
+        xref = "x",
+        yref = "y",
+        line = list(color = "red", dash = "dash")
+      )
+    ),
+    xaxis = list(title = "Sale Price"),
+    yaxis = list(title = "Predicted FMV (Base)")
+  )
+
+mc_p_scatter_base
+```