PSLmodels · andersonfrailey · Jul 19, 2020 · Jul 3, 2020 · Jul 3, 2020 · Jul 3, 2020
diff --git a/cps_data/README.md b/cps_data/README.md
@@ -1,17 +1,92 @@
 About cps_data
 ==============
 
-This directory contains the following script:
+This directory contains the python scripts used to create `cps.csv.gz`. You
+can run all of the scripts with the command `python create.py`. By default,
+you will get a CPS file composed of the 2013, 2014, and 2015 March CPS Supplemental
+files. If you would like to use another combination of the 2013, 2014, 2015,
+2016, 2017, and 2018 files, there are two ways to do so.
 
-* Python script **finalprep.py**, which reads/writes:
+1. You can modify `create.py` by adding the `cps_files` argument to the `create()`
+function call at the bottom of the file to specify which files you would like to
+use. For example, to use the 2016, 2017, and 2018 files, the function call would
+now be
+```python
+if __name__ == "__main__":
+    create(
+        exportcsv=False, exportpkl=True, exportraw=False, validate=False,
+        benefits=True, verbose=True, cps_files=[2016, 2017, 2018]
+    )
+```
 
-  Input files:
-    - cps_raw.csv.gz
-    - adjustment_targets.csv
-    - benefitprograms.csv
+2. You could write a separate python file that imports the `create()` function
+and calls it in the same way as above.
 
-  Output files:
-    - cps.csv
+## Input files:
+With the exception of the CPS March Supplements, all input files can be found
+in the `pycps/data` directory.
+
+### CPS March Supplements
+* asec2013_pubuse.dat
+* asec2014_pubuse_tax_fix_5x8_2017.dat
+* asec2015_pubuse.dat
+* asec2016_pubuse.dat
+* asec2017_pubuse.dat
+* asec2018_pubuse.dat
+
+### C-TAM Benefit Imputations
+
+Note that we only have C-TAM imputations for the 2013, 2014, and 2015 files.
+For other years, we just use the benefit program information in the CPS
+* Housing_Imputation_logreg_2013.csv
+* Housing_Imputation_logreg_2014.csv
+* Housing_Imputation_logreg_2015.csv
+* medicaid2013.csv
+* medicaid2014.csv
+* medicaid2015.csv
+* medicare2013.csv
+* medicare2014.csv
+* medicare2015.csv
+* otherbenefitprograms.csv
+* SNAP_Imputation_2013.csv
+* SNAP_Imputation_2014.csv
+* SNAP_Imputation_2015.csv
+* SS_augmentation_2013.csv
+* SS_augmentation_2014.csv
+* SS_augmentation_2015.csv
+* SSI_Imputation2013.csv
+* SSI_Imputation2014.csv
+* SSI_Imputation2015.csv
+* TANF_Imputation_2013.csv
+* TANF_Imputation_2014.csv
+* TANF_Imputation_2015.csv
+* UI_imputation_logreg_2013.csv
+* UI_imputation_logreg_2014.csv
+* UI_imputation_logreg_2015.csv
+* VB_Imputation2013.csv
+* VB_Imputation2014.csv
+* VB_Imputation2015.csv
+* WIC_imputation_children_logreg_2013.csv
+* WIC_imputation_children_logreg_2014.csv
+* WIC_imputation_children_logreg_2015.csv
+* WIC_imputation_infants_logreg_2013.csv
+* WIC_imputation_infants_logreg_2014.csv
+* WIC_imputation_infants_logreg_2015.csv
+* WIC_imputation_women_logreg_2013.csv
+* WIC_imputation_women_logreg_2014.csv
+* WIC_imputation_women_logreg_2015.csv
+
+### Imputation Parameters
+
+These parameters are used in the imputations found in `pycps/impute.py`
+* logit_beta.csv
+* ols_betas.csv
+
+## Output Files
+
+Only `cps.csv.gz` is included in the repository due to the size of `cps_raw.csv.gz`.
+* cps.csv.gz
+* cps_raw.csv.gz
 
 
 Documentation

diff --git a/cps_data/pycps/benefits.py b/cps_data/pycps/benefits.py
@@ -95,26 +95,39 @@ def distribute_benefits(data, other_ben):
     other_ben["2014_cost"] *= 1e6
 
     # adjust medicare and medicaid totals
-    weighted_mcare_count = (data["mcare_count"] * data["s006"]).sum()
-    weighted_mcaid_count = (data["mcaid_count"] * data["s006"]).sum()
-    weighted_mcare = (data["mcare_ben"] * data["s006"]).sum()
-    weighted_mcaid = (data["mcaid_ben"] * data["s006"]).sum()
-    mcare_amt = weighted_mcare / weighted_mcare_count
-    mcaid_amt = weighted_mcaid / weighted_mcaid_count
-    data["mcaid_ben"] = data["mcaid_count"] * mcaid_amt
-    data["mcare_ben"] = data["mcare_count"] * mcare_amt
+    try:
+        weighted_mcare_count = (data["mcare_count"] * data["s006"]).sum()
+        weighted_mcaid_count = (data["mcaid_count"] * data["s006"]).sum()
+        weighted_mcare = (data["mcare_ben"] * data["s006"]).sum()
+        weighted_mcaid = (data["mcaid_ben"] * data["s006"]).sum()
+        mcare_amt = weighted_mcare / weighted_mcare_count
+        mcaid_amt = weighted_mcaid / weighted_mcaid_count
+        data["mcaid_ben"] = data["mcaid_count"] * mcaid_amt
+        data["mcare_ben"] = data["mcare_count"] * mcare_amt
+    except KeyError:
+        # skip over adjusting medicare and medicaid if we don't impute them
+        # set to zero to avoid errors later
+        data["mcaid_ben"] = 0.
+        data["mcare_ben"] = 0
 
     # Distribute other benefits
     data["dist_ben"] = data[["mcaid_ben", "ssi_ben", "snap_ben"]].sum(axis=1)
     data["ratio"] = (data["dist_ben"] * data["s006"] /
                      (data["dist_ben"] * data["s006"]).sum())
     # ... remove TANF and WIC from other_ben total
     tanf_total = (data["tanf_ben"] * data["s006"]).sum()
-    wic_total = (data["wic_ben"] * data["s006"]).sum()
+    try:
+        wic_total = (data["wic_ben"] * data["s006"]).sum()
+    except KeyError:
+        # Same as medicare and medicaid
+        wic_total = 0.
     other_ben_total = other_ben["2014_cost"].sum() - tanf_total - wic_total
     # ... divide by the weight to account for weighting in Tax-Calculator
     data["other_ben"] = (data["ratio"] * other_ben_total / data["s006"])
 
-    data["housing_ben"] *= 12
+    try:
+        data["housing_ben"] *= 12
+    except KeyError:
+        pass
 
     return data
diff --git a/cps_data/pycps/cps_meta.py b/cps_data/pycps/cps_meta.py
@@ -0,0 +1,39 @@
+"""
+Holds all the CPS file metadata we need. Created to keep create.py clean
+"""
+import cpsmar2013
+import cpsmar2014
+import cpsmar2015
+import cpsmar2016
+import cpsmar2017
+import cpsmar2018
+
+
+C_TAM_YEARS = [2013, 2014, 2015]  # years we have C-TAM imputations for
+
+CPS_META_DATA = {
+    2013: {
+        "dat_file": "asec2013_pubuse.dat",
+        "create_func": cpsmar2013.create_cps
+    },
+    2014: {
+        "dat_file": "asec2014_pubuse_tax_fix_5x8_2017.dat",
+        "create_func": cpsmar2014.create_cps
+    },
+    2015: {
+        "dat_file": "asec2015_pubuse.dat",
+        "create_func": cpsmar2015.create_cps
+    },
+    2016: {
+        "dat_file": "asec2016_pubuse_v3.dat",
+        "create_func": cpsmar2016.create_cps
+    },
+    2017: {
+        "dat_file": "asec2017_pubuse.dat",
+        "create_func": cpsmar2017.create_cps
+    },
+    2018: {
+        "dat_file": "asec2018_pubuse.dat",
+        "create_func": cpsmar2018.create_cps
+    }
+}
diff --git a/cps_data/pycps/cpsmar2013.py b/cps_data/pycps/cpsmar2013.py
@@ -805,6 +805,12 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
         record["tot_inc"] -= record["uc_val"]
         record["tot_inc"] += record["UI_impute"]
         record["tot_inc"] += record["ss_impute"]
+    else:
+        # calculate benefits in CPS where possible
+        record["tanf_val"] = 0.
+        if record["paw_yn"] == 1:
+            record["tanf_val"] = record["paw_val"]
+        record["housing_val"] = record["fhoussub"]
     return record
 
 
@@ -878,4 +884,6 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
 
 
 if __name__ == "__main__":
-    create_cps(Path(CUR_PATH, "data", "asec2013_pubuse.dat"), 2013)
+    create_cps(
+        Path(CUR_PATH, "data", "asec2013_pubuse.dat"), 2013, True
+    )
diff --git a/cps_data/pycps/cpsmar2014.py b/cps_data/pycps/cpsmar2014.py
@@ -805,6 +805,12 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
         record["tot_inc"] -= record["uc_val"]
         record["tot_inc"] += record["UI_impute"]
         record["tot_inc"] += record["ss_impute"]
+    else:
+        # calculate benefits in CPS where possible
+        record["tanf_val"] = 0.
+        if record["paw_yn"] == 1:
+            record["tanf_val"] = record["paw_val"]
+        record["housing_val"] = record["fhoussub"]
     return record
 
 
@@ -878,4 +884,6 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
 
 
 if __name__ == "__main__":
-    create_cps(Path(CUR_PATH, "data", "asec2014_pubuse_tax_fix_5x8_2017.dat"), 2014)
+    create_cps(
+        Path(CUR_PATH, "data", "asec2014_pubuse_tax_fix_5x8_2017.dat"), 2014, True
+    )
diff --git a/cps_data/pycps/cpsmar2015.py b/cps_data/pycps/cpsmar2015.py
@@ -802,6 +802,12 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
         record["tot_inc"] -= record["uc_val"]
         record["tot_inc"] += record["UI_impute"]
         record["tot_inc"] += record["ss_impute"]
+    else:
+        # calculate benefits in CPS where possible
+        record["tanf_val"] = 0.
+        if record["paw_yn"] == 1:
+            record["tanf_val"] = record["paw_val"]
+        record["housing_val"] = record["fhoussub"]
     return record
 
 
@@ -875,4 +881,6 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
 
 
 if __name__ == "__main__":
-    create_cps(Path(CUR_PATH, "data", "asec2015_pubuse.dat"), 2015)
+    create_cps(
+        Path(CUR_PATH, "data", "asec2015_pubuse.dat"), 2015, True
+    )