Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 83 additions & 8 deletions cps_data/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,92 @@
About cps_data
==============

This directory contains the following script:
This directory contains the python scripts used to create `cps.csv.gz`. You
can run all of the scripts with the command `python create.py`. By default,
you will get a CPS file composed of the 2013, 2014, and 2015 March CPS Supplemental
files. If you would like to use another combination of the 2013, 2014, 2015,
2016, 2017, and 2018 files, there are two ways to do so.

* Python script **finalprep.py**, which reads/writes:
1. You can modify `create.py` by adding the `cps_files` argument to the `create()`
function call at the bottom of the file to specify which files you would like to
use. For example, to use the 2016, 2017, and 2018 files, the function call would
now be
```python
if __name__ == "__main__":
create(
exportcsv=False, exportpkl=True, exportraw=False, validate=False,
benefits=True, verbose=True, cps_files=[2016, 2017, 2018]
)
```

Input files:
- cps_raw.csv.gz
- adjustment_targets.csv
- benefitprograms.csv
2. You could write a separate python file that imports the `create()` function
and calls it in the same way as above.

Output files:
- cps.csv
## Input files:
With the exception of the CPS March Supplements, all input files can be found
in the `pycps/data` directory.

### CPS March Supplements
* asec2013_pubuse.dat
* asec2014_pubuse_tax_fix_5x8_2017.dat
* asec2015_pubuse.dat
* asec2016_pubuse.dat
* asec2017_pubuse.dat
* asec2018_pubuse.dat

### C-TAM Benefit Imputations

Note that we only have C-TAM imputations for the 2013, 2014, and 2015 files.
For other years, we just use the benefit program information in the CPS
* Housing_Imputation_logreg_2013.csv
* Housing_Imputation_logreg_2014.csv
* Housing_Imputation_logreg_2015.csv
* medicaid2013.csv
* medicaid2014.csv
* medicaid2015.csv
* medicare2013.csv
* medicare2014.csv
* medicare2015.csv
* otherbenefitprograms.csv
* SNAP_Imputation_2013.csv
* SNAP_Imputation_2014.csv
* SNAP_Imputation_2015.csv
* SS_augmentation_2013.csv
* SS_augmentation_2014.csv
* SS_augmentation_2015.csv
* SSI_Imputation2013.csv
* SSI_Imputation2014.csv
* SSI_Imputation2015.csv
* TANF_Imputation_2013.csv
* TANF_Imputation_2014.csv
* TANF_Imputation_2015.csv
* UI_imputation_logreg_2013.csv
* UI_imputation_logreg_2014.csv
* UI_imputation_logreg_2015.csv
* VB_Imputation2013.csv
* VB_Imputation2014.csv
* VB_Imputation2015.csv
* WIC_imputation_children_logreg_2013.csv
* WIC_imputation_children_logreg_2014.csv
* WIC_imputation_children_logreg_2015.csv
* WIC_imputation_infants_logreg_2013.csv
* WIC_imputation_infants_logreg_2014.csv
* WIC_imputation_infants_logreg_2015.csv
* WIC_imputation_women_logreg_2013.csv
* WIC_imputation_women_logreg_2014.csv
* WIC_imputation_women_logreg_2015.csv

### Imputation Parameters

These parameters are used in the imputations found in `pycps/impute.py`
* logit_beta.csv
* ols_betas.csv

## Output Files

Only `cps.csv.gz` is included in the repository due to the size of `cps_raw.csv.gz`.
* cps.csv.gz
* cps_raw.csv.gz


Documentation
Expand Down
33 changes: 23 additions & 10 deletions cps_data/pycps/benefits.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,26 +95,39 @@ def distribute_benefits(data, other_ben):
other_ben["2014_cost"] *= 1e6

# adjust medicare and medicaid totals
weighted_mcare_count = (data["mcare_count"] * data["s006"]).sum()
weighted_mcaid_count = (data["mcaid_count"] * data["s006"]).sum()
weighted_mcare = (data["mcare_ben"] * data["s006"]).sum()
weighted_mcaid = (data["mcaid_ben"] * data["s006"]).sum()
mcare_amt = weighted_mcare / weighted_mcare_count
mcaid_amt = weighted_mcaid / weighted_mcaid_count
data["mcaid_ben"] = data["mcaid_count"] * mcaid_amt
data["mcare_ben"] = data["mcare_count"] * mcare_amt
try:
weighted_mcare_count = (data["mcare_count"] * data["s006"]).sum()
weighted_mcaid_count = (data["mcaid_count"] * data["s006"]).sum()
weighted_mcare = (data["mcare_ben"] * data["s006"]).sum()
weighted_mcaid = (data["mcaid_ben"] * data["s006"]).sum()
mcare_amt = weighted_mcare / weighted_mcare_count
mcaid_amt = weighted_mcaid / weighted_mcaid_count
data["mcaid_ben"] = data["mcaid_count"] * mcaid_amt
data["mcare_ben"] = data["mcare_count"] * mcare_amt
except KeyError:
# skip over adjusting medicare and medicaid if we don't impute them
# set to zero to avoid errors later
data["mcaid_ben"] = 0.
data["mcare_ben"] = 0

# Distribute other benefits
data["dist_ben"] = data[["mcaid_ben", "ssi_ben", "snap_ben"]].sum(axis=1)
data["ratio"] = (data["dist_ben"] * data["s006"] /
(data["dist_ben"] * data["s006"]).sum())
# ... remove TANF and WIC from other_ben total
tanf_total = (data["tanf_ben"] * data["s006"]).sum()
wic_total = (data["wic_ben"] * data["s006"]).sum()
try:
wic_total = (data["wic_ben"] * data["s006"]).sum()
except KeyError:
# Same as medicare and medicaid
wic_total = 0.
other_ben_total = other_ben["2014_cost"].sum() - tanf_total - wic_total
# ... divide by the weight to account for weighting in Tax-Calculator
data["other_ben"] = (data["ratio"] * other_ben_total / data["s006"])

data["housing_ben"] *= 12
try:
data["housing_ben"] *= 12
except KeyError:
pass

return data
39 changes: 39 additions & 0 deletions cps_data/pycps/cps_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
Holds all the CPS file metadata we need. Created to keep create.py clean
"""
import cpsmar2013
import cpsmar2014
import cpsmar2015
import cpsmar2016
import cpsmar2017
import cpsmar2018


C_TAM_YEARS = [2013, 2014, 2015] # years we have C-TAM imputations for

CPS_META_DATA = {
2013: {
"dat_file": "asec2013_pubuse.dat",
"create_func": cpsmar2013.create_cps
},
2014: {
"dat_file": "asec2014_pubuse_tax_fix_5x8_2017.dat",
"create_func": cpsmar2014.create_cps
},
2015: {
"dat_file": "asec2015_pubuse.dat",
"create_func": cpsmar2015.create_cps
},
2016: {
"dat_file": "asec2016_pubuse_v3.dat",
"create_func": cpsmar2016.create_cps
},
2017: {
"dat_file": "asec2017_pubuse.dat",
"create_func": cpsmar2017.create_cps
},
2018: {
"dat_file": "asec2018_pubuse.dat",
"create_func": cpsmar2018.create_cps
}
}
10 changes: 9 additions & 1 deletion cps_data/pycps/cpsmar2013.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,12 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
record["tot_inc"] -= record["uc_val"]
record["tot_inc"] += record["UI_impute"]
record["tot_inc"] += record["ss_impute"]
else:
# calculate benefits in CPS where possible
record["tanf_val"] = 0.
if record["paw_yn"] == 1:
record["tanf_val"] = record["paw_val"]
record["housing_val"] = record["fhoussub"]
return record


Expand Down Expand Up @@ -878,4 +884,6 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):


if __name__ == "__main__":
create_cps(Path(CUR_PATH, "data", "asec2013_pubuse.dat"), 2013)
create_cps(
Path(CUR_PATH, "data", "asec2013_pubuse.dat"), 2013, True
)
10 changes: 9 additions & 1 deletion cps_data/pycps/cpsmar2014.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,12 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
record["tot_inc"] -= record["uc_val"]
record["tot_inc"] += record["UI_impute"]
record["tot_inc"] += record["ss_impute"]
else:
# calculate benefits in CPS where possible
record["tanf_val"] = 0.
if record["paw_yn"] == 1:
record["tanf_val"] = record["paw_val"]
record["housing_val"] = record["fhoussub"]
return record


Expand Down Expand Up @@ -878,4 +884,6 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):


if __name__ == "__main__":
create_cps(Path(CUR_PATH, "data", "asec2014_pubuse_tax_fix_5x8_2017.dat"), 2014)
create_cps(
Path(CUR_PATH, "data", "asec2014_pubuse_tax_fix_5x8_2017.dat"), 2014, True
)
10 changes: 9 additions & 1 deletion cps_data/pycps/cpsmar2015.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,12 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
record["tot_inc"] -= record["uc_val"]
record["tot_inc"] += record["UI_impute"]
record["tot_inc"] += record["ss_impute"]
else:
# calculate benefits in CPS where possible
record["tanf_val"] = 0.
if record["paw_yn"] == 1:
record["tanf_val"] = record["paw_val"]
record["housing_val"] = record["fhoussub"]
return record


Expand Down Expand Up @@ -875,4 +881,6 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):


if __name__ == "__main__":
create_cps(Path(CUR_PATH, "data", "asec2015_pubuse.dat"), 2015)
create_cps(
Path(CUR_PATH, "data", "asec2015_pubuse.dat"), 2015, True
)
Loading