Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions cps_data/pycps/cps_meta.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,31 @@
"""
Holds all the CPS file metadata we need. Created to keep create.py clean
"""
import cpsmar2013
import cpsmar2014
import cpsmar2015
import cpsmar2016
import cpsmar2017
import cpsmar2018


C_TAM_YEARS = [2013, 2014, 2015] # years we have C-TAM imputations for

CPS_META_DATA = {
2013: {
"dat_file": "asec2013_pubuse.dat",
"create_func": cpsmar2013.create_cps
"sas_file": "cpsmar2013.sas"
},
2014: {
"dat_file": "asec2014_pubuse_tax_fix_5x8_2017.dat",
"create_func": cpsmar2014.create_cps
"sas_file": "cpsmar2014t.sas"
},
2015: {
"dat_file": "asec2015_pubuse.dat",
"create_func": cpsmar2015.create_cps
"sas_file": "cpsmar2015.sas"
},
2016: {
"dat_file": "asec2016_pubuse_v3.dat",
"create_func": cpsmar2016.create_cps
"sas_file": "cpsmar2016.sas"
},
2017: {
"dat_file": "asec2017_pubuse.dat",
"create_func": cpsmar2017.create_cps
"sas_file": "cpsmar2017.sas"
},
2018: {
"dat_file": "asec2018_pubuse.dat",
"create_func": cpsmar2018.create_cps
"sas_file": "cpsmar2018.sas"
}
}
95 changes: 37 additions & 58 deletions cps_data/pycps/template.txt → cps_data/pycps/cpsmar.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,16 @@
DATA_PATH = Path(CUR_PATH, "data")


def h_rec(rec):

record = OrderedDict()

{% for item in household %}
{{ item }}{% endfor %}

return record


def f_rec(rec):
"""
Process family record in CPS
"""

record = OrderedDict()

{% for item in family %}
{{ item }}{% endfor %}

return record


def p_rec(rec, benefits, h_seq, fhseq, ffpos):
def person_details(record, benefits, h_seq, fhseq, ffpos, year):
"""
Process person record in CPS
Add additonal details for person records
"""
record = OrderedDict()

{% for item in person %}
{{ item }}{% endfor %}

{# This might need to be updated to year >= 2015 #}
{% if year >= 2015 %}
record["alimony"] = 0.
if record["oi_off"] == 20:
record["alimony"] = record["oi_val"]
{% else %}
record["alimony"] = record["alm_val"]
{% endif %}
if year >= 2015:
record["alimony"] = 0.
if record["oi_off"] == 20:
record["alimony"] = record["oi_val"]
else:
record["alimony"] = record["alm_val"]
# Calculate pensions and annuities
pensions_annuities = (
((record["oi_off"] == 2) * record["oi_val"]) +
Expand Down Expand Up @@ -114,16 +84,31 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
record["tanf_val"] = 0.
if record["paw_yn"] == 1:
record["tanf_val"] = record["paw_val"]
{# fhoussub not included after 2016 #}
{% if year >= 2016 %}
record["housing_val"] = 0.
{% else %}
record["housing_val"] = record["fhoussub"]
{% endif %}
if year >= 2016:
record["housing_val"] = 0.
else:
record["housing_val"] = record["fhoussub"]
return record


def parse(rec, parse_dict):
"""
Function for parsing lines of the CPS
"""
record = OrderedDict()

for var in parse_dict.keys():
start, end, decimals = parse_dict[var]
value = int(rec[start: end])
if decimals != 0:
value /= int("1" + ("0" * decimals))
record[var] = value

return record


def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
def create_cps(dat_file, year, parsing_dict, benefits=True, exportpkl=True,
exportcsv=True):
"""
Read the .DAT CPS file and convert it to a list of dictionaries that
to later be converted to tax units. Optionally export that list as a
Expand All @@ -132,11 +117,11 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
----------
dat_file: Path to the .DAT version of the CPS downloaded from NBER
year: year of the CPS being converted
parsing_dict: dictionary with information
benefits: Set to true to include C-TAM imputed benefits in the CPS
exportpkl: Set to true to export a pickled list of households in the CPS
exportcsv: Set to true to export a CSV version of the CPS
"""

# read in file
print("Reading DAT file")
with Path(dat_file).open("r") as f:
Expand All @@ -159,15 +144,16 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
if household:
cps_list.append(household)
household = []
house = h_rec(record)
house = parse(record, parsing_dict["household"])
# family record
elif rec_type == "2":
family = f_rec(record)
family = parse(record, parsing_dict["family"])
# person record
elif rec_type == "3":
person = p_rec(
record, benefits, house["h_seq"], family["fh_seq"],
family["ffpos"]
person = parse(record, parsing_dict["person"])
person = person_details(
person, benefits, house["h_seq"], family["fh_seq"],
family["ffpos"], year
)
full_rec = {**house, **family, **person}
household.append(full_rec)
Expand All @@ -190,10 +176,3 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
pickle.dump(cps_list, f)

return cps_list


if __name__ == "__main__":
create_cps(
Path(CUR_PATH, "data", "{{ file_name }}"), {{ year }}, {{ benefits }}
)

Loading