PSLmodels · andersonfrailey · Jul 28, 2020 · Jul 19, 2020 · Jul 19, 2020 · Jul 19, 2020
diff --git a/cps_data/pycps/cps_meta.py b/cps_data/pycps/cps_meta.py
@@ -1,39 +1,31 @@
 """
 Holds all the CPS file metadata we need. Created to keep create.py clean
 """
-import cpsmar2013
-import cpsmar2014
-import cpsmar2015
-import cpsmar2016
-import cpsmar2017
-import cpsmar2018
-
-
 C_TAM_YEARS = [2013, 2014, 2015]  # years we have C-TAM imputations for
 
 CPS_META_DATA = {
     2013: {
         "dat_file": "asec2013_pubuse.dat",
-        "create_func": cpsmar2013.create_cps
+        "sas_file": "cpsmar2013.sas"
     },
     2014: {
         "dat_file": "asec2014_pubuse_tax_fix_5x8_2017.dat",
-        "create_func": cpsmar2014.create_cps
+        "sas_file": "cpsmar2014t.sas"
     },
     2015: {
         "dat_file": "asec2015_pubuse.dat",
-        "create_func": cpsmar2015.create_cps
+        "sas_file": "cpsmar2015.sas"
     },
     2016: {
         "dat_file": "asec2016_pubuse_v3.dat",
-        "create_func": cpsmar2016.create_cps
+        "sas_file": "cpsmar2016.sas"
     },
     2017: {
         "dat_file": "asec2017_pubuse.dat",
-        "create_func": cpsmar2017.create_cps
+        "sas_file": "cpsmar2017.sas"
     },
     2018: {
         "dat_file": "asec2018_pubuse.dat",
-        "create_func": cpsmar2018.create_cps
+        "sas_file": "cpsmar2018.sas"
     }
 }
diff --git a/cps_data/pycps/template.txt → cps_data/pycps/cpsmar.py b/cps_data/pycps/template.txt → cps_data/pycps/cpsmar.py
@@ -10,46 +10,16 @@
 DATA_PATH = Path(CUR_PATH, "data")
 
 
-def h_rec(rec):
-
-    record = OrderedDict()
-
-{% for item in household %}
-    {{ item }}{% endfor %}
-
-    return record
-
-
-def f_rec(rec):
-    """
-    Process family record in CPS
-    """
-
-    record = OrderedDict()
-
-{% for item in family %}
-    {{ item }}{% endfor %}
-
-    return record
-
-
-def p_rec(rec, benefits, h_seq, fhseq, ffpos):
+def person_details(record, benefits, h_seq, fhseq, ffpos, year):
     """
-    Process person record in CPS
+    Add additonal details for person records
     """
-    record = OrderedDict()
-
-{% for item in person %}
-    {{ item }}{% endfor %}
-
-    {# This might need to be updated to year >= 2015 #}
-    {% if year >= 2015 %}
-    record["alimony"] = 0.
-    if record["oi_off"] == 20:
-        record["alimony"] = record["oi_val"]
-    {% else %}
-    record["alimony"] = record["alm_val"]
-    {% endif %}
+    if year >= 2015:
+        record["alimony"] = 0.
+        if record["oi_off"] == 20:
+            record["alimony"] = record["oi_val"]
+    else:
+        record["alimony"] = record["alm_val"]
     # Calculate pensions and annuities
     pensions_annuities = (
         ((record["oi_off"] == 2) * record["oi_val"]) +
@@ -114,16 +84,31 @@ def p_rec(rec, benefits, h_seq, fhseq, ffpos):
         record["tanf_val"] = 0.
         if record["paw_yn"] == 1:
             record["tanf_val"] = record["paw_val"]
-        {# fhoussub not included after 2016 #}
-        {% if year >= 2016 %}
-        record["housing_val"] = 0.
-        {% else %}
-        record["housing_val"] = record["fhoussub"]
-        {% endif %}
+        if year >= 2016:
+            record["housing_val"] = 0.
+        else:
+            record["housing_val"] = record["fhoussub"]
+    return record
+
+
+def parse(rec, parse_dict):
+    """
+    Function for parsing lines of the CPS
+    """
+    record = OrderedDict()
+
+    for var in parse_dict.keys():
+        start, end, decimals = parse_dict[var]
+        value = int(rec[start: end])
+        if decimals != 0:
+            value /= int("1" + ("0" * decimals))
+        record[var] = value
+
     return record
 
 
-def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
+def create_cps(dat_file, year, parsing_dict, benefits=True, exportpkl=True,
+               exportcsv=True):
     """
     Read the .DAT CPS file and convert it to a list of dictionaries that
     to later be converted to tax units. Optionally export that list as a
@@ -132,11 +117,11 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
     ----------
     dat_file: Path to the .DAT version of the CPS downloaded from NBER
     year: year of the CPS being converted
+    parsing_dict: dictionary with information
     benefits: Set to true to include C-TAM imputed benefits in the CPS
     exportpkl: Set to true to export a pickled list of households in the CPS
     exportcsv: Set to true to export a CSV version of the CPS
     """
-
     # read in file
     print("Reading DAT file")
     with Path(dat_file).open("r") as f:
@@ -159,15 +144,16 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
             if household:
                 cps_list.append(household)
                 household = []
-            house = h_rec(record)
+            house = parse(record, parsing_dict["household"])
         # family record
         elif rec_type == "2":
-            family = f_rec(record)
+            family = parse(record, parsing_dict["family"])
         # person record
         elif rec_type == "3":
-            person = p_rec(
-                record, benefits, house["h_seq"], family["fh_seq"],
-                family["ffpos"]
+            person = parse(record, parsing_dict["person"])
+            person = person_details(
+                person, benefits, house["h_seq"], family["fh_seq"],
+                family["ffpos"], year
             )
             full_rec = {**house, **family, **person}
             household.append(full_rec)
@@ -190,10 +176,3 @@ def create_cps(dat_file, year, benefits=True, exportpkl=True, exportcsv=True):
             pickle.dump(cps_list, f)
 
     return cps_list
-
-
-if __name__ == "__main__":
-    create_cps(
-        Path(CUR_PATH, "data", "{{ file_name }}"), {{ year }}, {{ benefits }}
-    )
-