From 198b59cfa2a674e3bdc88c5ec87f9e5c922d8cc7 Mon Sep 17 00:00:00 2001
From: Evan Magnusson <evanmag42@gmail.com>
Date: Mon, 20 Jul 2015 10:06:52 -0700
Subject: [PATCH] merged income_polynomial and income files

---
 .../CURRENT_DEBUGGED_VERSION/income_nopoly.py | 160 ++++++++++++++++++
 Python/dynamic/income.py                      |  63 ++++---
 2 files changed, 203 insertions(+), 20 deletions(-)
 create mode 100644 Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py

diff --git a/Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py b/Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py
new file mode 100644
index 000000000..53867d30a
--- /dev/null
+++ b/Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py
@@ -0,0 +1,160 @@
+'''
+------------------------------------------------------------------------
+Last updated 2/16/2014
+
+Functions for created the matrix of ability levels, e.
+
+This py-file calls the following other file(s):
+            data/e_vec_data/cwhs_earn_rate_age_profile.csv
+
+This py-file creates the following other file(s):
+    (make sure that an OUTPUT folder exists)
+            OUTPUT/Demographics/ability_log
+------------------------------------------------------------------------
+'''
+
+'''
+------------------------------------------------------------------------
+    Packages
+------------------------------------------------------------------------
+'''
+
+import numpy as np
+import pandas as pd
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+import numpy.polynomial.polynomial as poly
+import scipy.optimize as opt
+
+
+'''
+------------------------------------------------------------------------
+    Read Data for Ability Types
+------------------------------------------------------------------------
+The data comes from the IRS.  We can either use wage or earnings data,
+in this version we are using earnings data.  The data is for individuals
+in centile groups for each age from 20 to 70.
+------------------------------------------------------------------------
+'''
+
+earn_rate = pd.read_table(
+    "data/e_vec_data/cwhs_earn_rate_age_profile.csv", sep=',', header=0)
+del earn_rate['obs_earn']
+piv = earn_rate.pivot(index='age', columns='q_earn', values='mean_earn_rate')
+
+emat_basic = np.array(piv)
+
+'''
+------------------------------------------------------------------------
+    Generate ability type matrix
+------------------------------------------------------------------------
+Given desired starting and stopping ages, as well as the values for S
+and J, the ability matrix is created.
+------------------------------------------------------------------------
+'''
+
+
+def fit_exp_right(params, pt1):
+    a, b = params
+    x1, y1, slope = pt1
+    error1 = -a*b**(-x1)*np.log(b) - slope
+    error2 = a*b**(-x1) - y1
+    return [error1, error2]
+
+
+def exp_funct(points, a, b):
+    y = a*b**(-points)
+    return y
+
+
+def exp_fit(e_input, S, J):
+    params_guess = [20, 1]
+    e_output = np.zeros((S, J))
+    e_output[:50, :] = e_input
+    for j in xrange(J):
+        meanslope = np.mean([e_input[-1, j]-e_input[-2, j], e_input[
+            -2, j]-e_input[-3, j], e_input[-3, j]-e_input[-4, j]])
+        slope = np.min([meanslope, -.01])
+        a, b = opt.fsolve(fit_exp_right, params_guess, args=(
+            [70, e_input[-1, j], slope]))
+        e_output[50:, j] = exp_funct(np.linspace(70, 100, 30), a, b)
+    return e_output
+
+
+def graph_income(S, J, e, starting_age, ending_age, bin_weights):
+    '''
+    Graphs the log of the ability matrix.
+    '''
+    e_tograph = np.log(e)
+    domain = np.linspace(starting_age, ending_age, S)
+    Jgrid = np.zeros(J)
+    for j in xrange(J):
+        Jgrid[j:] += bin_weights[j]
+    X, Y = np.meshgrid(domain, Jgrid)
+    cmap2 = matplotlib.cm.get_cmap('summer')
+    if J == 1:
+        plt.figure()
+        plt.plot(domain, e_tograph)
+        plt.savefig('OUTPUT/Demographics/ability_log')
+    else:
+        fig10 = plt.figure()
+        ax10 = fig10.gca(projection='3d')
+        ax10.plot_surface(X, Y, e_tograph.T, rstride=1, cstride=2, cmap=cmap2)
+        ax10.set_xlabel(r'age-$s$')
+        ax10.set_ylabel(r'ability type -$j$')
+        ax10.set_zlabel(r'log ability $log(e_j(s))$')
+        plt.savefig('OUTPUT/Demographics/ability_log')
+    if J == 1:
+        plt.figure()
+        plt.plot(domain, e)
+        plt.savefig('OUTPUT/Demographics/ability')
+    else:
+        fig10 = plt.figure()
+        ax10 = fig10.gca(projection='3d')
+        ax10.plot_surface(X, Y, e.T, rstride=1, cstride=2, cmap=cmap2)
+        ax10.set_xlabel(r'age-$s$')
+        ax10.set_ylabel(r'ability type -$j$')
+        ax10.set_zlabel(r'ability $e_j(s)$')
+        plt.savefig('OUTPUT/Demographics/ability')
+
+
+def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS):
+    '''
+    Parameters: S - Number of age cohorts
+                J - Number of ability levels by age
+                starting_age - age of first age cohort
+                ending_age - age of last age cohort
+                bin_weights - what fraction of each age is in each
+                              abiility type
+
+    Returns:    e - S x J matrix of ability levels for each
+                    age cohort, normalized so
+                    the mean is one
+    '''
+    emat_trunc = emat_basic[:50, :]
+    cum_bins = 100 * np.array(bin_weights)
+    for j in xrange(J-1):
+        cum_bins[j+1] += cum_bins[j]
+    emat_collapsed = np.zeros((50, J))
+    for s in xrange(50):
+        for j in xrange(J):
+            if j == 0:
+                emat_collapsed[s, j] = emat_trunc[s, :cum_bins[j]].mean()
+            else:
+                emat_collapsed[s, j] = emat_trunc[
+                    s, cum_bins[j-1]:cum_bins[j]].mean()
+    e_fitted = np.zeros((50, J))
+    for j in xrange(J):
+        func = poly.polyfit(
+            np.arange(50)+starting_age, emat_collapsed[:50, j], deg=2)
+
+        e_fitted[:, j] = poly.polyval(np.arange(50)+starting_age, func)
+    emat_extended = exp_fit(e_fitted, S, J)
+    for j in xrange(1, J):
+        emat_extended[:, j] = np.max(np.array(
+            [emat_extended[:, j], emat_extended[:, j-1]]), axis=0)
+    graph_income(S, J, emat_extended, starting_age, ending_age, bin_weights)
+    emat_normed = emat_extended/(omega_SS * emat_extended).sum()
+    return emat_normed
diff --git a/Python/dynamic/income.py b/Python/dynamic/income.py
index 3ea67772f..c3ac4e740 100644
--- a/Python/dynamic/income.py
+++ b/Python/dynamic/income.py
@@ -1,6 +1,6 @@
 '''
 ------------------------------------------------------------------------
-Last updated 6/19/2015
+Last updated 7/17/2015
 
 Functions for created the matrix of ability levels, e.  This can
     only be used for looking at the 25, 50, 70, 80, 90, 99, and 100th
@@ -65,7 +65,17 @@
 
 def graph_income(S, J, e, starting_age, ending_age, bin_weights):
     '''
-    Graphs the log of the ability matrix.
+    Graphs the ability matrix (and it's log)
+    Inputs:
+        S = number of age groups (scalar)
+        J = number of ability types (scalar)
+        e = ability matrix (SxJ array)
+        starting_age = initial age (scalar)
+        ending_age = end age (scalar)
+        bin_weights = ability weights (Jx1 array)
+    Outputs:
+        OUTPUT/Demographics/ability_log.png
+        OUTPUT/Demographics/ability.png
     '''
     import matplotlib
     import matplotlib.pyplot as plt
@@ -104,7 +114,6 @@ def graph_income(S, J, e, starting_age, ending_age, bin_weights):
         box = ax.get_position()
         ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
-        # plt.show()
         ax.set_xlabel(r'age-$s$')
         ax.set_ylabel(r'log ability $log(e_j(s))$')
         plt.savefig('OUTPUT/Demographics/ability_log_2D')
@@ -120,20 +129,28 @@ def graph_income(S, J, e, starting_age, ending_age, bin_weights):
         ax10.set_ylabel(r'ability type -$j$')
         ax10.set_zlabel(r'ability $e_j(s)$')
         plt.savefig('OUTPUT/Demographics/ability')
-        # plt.show()
 
 
 def arc_tan_func(points, a, b, c):
+    '''
+    Functional form for a generic arctan function
+    '''
     y = (-a / np.pi) * np.arctan(b*points + c) + a / 2
     return y
 
 
 def arc_tan_deriv_func(points, a, b, c):
+    '''
+    Functional form for the derivative of a generic arctan function
+    '''
     y = -a * b / (np.pi * (1+(b*points+c)**2))
     return y
 
 
 def arc_error(guesses, params):
+    '''
+    How well the arctan function fits the slope of ability matrix at age 80, the level at age 80, and the level of age 80 times a constant
+    '''
     a, b, c = guesses
     first_point, coef1, coef2, coef3, ability_depreciation = params
     error1 = first_point - arc_tan_func(80, a, b, c)
@@ -141,42 +158,48 @@ def arc_error(guesses, params):
         error2 = (3 * coef3 * 80 ** 2 + 2 * coef2 * 80 + coef1)*first_point - arc_tan_deriv_func(80, a, b, c)
     else:
         error2 = -.02 * first_point - arc_tan_deriv_func(80, a, b, c)
-    # print (3 * coef3 * 80 ** 2 + 2 * coef2 * 80 + coef1) * first_point
     error3 = ability_depreciation * first_point - arc_tan_func(100, a, b, c)
     error = [np.abs(error1)] + [np.abs(error2)] + [np.abs(error3)]
-    # print np.array(error).max()
     return error
 
 
 def arc_tan_fit(first_point, coef1, coef2, coef3, ability_depreciation, init_guesses):
+    '''
+    Fits an arctan function to the last 20 years of the ability levels
+    '''
     guesses = init_guesses
     params = [first_point, coef1, coef2, coef3, ability_depreciation]
     a, b, c = opt.fsolve(arc_error, guesses, params)
-    # print a, b, c
-    # print np.array(arc_error([a, b, c], params)).max()
     old_ages = np.linspace(81, 100, 20)
     return arc_tan_func(old_ages, a, b, c)
 
 
 def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS, flag_graphs):
     '''
-    Parameters: S - Number of age cohorts
-                J - Number of ability levels by age
-                starting_age - age of first age cohort
-                ending_age - age of last age cohort
-                bin_weights - what fraction of each age is in each
-                              abiility type
-
-    Returns:    e - S x J matrix of ability levels for each
-                    age cohort, normalized so
-                    the mean is one
+    Inputs: 
+        S = Number of age cohorts (scalar)
+        J = Number of ability levels by age (scalar)
+        starting_age = age of first age cohort (scalar)
+        ending_age = age of last age cohort (scalar)
+        bin_weights = ability weights (Jx1 array)
+        omega_SS = population weights (Sx1 array)
+        flag_graphs = Graph flags or not (bool)
+
+    Output:
+        e = ability levels for each age cohort, normalized so
+            the weighted sum is one (SxJ array)
     '''
     e_short = income_profiles
     e_final = np.ones((S, J))
     e_final[:60, :] = e_short
     e_final[60:, :] = 0.0
-    # This following variable is what percentage of ability at age 80 ability falls to at age 100
+    # This following variable is what percentage of ability at age 80 ability falls to at age 100.
+    # In general, we wanted people to lose half of their ability over a 20 year period.  The first
+    # entry is .47, though, because nothing higher would converge.  The second to last is .7 because this group
+    # actually has a slightly higher ability at age 80 then the last group, so this makes it decrease more so it
+    # ends monotonic.
     ability_depreciation = np.array([.47, .5, .5, .5, .5, .7, .5])
+    # Initial guesses for the arctan.  They're pretty sensitive.
     init_guesses = np.array([[58, 0.0756438545595, -5.6940142786],
                              [27, 0.069, -5],
                              [35, .06, -5],
@@ -188,5 +211,5 @@ def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS, flag_graphs):
         e_final[60:, j] = arc_tan_fit(e_final[59, j], one[j], two[j], three[j], ability_depreciation[j], init_guesses[j])
     if flag_graphs:
         graph_income(S, J, e_final, starting_age, ending_age, bin_weights)
-    e_final /= (e_final * omega_SS).sum()
+    e_final /= (e_final * omega_SS.reshape(S, 1) * bin_weights.reshape(1, J)).sum()
     return e_final