Skip to content

Commit

Permalink
merged income_polynomial and income files
Browse files Browse the repository at this point in the history
  • Loading branch information
evan-magnusson committed Jul 20, 2015
1 parent bd228dd commit 198b59c
Show file tree
Hide file tree
Showing 2 changed files with 203 additions and 20 deletions.
160 changes: 160 additions & 0 deletions Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
'''
------------------------------------------------------------------------
Last updated 2/16/2014
Functions for created the matrix of ability levels, e.
This py-file calls the following other file(s):
data/e_vec_data/cwhs_earn_rate_age_profile.csv
This py-file creates the following other file(s):
(make sure that an OUTPUT folder exists)
OUTPUT/Demographics/ability_log
------------------------------------------------------------------------
'''

'''
------------------------------------------------------------------------
Packages
------------------------------------------------------------------------
'''

import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy.polynomial.polynomial as poly
import scipy.optimize as opt


'''
------------------------------------------------------------------------
Read Data for Ability Types
------------------------------------------------------------------------
The data comes from the IRS. We can either use wage or earnings data,
in this version we are using earnings data. The data is for individuals
in centile groups for each age from 20 to 70.
------------------------------------------------------------------------
'''

earn_rate = pd.read_table(
"data/e_vec_data/cwhs_earn_rate_age_profile.csv", sep=',', header=0)
del earn_rate['obs_earn']
piv = earn_rate.pivot(index='age', columns='q_earn', values='mean_earn_rate')

emat_basic = np.array(piv)

'''
------------------------------------------------------------------------
Generate ability type matrix
------------------------------------------------------------------------
Given desired starting and stopping ages, as well as the values for S
and J, the ability matrix is created.
------------------------------------------------------------------------
'''


def fit_exp_right(params, pt1):
a, b = params
x1, y1, slope = pt1
error1 = -a*b**(-x1)*np.log(b) - slope
error2 = a*b**(-x1) - y1
return [error1, error2]


def exp_funct(points, a, b):
y = a*b**(-points)
return y


def exp_fit(e_input, S, J):
params_guess = [20, 1]
e_output = np.zeros((S, J))
e_output[:50, :] = e_input
for j in xrange(J):
meanslope = np.mean([e_input[-1, j]-e_input[-2, j], e_input[
-2, j]-e_input[-3, j], e_input[-3, j]-e_input[-4, j]])
slope = np.min([meanslope, -.01])
a, b = opt.fsolve(fit_exp_right, params_guess, args=(
[70, e_input[-1, j], slope]))
e_output[50:, j] = exp_funct(np.linspace(70, 100, 30), a, b)
return e_output


def graph_income(S, J, e, starting_age, ending_age, bin_weights):
'''
Graphs the log of the ability matrix.
'''
e_tograph = np.log(e)
domain = np.linspace(starting_age, ending_age, S)
Jgrid = np.zeros(J)
for j in xrange(J):
Jgrid[j:] += bin_weights[j]
X, Y = np.meshgrid(domain, Jgrid)
cmap2 = matplotlib.cm.get_cmap('summer')
if J == 1:
plt.figure()
plt.plot(domain, e_tograph)
plt.savefig('OUTPUT/Demographics/ability_log')
else:
fig10 = plt.figure()
ax10 = fig10.gca(projection='3d')
ax10.plot_surface(X, Y, e_tograph.T, rstride=1, cstride=2, cmap=cmap2)
ax10.set_xlabel(r'age-$s$')
ax10.set_ylabel(r'ability type -$j$')
ax10.set_zlabel(r'log ability $log(e_j(s))$')
plt.savefig('OUTPUT/Demographics/ability_log')
if J == 1:
plt.figure()
plt.plot(domain, e)
plt.savefig('OUTPUT/Demographics/ability')
else:
fig10 = plt.figure()
ax10 = fig10.gca(projection='3d')
ax10.plot_surface(X, Y, e.T, rstride=1, cstride=2, cmap=cmap2)
ax10.set_xlabel(r'age-$s$')
ax10.set_ylabel(r'ability type -$j$')
ax10.set_zlabel(r'ability $e_j(s)$')
plt.savefig('OUTPUT/Demographics/ability')


def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS):
'''
Parameters: S - Number of age cohorts
J - Number of ability levels by age
starting_age - age of first age cohort
ending_age - age of last age cohort
bin_weights - what fraction of each age is in each
abiility type
Returns: e - S x J matrix of ability levels for each
age cohort, normalized so
the mean is one
'''
emat_trunc = emat_basic[:50, :]
cum_bins = 100 * np.array(bin_weights)
for j in xrange(J-1):
cum_bins[j+1] += cum_bins[j]
emat_collapsed = np.zeros((50, J))
for s in xrange(50):
for j in xrange(J):
if j == 0:
emat_collapsed[s, j] = emat_trunc[s, :cum_bins[j]].mean()
else:
emat_collapsed[s, j] = emat_trunc[
s, cum_bins[j-1]:cum_bins[j]].mean()
e_fitted = np.zeros((50, J))
for j in xrange(J):
func = poly.polyfit(
np.arange(50)+starting_age, emat_collapsed[:50, j], deg=2)

e_fitted[:, j] = poly.polyval(np.arange(50)+starting_age, func)
emat_extended = exp_fit(e_fitted, S, J)
for j in xrange(1, J):
emat_extended[:, j] = np.max(np.array(
[emat_extended[:, j], emat_extended[:, j-1]]), axis=0)
graph_income(S, J, emat_extended, starting_age, ending_age, bin_weights)
emat_normed = emat_extended/(omega_SS * emat_extended).sum()
return emat_normed
63 changes: 43 additions & 20 deletions Python/dynamic/income.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'''
------------------------------------------------------------------------
Last updated 6/19/2015
Last updated 7/17/2015
Functions for created the matrix of ability levels, e. This can
only be used for looking at the 25, 50, 70, 80, 90, 99, and 100th
Expand Down Expand Up @@ -65,7 +65,17 @@

def graph_income(S, J, e, starting_age, ending_age, bin_weights):
'''
Graphs the log of the ability matrix.
Graphs the ability matrix (and it's log)
Inputs:
S = number of age groups (scalar)
J = number of ability types (scalar)
e = ability matrix (SxJ array)
starting_age = initial age (scalar)
ending_age = end age (scalar)
bin_weights = ability weights (Jx1 array)
Outputs:
OUTPUT/Demographics/ability_log.png
OUTPUT/Demographics/ability.png
'''
import matplotlib
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -104,7 +114,6 @@ def graph_income(S, J, e, starting_age, ending_age, bin_weights):
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
# plt.show()
ax.set_xlabel(r'age-$s$')
ax.set_ylabel(r'log ability $log(e_j(s))$')
plt.savefig('OUTPUT/Demographics/ability_log_2D')
Expand All @@ -120,63 +129,77 @@ def graph_income(S, J, e, starting_age, ending_age, bin_weights):
ax10.set_ylabel(r'ability type -$j$')
ax10.set_zlabel(r'ability $e_j(s)$')
plt.savefig('OUTPUT/Demographics/ability')
# plt.show()


def arc_tan_func(points, a, b, c):
'''
Functional form for a generic arctan function
'''
y = (-a / np.pi) * np.arctan(b*points + c) + a / 2
return y


def arc_tan_deriv_func(points, a, b, c):
'''
Functional form for the derivative of a generic arctan function
'''
y = -a * b / (np.pi * (1+(b*points+c)**2))
return y


def arc_error(guesses, params):
'''
How well the arctan function fits the slope of ability matrix at age 80, the level at age 80, and the level of age 80 times a constant
'''
a, b, c = guesses
first_point, coef1, coef2, coef3, ability_depreciation = params
error1 = first_point - arc_tan_func(80, a, b, c)
if (3 * coef3 * 80 ** 2 + 2 * coef2 * 80 + coef1) < 0:
error2 = (3 * coef3 * 80 ** 2 + 2 * coef2 * 80 + coef1)*first_point - arc_tan_deriv_func(80, a, b, c)
else:
error2 = -.02 * first_point - arc_tan_deriv_func(80, a, b, c)
# print (3 * coef3 * 80 ** 2 + 2 * coef2 * 80 + coef1) * first_point
error3 = ability_depreciation * first_point - arc_tan_func(100, a, b, c)
error = [np.abs(error1)] + [np.abs(error2)] + [np.abs(error3)]
# print np.array(error).max()
return error


def arc_tan_fit(first_point, coef1, coef2, coef3, ability_depreciation, init_guesses):
'''
Fits an arctan function to the last 20 years of the ability levels
'''
guesses = init_guesses
params = [first_point, coef1, coef2, coef3, ability_depreciation]
a, b, c = opt.fsolve(arc_error, guesses, params)
# print a, b, c
# print np.array(arc_error([a, b, c], params)).max()
old_ages = np.linspace(81, 100, 20)
return arc_tan_func(old_ages, a, b, c)


def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS, flag_graphs):
'''
Parameters: S - Number of age cohorts
J - Number of ability levels by age
starting_age - age of first age cohort
ending_age - age of last age cohort
bin_weights - what fraction of each age is in each
abiility type
Returns: e - S x J matrix of ability levels for each
age cohort, normalized so
the mean is one
Inputs:
S = Number of age cohorts (scalar)
J = Number of ability levels by age (scalar)
starting_age = age of first age cohort (scalar)
ending_age = age of last age cohort (scalar)
bin_weights = ability weights (Jx1 array)
omega_SS = population weights (Sx1 array)
flag_graphs = Graph flags or not (bool)
Output:
e = ability levels for each age cohort, normalized so
the weighted sum is one (SxJ array)
'''
e_short = income_profiles
e_final = np.ones((S, J))
e_final[:60, :] = e_short
e_final[60:, :] = 0.0
# This following variable is what percentage of ability at age 80 ability falls to at age 100
# This following variable is what percentage of ability at age 80 ability falls to at age 100.
# In general, we wanted people to lose half of their ability over a 20 year period. The first
# entry is .47, though, because nothing higher would converge. The second to last is .7 because this group
# actually has a slightly higher ability at age 80 then the last group, so this makes it decrease more so it
# ends monotonic.
ability_depreciation = np.array([.47, .5, .5, .5, .5, .7, .5])
# Initial guesses for the arctan. They're pretty sensitive.
init_guesses = np.array([[58, 0.0756438545595, -5.6940142786],
[27, 0.069, -5],
[35, .06, -5],
Expand All @@ -188,5 +211,5 @@ def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS, flag_graphs):
e_final[60:, j] = arc_tan_fit(e_final[59, j], one[j], two[j], three[j], ability_depreciation[j], init_guesses[j])
if flag_graphs:
graph_income(S, J, e_final, starting_age, ending_age, bin_weights)
e_final /= (e_final * omega_SS).sum()
e_final /= (e_final * omega_SS.reshape(S, 1) * bin_weights.reshape(1, J)).sum()
return e_final

0 comments on commit 198b59c

Please sign in to comment.