From 198b59cfa2a674e3bdc88c5ec87f9e5c922d8cc7 Mon Sep 17 00:00:00 2001 From: Evan Magnusson Date: Mon, 20 Jul 2015 10:06:52 -0700 Subject: [PATCH] merged income_polynomial and income files --- .../CURRENT_DEBUGGED_VERSION/income_nopoly.py | 160 ++++++++++++++++++ Python/dynamic/income.py | 63 ++++--- 2 files changed, 203 insertions(+), 20 deletions(-) create mode 100644 Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py diff --git a/Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py b/Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py new file mode 100644 index 000000000..53867d30a --- /dev/null +++ b/Python/CURRENT_DEBUGGED_VERSION/income_nopoly.py @@ -0,0 +1,160 @@ +''' +------------------------------------------------------------------------ +Last updated 2/16/2014 + +Functions for created the matrix of ability levels, e. + +This py-file calls the following other file(s): + data/e_vec_data/cwhs_earn_rate_age_profile.csv + +This py-file creates the following other file(s): + (make sure that an OUTPUT folder exists) + OUTPUT/Demographics/ability_log +------------------------------------------------------------------------ +''' + +''' +------------------------------------------------------------------------ + Packages +------------------------------------------------------------------------ +''' + +import numpy as np +import pandas as pd +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +import numpy.polynomial.polynomial as poly +import scipy.optimize as opt + + +''' +------------------------------------------------------------------------ + Read Data for Ability Types +------------------------------------------------------------------------ +The data comes from the IRS. We can either use wage or earnings data, +in this version we are using earnings data. The data is for individuals +in centile groups for each age from 20 to 70. +------------------------------------------------------------------------ +''' + +earn_rate = pd.read_table( + "data/e_vec_data/cwhs_earn_rate_age_profile.csv", sep=',', header=0) +del earn_rate['obs_earn'] +piv = earn_rate.pivot(index='age', columns='q_earn', values='mean_earn_rate') + +emat_basic = np.array(piv) + +''' +------------------------------------------------------------------------ + Generate ability type matrix +------------------------------------------------------------------------ +Given desired starting and stopping ages, as well as the values for S +and J, the ability matrix is created. +------------------------------------------------------------------------ +''' + + +def fit_exp_right(params, pt1): + a, b = params + x1, y1, slope = pt1 + error1 = -a*b**(-x1)*np.log(b) - slope + error2 = a*b**(-x1) - y1 + return [error1, error2] + + +def exp_funct(points, a, b): + y = a*b**(-points) + return y + + +def exp_fit(e_input, S, J): + params_guess = [20, 1] + e_output = np.zeros((S, J)) + e_output[:50, :] = e_input + for j in xrange(J): + meanslope = np.mean([e_input[-1, j]-e_input[-2, j], e_input[ + -2, j]-e_input[-3, j], e_input[-3, j]-e_input[-4, j]]) + slope = np.min([meanslope, -.01]) + a, b = opt.fsolve(fit_exp_right, params_guess, args=( + [70, e_input[-1, j], slope])) + e_output[50:, j] = exp_funct(np.linspace(70, 100, 30), a, b) + return e_output + + +def graph_income(S, J, e, starting_age, ending_age, bin_weights): + ''' + Graphs the log of the ability matrix. + ''' + e_tograph = np.log(e) + domain = np.linspace(starting_age, ending_age, S) + Jgrid = np.zeros(J) + for j in xrange(J): + Jgrid[j:] += bin_weights[j] + X, Y = np.meshgrid(domain, Jgrid) + cmap2 = matplotlib.cm.get_cmap('summer') + if J == 1: + plt.figure() + plt.plot(domain, e_tograph) + plt.savefig('OUTPUT/Demographics/ability_log') + else: + fig10 = plt.figure() + ax10 = fig10.gca(projection='3d') + ax10.plot_surface(X, Y, e_tograph.T, rstride=1, cstride=2, cmap=cmap2) + ax10.set_xlabel(r'age-$s$') + ax10.set_ylabel(r'ability type -$j$') + ax10.set_zlabel(r'log ability $log(e_j(s))$') + plt.savefig('OUTPUT/Demographics/ability_log') + if J == 1: + plt.figure() + plt.plot(domain, e) + plt.savefig('OUTPUT/Demographics/ability') + else: + fig10 = plt.figure() + ax10 = fig10.gca(projection='3d') + ax10.plot_surface(X, Y, e.T, rstride=1, cstride=2, cmap=cmap2) + ax10.set_xlabel(r'age-$s$') + ax10.set_ylabel(r'ability type -$j$') + ax10.set_zlabel(r'ability $e_j(s)$') + plt.savefig('OUTPUT/Demographics/ability') + + +def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS): + ''' + Parameters: S - Number of age cohorts + J - Number of ability levels by age + starting_age - age of first age cohort + ending_age - age of last age cohort + bin_weights - what fraction of each age is in each + abiility type + + Returns: e - S x J matrix of ability levels for each + age cohort, normalized so + the mean is one + ''' + emat_trunc = emat_basic[:50, :] + cum_bins = 100 * np.array(bin_weights) + for j in xrange(J-1): + cum_bins[j+1] += cum_bins[j] + emat_collapsed = np.zeros((50, J)) + for s in xrange(50): + for j in xrange(J): + if j == 0: + emat_collapsed[s, j] = emat_trunc[s, :cum_bins[j]].mean() + else: + emat_collapsed[s, j] = emat_trunc[ + s, cum_bins[j-1]:cum_bins[j]].mean() + e_fitted = np.zeros((50, J)) + for j in xrange(J): + func = poly.polyfit( + np.arange(50)+starting_age, emat_collapsed[:50, j], deg=2) + + e_fitted[:, j] = poly.polyval(np.arange(50)+starting_age, func) + emat_extended = exp_fit(e_fitted, S, J) + for j in xrange(1, J): + emat_extended[:, j] = np.max(np.array( + [emat_extended[:, j], emat_extended[:, j-1]]), axis=0) + graph_income(S, J, emat_extended, starting_age, ending_age, bin_weights) + emat_normed = emat_extended/(omega_SS * emat_extended).sum() + return emat_normed diff --git a/Python/dynamic/income.py b/Python/dynamic/income.py index 3ea67772f..c3ac4e740 100644 --- a/Python/dynamic/income.py +++ b/Python/dynamic/income.py @@ -1,6 +1,6 @@ ''' ------------------------------------------------------------------------ -Last updated 6/19/2015 +Last updated 7/17/2015 Functions for created the matrix of ability levels, e. This can only be used for looking at the 25, 50, 70, 80, 90, 99, and 100th @@ -65,7 +65,17 @@ def graph_income(S, J, e, starting_age, ending_age, bin_weights): ''' - Graphs the log of the ability matrix. + Graphs the ability matrix (and it's log) + Inputs: + S = number of age groups (scalar) + J = number of ability types (scalar) + e = ability matrix (SxJ array) + starting_age = initial age (scalar) + ending_age = end age (scalar) + bin_weights = ability weights (Jx1 array) + Outputs: + OUTPUT/Demographics/ability_log.png + OUTPUT/Demographics/ability.png ''' import matplotlib import matplotlib.pyplot as plt @@ -104,7 +114,6 @@ def graph_income(S, J, e, starting_age, ending_age, bin_weights): box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) - # plt.show() ax.set_xlabel(r'age-$s$') ax.set_ylabel(r'log ability $log(e_j(s))$') plt.savefig('OUTPUT/Demographics/ability_log_2D') @@ -120,20 +129,28 @@ def graph_income(S, J, e, starting_age, ending_age, bin_weights): ax10.set_ylabel(r'ability type -$j$') ax10.set_zlabel(r'ability $e_j(s)$') plt.savefig('OUTPUT/Demographics/ability') - # plt.show() def arc_tan_func(points, a, b, c): + ''' + Functional form for a generic arctan function + ''' y = (-a / np.pi) * np.arctan(b*points + c) + a / 2 return y def arc_tan_deriv_func(points, a, b, c): + ''' + Functional form for the derivative of a generic arctan function + ''' y = -a * b / (np.pi * (1+(b*points+c)**2)) return y def arc_error(guesses, params): + ''' + How well the arctan function fits the slope of ability matrix at age 80, the level at age 80, and the level of age 80 times a constant + ''' a, b, c = guesses first_point, coef1, coef2, coef3, ability_depreciation = params error1 = first_point - arc_tan_func(80, a, b, c) @@ -141,42 +158,48 @@ def arc_error(guesses, params): error2 = (3 * coef3 * 80 ** 2 + 2 * coef2 * 80 + coef1)*first_point - arc_tan_deriv_func(80, a, b, c) else: error2 = -.02 * first_point - arc_tan_deriv_func(80, a, b, c) - # print (3 * coef3 * 80 ** 2 + 2 * coef2 * 80 + coef1) * first_point error3 = ability_depreciation * first_point - arc_tan_func(100, a, b, c) error = [np.abs(error1)] + [np.abs(error2)] + [np.abs(error3)] - # print np.array(error).max() return error def arc_tan_fit(first_point, coef1, coef2, coef3, ability_depreciation, init_guesses): + ''' + Fits an arctan function to the last 20 years of the ability levels + ''' guesses = init_guesses params = [first_point, coef1, coef2, coef3, ability_depreciation] a, b, c = opt.fsolve(arc_error, guesses, params) - # print a, b, c - # print np.array(arc_error([a, b, c], params)).max() old_ages = np.linspace(81, 100, 20) return arc_tan_func(old_ages, a, b, c) def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS, flag_graphs): ''' - Parameters: S - Number of age cohorts - J - Number of ability levels by age - starting_age - age of first age cohort - ending_age - age of last age cohort - bin_weights - what fraction of each age is in each - abiility type - - Returns: e - S x J matrix of ability levels for each - age cohort, normalized so - the mean is one + Inputs: + S = Number of age cohorts (scalar) + J = Number of ability levels by age (scalar) + starting_age = age of first age cohort (scalar) + ending_age = age of last age cohort (scalar) + bin_weights = ability weights (Jx1 array) + omega_SS = population weights (Sx1 array) + flag_graphs = Graph flags or not (bool) + + Output: + e = ability levels for each age cohort, normalized so + the weighted sum is one (SxJ array) ''' e_short = income_profiles e_final = np.ones((S, J)) e_final[:60, :] = e_short e_final[60:, :] = 0.0 - # This following variable is what percentage of ability at age 80 ability falls to at age 100 + # This following variable is what percentage of ability at age 80 ability falls to at age 100. + # In general, we wanted people to lose half of their ability over a 20 year period. The first + # entry is .47, though, because nothing higher would converge. The second to last is .7 because this group + # actually has a slightly higher ability at age 80 then the last group, so this makes it decrease more so it + # ends monotonic. ability_depreciation = np.array([.47, .5, .5, .5, .5, .7, .5]) + # Initial guesses for the arctan. They're pretty sensitive. init_guesses = np.array([[58, 0.0756438545595, -5.6940142786], [27, 0.069, -5], [35, .06, -5], @@ -188,5 +211,5 @@ def get_e(S, J, starting_age, ending_age, bin_weights, omega_SS, flag_graphs): e_final[60:, j] = arc_tan_fit(e_final[59, j], one[j], two[j], three[j], ability_depreciation[j], init_guesses[j]) if flag_graphs: graph_income(S, J, e_final, starting_age, ending_age, bin_weights) - e_final /= (e_final * omega_SS).sum() + e_final /= (e_final * omega_SS.reshape(S, 1) * bin_weights.reshape(1, J)).sum() return e_final