-
Notifications
You must be signed in to change notification settings - Fork 200
/
example_correlation_explorer_with_plotly.py
110 lines (97 loc) · 3.58 KB
/
example_correlation_explorer_with_plotly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#######
# Super Simple Correlation Explorer
#
# Choose Two Variables, Scatter Plot Results and
# Use scipy.stats to run linear regression and plot a best fit line
# Built with Plot.ly, Dash and Flask
#
# Part of QS Ledger github.com/markwk/qs_ledger
######
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import pandas as pd
from scipy import stats
# import data
df = pd.read_csv('data/combined_personal_data.csv')
# a bit of a cleanup, might be unnecessary in your case
df.drop([0], inplace=True)
df.drop('Unnamed: 0', axis=1, inplace=True)
app = dash.Dash()
# let's use all of of the columns as features for now
features = df.columns
app.layout = html.Div([
html.Div([
dcc.Dropdown(id='xaxis',
options=[{'label': i, 'value': i} for i in features],
value='ProjectTime')
], style={'width': '48%','display':'inline-block'}),
html.Div([
dcc.Dropdown(id='yaxis',
options=[{'label': i, 'value': i} for i in features],
value='Songs')
], style={'width': '48%','display':'inline-block'}),
dcc.Graph(id='feature-graphic'),
dcc.Markdown(
id='correlation_stats'
)
],style={'padding':10})
@app.callback(Output('feature-graphic','figure'),
[Input('xaxis', 'value'),
Input('yaxis', 'value')])
def update_graph(xaxis_name, yaxis_name):
# we need to check for matching data in both columns
temp_df = df[[xaxis_name,yaxis_name]].dropna()
x = temp_df[xaxis_name]
y = temp_df[yaxis_name]
# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
line = slope*x+intercept
fig = {'data':[go.Scatter(x=df[xaxis_name],
y=df[yaxis_name],
text=df['Date'],
mode='markers',
name="{} vs. {}".format(xaxis_name, yaxis_name),
marker={'size':5,
'opacity':0.5,
'line':{'width':0.5, 'color':'white'}})
,
go.Scatter(
x=x,
y=line,
mode='lines',
marker=go.Marker(color='rgb(31, 119, 180)'),
name='Fit'
)]
,'layout':go.Layout(
title='Simple Correlation Explorer for QS Ledger',
xaxis={'title':xaxis_name},
yaxis={'title':yaxis_name},
hovermode='closest'
)
}
return fig
@app.callback(
Output('correlation_stats', 'children'),
[Input('xaxis', 'value'),
Input('yaxis', 'value')])
def callback_stats(xaxis_name, yaxis_name):
# we need to check for matching data in both columns
temp_df = df[[xaxis_name,yaxis_name]].dropna()
x = temp_df[xaxis_name]
y = temp_df[yaxis_name]
# Generated linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
line = slope*x+intercept
correlation_stats = """
X-Variable: {}
Y-Variable: {}
p_value: {}
r_value: {}
std_err: {}
""".format(xaxis_name, yaxis_name, p_value, r_value, std_err)
return correlation_stats
if __name__ == '__main__':
app.run_server()