Skip to content

Commit 54f5a38

Browse files
committed
added a ploting module with a scatter plot function
1 parent 98b4092 commit 54f5a38

File tree

4 files changed

+271
-0
lines changed

4 files changed

+271
-0
lines changed

playground/dev/sample_notebook.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
2+
# coding: utf-8
3+
4+
# In[1]:
5+
6+
##### custome imports
7+
try:
8+
import template
9+
except Exception as e:
10+
print e
11+
12+
##### standard imports
13+
import pandas as pd
14+
import numpy as np
15+
import datetime as dt
16+
import seaborn as sns
17+
import pytz
18+
import matplotlib.pylab as plt
19+
20+
##### globals
21+
now = dt.datetime.now(pytz.timezone('US/Pacific'))
22+
datestr = now.strftime('%m%d%y')
23+
rootdir = './results/'
24+
25+
##### status, ipynb options
26+
print 'script executed at', now
27+
pd.set_option('display.max_columns', 500)
28+
pd.set_option('display.max_rows', 100)
29+
30+
#make the plots happen inline
31+
get_ipython().magic(u'matplotlib inline')
32+
33+
34+
# ## Make a simple matplotlib plot with numpy arrays
35+
36+
# In[2]:
37+
38+
X = np.random.randn(100)
39+
Y = np.random.randn(100)
40+
41+
plt.scatter(X,Y)
42+
plt.show()
43+
44+
45+
# ## Make a DataFrame
46+
47+
# In[12]:
48+
49+
df = pd.DataFrame({'X':X,'Y':Y})
50+
df.head()
51+
52+
53+
# ##Make a pretty plot with seaborn
54+
55+
# In[14]:
56+
57+
sns.jointplot(X,Y, kind = 'reg' )
58+
59+
60+
# In[ ]:
61+
62+
63+

projects/.DS_Store

0 Bytes
Binary file not shown.
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
2+
# coding: utf-8
3+
4+
# Dont forget to install your package.
5+
#
6+
# ```
7+
# python setup.py install
8+
# ```
9+
# or on a mac...
10+
# ```
11+
# sudo python setup.py install
12+
# ```
13+
14+
# In[1]:
15+
16+
import template
17+
18+
19+
# try using the tab button to see what modules and methods you have avaliable
20+
21+
# In[2]:
22+
23+
template.simple_module
24+
25+
26+
# try shit tab after () to see the doc string of the methods you build in simple module
27+
28+
# In[3]:
29+
30+
die = template.simple_module.die(6)
31+
die.roll()
32+
33+
34+
# >In order to make updates to the source code:
35+
# 1. The source code must be saved.
36+
# 2. The package must be re-installed with "python setup.py install"
37+
# 3. The notebook kernal must be restarted

repos/template/template/plotBox.py

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
import matplotlib.pyplot as plt
2+
import numpy as np
3+
import pandas as pd
4+
5+
6+
7+
def scatter_plot(x, y, data = None, hue = None, title = 'Scatter Plot'
8+
,xlabel = 'X-Values', ylabel = 'Y-Values', alpha=0.3, figsize=(9.5*1.5,6*1.5), saveAs = None
9+
,vlines = None, hlines = None
10+
,xlim = (None,None), ylim = (None,None), legend = True,
11+
model = None,
12+
plotly = False):
13+
'''
14+
scatter_plot uses matplotlib.pyplot.scatter in a seaborn like functional paridigm
15+
16+
Parameters
17+
----------
18+
x, y : strings
19+
Column names in ``data``.
20+
data : DataFrame
21+
Long-form (tidy) dataframe with variables in columns and observations
22+
in rows.
23+
hue, col, row : strings, optional
24+
Variable names to facet on the hue, col, or row dimensions (see
25+
:class:`FacetGrid` docs for more information).
26+
title, xlabel, ylabel : strings
27+
labels of scatter plot.
28+
alpha : float
29+
opacity of scatter points.
30+
figsize : touple, (width, height)
31+
saveAs : optional
32+
filename to save figure as
33+
vlines : list
34+
list of x points to make vertical lines in the plot
35+
xlim : touple (xmin, xmax)
36+
horizontal boundries of the figure
37+
ylim : tuple (ymin, ymax)
38+
vertical boundries of the plot
39+
legend : boolean, optional
40+
Draw a legend for the data when using a `hue` variable.
41+
42+
43+
Examples
44+
--------
45+
46+
.. plot::
47+
>>>import plotBox
48+
>>>f = 1000
49+
>>>hue = ['one' for i in range(50*f)] + ['two' for i in range(30*f)] + ['three' for i in range(20*f)]
50+
>>>plotBox.scatter_plot(x = np.random.randn(100*f), y = np.random.randn(100*f), hue = hue, vlines = 0, alpha= .1, hlines = 0)
51+
52+
.. todo::
53+
54+
Add arguments:
55+
56+
* dropna : boolean, optional
57+
Drop missing values from the data before plotting.
58+
59+
* add regression :
60+
f, popt, pcov = rp.statBox.regression_model(x,y, model)
61+
plt.plot(np.linspace(0,max(x)+100,50), f(np.linspace(0,max(x)+100,50), *popt), 'r-', label="Fitted Curve")
62+
63+
64+
Notes
65+
-----
66+
This function can be used in 2 different ways:
67+
68+
* Using the arguments to generate titles, legends, etc... and then save/display the plot
69+
70+
* Incorporate the plot in a script and overriding the plotting features this way:
71+
72+
>>> import matplotlib.pyplot as plt
73+
>>>
74+
>>> f = 1000
75+
>>> hue = ['one' for i in range(50*f)] + ['two' for i in range(30*f)] + ['three' for i in range(20*f)]
76+
>>> plotBox.scatter_plot(x = np.random.randn(100*f), y = np.random.randn(100*f), hue = hue, vlines = 0, alpha= .1, hlines = 0)
77+
>>> plt.title('My title')
78+
>>> plt.xlabel('X label I want')
79+
>>>
80+
>>> # To change the figure size :
81+
>>> fig = plt.gcf() # get the figure object
82+
>>> fig.set_size_inches(5,10)
83+
>>>
84+
>>> plt.show()
85+
86+
'''
87+
88+
if isinstance(x, basestring):
89+
if xlabel == 'X-Values':
90+
xlabel = x
91+
x = list(data[x])
92+
else:
93+
x = list(x)
94+
95+
if isinstance(y, basestring):
96+
if ylabel == 'Y-Values':
97+
ylabel = y
98+
y = list(data[y])
99+
else:
100+
y = list(y)
101+
102+
103+
if hue is None:
104+
hue = ['Data' for i in range(len(x))]
105+
else:
106+
if isinstance(hue, basestring):
107+
hue = list(data[hue])
108+
else:
109+
hue = list(hue)
110+
111+
hue_count_set = []
112+
for h in set(hue):
113+
hue_count_set.append((hue.count(h),h))
114+
115+
hue_labels = [t[1] for t in sorted(hue_count_set, reverse = True)]
116+
117+
118+
if len(hue_labels) > 7:
119+
color_list = list(np.random.rand(len(hue_labels)))
120+
else:
121+
color_list = ['c','r','g','b','m','y','k']
122+
inc = 0
123+
fig = plt.figure(figsize = figsize)
124+
for h in hue_labels:
125+
idx = list(np.where(np.array(hue) == h)[0])
126+
x_idx = [x[i] for i in idx]
127+
y_idx = [y[i] for i in idx]
128+
plt.scatter(x_idx, y_idx, c = color_list[inc], alpha=alpha)
129+
inc += 1
130+
131+
if legend:
132+
plt.legend(hue_labels)
133+
plt.title(title, size = 16)
134+
plt.xlabel(xlabel, fontsize = 14)
135+
plt.ylabel(ylabel, fontsize = 14)
136+
137+
if ylim[0] is None:
138+
y_min = plt.ylim()[0]
139+
else:
140+
y_min = ylim[0]
141+
142+
if ylim[1] is None:
143+
y_max = plt.ylim()[1]
144+
else:
145+
y_max = ylim[1]
146+
plt.ylim(y_min, y_max)
147+
148+
149+
if xlim[0] is None:
150+
x_min = plt.xlim()[0]
151+
else:
152+
x_min = xlim[0]
153+
154+
if xlim[1] is None:
155+
x_max = plt.xlim()[1]
156+
else:
157+
x_max = xlim[1]
158+
plt.xlim(x_min, x_max)
159+
160+
161+
if vlines is not None:
162+
plt.vlines(vlines, y_min, y_max)
163+
if hlines is not None:
164+
plt.hlines(hlines, x_min, x_max)
165+
166+
# if plotly:
167+
# plot_url = py.iplot_mpl(fig, filename=title, fileopt='overwrite')
168+
169+
if saveAs is not None:
170+
plt.savefig(saveAs)
171+

0 commit comments

Comments
 (0)