1
+ import matplotlib .pyplot as plt
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+
6
+
7
+ def scatter_plot (x , y , data = None , hue = None , title = 'Scatter Plot'
8
+ ,xlabel = 'X-Values' , ylabel = 'Y-Values' , alpha = 0.3 , figsize = (9.5 * 1.5 ,6 * 1.5 ), saveAs = None
9
+ ,vlines = None , hlines = None
10
+ ,xlim = (None ,None ), ylim = (None ,None ), legend = True ,
11
+ model = None ,
12
+ plotly = False ):
13
+ '''
14
+ scatter_plot uses matplotlib.pyplot.scatter in a seaborn like functional paridigm
15
+
16
+ Parameters
17
+ ----------
18
+ x, y : strings
19
+ Column names in ``data``.
20
+ data : DataFrame
21
+ Long-form (tidy) dataframe with variables in columns and observations
22
+ in rows.
23
+ hue, col, row : strings, optional
24
+ Variable names to facet on the hue, col, or row dimensions (see
25
+ :class:`FacetGrid` docs for more information).
26
+ title, xlabel, ylabel : strings
27
+ labels of scatter plot.
28
+ alpha : float
29
+ opacity of scatter points.
30
+ figsize : touple, (width, height)
31
+ saveAs : optional
32
+ filename to save figure as
33
+ vlines : list
34
+ list of x points to make vertical lines in the plot
35
+ xlim : touple (xmin, xmax)
36
+ horizontal boundries of the figure
37
+ ylim : tuple (ymin, ymax)
38
+ vertical boundries of the plot
39
+ legend : boolean, optional
40
+ Draw a legend for the data when using a `hue` variable.
41
+
42
+
43
+ Examples
44
+ --------
45
+
46
+ .. plot::
47
+ >>>import plotBox
48
+ >>>f = 1000
49
+ >>>hue = ['one' for i in range(50*f)] + ['two' for i in range(30*f)] + ['three' for i in range(20*f)]
50
+ >>>plotBox.scatter_plot(x = np.random.randn(100*f), y = np.random.randn(100*f), hue = hue, vlines = 0, alpha= .1, hlines = 0)
51
+
52
+ .. todo::
53
+
54
+ Add arguments:
55
+
56
+ * dropna : boolean, optional
57
+ Drop missing values from the data before plotting.
58
+
59
+ * add regression :
60
+ f, popt, pcov = rp.statBox.regression_model(x,y, model)
61
+ plt.plot(np.linspace(0,max(x)+100,50), f(np.linspace(0,max(x)+100,50), *popt), 'r-', label="Fitted Curve")
62
+
63
+
64
+ Notes
65
+ -----
66
+ This function can be used in 2 different ways:
67
+
68
+ * Using the arguments to generate titles, legends, etc... and then save/display the plot
69
+
70
+ * Incorporate the plot in a script and overriding the plotting features this way:
71
+
72
+ >>> import matplotlib.pyplot as plt
73
+ >>>
74
+ >>> f = 1000
75
+ >>> hue = ['one' for i in range(50*f)] + ['two' for i in range(30*f)] + ['three' for i in range(20*f)]
76
+ >>> plotBox.scatter_plot(x = np.random.randn(100*f), y = np.random.randn(100*f), hue = hue, vlines = 0, alpha= .1, hlines = 0)
77
+ >>> plt.title('My title')
78
+ >>> plt.xlabel('X label I want')
79
+ >>>
80
+ >>> # To change the figure size :
81
+ >>> fig = plt.gcf() # get the figure object
82
+ >>> fig.set_size_inches(5,10)
83
+ >>>
84
+ >>> plt.show()
85
+
86
+ '''
87
+
88
+ if isinstance (x , basestring ):
89
+ if xlabel == 'X-Values' :
90
+ xlabel = x
91
+ x = list (data [x ])
92
+ else :
93
+ x = list (x )
94
+
95
+ if isinstance (y , basestring ):
96
+ if ylabel == 'Y-Values' :
97
+ ylabel = y
98
+ y = list (data [y ])
99
+ else :
100
+ y = list (y )
101
+
102
+
103
+ if hue is None :
104
+ hue = ['Data' for i in range (len (x ))]
105
+ else :
106
+ if isinstance (hue , basestring ):
107
+ hue = list (data [hue ])
108
+ else :
109
+ hue = list (hue )
110
+
111
+ hue_count_set = []
112
+ for h in set (hue ):
113
+ hue_count_set .append ((hue .count (h ),h ))
114
+
115
+ hue_labels = [t [1 ] for t in sorted (hue_count_set , reverse = True )]
116
+
117
+
118
+ if len (hue_labels ) > 7 :
119
+ color_list = list (np .random .rand (len (hue_labels )))
120
+ else :
121
+ color_list = ['c' ,'r' ,'g' ,'b' ,'m' ,'y' ,'k' ]
122
+ inc = 0
123
+ fig = plt .figure (figsize = figsize )
124
+ for h in hue_labels :
125
+ idx = list (np .where (np .array (hue ) == h )[0 ])
126
+ x_idx = [x [i ] for i in idx ]
127
+ y_idx = [y [i ] for i in idx ]
128
+ plt .scatter (x_idx , y_idx , c = color_list [inc ], alpha = alpha )
129
+ inc += 1
130
+
131
+ if legend :
132
+ plt .legend (hue_labels )
133
+ plt .title (title , size = 16 )
134
+ plt .xlabel (xlabel , fontsize = 14 )
135
+ plt .ylabel (ylabel , fontsize = 14 )
136
+
137
+ if ylim [0 ] is None :
138
+ y_min = plt .ylim ()[0 ]
139
+ else :
140
+ y_min = ylim [0 ]
141
+
142
+ if ylim [1 ] is None :
143
+ y_max = plt .ylim ()[1 ]
144
+ else :
145
+ y_max = ylim [1 ]
146
+ plt .ylim (y_min , y_max )
147
+
148
+
149
+ if xlim [0 ] is None :
150
+ x_min = plt .xlim ()[0 ]
151
+ else :
152
+ x_min = xlim [0 ]
153
+
154
+ if xlim [1 ] is None :
155
+ x_max = plt .xlim ()[1 ]
156
+ else :
157
+ x_max = xlim [1 ]
158
+ plt .xlim (x_min , x_max )
159
+
160
+
161
+ if vlines is not None :
162
+ plt .vlines (vlines , y_min , y_max )
163
+ if hlines is not None :
164
+ plt .hlines (hlines , x_min , x_max )
165
+
166
+ # if plotly:
167
+ # plot_url = py.iplot_mpl(fig, filename=title, fileopt='overwrite')
168
+
169
+ if saveAs is not None :
170
+ plt .savefig (saveAs )
171
+
0 commit comments