|
| 1 | +######################################## |
| 2 | +# A brief introduction to numpy arrays # |
| 3 | +######################################## |
| 4 | +# |
| 5 | +# Prereqs: Basic python. "import", built-in data types (numbers, lists, |
| 6 | +# strings), range |
| 7 | +# |
| 8 | +# This short tutorial is mostly about introducing numpy arrays, how they're |
| 9 | +# different from basic python lists/tuples, and the various ways you can |
| 10 | +# manipulate them. It's intended to be both a runnable python script, and |
| 11 | +# a step by step tutorial. |
| 12 | +# |
| 13 | +# This tutorial does NOT cover |
| 14 | +# 1) Installing numpy/dependencies. For that see |
| 15 | +# |
| 16 | +# http://docs.scipy.org/doc/numpy/user/install.html |
| 17 | +# |
| 18 | +# 2) Basic python. This includes getting, installing, running the python |
| 19 | +# interpreter, the basic python data types (strings, numbers, sequences), |
| 20 | +# if statements, or for loops. If you're new to python an excellent place |
| 21 | +# to start is here: |
| 22 | +# |
| 23 | +# http://docs.python.org/2/tutorial/ |
| 24 | +# |
| 25 | +# 3) Any numpy libraries in depth. It may include references to utility |
| 26 | +# functions where necessary, but this is strictly a tutorial for |
| 27 | +# beginners. More advanced documentation is available here: |
| 28 | +# |
| 29 | +# (Users guide) |
| 30 | +# http://docs.scipy.org/doc/numpy/user/index.html |
| 31 | +# (Reference documentation) |
| 32 | +# http://docs.scipy.org/doc/numpy/reference/ |
| 33 | +# |
| 34 | +# |
| 35 | +# |
| 36 | +# |
| 37 | +## Lets get started! |
| 38 | +print "Importing numpy" |
| 39 | +import numpy as np |
| 40 | + |
| 41 | +## This loads the numpy library and lets us refer to it by the shorthand "np", |
| 42 | +## which is the convention used in the numpy documentation and in many |
| 43 | +## online tutorials/examples |
| 44 | + |
| 45 | +print "Creating arrays" |
| 46 | +## Now lets make an array to play around with. You can make numpy arrays in |
| 47 | +## a number of ways, |
| 48 | +## Filled with zeros: |
| 49 | +zeroArray = np.zeros( (2,3) ) # [[ 0. 0. 0.] |
| 50 | +print zeroArray # [ 0. 0. 0.]] |
| 51 | + |
| 52 | +## Or ones: |
| 53 | +oneArray = np.ones( (2,3) ) # [[ 1. 1. 1.] |
| 54 | +print oneArray # [ 1. 1. 1.]] |
| 55 | + |
| 56 | +## Or filled with junk: |
| 57 | +emptyArray = np.empty( (2,3) ) |
| 58 | +print emptyArray |
| 59 | + |
| 60 | +## Note, emptyArray might look random, but it's just uninitialized which means |
| 61 | +## you shouldn't count on it having any particular data in it, even random |
| 62 | +## data! If you do want random data you can use random(): |
| 63 | +randomArray = np.random.random( (2,3) ) |
| 64 | +print randomArray |
| 65 | + |
| 66 | +## If you're following along and trying these commands out, you should have |
| 67 | +## noticed that making randomArray took a lot longer than emptyArray. That's |
| 68 | +## because np.random.random(...) is actually using a random number generator |
| 69 | +## to fill in each of the spots in the array with a randomly sampled number |
| 70 | +## from 0 to 1. |
| 71 | + |
| 72 | +## You can also create an array by hand: |
| 73 | +foo = [ [1,2,3], |
| 74 | + [4,5,6]] |
| 75 | + |
| 76 | +myArray = np.array(foo) # [[1 2 3] |
| 77 | +print myArray # [4 5 6]] |
| 78 | + |
| 79 | + |
| 80 | +print "Reshaping arrays" |
| 81 | +## Of course, if you're typing out a range for a larger matrix, it's easier to |
| 82 | +## use arange(...): |
| 83 | +rangeArray = np.arange(6,12).reshape( (2,3) ) # [[ 6 7 8] |
| 84 | +print rangeArray # [ 9 10 11]] |
| 85 | + |
| 86 | +## there's two things going on here. First, the arange(...) function returns a |
| 87 | +## 1D array similar to what you'd get from using the built-in python function |
| 88 | +## range(...) with the same arguments, except it returns a numpy array |
| 89 | +## instead of a list. |
| 90 | +print np.arange(6,12) # [ 6 7 8 9 10 11 12] |
| 91 | + |
| 92 | +## the reshape method takes the data in an existing array, and stuffs it into |
| 93 | +## an array with the given shape and returns it. |
| 94 | +print rangeArray.reshape( (3,2) ) # [[ 6 7] |
| 95 | + # [ 8 9] |
| 96 | + # [10 11]] |
| 97 | + |
| 98 | +#The original array doesn't change though. |
| 99 | +print rangeArray # [[ 6 7 8] |
| 100 | + # [ 9 10 11] |
| 101 | + |
| 102 | +## When you use reshape(...) the total number of things in the array must stay |
| 103 | +## the same. So reshaping an array with 2 rows and 3 columns into one with |
| 104 | +## 3 rows and 2 columns is fine, but 3x3 or 1x5 won't work |
| 105 | +#print rangeArray.reshape( (3,3) ) #ERROR |
| 106 | +squareArray = np.arange(1,10).reshape( (3,3) ) #this is fine, 9 elements |
| 107 | + |
| 108 | + |
| 109 | +print "Accessing array elements" |
| 110 | +## Accessing an array is also pretty straight forward. You access a specific |
| 111 | +## spot in the table by referring to its row and column inside square braces |
| 112 | +## after the array: |
| 113 | +print rangeArray[0,1] #7 |
| 114 | + |
| 115 | +## Note that row and column numbers start from 0, not 1! Numpy also lets you |
| 116 | +## refer to ranges inside an array: |
| 117 | +print rangeArray[0,0:2] #[6 7] |
| 118 | +print squareArray[0:2,0:2] #[[1 2] # the top left corner of squareArray |
| 119 | + # [4 5]] |
| 120 | + |
| 121 | +## These ranges work just like slices and python lists. n:m:t specifies a range |
| 122 | +## that starts at n, and stops before m, in steps of size t. If any of these |
| 123 | +## are left off, they're assumed to be the start, the end+1, and 1 respectively |
| 124 | +print squareArray[:,0:3:2] #[[1 3] #skip the middle column |
| 125 | + # [4 6] |
| 126 | + # [7 9]] |
| 127 | + |
| 128 | +## Also like python lists, you can assign values to specific positions, or |
| 129 | +## ranges of values to slices |
| 130 | +squareArray[0,:] = np.array(range(1,4)) #set the first row to 1,2,3 |
| 131 | +squareArray[1,1] = 0 # set the middle spot to zero |
| 132 | +squareArray[2,:] = 1 # set the last row to ones |
| 133 | +print squareArray # [[1 2 3] |
| 134 | + # [4 0 6] |
| 135 | + # [1 1 1]] |
| 136 | + |
| 137 | +## Something new to numpy arrays is indexing using an array of indices: |
| 138 | +fibIndices = np.array( [1, 1, 2, 3] ) |
| 139 | +randomRow = np.random.random( (10,1) ) # an array of 10 random numbers |
| 140 | +print randomRow |
| 141 | +print randomRow[fibIndices] # the first, first, second and third element of |
| 142 | + # randomRow |
| 143 | + |
| 144 | +## You can also use an array of true/false values to index: |
| 145 | +boolIndices = np.array( [[ True, False, True], |
| 146 | + [False, True, False], |
| 147 | + [ True, False, True]] ) |
| 148 | +print squareArray[boolIndices] # a 1D array with the selected values |
| 149 | + # [1 3 0 1 1] |
| 150 | + |
| 151 | +## It gets a little more complicated with 2D (and higher) arrays. You need |
| 152 | +## two index arrays for a 2D array: |
| 153 | +rows = np.array( [[0,0],[2,2]] ) #get the corners of our square array |
| 154 | +cols = np.array( [[0,2],[0,2]] ) |
| 155 | +print squareArray[rows,cols] #[[1 3] |
| 156 | + # [1 1]] |
| 157 | +boolRows = np.array( [False, True, False] ) # just the middle row |
| 158 | +boolCols = np.array( [True, False, True] ) # Not the middle column |
| 159 | +print squareArray[boolRows,boolCols] # [4 6] |
| 160 | + |
| 161 | +print "Operations on arrays" |
| 162 | +## One useful trick is to create a boolean matrix based on some test and use |
| 163 | +## that as an index in order to get the elements of a matrix that pass the |
| 164 | +## test: |
| 165 | +sqAverage = np.average(squareArray) # average(...) returns the average of all |
| 166 | + # the elements in the given array |
| 167 | +betterThanAverage = squareArray > sqAverage |
| 168 | +print betterThanAverage #[[False False True] |
| 169 | + # [ True False True] |
| 170 | + # [False False False]] |
| 171 | +print squareArray[betterThanAverage] #[3 4 6] |
| 172 | + |
| 173 | +## Indexing like this can also be used to assign values to elements of the |
| 174 | +## array. This is particularly useful if you want to filter an array, say by |
| 175 | +## making sure that all of its values are above/below a certain threshold: |
| 176 | +sqStdDev = np.std(squareArray) # std(...) returns the standard deviation of |
| 177 | + # all the elements in the given array |
| 178 | +clampedSqArray = np.array(squareArray.copy(), dtype=float) |
| 179 | + # make a copy of squareArray that will |
| 180 | + # be "clamped". It will only contain |
| 181 | + # values within one standard deviation |
| 182 | + # of the mean. Values that are too low |
| 183 | + # or to high will be set to the min |
| 184 | + # and max respectively. We set |
| 185 | + # dtype=float because sqAverage |
| 186 | + # and sqStdDev are floating point |
| 187 | + # numbers, and we don't want to |
| 188 | + # truncate them down to integers. |
| 189 | +clampedSqArray[ (squareArray-sqAverage) > sqStdDev ] = sqAverage+sqStdDev |
| 190 | +clampedSqArray[ (squareArray-sqAverage) < -sqStdDev ] = sqAverage-sqStdDev |
| 191 | +print clampedSqArray # [[ 1. 2. 3. ] |
| 192 | + # [ 3.90272394 0.31949828 3.90272394] |
| 193 | + # [ 1. 1. 1. ]] |
| 194 | + |
| 195 | + |
| 196 | +## Multiplying and dividing arrays by numbers does what you'd expect. It |
| 197 | +## multiples/divides element-wise |
| 198 | +print squareArray * 2 # [[ 2 4 6] |
| 199 | + # [ 8 0 12] |
| 200 | + # [ 2 2 2]] |
| 201 | + |
| 202 | +## Addition works similarly: |
| 203 | +print squareArray + np.ones( (3,3) ) #[[2 3 4] |
| 204 | + # [5 1 7] |
| 205 | + # [2 2 2]] |
| 206 | + |
| 207 | +## Multiplying two arrays together (of the same size) is also element wise |
| 208 | +print squareArray * np.arange(1,10).reshape( (3,3) ) #[[ 1 4 9] |
| 209 | + # [16 0 36] |
| 210 | + # [ 7 8 9]] |
| 211 | + |
| 212 | +## Unless you use the dot(...) function, which does matrix multiplication |
| 213 | +## from linear algebra: |
| 214 | +matA = np.array( [[1,2],[3,4]] ) |
| 215 | +matB = np.array( [[5,6],[7,8]] ) |
| 216 | +print np.dot(matA,matB) #[[19 22] |
| 217 | + # [43 50]] |
| 218 | + |
| 219 | +## And thats it! There's a lot more to the numpy library, and there are a few |
| 220 | +## things I skipped over here, such as what happens when array dimensions |
| 221 | +## don't line up when you're indexing or multiplying them together, so if |
| 222 | +## you're interested, I strongly suggest you head over to the scipy wiki's |
| 223 | +## numpy tutorial for a more in depth look at using numpy arrays: |
| 224 | +## |
| 225 | +## http://www.scipy.org/Tentative_NumPy_Tutorial |
0 commit comments