initial commit

add url to tutorial Create README.md linear regression tutorial update a comment for linear regression add Classifier class and change main() to test() rename classifier to regressor. oops! add line of best fit for absolute error add k-means example fix bug and add link to tutorial update readme add bayes classifier add knn add tutorial links matricize linear regression matricize regressor also matricize regressor also add predict and fit to regressor add logistic regression for mnist add tutorial link logistic add linear regression 1-d class code, add GMM for some reason was not added yet oops, now add GMM add r-squared add 2d and poly examples wip logistic regression class add course urls ann examples add class links try batch on donut problem ann oops more oopses change slow code too nlp class fix lsa add url more links misc stuff oops add new class update comment add gpu script add course links add one more i forget cnn update cnn class add unsupervised class oops erase unneeded line use eye renet test fix reshaping add GRU add real mnist add batches and downcast add all scan version unsupervised deep learning updates visualize features add class url fix load and save use get_value instead of eval visualize better e-commerce example visualize lda ann updates update tf update deep unsupervised + a test minor unsupervised fix some wip files add lstm wiki + minor fixes compartmentalize gru,lstm add decent embeddings add visualize embeddings add vanishing gradient demo and xwing 30 epochs add url to rnn rrnn variable learning rate add linear regression examples wip add word2vec and glove update remove some unnecessary comments add rnn init cnn additions rntn update util print out remove divide by len labels, only look at root for scoring extra comment increment j recursive nn tensorflow faster train score rntn add theano renn and rntn messing around with rntn add pos hmm make sure hmm class init exists nlp2 url wiki data split by paragraph update gitignore better cost unsupervised2 ssl add dropout files fix class name overfitting bias in right order add clustering extras + urls supervised class update comment add visualization airline example add bonus message remove irrelevant comments minor changes numpy class add nonlinearity to tf cnn tf load and save remove unnecessary code small update to logistic code bayesian ab testing tiny fix update url fix add word2idx json add batch + tf rnn update tensorflow dropout update bias correctly comment out the right derivative bayesian examples k-means additions add url add regularization code and overfitting code for linear and logistic supervised class regression init for unsupervised class init for ann2 ensembles class tiny fix alt nesterov add dlc url ab testing add dlc url to ann add dlc url to ann2 update cnn urls update hmm urls update linear urls update logistic urls update nlp urls update urls nlp2 update urls numpy update urls rnn update urls supervised update urls supervised2 update urls unsupervised update urls unsupervised2 add course url to readme cleanup change it back remove old hmm change it back add tic tac toe to rl add policy iteration and value iteration fix dt update readme with course links update update comment tiny update more correct add url add course url update remove old line add course urls and rl files add dlc url re-add linear programming linear regression update cnn typo fix relu derivative cast y to int misc updates update tf update for tf1.0 tiny xor update update xor label change var names initial commit rl2 update readme update rl2 make tf example compatible with python 3 tiny update add done flag use list copy make purity work different optimizer use brown corpus instead of wiki add brown corpus update glove "a" only gets updated at top of loop in q-learning updating a to a2 is ineffectual as it gets overwritten at top of loop before use anyway. q-learning is off policy so that line is misleading tiny fix plot mean images add new examples one more add brown update add sklearn example update add brown oops oops Set theme jekyll-theme-cayman and migrate Page Generator content Update index.md test tf scan hmm tf tf language model actual tf language model updates different way of catching loops minor change extra help finding files help finding files cnn oops add extra reading ann 2 Updating cnn_theano code according to theano API last update. update update add extra reading rl extra reading rl2 add hints for data oops update name clean up more reading add relu update add new theano fix tf unsupervised deep learning update custom cnn test single autoencoder theano autoencoder tiny update tiny update softplus py 3 test other means change it back better rmsprop make it like tensorflow more materials python3 compatibility for web service example add updated stuff that had not been pushed reading material fix ucb1 change back numbers fix fix s,a,r tuples Fix conv gans add link more links minor updates small changes tiny fix misc updates new examples update kmeans mnist just for fun oops
lazyprogrammer · Aug 24, 2017 · 3009035 · 3009035
1 parent 54f4201
commit 3009035
Show file tree

Hide file tree

Showing 311 changed files with 724,037 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+*.DS_Store
+*.pyc
+large_files
+large_files/*
+nlp_class2/chunking/*
diff --git a/README.md b/README.md
@@ -0,0 +1,67 @@
+machine_learning_examples
+=========================
+
+A collection of machine learning examples and tutorials.
+
+Find associated tutorials at https://lazyprogrammer.me
+
+Find associated courses at https://deeplearningcourses.com
+
+
+Direct Course Links
+===================
+
+Deep Learning Prerequisites: The Numpy Stack in Python
+https://deeplearningcourses.com/c/deep-learning-prerequisites-the-numpy-stack-in-python
+
+Deep Learning Prerequisites: Linear Regression in Python
+https://deeplearningcourses.com/c/data-science-linear-regression-in-python
+
+Deep Learning Prerequisites: Logistic Regression in Python
+https://deeplearningcourses.com/c/data-science-logistic-regression-in-python
+
+Deep Learning in Python
+https://deeplearningcourses.com/c/data-science-deep-learning-in-python
+
+Cluster Analysis and Unsupervised Machine Learning in Python
+https://deeplearningcourses.com/c/cluster-analysis-unsupervised-machine-learning-python
+
+Data Science: Supervised Machine Learning in Python
+https://deeplearningcourses.com/c/data-science-supervised-machine-learning-in-python
+
+Bayesian Machine Learning in Python: A/B Testing
+https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+
+Easy Natural Language Processing in Python
+https://deeplearningcourses.com/c/data-science-natural-language-processing-in-python
+
+Practical Deep Learning in Theano and TensorFlow
+https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
+
+Ensemble Machine Learning in Python: Random Forest and AdaBoost
+https://deeplearningcourses.com/c/machine-learning-in-python-random-forest-adaboost
+
+Deep Learning: Convolutional Neural Networks in Python
+https://deeplearningcourses.com/c/deep-learning-convolutional-neural-networks-theano-tensorflow
+
+Unsupervised Deep Learning in Python
+https://deeplearningcourses.com/c/unsupervised-deep-learning-in-python
+
+Unsupervised Machine Learning: Hidden Markov Models in Python
+https://deeplearningcourses.com/c/unsupervised-machine-learning-hidden-markov-models-in-python
+
+Deep Learning: Recurrent Neural Networks in Python
+https://deeplearningcourses.com/c/deep-learning-recurrent-neural-networks-in-python
+
+Advanced Natural Language Processing: Deep Learning in Python
+https://deeplearningcourses.com/c/natural-language-processing-with-deep-learning-in-python
+
+Artificial Intelligence: Reinforcement Learning in Python
+https://deeplearningcourses.com/c/artificial-intelligence-reinforcement-learning-in-python
+
+Advanced AI: Deep Reinforcement Learning in Python
+https://deeplearningcourses.com/c/deep-reinforcement-learning-in-python
+
+Deep Learning: GANs and Variational Autoencoders
+https://deeplearningcourses.com/c/deep-learning-gans-and-variational-autoencoders
+
diff --git a/ab_testing/bayesian_bandit.py b/ab_testing/bayesian_bandit.py
@@ -0,0 +1,68 @@
+# From the course: Bayesin Machine Learning in Python: A/B Testing
+# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.stats import beta
+
+
+NUM_TRIALS = 2000
+BANDIT_PROBABILITIES = [0.2, 0.5, 0.75]
+
+
+class Bandit(object):
+  def __init__(self, p):
+    self.p = p
+    self.a = 1
+    self.b = 1
+
+  def pull(self):
+    return np.random.random() < self.p
+
+  def sample(self):
+    return np.random.beta(self.a, self.b)
+
+  def update(self, x):
+    self.a += x
+    self.b += 1 - x
+
+
+def plot(bandits, trial):
+  x = np.linspace(0, 1, 200)
+  for b in bandits:
+    y = beta.pdf(x, b.a, b.b)
+    plt.plot(x, y, label="real p: %.4f" % b.p)
+  plt.title("Bandit distributions after %s trials" % trial)
+  plt.legend()
+  plt.show()
+
+
+def experiment():
+  bandits = [Bandit(p) for p in BANDIT_PROBABILITIES]
+
+  sample_points = [5,10,20,50,100,200,500,1000,1500,1999]
+  for i in xrange(NUM_TRIALS):
+
+    # take a sample from each bandit
+    bestb = None
+    maxsample = -1
+    allsamples = [] # let's collect these just to print for debugging
+    for b in bandits:
+      sample = b.sample()
+      allsamples.append("%.4f" % sample)
+      if sample > maxsample:
+        maxsample = sample
+        bestb = b
+    if i in sample_points:
+      print "current samples: %s" % allsamples
+      plot(bandits, i)
+
+    # pull the arm for the bandit with the largest sample
+    x = bestb.pull()
+
+    # update the distribution for the bandit whose arm we just pulled
+    bestb.update(x)
+
+
+if __name__ == "__main__":
+  experiment()
diff --git a/ab_testing/chisquare.py b/ab_testing/chisquare.py
@@ -0,0 +1,63 @@
+# From the course: Bayesin Machine Learning in Python: A/B Testing
+# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import chi2, chi2_contingency
+
+# contingency table
+#        click       no click
+#------------------------------
+# ad A |   a            b
+# ad B |   c            d
+#
+# chi^2 = (ad - bc)^2 (a + b + c + d) / [ (a + b)(c + d)(a + c)(b + d)]
+# degrees of freedom = (#cols - 1) x (#rows - 1) = (2 - 1)(2 - 1) = 1
+
+# short example
+
+# T = np.array([[36, 14], [30, 25]])
+# c2 = np.linalg.det(T)**2 * T.sum() / ( T[0].sum()*T[1].sum()*T[:,0].sum()*T[:,1].sum() )
+# p_value = 1 - chi2.cdf(x=c2, df=1)
+
+# equivalent:
+# (36-31.429)**2/31.429+(14-18.571)**2/18.571 + (30-34.571)**2/34.571 + (25-20.429)**2/20.429
+
+
+class DataGenerator:
+  def __init__(self, p1, p2):
+    self.p1 = p1
+    self.p2 = p2
+
+  def next(self):
+    click1 = 1 if (np.random.random() < self.p1) else 0
+    click2 = 1 if (np.random.random() < self.p2) else 0
+    return click1, click2
+
+
+def get_p_value(T):
+  # same as scipy.stats.chi2_contingency(T, correction=False)
+  det = T[0,0]*T[1,1] - T[0,1]*T[1,0]
+  c2 = float(det) / T[0].sum() * det / T[1].sum() * T.sum() / T[:,0].sum() / T[:,1].sum()
+  p = 1 - chi2.cdf(x=c2, df=1)
+  return p
+
+
+def run_experiment(p1, p2, N):
+  data = DataGenerator(p1, p2)
+  p_values = np.empty(N)
+  T = np.zeros((2, 2)).astype(np.float32)
+  for i in xrange(N):
+    c1, c2 = data.next()
+    T[0,c1] += 1
+    T[1,c2] += 1
+    # ignore the first 10 values
+    if i < 10:
+      p_values[i] = None
+    else:
+      p_values[i] = get_p_value(T)
+  plt.plot(p_values)
+  plt.plot(np.ones(N)*0.05)
+  plt.show()
+
+run_experiment(0.1, 0.11, 20000)
diff --git a/ab_testing/ci_comparison.py b/ab_testing/ci_comparison.py
@@ -0,0 +1,37 @@
+# From the course: Bayesin Machine Learning in Python: A/B Testing
+# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.stats import beta, norm
+
+T = 501 # number of coin tosses
+true_ctr = 0.5
+a, b = 1, 1 # beta priors
+plot_indices = (10, 20, 30, 50, 100, 200, 500)
+data = np.empty(T)
+for i in xrange(T):
+  x = 1 if np.random.random() < true_ctr else 0
+  data[i] = x
+
+  # update a and b
+  a += x
+  b += 1 - x
+
+  if i in plot_indices:
+    # maximum likelihood estimate of ctr
+    p = data[:i].mean()
+    n = i + 1 # number of samples collected so far
+    std = np.sqrt(p*(1-p)/n)
+
+    # gaussian
+    x = np.linspace(0, 1, 200)
+    g = norm.pdf(x, loc=p, scale=std)
+    plt.plot(x, g, label='Gaussian Approximation')
+
+    # beta
+    posterior = beta.pdf(x, a=a, b=b)
+    plt.plot(x, posterior, label='Beta Posterior')
+    plt.legend()
+    plt.title("N = %s" % n)
+    plt.show()
diff --git a/ab_testing/convergence.py b/ab_testing/convergence.py
@@ -0,0 +1,34 @@
+# From the course: Bayesin Machine Learning in Python: A/B Testing
+# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
+import matplotlib.pyplot as plt
+import numpy as np
+from bayesian_bandit import Bandit
+
+
+def run_experiment(p1, p2, p3, N):
+  bandits = [Bandit(p1), Bandit(p2), Bandit(p3)]
+
+  data = np.empty(N)
+
+  for i in xrange(N):
+    # thompson sampling
+    j = np.argmax([b.sample() for b in bandits])
+    x = bandits[j].pull()
+    bandits[j].update(x)
+
+    # for the plot
+    data[i] = x
+  cumulative_average_ctr = np.cumsum(data) / (np.arange(N) + 1)
+
+  # plot moving average ctr
+  plt.plot(cumulative_average_ctr)
+  plt.plot(np.ones(N)*p1)
+  plt.plot(np.ones(N)*p2)
+  plt.plot(np.ones(N)*p3)
+  plt.ylim((0,1))
+  plt.xscale('log')
+  plt.show()
+
+
+run_experiment(0.2, 0.25, 0.3, 100000)
diff --git a/ab_testing/demo.py b/ab_testing/demo.py
@@ -0,0 +1,27 @@
+# From the course: Bayesin Machine Learning in Python: A/B Testing
+# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.stats import beta
+
+def plot(a, b, trial, ctr):
+  x = np.linspace(0, 1, 200)
+  y = beta.pdf(x, a, b)
+  mean = float(a) / (a + b)
+  plt.plot(x, y)
+  plt.title("Distributions after %s trials, true rate = %.1f, mean = %.2f" % (trial, ctr, mean))
+  plt.show()
+
+true_ctr = 0.3
+a, b = 1, 1 # beta parameters
+show = [0, 5, 10, 25, 50, 100, 200, 300, 500, 700, 1000, 1500]
+for t in xrange(1501):
+  coin_toss_result = (np.random.random() < true_ctr)
+  if coin_toss_result:
+    a += 1
+  else:
+    b += 1
+
+  if t in show:
+    plot(a, b, t+1, true_ctr)
diff --git a/ab_testing/ttest.py b/ab_testing/ttest.py
@@ -0,0 +1,23 @@
+# From the course: Bayesin Machine Learning in Python: A/B Testing
+# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
+# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
+import numpy as np
+from scipy import stats
+
+# generate data
+N = 10
+a = np.random.randn(N) + 2 # mean 2, variance 1
+b = np.random.randn(N) # mean 0, variance 1
+
+# roll your own t-test:
+var_a = a.var(ddof=1) # unbiased estimator, divide by N-1 instead of N
+var_b = b.var(ddof=1)
+s = np.sqrt( (var_a + var_b) / 2 ) # balanced standard deviation
+t = (a.mean() - b.mean()) / (s * np.sqrt(2.0/N)) # t-statistic
+df = 2*N - 2 # degrees of freedom
+p = 1 - stats.t.cdf(np.abs(t), df=df) # one-sided test p-value
+print "t:\t", t, "p:\t", 2*p # two-sided test p-value
+
+# built-in t-test:
+t2, p2 = stats.ttest_ind(a, b)
+print "t2:\t", t2, "p2:\t", p2