Add average perceptron implementation

beatrichartz · beatrichartz · commit bb40ef00eb33 · 2015-06-11T21:49:53.000+01:00
diff --git a/lib/classifiers/perceptron/average.ex b/lib/classifiers/perceptron/average.ex
@@ -0,0 +1,124 @@
+defmodule Classifiers.Perceptron.Average do
+  defstruct weights: %{},
+            edges: %{},
+            count: 0,
+            epoch: 0
+
+  @doc """
+    Get a new classifier pid.
+  """
+  def new do
+    {:ok, pid} = Agent.start_link fn ->
+      %Classifiers.Perceptron.Average{}
+    end
+
+    pid
+  end
+
+  @doc """
+    Fit a stream of data to an existing classifier.
+    Currently expects input in the form of a stream of maps as the following:
+     [ feature_1, feature_2, ... feature_n, class ]
+  """
+  def fit(stream, pid) do
+    stream |> Stream.chunk(10) |> Enum.each fn chunk ->
+      Agent.get_and_update pid, fn classifier ->
+        c = chunk |> Enum.reduce classifier, fn row, classifier ->
+          label = row |> List.last
+          features = row |> Enum.drop(-1)
+                         |> Enum.with_index
+                         |> Enum.map(fn {a, b} -> {a,b} end)
+
+          classifier = case classifier |> make_prediction(features, true) do
+            nil ->
+              %{ 
+                classifier | edges: classifier.edges |> Map.put(
+                  label, features |> Enum.into(%{}, &({&1, 1}))
+                )
+              }
+            ^label ->
+              classifier
+            prediction ->
+              %{
+                classifier | edges: classifier.edges |> Map.update(
+                  label, %{}, fn current ->
+                    features |> Enum.reduce(
+                      current, fn feature, current ->
+                        current |> Map.update(feature, 0, &(&1 + 1))
+                      end
+                    )
+                  end 
+                ) |> Map.update(
+                  prediction, %{}, fn current ->
+                    features |> Enum.reduce(
+                      current, fn feature, current ->
+                        current |> Map.update(feature, 0, &(&1 - 1))
+                      end
+                    )
+                  end 
+                )
+              }
+          end
+
+          %{ classifier |
+             count: classifier.count + 1,
+             weights: classifier.edges |> Enum.reduce(
+                classifier.weights, fn { label, edges }, weights ->
+                  target = weights |> Map.get(label, %{})
+                  target = edges |> Enum.reduce(target, fn { feature, edge }, target ->
+                    target |> Map.update(feature, 0, fn weight -> 
+                      (classifier.count * weight + edge) / (classifier.count + 1)
+                    end)
+                  end)
+
+                  weights |> Map.update(label, %{}, fn w -> w |> Map.merge(target) end)
+                end
+              )
+          }
+        end
+
+        {:ok, c}
+      end
+    end
+  end
+
+  @doc """
+    Predict the class for one set of features.
+  """
+  def predict_one(features, pid) do
+  end
+
+  @doc """
+    Predict the classes for a stream of features
+  """
+  def predict(stream, pid) do
+    c = classifier(pid)
+    stream |> Stream.transform(0, fn row, acc ->
+      features = row |> Enum.with_index |> Enum.map(fn {a, b} -> {a, b} end)
+
+      { [ c |> make_prediction(features, false) ], acc + 1 } 
+    end)
+  end
+
+  defp make_prediction(%{edges: edges}, features, true) when map_size(edges) == 0 do
+  end
+  defp make_prediction(%{edges: edges}, features, true) do
+    {p, _} = edges |> Enum.max_by fn { label, edge } ->
+      features |> Enum.reduce(0, fn feature, weight -> weight + Map.get(edge, feature, 0) end)
+    end
+
+    p
+  end
+  defp make_prediction(%{weights: weights}, features, false) do
+    {p, _} = weights |> Enum.max_by fn { label, weight } ->
+      features |> Enum.reduce(0, fn feature, w -> w + Map.get(weight, feature, 0) end)
+    end
+
+    p
+  end
+
+  defp classifier(pid) do
+    Agent.get pid, fn c -> c end
+  end
+
+end
diff --git a/mix.exs b/mix.exs
@@ -26,7 +26,7 @@ defmodule Classifiers.Mixfile do
 
   defp deps do
     [
-      {:csv, "~> 0.2.0", only: :test},
+      {:csv, "~> 1.0.0", only: :test},
       {:ex_doc, "~> 0.7.1", only: :docs},
       {:inch_ex, only: :docs},
       {:earmark, only: :docs}
diff --git a/mix.lock b/mix.lock
@@ -1,4 +1,4 @@
-%{"csv": {:hex, :csv, "0.2.2"},
+%{"csv": {:hex, :csv, "1.0.0"},
   "earmark": {:hex, :earmark, "0.1.17"},
   "ex_doc": {:hex, :ex_doc, "0.7.3"},
   "inch_ex": {:hex, :inch_ex, "0.3.1"},
diff --git a/test/classifiers/naive_bayes/bernoulli_test.exs b/test/classifiers/naive_bayes/bernoulli_test.exs
@@ -31,29 +31,29 @@ defmodule ClassifiersTest.NaiveBayes.Bernoulli do
     assert conditional_probabilities["positive"] == [1.0, 1.0, 1.0/3.0]
   end
 
-  test "pnegativeict one works correctly", context do
+  test "predict one works correctly", context do
     classifier = context[:classifier]
 
-    pnegativeiction = Classifiers.NaiveBayes.Bernoulli.predict_one([0,1,0], classifier)
-    assert pnegativeiction == "positive"
+    prediction = Classifiers.NaiveBayes.Bernoulli.predict_one([0,1,0], classifier)
+    assert prediction == "positive"
 
-    pnegativeiction = Classifiers.NaiveBayes.Bernoulli.predict_one([1,1,0], classifier)
-    assert pnegativeiction == "positive"
+    prediction = Classifiers.NaiveBayes.Bernoulli.predict_one([1,1,0], classifier)
+    assert prediction == "positive"
 
-    pnegativeiction = Classifiers.NaiveBayes.Bernoulli.predict_one([1,0,1], classifier)
-    assert pnegativeiction == "negative"
+    prediction = Classifiers.NaiveBayes.Bernoulli.predict_one([1,0,1], classifier)
+    assert prediction == "negative"
 
-    pnegativeiction = Classifiers.NaiveBayes.Bernoulli.predict_one([0,0,1], classifier)
-    assert pnegativeiction == "negative"
+    prediction = Classifiers.NaiveBayes.Bernoulli.predict_one([0,0,1], classifier)
+    assert prediction == "negative"
   end
 
-  test "pnegativeict works correctly", context do
-    pnegativeictions = "naive_bayesian_test.csv"
+  test "predict works correctly", context do
+    predictions = "naive_bayesian_test.csv"
                   |> Fixture.csv
                   |> Classifiers.NaiveBayes.Bernoulli.predict(context[:classifier])
                   |> Enum.to_list
     
-    assert pnegativeictions == ["positive", "positive", "negative", "negative"]
+    assert predictions == ["positive", "positive", "negative", "negative"]
   end
 
 end
diff --git a/test/classifiers/perceptron/average_test.exs b/test/classifiers/perceptron/average_test.exs
@@ -0,0 +1,39 @@
+defmodule ClassifiersTest.Perceptron.Average do
+  use ExUnit.Case
+
+  setup do
+    classifier = Classifiers.Perceptron.Average.new
+    
+    "average_perceptron_train.csv"
+    |> Fixture.csv(num_pipes: 1)
+    |> Classifiers.Perceptron.Average.fit(classifier)
+
+    {:ok, classifier: classifier}
+  end
+
+  defp get_classifier(pid) do
+    Agent.get pid, fn c -> c end
+  end
+
+  test "fitting generates averaged weights for the given features", context do
+    %{ weights: %{ "democrat" => w } } = context[:classifier] |> get_classifier
+    assert w |> Map.size == 48
+    assert w |> Map.values |> Enum.sum |> Float.round(3) == 0.367
+  end
+
+  test "fitting generates edges for the given features", context do
+    %{ edges: %{ "republican" => e } } = context[:classifier] |> get_classifier
+    assert e |> Map.size == 48
+    assert e |> Map.values |> Enum.sum == 8
+  end
+
+  test "predict works correctly", context do
+    predictions = "average_perceptron_test.csv"
+                  |> Fixture.csv(num_pipes: 1)
+                  |> Classifiers.Perceptron.Average.predict(context[:classifier])
+                  |> Enum.to_list
+    assert predictions == 
+      ~w(democrat democrat democrat republican democrat democrat republican democrat republican republican) 
+  end
+
+end
diff --git a/test/support/fixture.exs b/test/support/fixture.exs
@@ -4,11 +4,11 @@ defmodule Fixture do
     Path.join("./fixtures", filename) |> Path.expand(__DIR__)
   end
 
-  def csv(filename) do
+  def csv(filename, options \\ []) do
     filename
     |> Fixture.path
     |> File.stream!
-    |> CSV.decode
+    |> CSV.decode(options)
   end
 
 end
diff --git a/test/support/fixtures/average_perceptron_test.csv b/test/support/fixtures/average_perceptron_test.csv
@@ -0,0 +1,10 @@
+n,y,y,n,n,?,y,y,y,y,y,n,?,y,y,y
+n,n,y,n,n,n,y,y,n,y,y,n,n,n,y,?
+y,n,y,n,n,n,y,y,y,y,n,n,n,n,y,y
+n,n,n,y,y,y,y,y,n,y,n,y,y,y,n,y
+?,?,?,n,n,n,y,y,y,y,n,n,y,n,y,y
+y,n,y,n,?,n,y,y,y,y,n,y,n,?,y,y
+n,n,y,y,y,y,n,n,y,y,n,y,y,y,n,y
+n,n,y,n,n,n,y,y,y,y,n,n,n,n,n,y
+n,?,n,y,y,y,n,n,n,n,y,y,y,y,n,y
+n,n,n,y,y,y,?,?,?,?,n,y,y,y,n,y
diff --git a/test/support/fixtures/average_perceptron_train.csv b/test/support/fixtures/average_perceptron_train.csv
diff --git a/test/support/fixtures/the-wire.jpg b/test/support/fixtures/the-wire.jpg

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ defmodule Classifiers.Mixfile do`
`26`	`26`
`27`	`27`	`defp deps do`
`28`	`28`	`[`
`29`		`- {:csv, "~> 0.2.0", only: :test},`
	`29`	`+ {:csv, "~> 1.0.0", only: :test},`
`30`	`30`	`{:ex_doc, "~> 0.7.1", only: :docs},`
`31`	`31`	`{:inch_ex, only: :docs},`
`32`	`32`	`{:earmark, only: :docs}`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-%{"csv": {:hex, :csv, "0.2.2"},`
	`1`	`+%{"csv": {:hex, :csv, "1.0.0"},`
`2`	`2`	`"earmark": {:hex, :earmark, "0.1.17"},`
`3`	`3`	`"ex_doc": {:hex, :ex_doc, "0.7.3"},`
`4`	`4`	`"inch_ex": {:hex, :inch_ex, "0.3.1"},`