Skip to content

Commit

Permalink
Changes for performance as well as some general refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
cheerfulstoic committed Apr 7, 2017
1 parent 21b19e9 commit ff298a8
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 30 deletions.
32 changes: 11 additions & 21 deletions lib/core_extensions/array.rb
Original file line number Diff line number Diff line change
@@ -1,29 +1,19 @@
class Array
def classification
collect(&:last)
end

# calculate information entropy
def entropy
return 0 if empty?

info = {}
each do |i|
info[i] = !info[i] ? 1 : (info[i] + 1)
each_with_object(Hash.new(0)) do |i, result|
result[i] += 1
end.values.inject(0) do |sum, count|
percentage = count.to_f / length
sum + -percentage * Math.log2(percentage)
end

result(info, length)
end
end

private

def result(info, total)
final = 0
info.each do |_symbol, count|
next unless count > 0
percentage = count.to_f / total
final += -percentage * Math.log(percentage) / Math.log(2.0)
module ArrayClassification
refine Array do
def classification
collect(&:last)
end
final
end
end

2 changes: 1 addition & 1 deletion lib/decisiontree.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
require 'core_extensions/object'
require 'core_extensions/array'
require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb'
27 changes: 19 additions & 8 deletions lib/decisiontree/id3_tree.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
module DecisionTree
Node = Struct.new(:attribute, :threshold, :gain)

using ArrayClassification

class ID3Tree
def initialize(attributes, data, default, type)
@used = {}
Expand Down Expand Up @@ -119,10 +121,14 @@ def id3_continuous(data, attributes, attribute)
def id3_discrete(data, attributes, attribute)
index = attributes.index(attribute)

values = Set.new
data.each { |d| values << d[index] }
partitions = values.to_a.sort.collect { |val| data.select { |d| d[index] == val } }
remainder = partitions.collect { |p| (p.size.to_f / data.size) * p.classification.entropy }.inject(0) { |a, e| e += a }
values = data.map { |row| row[index] }.uniq
remainder = values.sort.inject(0) do |sum, val|
classification = data.each_with_object([]) do |row, result|
result << row.last if row[index] == val
end

sum + ((classification.size.to_f / data.size) * classification.entropy)
end

[data.classification.entropy - remainder, index]
end
Expand Down Expand Up @@ -324,6 +330,7 @@ def predict(test)

class Bagging
attr_accessor :classifiers

def initialize(attributes, data, default, type)
@classifiers = []
@type = type
Expand All @@ -333,10 +340,13 @@ def initialize(attributes, data, default, type)
end

def train(data = @data, attributes = @attributes, default = @default)
@classifiers = []
10.times { @classifiers << Ruleset.new(attributes, data, default, @type) }
@classifiers.each do |c|
c.train(data, attributes, default)
@classifiers = 5.times.map do |i|
Ruleset.new(attributes, data, default, @type)
end

@classifiers.each_with_index do |classifier, index|
puts "Processing classifier ##{index + 1}"
classifier.train(data, attributes, default)
end
end

Expand All @@ -352,3 +362,4 @@ def predict(test)
end
end
end

0 comments on commit ff298a8

Please sign in to comment.