Skip to content

Commit

Permalink
canonical example working with new Dedupe api class
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed Sep 13, 2012
1 parent ae38870 commit 8a8ecbe
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
10 changes: 5 additions & 5 deletions dedupe/dedupe.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,15 @@ def initializeTraining(self, training_file=None) :
if training_file :
(self.training_pairs,
self.training_data) = self.readTraining(training_source,
training_data)
self.training_data)



def trainX(self, training_source=None, data_d) :

elif (training_source.__class__ is not str
or not isinstance(training_source, types.FunctionType):
def trainX(self, data_d, training_source=None) :
if (training_source.__class__ is not str
or not isinstance(training_source, types.FunctionType)):
raise ValueError

self.data_d = sampleDict(data_d, 700)

if training_source.__class__ is str:
Expand Down
5 changes: 4 additions & 1 deletion examples/canonical_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ def printPairs(pairs):
num_training_dupes,
num_training_distinct)

deduper.trainingDistance()
deduper.training_data = dedupe.training_sample.addTrainingData(deduper.training_pairs,
deduper.data_model,
deduper.training_data)

deduper.train()

deduper.findDuplicates(data_d)
Expand Down

0 comments on commit 8a8ecbe

Please sign in to comment.