diff --git a/dedupe/dedupe.py b/dedupe/dedupe.py index 5f912d027..ae04f2330 100644 --- a/dedupe/dedupe.py +++ b/dedupe/dedupe.py @@ -157,15 +157,15 @@ def initializeTraining(self, training_file=None) : if training_file : (self.training_pairs, self.training_data) = self.readTraining(training_source, - training_data) + self.training_data) - def trainX(self, training_source=None, data_d) : - - elif (training_source.__class__ is not str - or not isinstance(training_source, types.FunctionType): + def trainX(self, data_d, training_source=None) : + if (training_source.__class__ is not str + or not isinstance(training_source, types.FunctionType)): raise ValueError + self.data_d = sampleDict(data_d, 700) if training_source.__class__ is str: diff --git a/examples/canonical_example.py b/examples/canonical_example.py index f655d15e3..28d5529bb 100644 --- a/examples/canonical_example.py +++ b/examples/canonical_example.py @@ -89,7 +89,10 @@ def printPairs(pairs): num_training_dupes, num_training_distinct) - deduper.trainingDistance() + deduper.training_data = dedupe.training_sample.addTrainingData(deduper.training_pairs, + deduper.data_model, + deduper.training_data) + deduper.train() deduper.findDuplicates(data_d)