Skip to content

Commit

Permalink
updated consoleLabel function to not take in data_d, added more docum…
Browse files Browse the repository at this point in the history
…entation to dedupe
  • Loading branch information
derekeder committed Sep 13, 2012
1 parent ae38870 commit ad69f06
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
3 changes: 3 additions & 0 deletions dedupe/dedupe.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def _initializeSettings(self, fields):
self.predicates = None

def initializeTraining(self, training_file=None) :
"""
"""
n_fields = len(self.data_model['fields'])

field_dtype = [('names', 'a20', n_fields),
Expand Down
15 changes: 9 additions & 6 deletions dedupe/training_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,15 @@ def activeLearning(data_d,
# pop the next most uncertain pair off of record distances

record_distances = record_distances[:, uncertain_indices]
uncertain_pairs = (record_distances['pairs'])[0:1]
uncertain_pair_ids = (record_distances['pairs'])[0:1]
record_distances = record_distances[1:]

uncertain_pairs = []
for pair in uncertain_pair_ids :
record_pair = [data_d[instance] for instance in pair]
record_pair = [tuple(record_pair)]
uncertain_pairs.append(record_pair)

labeled_pairs = labelPairFunction(uncertain_pairs,
data_d,
data_model)
Expand Down Expand Up @@ -151,19 +157,16 @@ def addTrainingData(labeled_pairs, data_model, training_data=[]):
return training_data


def consoleLabel(uncertain_pairs, data_d, data_model):
def consoleLabel(uncertain_pairs, data_model):
duplicates = []
nonduplicates = []

fields = [field for field in data_model['fields']
if data_model['fields'][field]['type'] != 'Interaction']

for pair in uncertain_pairs:
for record_pair in uncertain_pairs:
label = ''

record_pair = [data_d[instance] for instance in pair]
record_pair = tuple(record_pair)

for pair in record_pair:
for field in fields:
print field, ': ', pair[field]
Expand Down

0 comments on commit ad69f06

Please sign in to comment.