Skip to content

Commit

Permalink
make sure to only use positive votes from nadya.jp
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Speer committed Feb 1, 2018
1 parent dfca826 commit 9c1bc3e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 24 deletions.
13 changes: 12 additions & 1 deletion conceptnet5/readers/conceptnet4.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,17 @@ def build_sources(parts_dict, preposition_fix=False):
return sources


# TODO: this doesn't need to be a class
class CN4Builder(object):
def __init__(self, weight=1.):
"""
Create a builder for processing a source of ConceptNet-4-style
assertions.
The optional parameter provides a weight multiplier, which will modify
the weight computed by `build_sources`. For example, this can be set
lower than 1 for GWAPs, where we don't necessarily trust that every edge
is a real assertion about common sense.
"""
self.weight = weight

def handle_assertion(self, parts_dict):
Expand Down Expand Up @@ -377,6 +385,9 @@ def handle_assertion(self, parts_dict):
rel=relation, start=start, end=end,
dataset=dataset, license=Licenses.cc_attribution,
sources=[source_dict], surfaceText=frame_text,

# The edge weight is the weight computed by build_sources,
# times the multiplier set on this instance
weight=weight * self.weight
)

Expand Down
47 changes: 24 additions & 23 deletions conceptnet5/readers/nadya.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,29 +64,30 @@ def handle_line(line, builder):
freq = int(freq)
vote = int(vote)

# Create the parts_dict that CN4Builder expects
parts_dict = {
'lang': lang,
'polarity': freq,
'cnet4_id': cnet4_id,
'relname': relname,
'frame_text': frame_text,
'startText': start_text,
'endText': end_text,
# In the case of nadya.jp, it's not important to track the creator
# separately from the voters -- they were all doing the same
# thing.
#
# Each voter just shows up as the source of a separate
# edge, which is what the CN4Builder ultimately does with the
# votes anyway. The only reason the CN4Builder takes more complex
# input is to handle weird edge cases.
'creator': voter,
'votes': [],
'activity': 'nadya.jp',
'goodness': 3
}
yield from builder.handle_assertion(parts_dict)
if vote > 0:
# Create the parts_dict that CN4Builder expects
parts_dict = {
'lang': lang,
'polarity': freq,
'cnet4_id': cnet4_id,
'relname': relname,
'frame_text': frame_text,
'startText': start_text,
'endText': end_text,
# In the case of nadya.jp, it's not important to track the creator
# separately from the voters -- they were all doing the same
# thing.
#
# Each voter just shows up as the source of a separate
# edge, which is what the CN4Builder ultimately does with the
# votes anyway. The only reason the CN4Builder takes more complex
# input is to handle weird edge cases.
'creator': voter,
'votes': [],
'activity': 'nadya.jp',
'goodness': 3
}
yield from builder.handle_assertion(parts_dict)


def handle_file(input_filename, output_file):
Expand Down

0 comments on commit 9c1bc3e

Please sign in to comment.