Skip to content

Commit

Permalink
Remove the leading space in the tldr preference dataset (#1773)
Browse files Browse the repository at this point in the history
  • Loading branch information
vwxyzjn authored Jun 26, 2024
1 parent 7965b78 commit 3479606
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions examples/datasets/tldr_preference.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ def process(row):
format_str = cnndm_format_str if row["batch"] in cnndm_batches else tldr_format_str
row["prompt"] = format_str.format(**row["info"])
choice = row["choice"]
chosen = row["summaries"][choice]["text"]
rejected = row["summaries"][1 - choice]["text"]
# need to remove the leading space
chosen = row["summaries"][choice]["text"].strip()
rejected = row["summaries"][1 - choice]["text"].strip()
row["chosen"] = [{"role": "user", "content": row["prompt"]}, {"role": "assistant", "content": chosen}]
row["rejected"] = [{"role": "user", "content": row["prompt"]}, {"role": "assistant", "content": rejected}]
return row
Expand Down

0 comments on commit 3479606

Please sign in to comment.