Skip to content

Commit 05833ad

Browse files
committed
PR feedback
1 parent cbdf35e commit 05833ad

File tree

1 file changed

+16
-10
lines changed

1 file changed

+16
-10
lines changed

src/oumi/datasets/vision_language/coco_captions.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
from oumi.core.registry import register_dataset
33
from oumi.core.types.turn import Conversation, Message, Role, Type
44

5+
_COCO_COLUMN_SENTENCES = "sentences"
6+
_COCO_COLUMN_RAW = "raw"
7+
_COCO_COLUMN_IMAGE = "image"
8+
_COCO_COLUMN_PATH = "path"
9+
_COCO_COLUMN_BYTES = "bytes"
10+
511

612
@register_dataset("coco_captions")
713
class COCOCaptionsDataset(VisionLanguageSftDataset):
@@ -12,43 +18,43 @@ def transform_conversation(self, example: dict) -> Conversation:
1218
"""Transform a single conversation example into a Conversation object."""
1319
input_text = self.default_prompt
1420

15-
for required_key in ("sentences", "image"):
21+
for required_key in (_COCO_COLUMN_SENTENCES, _COCO_COLUMN_IMAGE):
1622
if required_key not in example:
1723
raise ValueError(
1824
"Training example doesn't contain '{required_key}' key. "
1925
f"Available keys: {example.keys()}."
2026
)
2127

22-
if "raw" not in example["sentences"]:
28+
if _COCO_COLUMN_RAW not in example[_COCO_COLUMN_SENTENCES]:
2329
raise ValueError(
24-
"Training example doesn't contain 'sentences.raw' key. "
25-
f"Available keys under 'sentences.': {example['sentences'].keys()}."
30+
"Training example doesn't contain 'sentences.raw' key. Available keys "
31+
f"under 'sentences.': {example[_COCO_COLUMN_SENTENCES].keys()}."
2632
)
27-
output_text = example["sentences"]["raw"]
33+
output_text = example[_COCO_COLUMN_SENTENCES][_COCO_COLUMN_RAW]
2834

2935
messages = [Message(role=Role.USER, content=input_text)]
3036

31-
if "bytes" in example["image"]:
37+
if _COCO_COLUMN_BYTES in example[_COCO_COLUMN_IMAGE]:
3238
messages.append(
3339
Message(
3440
role=Role.USER,
35-
binary=example["image"]["bytes"],
41+
binary=example[_COCO_COLUMN_IMAGE][_COCO_COLUMN_BYTES],
3642
type=Type.IMAGE_BINARY,
3743
)
3844
)
39-
elif "path" in example["image"]:
45+
elif _COCO_COLUMN_PATH in example[_COCO_COLUMN_IMAGE]:
4046
messages.append(
4147
Message(
4248
role=Role.USER,
43-
content=example["image"]["path"],
49+
content=example[_COCO_COLUMN_IMAGE][_COCO_COLUMN_PATH],
4450
type=Type.IMAGE_PATH,
4551
)
4652
)
4753
else:
4854
raise ValueError(
4955
"Training example contains none of required keys: "
5056
"'image.bytes', 'image.path'. "
51-
f"Available keys under 'image.': {example['image'].keys()}."
57+
f"Available keys under 'image.': {example[_COCO_COLUMN_IMAGE].keys()}."
5258
)
5359

5460
messages.append(Message(role=Role.ASSISTANT, content=output_text))

0 commit comments

Comments
 (0)