Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix discrepancies with the specs #742

Merged
merged 9 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 26 additions & 26 deletions datasets/1.0/bigcode-the-stack/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
{
"@type": "cr:Field",
"@id": "default/content",
"name": "content",
"name": "default/content",
"description": "The content of the file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -136,7 +136,7 @@
{
"@type": "cr:Field",
"@id": "default/language",
"name": "language",
"name": "default/language",
"description": "Programming language of the file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -151,7 +151,7 @@
{
"@type": "cr:Field",
"@id": "default/hexsha",
"name": "hexsha",
"name": "default/hexsha",
"description": "Unique git hash of file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -166,7 +166,7 @@
{
"@type": "cr:Field",
"@id": "default/size",
"name": "size",
"name": "default/size",
"description": "Size of the uncompressed file..",
"dataType": "sc:Integer",
"source": {
Expand All @@ -181,7 +181,7 @@
{
"@type": "cr:Field",
"@id": "default/ext",
"name": "ext",
"name": "default/ext",
"description": "File extension.",
"dataType": "sc:Text",
"source": {
Expand All @@ -196,7 +196,7 @@
{
"@type": "cr:Field",
"@id": "default/lang",
"name": "lang",
"name": "default/lang",
"description": "Hash of the file.",
"dataType": "sc:Text",
"source": {
Expand All @@ -211,7 +211,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_path",
"name": "max_stars_repo_path",
"name": "default/max_stars_repo_path",
"description": "Path to file in repo containing this file with maximum number of stars.",
"dataType": "sc:Text",
"source": {
Expand All @@ -226,7 +226,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_name",
"name": "max_stars_repo_name",
"name": "default/max_stars_repo_name",
"description": "Name of repo containing this file with maximum number of stars.",
"dataType": "sc:Text",
"source": {
Expand All @@ -241,7 +241,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_head_hexsha",
"name": "max_stars_repo_head_hexsha",
"name": "default/max_stars_repo_head_hexsha",
"description": "Hexsha of repository head with the maximum of stars.",
"dataType": "sc:Text",
"source": {
Expand All @@ -256,7 +256,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_count",
"name": "max_stars_count",
"name": "default/max_stars_count",
"description": "Number of stars in repository.",
"dataType": "sc:Text",
"source": {
Expand All @@ -271,7 +271,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_stars_event_min_datetime",
"name": "max_stars_repo_stars_event_min_datetime",
"name": "default/max_stars_repo_stars_event_min_datetime",
"description": "First timestamp of a stars event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -286,7 +286,7 @@
{
"@type": "cr:Field",
"@id": "default/max_stars_repo_stars_event_max_datetime",
"name": "max_stars_repo_stars_event_max_datetime",
"name": "default/max_stars_repo_stars_event_max_datetime",
"description": "Last timestamp of a stars event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -301,7 +301,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_path",
"name": "max_issues_repo_path",
"name": "default/max_issues_repo_path",
"description": "Path to file in repo containing this file with maximum number of issues.",
"dataType": "sc:Text",
"source": {
Expand All @@ -316,7 +316,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_head_hexsha",
"name": "max_issues_repo_head_hexsha",
"name": "default/max_issues_repo_head_hexsha",
"description": "Hexsha of repository head with the maximum of issues.",
"dataType": "sc:Text",
"source": {
Expand All @@ -331,7 +331,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_count",
"name": "max_issues_count",
"name": "default/max_issues_count",
"description": "Number of issues in repository.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -346,7 +346,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_issues_event_min_datetime",
"name": "max_issues_repo_issues_event_min_datetime",
"name": "default/max_issues_repo_issues_event_min_datetime",
"description": "First timestamp of an issues event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -361,7 +361,7 @@
{
"@type": "cr:Field",
"@id": "default/max_issues_repo_issues_event_max_datetime",
"name": "max_issues_repo_issues_event_max_datetime",
"name": "default/max_issues_repo_issues_event_max_datetime",
"description": "Last timestamp of an issues event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -376,7 +376,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_path",
"name": "max_forks_repo_path",
"name": "default/max_forks_repo_path",
"description": "Path to file in repo containing this file with maximum number of forks.",
"dataType": "sc:Text",
"source": {
Expand All @@ -391,7 +391,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_name",
"name": "max_forks_repo_name",
"name": "default/max_forks_repo_name",
"description": "Name of repo containing this file with maximum number of forks.",
"dataType": "sc:Text",
"source": {
Expand All @@ -406,7 +406,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_head_hexsha",
"name": "max_forks_repo_head_hexsha",
"name": "default/max_forks_repo_head_hexsha",
"description": "Hexsha of repository head with the maximum of forks.",
"dataType": "sc:Text",
"source": {
Expand All @@ -421,7 +421,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_count",
"name": "max_forks_count",
"name": "default/max_forks_count",
"description": "Number of forks in repository.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -436,7 +436,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_forks_event_min_datetime",
"name": "max_forks_repo_forks_event_min_datetime",
"name": "default/max_forks_repo_forks_event_min_datetime",
"description": "First timestamp of a forks event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -451,7 +451,7 @@
{
"@type": "cr:Field",
"@id": "default/max_forks_repo_forks_event_max_datetime",
"name": "max_forks_repo_forks_event_max_datetime",
"name": "default/max_forks_repo_forks_event_max_datetime",
"description": "Last timestamp of a forks event.",
"dataType": "sc:Text",
"source": {
Expand All @@ -466,7 +466,7 @@
{
"@type": "cr:Field",
"@id": "default/avg_line_length",
"name": "avg_line_length",
"name": "default/avg_line_length",
"description": "The average line-length of the file.",
"dataType": "sc:Float",
"source": {
Expand All @@ -481,7 +481,7 @@
{
"@type": "cr:Field",
"@id": "default/max_line_length",
"name": "max_line_length",
"name": "default/max_line_length",
"description": "The maximum line-length of the file.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -496,7 +496,7 @@
{
"@type": "cr:Field",
"@id": "default/alphanum_fraction",
"name": "alphanum_fraction",
"name": "default/alphanum_fraction",
"description": "The fraction of characters in the file that are alphabetical or numerical characters.",
"dataType": "sc:Float",
"source": {
Expand Down
38 changes: 19 additions & 19 deletions datasets/1.0/coco2014-mini/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@
{
"@type": "cr:Field",
"@id": "split_enums/name",
"name": "name",
"name": "split_enums/name",
"description": "One of: train, val, test.",
"dataType": "sc:Text"
},
{
"@type": "cr:Field",
"@id": "split_enums/url",
"name": "url",
"name": "split_enums/url",
"description": "Corresponding mlcommons.org definition URL",
"dataType": [
"wd:Q3985153",
Expand All @@ -121,16 +121,16 @@
],
"data": [
{
"name": "train",
"url": "https://mlcommons.org/definitions/training_split"
"split_enums/name": "train",
"split_enums/url": "https://mlcommons.org/definitions/training_split"
},
{
"name": "val",
"url": "https://mlcommons.org/definitions/validation_split"
"split_enums/name": "val",
"split_enums/url": "https://mlcommons.org/definitions/validation_split"
},
{
"name": "test",
"url": "https://mlcommons.org/definitions/test_split"
"split_enums/name": "test",
"split_enums/url": "https://mlcommons.org/definitions/test_split"
}
]
},
Expand All @@ -145,7 +145,7 @@
{
"@type": "cr:Field",
"@id": "images/image_filename",
"name": "image_filename",
"name": "images/image_filename",
"description": "The filename of the image. eg: COCO_train2014_000000000003.jpg",
"dataType": "sc:Text",
"source": {
Expand All @@ -160,7 +160,7 @@
{
"@type": "cr:Field",
"@id": "images/image_content",
"name": "image_content",
"name": "images/image_content",
"description": "The content of the image.",
"dataType": "sc:ImageObject",
"source": {
Expand All @@ -175,7 +175,7 @@
{
"@type": "cr:Field",
"@id": "images/split",
"name": "split",
"name": "images/split",
"dataType": [
"wd:Q3985153",
"sc:Text"
Expand Down Expand Up @@ -210,7 +210,7 @@
{
"@type": "cr:Field",
"@id": "captions/id",
"name": "id",
"name": "captions/id",
"description": "The ID of the caption",
"dataType": "sc:Integer",
"source": {
Expand All @@ -225,7 +225,7 @@
{
"@type": "cr:Field",
"@id": "captions/image_id",
"name": "image_id",
"name": "captions/image_id",
"description": "The ID of the image",
"dataType": "sc:Integer",
"source": {
Expand All @@ -240,7 +240,7 @@
{
"@type": "cr:Field",
"@id": "captions/caption",
"name": "caption",
"name": "captions/caption",
"description": "The caption",
"dataType": [
"wd:Q18585177",
Expand All @@ -258,7 +258,7 @@
{
"@type": "cr:Field",
"@id": "captions/split",
"name": "split",
"name": "captions/split",
"dataType": [
"wd:Q3985153",
"sc:Text"
Expand Down Expand Up @@ -290,7 +290,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/id",
"name": "id",
"name": "bounding_boxes/id",
"description": "The ID of the annotation.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -305,7 +305,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/image_id",
"name": "image_id",
"name": "bounding_boxes/image_id",
"description": "The ID of the image.",
"dataType": "sc:Integer",
"source": {
Expand All @@ -320,7 +320,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/bbox",
"name": "bbox",
"name": "bounding_boxes/bbox",
"description": "The bounding box on the image.",
"dataType": "cr:BoundingBox",
"source": {
Expand All @@ -335,7 +335,7 @@
{
"@type": "cr:Field",
"@id": "bounding_boxes/area",
"name": "area",
"name": "bounding_boxes/area",
"description": "The area of the bounding box.",
"dataType": "sc:Integer",
"source": {
Expand Down
8 changes: 4 additions & 4 deletions datasets/1.0/coco2014-mini/output/bounding_boxes.jsonl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"id": 86, "image_id": 318556, "bbox": "[116.95, 305.86, 285.3, 266.03]", "area": 54652}
{"id": 89, "image_id": 116100, "bbox": "[245.54, 208.17, 40.14, 19.1]", "area": 421}
{"id": 93, "image_id": 318556, "bbox": "[288.4, 18.07, 211.6, 331.33]", "area": 53535}
{"id": 113, "image_id": 116100, "bbox": "[126.5, 475.24, 77.68, 76.73]", "area": 3892}
{"bounding_boxes/id": 86, "bounding_boxes/image_id": 318556, "bounding_boxes/bbox": "[116.95, 305.86, 285.3, 266.03]", "bounding_boxes/area": 54652}
{"bounding_boxes/id": 89, "bounding_boxes/image_id": 116100, "bounding_boxes/bbox": "[245.54, 208.17, 40.14, 19.1]", "bounding_boxes/area": 421}
{"bounding_boxes/id": 93, "bounding_boxes/image_id": 318556, "bounding_boxes/bbox": "[288.4, 18.07, 211.6, 331.33]", "bounding_boxes/area": 53535}
{"bounding_boxes/id": 113, "bounding_boxes/image_id": 116100, "bounding_boxes/bbox": "[126.5, 475.24, 77.68, 76.73]", "bounding_boxes/area": 3892}
8 changes: 4 additions & 4 deletions datasets/1.0/coco2014-mini/output/captions.jsonl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{"id": 48, "image_id": 318556, "caption": "A very clean and well decorated empty bathroom", "split": "train"}
{"id": 67, "image_id": 116100, "caption": "A panoramic view of a kitchen and all of its appliances.", "split": "train"}
{"id": 126, "image_id": 318556, "caption": "A blue and white bathroom with butterfly themed wall tiles.", "split": "train"}
{"id": 148, "image_id": 116100, "caption": "A panoramic photo of a kitchen and dining room", "split": "train"}
{"captions/id": 48, "captions/image_id": 318556, "captions/caption": "A very clean and well decorated empty bathroom", "captions/split": "train"}
{"captions/id": 67, "captions/image_id": 116100, "captions/caption": "A panoramic view of a kitchen and all of its appliances.", "captions/split": "train"}
{"captions/id": 126, "captions/image_id": 318556, "captions/caption": "A blue and white bathroom with butterfly themed wall tiles.", "captions/split": "train"}
{"captions/id": 148, "captions/image_id": 116100, "captions/caption": "A panoramic photo of a kitchen and dining room", "captions/split": "train"}
4 changes: 2 additions & 2 deletions datasets/1.0/coco2014-mini/output/images.jsonl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"image_filename": "COCO_train2014_000000467840.jpg", "image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "split": "train"}
{"image_filename": "COCO_train2014_000000533055.jpg", "image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "split": "train"}
{"images/image_filename": "COCO_train2014_000000467840.jpg", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/split": "train"}
{"images/image_filename": "COCO_train2014_000000533055.jpg", "images/image_content": "<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=3x2 at <MEMORY_ADDRESS>>", "images/split": "train"}
Loading
Loading