Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Commit

Permalink
Add VQA V2.0 and Visual Dialog V0.9. (#54)
Browse files Browse the repository at this point in the history
* Move COCO image path to --download_path
* add VQA_v2.0
* add VisDial_V0.9

Summary:
Fix the VQA, VisalDialog, VQA v2.0 based on last update of ParAI.

Test Plan:
python examples/display_data.py -t vqa_coco2014
python examples/display_data.py -t vqa_coco2014_v2
python examples/display_data.py -t visdial
  • Loading branch information
jiasenlu authored and alexholdenmiller committed May 12, 2017
1 parent 1854066 commit 10453fd
Show file tree
Hide file tree
Showing 9 changed files with 397 additions and 19 deletions.
2 changes: 1 addition & 1 deletion parlai/core/dialog_teacher.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def get(self, episode_idx, entry_idx=0):
table['reward'] = entry[2]
if len(entry) > 3:
table['label_candidates'] = entry[3]
if len(entry) > 4 and not opt.get('no_images', False):
if len(entry) > 4 and not self.opt.get('no_images', False):
table['image'] = load_image(self.opt, entry[4])


Expand Down
5 changes: 5 additions & 0 deletions parlai/tasks/visdial/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
86 changes: 86 additions & 0 deletions parlai/tasks/visdial/agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

from parlai.core.dialog_teacher import DialogTeacher
from .build import build, buildImage

from PIL import Image
import json
import random
import os

def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
suffix = 'train'
img_suffix = os.path.join('train2014', 'COCO_train2014_')
elif dt == 'valid':
suffix = 'val'
img_suffix = os.path.join('val2014', 'COCO_val2014_')
else:
raise RuntimeError('Not valid datatype.')

data_path = os.path.join(opt['datapath'], 'VisDial-v0.9',
'visdial_0.9_' + suffix + '.json')

image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, image_path


def _image_loader(path):
"""
Loads the appropriate image from the image_id and returns PIL Image format.
"""
return Image.open(path).convert('RGB')


class DefaultTeacher(DialogTeacher):
"""
This version of VisDial inherits from the core Dialog Teacher, which just
requires it to define an iterator over its data `setup_data` in order to
inherit basic metrics, a `act` function, and enables
Hogwild training with shared memory with no extra work.
"""
def __init__(self, opt, shared=None):

self.datatype = opt['datatype']
data_path, self.image_path = _path(opt)
opt['datafile'] = data_path
self.id = 'visdial'

super().__init__(opt, shared)

def setup_data(self, path):
print('loading: ' + path)
with open(path) as data_file:
self.visdial = json.load(data_file)

self.questions = self.visdial['data']['questions']
self.answers = self.visdial['data']['answers']

for dialog in self.visdial['data']['dialogs']:
# for each dialog
image_id = dialog['image_id']
caption = dialog['caption']
img_path = self.image_path + '%012d.jpg' % (image_id)

episode_done = False
for i, qa in enumerate(dialog['dialog']):
if i == len(dialog['dialog']):
episode_done = True
# for each question answer pair.
question = self.questions[qa['question']]
answer = [self.answers[qa['answer']]]
answer_options = []
for ans_id in qa['answer_options']:
answer_options.append(self.answers[ans_id])
#answer_options = qa['answer_options']
gt_index = qa['gt_index']
yield (question, answer, 'None', answer_options, img_path), True
60 changes: 60 additions & 0 deletions parlai/tasks/visdial/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
# Download and build the data if it does not exist.

import parlai.core.build_data as build_data
import os


def buildImage(opt):
dpath = os.path.join(opt['datapath'], 'COCO-IMG')

if not build_data.built(dpath):
print('[building image data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)

# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'

url = 'http://msvocds.blob.core.windows.net/coco2014/'

build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)
build_data.download(dpath, url + fname3)

build_data.untar(dpath, fname1, False)
build_data.untar(dpath, fname2, False)
build_data.untar(dpath, fname3, False)

# Mark the data as built.
build_data.mark_done(dpath)


def build(opt):
dpath = os.path.join(opt['datapath'], 'VisDial-v0.9')

if not build_data.built(dpath):
print('[building data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)

# Download the data.
fname1 = 'visdial_0.9_train.zip'
fname2 = 'visdial_0.9_val.zip'

url = 'https://computing.ece.vt.edu/~abhshkdz/data/visdial/'
build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)


build_data.untar(dpath, fname1)
build_data.untar(dpath, fname2)

# Mark the data as built.
build_data.mark_done(dpath)
6 changes: 3 additions & 3 deletions parlai/tasks/vqa_coco2014/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
import random
import os


def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
Expand All @@ -36,7 +36,7 @@ def _path(opt):
annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014',
annotation_suffix + '_annotations.json')

image_path = os.path.join(opt['datapath'], 'VQA-COCO2014', img_suffix)
image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, annotation_path, image_path

Expand Down Expand Up @@ -100,7 +100,7 @@ def act(self):
self.episode_idx = (self.episode_idx + self.step_size) % len(self)
if self.episode_idx == len(self) - self.step_size:
self.epochDone = True
# always showing the same index now.

qa = self.ques['questions'][self.episode_idx]
question = qa['question']
image_id = qa['image_id']
Expand Down
36 changes: 21 additions & 15 deletions parlai/tasks/vqa_coco2014/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,30 @@
import os


def buildImage(dpath):
print('[building image data: ' + dpath + ']')
# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'
def buildImage(opt):
dpath = os.path.join(opt['datapath'], 'COCO-IMG')

url = 'http://msvocds.blob.core.windows.net/coco2014/'
if not build_data.built(dpath):
print('[building image data: ' + dpath + ']')
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# download the image data.
fname1 = 'train2014.zip'
fname2 = 'val2014.zip'
fname3 = 'test2014.zip'

build_data.download(os.path.join(dpath, fname1), url + fname1, False)
build_data.download(os.path.join(dpath, fname2), url + fname2, False)
build_data.download(os.path.join(dpath, fname3), url + fname3, False)
url = 'http://msvocds.blob.core.windows.net/coco2014/'

build_data.untar(dpath, fname1)
build_data.untar(dpath, fname2)
build_data.untar(dpath, fname3)
build_data.download(dpath, url + fname1)
build_data.download(dpath, url + fname2)
build_data.download(dpath, url + fname3)

build_data.untar(dpath, fname1, False)
build_data.untar(dpath, fname2, False)
build_data.untar(dpath, fname3, False)

# Mark the data as built.
build_data.mark_done(dpath)



Expand Down Expand Up @@ -61,7 +69,5 @@ def build(opt):
build_data.untar(dpath, fname4)
build_data.untar(dpath, fname5)

buildImage(dpath)

# Mark the data as built.
build_data.mark_done(dpath)
5 changes: 5 additions & 0 deletions parlai/tasks/vqa_coco2014_v2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
146 changes: 146 additions & 0 deletions parlai/tasks/vqa_coco2014_v2/agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

from parlai.core.agents import Teacher
from .build import build, buildImage

from PIL import Image
import json
import random
import os
import pdb

def _path(opt):
build(opt)
buildImage(opt)
dt = opt['datatype'].split(':')[0]

if dt == 'train':
ques_suffix = 'v2_OpenEnded_mscoco_train2014'
annotation_suffix = 'v2_mscoco_train2014'
img_suffix = os.path.join('train2014', 'COCO_train2014_')
elif dt == 'valid':
ques_suffix = 'v2_OpenEnded_mscoco_val2014'
annotation_suffix = 'v2_mscoco_val2014'
img_suffix = os.path.join('val2014', 'COCO_val2014_')
else:
ques_suffix = 'v2_OpenEnded_mscoco_test2015'
annotation_suffix = 'None'
img_suffix = os.path.join('test2014', 'COCO_test2014_')

data_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
ques_suffix + '_questions.json')

annotation_path = os.path.join(opt['datapath'], 'VQA-COCO2014-v2',
annotation_suffix + '_annotations.json')

image_path = os.path.join(opt['datapath'], 'COCO-IMG', img_suffix)

return data_path, annotation_path, image_path


def _image_loader(opt, path):
"""
Loads the appropriate image from the image_id and returns PIL Image format.
"""
if not opt.get('no_images', False):
return Image.open(path).convert('RGB')
else:
return None


class OeTeacher(Teacher):
"""
VQA v2.0 Open-Ended teacher, which loads the json vqa data and implements its
own `act` method for interacting with student agent.
agent.
"""
def __init__(self, opt, shared=None):
super().__init__(opt)
self.datatype = opt['datatype']
data_path, annotation_path, self.image_path = _path(opt)

if shared and 'ques' in shared:
self.ques = shared['ques']
if 'annotation' in shared:
self.annotation = shared['annotation']
else:
self._setup_data(data_path, annotation_path)


# for ordered data in batch mode (especially, for validation and
# testing), each teacher in the batch gets a start index and a step
# size so they all process disparate sets of the data
self.step_size = opt.get('batchsize', 1)
self.data_offset = opt.get('batchindex', 0)

self.reset()

def __len__(self):
return self.len

def reset(self):
# Reset the dialog so that it is at the start of the epoch,
# and all metrics are reset.
super().reset()
self.lastY = None
self.episode_idx = self.data_offset - self.step_size

def observe(self, observation):
"""Process observation for metrics."""
if self.lastY is not None:
loss = self.metrics.update(observation, self.lastY)
self.lastY = None
return observation

def act(self):
if self.datatype == 'train':
self.episode_idx = random.randrange(self.len)
else:
self.episode_idx = (self.episode_idx + 1) % self.len

qa = self.ques['questions'][self.episode_idx]
question = qa['question']
image_id = qa['image_id']

img_path = self.image_path + '%012d.jpg' % (image_id)

action = {
'image': _image_loader(self.opt, img_path),
'text': question,
'episode_done': True
}

if not self.datatype.startswith('test'):
anno = self.annotation['annotations'][self.episode_idx]
self.lastY = [ans['answer'] for ans in anno['answers']]

if self.datatype.startswith('train'):
action['labels'] = self.lastY

return action

def share(self):
shared = super().share()
shared['ques'] = self.ques
if hasattr(self, 'annotation'):
shared['annotation'] = self.annotation
return shared

def _setup_data(self, data_path, annotation_path):
print('loading: ' + data_path)
with open(data_path) as data_file:
self.ques = json.load(data_file)

if self.datatype != 'test':
print('loading: ' + annotation_path)
with open(annotation_path) as data_file:
self.annotation = json.load(data_file)

self.len = len(self.ques['questions'])

class DefaultTeacher(OeTeacher):
pass
Loading

0 comments on commit 10453fd

Please sign in to comment.