Skip to content

Commit

Permalink
Frame primitive outputs a dataframe of "(frame_dir, num_frames)"
Browse files Browse the repository at this point in the history
  • Loading branch information
YIWEI-CHEN committed Sep 15, 2021
1 parent aea1d2b commit 7135358
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 45 deletions.
8 changes: 3 additions & 5 deletions autovideo/base/supervised_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import typing
from urllib.parse import urlparse

import pandas as pd
import torch

from d3m import container
Expand Down Expand Up @@ -59,10 +60,7 @@ def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[No
def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None:
self._inputs = inputs
self._outputs = outputs
try:
self._media_dir = urlparse(self._inputs.metadata.query_column(0)['location_base_uris'][0]).path
except KeyError:
pass
self._frame_list = pd.concat([self._inputs, self._outputs], axis=1).to_numpy()

@abc.abstractmethod
def _fit(self, *, timeout: float = None, iterations: int = None):
Expand All @@ -82,4 +80,4 @@ def produce(self, *, inputs: container.DataFrame, timeout: float = None, iterati
make the predictions
"""


39 changes: 23 additions & 16 deletions autovideo/common/frame_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

__all__ = ('ExtractFramesPrimitive',)

Inputs = container.Dataset
Inputs = container.DataFrame
Outputs = container.DataFrame

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -69,15 +69,9 @@ class ExtractFramesPrimitive(transformer.TransformerPrimitiveBase[Inputs, Output
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> base.CallResult[Outputs]:
num_worker = self.hyperparams['num_worker']

# empty output
outputs = Outputs()

selector = ['learningData', metadata_base.ALL_ELEMENTS, 1]
location_base_uris = inputs.metadata.query(selector)['location_base_uris']
media_types = inputs.metadata.query(selector)['media_types']

location_base_uris = inputs.metadata.query_column(0)['location_base_uris']
media_dir = urlparse(location_base_uris[0]).path[:-1]
ext = media_types[0].split('/')[-1]
ext = inputs.iloc[0, 0].split('.')[-1]

out_dir = os.path.join(media_dir, 'frames')
os.makedirs(out_dir, exist_ok=True)
Expand All @@ -89,12 +83,13 @@ def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = No
if os.path.splitext(os.path.basename(p))[0] not in done_list:
_fullpath_list.append(p)
fullpath_list = _fullpath_list
if len(fullpath_list) == 0:
return base.CallResult(outputs)
vid_list = list(map(lambda p: p.split('/')[-1], fullpath_list))
pool = Pool(num_worker)
pool.map(self._dump_frames, zip(
fullpath_list, vid_list, range(len(vid_list)), [out_dir] * len(vid_list)))
if len(fullpath_list) != 0:
vid_list = list(map(lambda p: p.split('/')[-1], fullpath_list))
pool = Pool(num_worker)
pool.map(self._dump_frames, zip(
fullpath_list, vid_list, range(len(vid_list)), [out_dir] * len(vid_list)))

outputs = self._get_frame_list(media_dir=media_dir, inputs=inputs)
return base.CallResult(outputs)

def _dump_frames(self, vid_item):
Expand All @@ -114,6 +109,18 @@ def _dump_frames(self, vid_item):
print('[Warning] length inconsistent!'
'Early stop with {} out of {} frames'.format(i + 1, len(vr)))
break
print('{} done with {} frames'.format(vid_name, len(vr)))
logger.info('{} done with {} frames'.format(vid_name, len(vr)))
sys.stdout.flush()
return True

def _get_frame_list(self, media_dir, inputs):
"""Returns the frame list with frame directory, #frames"""
frame_root_dir = os.path.join(media_dir, 'frames')
video_list = []
for i, row in inputs.iterrows():
video_name = os.path.splitext(row["video"])[0]
frame_dir = os.path.join(frame_root_dir, video_name)
num_frames = len(os.listdir(frame_dir))
video_list.append((frame_dir, num_frames))

return Outputs(video_list, columns=["frame_dir", "num_frames"])
48 changes: 24 additions & 24 deletions autovideo/utils/d3m_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,52 +68,52 @@ def build_pipeline(config):
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

#Step 0: Extract frames by extension
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.extract_frames'))
#Step 0: Denormalise
step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.denormalize'))
step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

#Step 1: Denormalise
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.denormalize'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
#Step 1: Dataset to DataFrame
step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.dataset_to_dataframe'))
step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_0.index}.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

#Step 2: Dataset to DataFrame
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.dataset_to_dataframe'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
#Step 2: Column Parser
step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.column_parser'))
step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_1.index}.produce')
step_2.add_output('produce')
pipeline_description.add_step(step_2)

#Step 3: Column Parser
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.column_parser'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
#Step 3: Extract columns by semantic types - Attributes
step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.extract_columns_by_semantic_types'))
step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_2.index}.produce')
step_3.add_output('produce')
step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
pipeline_description.add_step(step_3)

#Step 4: Extract columns by semantic types - Attributes
#Step 4: Extract Columns by semantic types - Target
step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.extract_columns_by_semantic_types'))
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_1.index}.produce')
step_4.add_output('produce')
step_4.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_4)

#Step 5: Extract Columns by semantic types - Target
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.extract_columns_by_semantic_types'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
#Step 5: Extract frames by extension
step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.extract_frames'))
step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_3.index}.produce')
step_5.add_output('produce')
step_5.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
pipeline_description.add_step(step_5)

#Step 6: Video primitive
algorithm = config.pop('algorithm', None)
alg_python_path = 'd3m.primitives.autovideo.recognition.' + algorithm
step_6 = PrimitiveStep(primitive=index.get_primitive(alg_python_path))
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_5.index}.produce')
step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_4.index}.produce')
# Add hyperparameters
for key, value in config.items():
step_6.add_hyperparameter(name=key, argument_type=ArgumentType.VALUE, data=value)
Expand All @@ -122,14 +122,14 @@ def build_pipeline(config):

#Step 7: Construct the predictions
step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.autovideo.common.construct_predictions'))
step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce')
step_7.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
step_7.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_6.index}.produce')
step_7.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{step_2.index}.produce')
step_7.add_output('produce')
step_7.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = [0,1])
pipeline_description.add_step(step_7)

# Final Output
pipeline_description.add_output(name='output predictions', data_reference='steps.7.produce')
pipeline_description.add_output(name='output predictions', data_reference=f'steps.{step_7.index}.produce')

return pipeline_description

0 comments on commit 7135358

Please sign in to comment.