brain-score · thekej · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/brainscore_vision/model_helpers/activations/temporal/core/__init__.py b/brainscore_vision/model_helpers/activations/temporal/core/__init__.py
@@ -1,3 +1,3 @@
 from .extractor import ActivationsExtractor
-from .executor import BatchExecutor
+from .executor import BatchExecutor, OnlineExecutor
 from .inferencer import *
diff --git a/brainscore_vision/model_helpers/activations/temporal/core/executor.py b/brainscore_vision/model_helpers/activations/temporal/core/executor.py
diff --git a/brainscore_vision/model_helpers/activations/temporal/core/extractor.py b/brainscore_vision/model_helpers/activations/temporal/core/extractor.py
@@ -13,6 +13,7 @@
 from brainscore_vision.model_helpers.utils import fullname
 from result_caching import store_xarray
 from .inferencer import Inferencer
+from .inferencer.video import OnlineTemporalInferencer
 from ..inputs import Stimulus
 
 
@@ -69,7 +70,9 @@ def __call__(
         if number_of_trials is not None and (number_of_trials > 1 or require_variance):
             self._logger.warning("CAUTION: number_of_trials > 1 or require_variance=True is not supported yet. "
                                  "Bypassing...")
-        if isinstance(stimuli, StimulusSet):
+        if isinstance(self.inferencer, OnlineTemporalInferencer):
+            return self.online_stimulus_set(stimulus_set=stimuli, layers=layers, stimuli_identifier=stimuli_identifier)
+        elif isinstance(stimuli, StimulusSet):
             return self.from_stimulus_set(stimulus_set=stimuli, layers=layers, stimuli_identifier=stimuli_identifier)
         else:
             return self.from_paths(stimuli_paths=stimuli, layers=layers, stimuli_identifier=stimuli_identifier)
@@ -116,6 +119,29 @@ def from_paths(
         activations = self._expand_paths(activations, original_paths=stimuli_paths)
         return activations
 
+    def online_stimulus_set(
+            self, 
+            stimulus_set : StimulusSet, 
+            layers : List[str],
+            stimuli_identifier : str = None,
+        ):
+        """
+        :param stimuli_identifier: a stimuli identifier for the stored results file.
+            False to disable saving. None to use `stimulus_set.identifier`
+        """
+        if stimuli_identifier is None and hasattr(stimulus_set, 'identifier'):
+            stimuli_identifier = stimulus_set.identifier
+        for hook in self._stimulus_set_hooks.copy().values():  # copy to avoid stale handles
+            stimulus_set = hook(stimulus_set)
+        stimuli_paths = [(str(stimulus_set.get_stimulus(stimulus_id)), label, train_flag) 
+                         for (stimulus_id, label, train_flag) in zip(stimulus_set['stimulus_id'], 
+                                                         stimulus_set['label'], 
+                                                         stimulus_set['train_flag'])
+                        ]
+        activations = self._from_paths(stimuli_paths=stimuli_paths, layers=layers)
+        activations = attach_stimulus_set_meta(activations, stimulus_set)
+        return activations
+
     @store_xarray(identifier_ignore=['stimuli_paths', 'layers'], combine_fields={'layers': 'layer'})
     def _from_paths_stored(self, identifier, layers, stimuli_identifier, stimuli_paths):
         stimuli_paths.sort()

diff --git a/brainscore_vision/model_helpers/activations/temporal/core/inferencer/base.py b/brainscore_vision/model_helpers/activations/temporal/core/inferencer/base.py
@@ -11,7 +11,7 @@
 from brainio.assemblies import NeuroidAssembly, walk_coords
 from brainscore_vision.model_helpers.utils import fullname
 
-from brainscore_vision.model_helpers.activations.temporal.core.executor import BatchExecutor
+from brainscore_vision.model_helpers.activations.temporal.core.executor import BatchExecutor, OnlineExecutor
 from brainscore_vision.model_helpers.activations.temporal.utils import stack_with_nan_padding, batch_2d_resize
 from brainscore_vision.model_helpers.activations.temporal.inputs import Stimulus
 
@@ -84,6 +84,8 @@ def __init__(
             batch_grouper : Callable[[Stimulus], Hashable] = None,
             batch_padding : bool = False,
             max_workers : int = None,
+            online_execution : bool = False,
+            num_classes : int = 1,
             *args,
             **kwargs
         ):
@@ -96,6 +98,9 @@ def __init__(
         self.visual_degrees = visual_degrees
         self.dtype = dtype
         self._executor = BatchExecutor(get_activations, preprocessing, batch_size, batch_padding, batch_grouper, max_workers)
+        if online_execution:
+                self._executor = OnlineExecutor(get_activations, preprocessing, batch_size, batch_padding, batch_grouper, 
+                                                max_workers, num_classes)
         self._stimulus_set_hooks = {}
         self._batch_activations_hooks = {}
         self._logger = logging.getLogger(fullname(self))

diff --git a/brainscore_vision/model_helpers/activations/temporal/core/inferencer/video/__init__.py b/brainscore_vision/model_helpers/activations/temporal/core/inferencer/video/__init__.py
@@ -1,2 +1,2 @@
-from .base import TemporalInferencer
+from .base import TemporalInferencer, OnlineTemporalInferencer
 from .temporal_context import *
diff --git a/brainscore_vision/model_helpers/activations/temporal/core/inferencer/video/base.py b/brainscore_vision/model_helpers/activations/temporal/core/inferencer/video/base.py
@@ -1,11 +1,17 @@
 import numpy as np
 from typing import Union, Tuple, Callable, Hashable, List, Dict
 from pathlib import Path
+from tqdm.auto import tqdm
+from collections import OrderedDict
+
+import gc
 
 from brainscore_vision.model_helpers.activations.temporal.inputs import Video, Stimulus
 from brainscore_vision.model_helpers.activations.temporal.utils import assembly_align_to_fps, stack_with_nan_padding
 from brainio.assemblies import NeuroidAssembly
 
+from brainscore_vision.model_helpers.activations.temporal.core.executor import OnlineExecutor
+
 from ..base import Inferencer
 from . import time_aligner as time_aligners 
 
@@ -132,3 +138,152 @@ def _check_video(self, video: Video):
             assert self.num_frames[0] <= estimated_num_frames <= self.num_frames[1], f"The number of frames must be within {self.num_frames}, but got {estimated_num_frames}"
         if self.duration is not None:
             assert self.duration[0] <= video.duration <= self.duration[1], f"The duration must be within {self.duration}, but got {video.duration}"
+
+
+class OnlineTemporalInferencer(Inferencer):
+    """Inferencer for video stimuli. The model takes video stimuli as input and generate the activations over time.
+    Then, the activations will be aligned to video time by the time_aligner specified in the constructor. The aligned
+    activations will be again unified to the fps specified within the constructor (self.fps). Finally, the activations
+    will be packaged into a NeuroidAssembly.
+
+    NOTE: for all the time_alignment method, the inference of time bins will only be done with the longest video, but ignore all other input videos.
+
+    Example:
+        temporal_inferencer = TemporalInferenver(..., fps=10)
+        model_assembly = temporal_inferencer(video_paths[1000ms], layers)
+        model_assembly.time_bins -> [(0, 100), (100, 200), ..., (900, 1000)]  # 1000ms, 10fps
+
+    Parameters
+    ----------
+    fps: float
+        frame rate of the model sampling.
+
+    num_frames: int, or (int, int)
+        - If None, the model accepts videos of any length.
+        - If a single int is passed, specify how many frames the model takes. 
+        - If a tuple of two ints is passed, specify the range of the number of frames the model takes (inclusive). If you need to specify infinite, use np.inf.
+
+    duration: float, or (float, float)
+        - If None, the model accepts videos of any length.
+        - If a single float is passed, specify the duration of the model takes, in ms.
+        - If a tuple of two floats is passed, specify the range of the duration the model takes (inclusive). If you need to specify infinite, use np.inf.
+
+    time_alignment: str
+        specify the method to align the activations in time.
+        The options and specifications are in the time_aligners module. The current options are:
+        - evenly_spaced: align the activations to have evenly spaced time bins across the whole video time span.
+        - ignore_time: ignore the time information and make a single time bin of the entire video.
+        - estimate_layer_fps: estimate the fps of the layer based on the video fps.
+        - per_frame_aligned: align the activations to the video frames.
+
+    convert_img_to_video: bool
+        whether to convert the input images to videos.
+    img_duration: float
+        specify the duration of the images, in ms. This will work only if convert_img_to_video is True.
+    batch_size: int
+        number of stimuli to process in each batch.
+    batch_grouper: function
+        function that takes a stimulus and return the property based on which the stimuli can be grouped.
+    """
+    def __init__(
+            self,
+            *args,
+            fps : float,
+            num_frames : Union[int, Tuple[int, int]] = None,
+            duration : Union[float, Tuple[float, float]] = None,
+            time_alignment : str = "evenly_spaced",
+            convert_img_to_video : bool = True,
+            img_duration : float = 1000.0,
+            batch_size : int = 32,
+            online_execution: bool = False,
+            batch_grouper : Callable[[Video], Hashable] = lambda video: (round(video.duration, 6), video.fps),  # not including video.frame_size because most preprocessors will change the frame size to be the same
+            **kwargs,
+    ):
+        super().__init__(*args, stimulus_type=Video, batch_size=batch_size, 
+                         batch_grouper=batch_grouper, online_execution=online_execution,
+                         **kwargs)
+        # Initialize the executor with the chosen class
+        self.fps = fps
+        self.num_frames = self._make_range(num_frames, type="num_frames")
+        self.duration = self._make_range(duration, type="duration")
+        assert hasattr(time_aligners, time_alignment), f"Unknown time alignment method: {time_alignment}"
+        self.time_aligner = getattr(time_aligners, time_alignment)
+
+        if convert_img_to_video:
+            assert img_duration is not None, "img_duration should be specified if convert_img_to_video is True"
+        self.img_duration = img_duration
+        self.convert_to_video = convert_img_to_video
+
+    @property
+    def identifier(self) -> str:
+        id = f"{super().identifier}.{self.time_aligner.__name__}.fps={float(self.fps)}"
+        if self.convert_to_video:
+            id += f".img_dur={float(self.img_duration)}"
+        return id
+
+    def load_stimulus(self, path: Union[str, Path]) -> Video:
+        path, label, train_flag = path
+        if self.convert_to_video and Stimulus.is_image_path(path):
+            video = Video.from_img_path(path, self.img_duration, self.fps)
+        else:
+            video = Video.from_path(path)
+        video = video.set_fps(self.fps)
+        self._check_video(video)
+        return video, label, train_flag
+
+    # given the paths of the stimuli and the layers, return the model activations as a NeuroidAssembly
+    def __call__(self, paths: List[Union[str, Path]], layers: List[str]):
+        stimuli = self.load_stimuli(paths)
+        paths = [path for (path, label, train_flag) in paths]
+        layer_activations = self.inference(stimuli, layers)
+        stimuli = [stim for (stim, label, train_flag) in stimuli]
+        layer_assemblies = OrderedDict()
+        for layer in tqdm(layers, desc="Packaging layers"):
+            layer_assemblies[layer] = self.package_layer(layer_activations[layer], self.layer_activation_format[layer], stimuli)
+            del layer_activations[layer]
+            gc.collect()  # reduce memory usage
+        model_assembly = self.package(layer_assemblies, paths)
+        return model_assembly
+
+    # process the list of stimulus and return the activations (list of np.array, 
+    # whose length is the number of stimuli) of the specified layers
+    def inference(self, stimuli : List[Stimulus], layers : List[str]) -> Dict[str, List[np.array]]:
+        self._executor.add_stimuli(stimuli)
+        train_flag = stimuli[0][2]
+        return self._executor.execute(layers, train_flag)
+
+    def package_layer(
+            self, 
+            layer_activations : List[np.array],
+            layer_spec : str, 
+            stimuli : List[Stimulus]
+        ):
+        assert len(layer_activations) == len(stimuli)
+        longest_stimulus = stimuli[np.argmax(np.array([stimulus.duration for stimulus in stimuli]))]
+        ignore_time = self.time_aligner is time_aligners.ignore_time
+        channels = self._map_dims(layer_spec)
+        layer_activations = stack_with_nan_padding(layer_activations)
+        assembly = self._package(layer_activations, ["stimulus_path"] + channels)
+        # align to the longest stimulus
+        assembly = self.time_aligner(assembly, longest_stimulus)
+        if "channel_temporal" in channels and not ignore_time: 
+            channels.remove("channel_temporal")
+        assembly = self._stack_neuroid(assembly, channels)
+        if not ignore_time:
+            assembly = assembly_align_to_fps(assembly, self.fps)
+        return assembly
+
+    def _make_range(self, num, type="num_frames"):
+        if num is None:
+            return (1 if type=='num_frames' else 0, np.inf)
+        if isinstance(num, (tuple, list)):
+            return num
+        else:
+            return (num, num)
+
+    def _check_video(self, video: Video):
+        if self.num_frames is not None:
+            estimated_num_frames = int(self.fps * video.duration / 1000)
+            assert self.num_frames[0] <= estimated_num_frames <= self.num_frames[1], f"The number of frames must be within {self.num_frames}, but got {estimated_num_frames}"
+        if self.duration is not None:
+            assert self.duration[0] <= video.duration <= self.duration[1], f"The duration must be within {self.duration}, but got {video.duration}"
diff --git a/brainscore_vision/model_helpers/brain_transformation/__init__.py b/brainscore_vision/model_helpers/brain_transformation/__init__.py
@@ -2,7 +2,7 @@
 from brainscore_vision.model_helpers.brain_transformation.temporal import TemporalAligned
 from brainscore_vision.model_interface import BrainModel
 from brainscore_vision.utils import LazyLoad
-from .behavior import BehaviorArbiter, LabelBehavior, ProbabilitiesMapping, OddOneOut
+from .behavior import BehaviorArbiter, LabelBehavior, ProbabilitiesMapping, OddOneOut, VideoReadoutMapping
 from .neural import LayerMappedModel, LayerSelection, LayerScores
 
 STANDARD_REGION_BENCHMARKS = {
@@ -21,7 +21,7 @@ class ModelCommitment(BrainModel):
 
     def __init__(self, identifier,
                  activations_model, layers, behavioral_readout_layer=None, region_layer_map=None,
-                 visual_degrees=8):
+                 visual_degrees=8, num_classes=1):
         self.layers = layers
         self.activations_model = activations_model
         # We set the visual degrees of the ActivationsExtractorHelper here to avoid changing its signature.
@@ -46,9 +46,12 @@ def __init__(self, identifier,
                                                       layer=behavioral_readout_layer)
         odd_one_out = OddOneOut(identifier=identifier, activations_model=activations_model,
                                 layer=behavioral_readout_layer)
+        video_readout_behavior = VideoReadoutMapping(identifier=identifier, activations_model=activations_model,
+                                                      layer=behavioral_readout_layer, num_classes=num_classes)
         self.behavior_model = BehaviorArbiter({BrainModel.Task.label: logits_behavior,
                                                BrainModel.Task.probabilities: probabilities_behavior,
                                                BrainModel.Task.odd_one_out: odd_one_out,
+                                               BrainModel.Task.video_readout: video_readout_behavior,
                                                })
         self.do_behavior = False
 
@@ -62,9 +65,9 @@ def start_task(self, task: BrainModel.Task, *args, **kwargs):
         else:
             self.do_behavior = False
 
-    def look_at(self, stimuli, number_of_trials: int = 1, require_variance: bool = False):
+    def look_at(self, stimuli, number_of_trials: int = 1, require_variance: bool = False, **kwargs):
         if self.do_behavior:
-            return self.behavior_model.look_at(stimuli, number_of_trials=number_of_trials, require_variance=require_variance)
+            return self.behavior_model.look_at(stimuli, number_of_trials=number_of_trials, require_variance=require_variance, **kwargs)
         else:
             return self.layer_model.look_at(stimuli, number_of_trials=number_of_trials)
 
@@ -94,4 +97,4 @@ def __getitem__(self, region):
     def commit_region(self, region):
         benchmark = self.region_benchmarks[region]
         best_layer = self.layer_selection(selection_identifier=region, benchmark=benchmark)
-        self[region] = best_layer
+        self[region] = best_layer