Make labels an attribute of label_keys, update tests and spec

rly · May 29, 2020 · 53a83eb · 53a83eb
1 parent ec18a4e
commit 53a83eb
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 55 deletions.
diff --git a/README.md b/README.md
@@ -1,8 +1,15 @@
 # ndx-events Extension for NWB
 
+This is an NWB extension for storing event information and TTL pulses. Events can be:
+1. Simple events. These are stored in the `Events` type. The `Events` type consists of only a name, a description,
+and a 1D array of timestamps. This should be used instead of a `TimeSeries` when the time series has no data.
+2. Labeled events. These are stored in the `LabeledEvents` type. The `LabeledEvents` type expands on the `Events` type
+by adding a 1D array of integer label keys with the same length as the timestamps and a 1D array of labels. The label
+keys are indices into the array of labels. This can be used to encode more information about individual events, such as
+the reward values for reward events.
+
 ## Installation
 
-TODO:
 ```
 pip install ndx-events
 ```

diff --git a/spec/ndx-events.extensions.yaml b/spec/ndx-events.extensions.yaml
@@ -30,34 +30,34 @@ groups:
   doc: A list of timestamps, stored in seconds, of an event that can have different
     labels. For example, this type could represent the times that reward was given,
     as well as which of three different types of reward was given. In this case, the
-    'label_keys' dataset would contain values {0, 1, 2}, and the 'labels' dataset
-    would contain three text elements, where the first (index 0) specifies the name
-    of the reward associated with a label_keys = 0, the second (index 1) specifies
-    the name of the reward associated with a label_keys = 1, etc. The labels do not
-    have to start at 0 and do not need to be sequential, e.g. the 'label_keys' dataset
-    could contain values {0, 10, 100}, and the 'labels' dataset could contain 101
-    values, where labels[0] is 'No reward', labels[10] is '10% reward', labels[100]
-    is 'Full reward', and all other entries in 'labels' are the empty string.
+    'label_keys' dataset would contain values {0, 1, 2}, its 'labels' attribute would
+    contain three text elements, where the first (index 0) specifies the name of the
+    reward associated with label_keys = 0, the second (index 1) specifies the name
+    of the reward associated with label_keys = 1, etc. The labels do not have to start
+    at 0 and do not need to be continuous, e.g. the 'label_keys' dataset could contain
+    values {0, 10, 100}, and the 'labels' attribute could contain 101 values, where
+    labels[0] is 'No reward', labels[10] is '10% reward', labels[100] is 'Full reward',
+    and all other entries in 'labels' are the empty string.
   datasets:
   - name: label_keys
     dtype: uint8
     dims:
     - num_events
     shape:
     - null
-    doc: Integer labels that map onto strings using the mapping in the 'labels' dataset.
-      Values must be 0 or greater and need not be sequential. This dataset should
-      have the same number of elements as the 'timestamps' dataset.
-  - name: labels
-    dtype: text
-    dims:
-    - num_labels
-    shape:
-    - null
-    doc: Mapping from an integer (the zero-based index) to a string, used to understand
-      the integer values in the 'label_keys' dataset. Use an empty string to represent
-      a label value that is not mapped to any text. Use '' to represent any values
-      that are None or empty.
+    doc: Integer labels that map onto strings using the mapping in the 'labels' array
+      attribute. This dataset should have the same number of elements as the 'timestamps'
+      dataset.
+    attributes:
+    - name: labels
+      dtype: text
+      dims:
+      - num_labels
+      shape:
+      - null
+      doc: Mapping from an unsigned integer (the zero-based index) to a string, used
+        to understand the values in the 'label_keys' dataset. Use an empty string
+        to represent a label value that is not mapped to any text.
 - neurodata_type_def: TTLs
   neurodata_type_inc: LabeledEvents
   doc: Data type to hold timestamps of TTL pulses. The 'label_keys' dataset contains
@@ -90,7 +90,7 @@ groups:
     neurodata_type_inc: VectorData
     dtype: text
     doc: Label for each event type.
-  - name: description
+  - name: event_description
     neurodata_type_inc: VectorData
     dtype: text
     doc: Description for each event type.
diff --git a/src/pynwb/ndx_events/events.py b/src/pynwb/ndx_events/events.py
@@ -140,5 +140,4 @@ def __init__(self, **kwargs):
             allow_extra=True)
     def add_event_type(self, **kwargs):
         """Add an event type as a row to this table."""
-        # TODO columns do not exist and are hitting table.py line 377 for a name clash
         super().add_row(**kwargs)
diff --git a/src/pynwb/ndx_events/io/events.py b/src/pynwb/ndx_events/io/events.py
@@ -6,7 +6,7 @@
 from hdmf.utils import getargs, docval
 from hdmf.spec import AttributeSpec
 
-from ..events import Events, AnnotatedEvents
+from ..events import Events, LabeledEvents, AnnotatedEvents
 
 
 @register_map(Events)
@@ -19,6 +19,15 @@ def __init__(self, spec):
         self.map_spec('resolution', timestamps_spec.get_attribute('resolution'))
 
 
+@register_map(LabeledEvents)
+class LabeledEventsMap(EventsMap):
+
+    def __init__(self, spec):
+        super().__init__(spec)
+        label_keys_spec = self.spec.get_dataset('label_keys')
+        self.map_spec('labels', label_keys_spec.get_attribute('labels'))
+
+
 @register_map(AnnotatedEvents)
 class AnnotatedEventsMap(DynamicTableMap):
 

diff --git a/src/pynwb/tests/unit/test_events.py b/src/pynwb/tests/unit/test_events.py
@@ -120,7 +120,7 @@ def test_add_event_type(self):
             id=3
         )
         self.assertEqual(events.id.data, [3])
-        self.assertEqual(events.event_times.data, [1., 2., 3.])
+        self.assertEqual(events['event_times'][0], [1., 2., 3.])
         self.assertEqual(events.colnames, ('event_times', 'label', 'event_description'))
         self.assertEqual(len(events.columns), 4)
         self.assertEqual(events.columns[0].name, 'event_times_index')

diff --git a/src/spec/create_extension_spec.py b/src/spec/create_extension_spec.py
@@ -33,14 +33,14 @@ def main():
                 name='unit',
                 dtype='text',
                 value='seconds',
-                doc="Unit of measurement for timestamps, which is fixed to 'seconds'."
+                doc="Unit of measurement for timestamps, which is fixed to 'seconds'.",
             ),
             NWBAttributeSpec(
                 name='resolution',
                 dtype='float32',
                 doc=('The smallest possible difference between two event times. Usually 1 divided by the event time '
                      'sampling rate on the data acquisition system.'),
-                required=False
+                required=False,
             )
         ]
     )
@@ -53,30 +53,30 @@ def main():
             NWBAttributeSpec(
                 name='description',
                 dtype='text',
-                doc='Description of the event.'
-            )
+                doc='Description of the event.',
+            ),
         ],
         datasets=[timestamps]
     )
 
-    label_keys = NWBDatasetSpec(
-        name='label_keys',
-        dtype='uint8',
-        dims=['num_events'],
-        shape=[None],
-        doc=("Integer labels that map onto strings using the mapping in the 'labels' dataset. Values must "
-             "be 0 or greater and need not be sequential. This dataset should have the same number of elements "
-             "as the 'timestamps' dataset.")
-    )
-
-    labels = NWBDatasetSpec(
+    labels = NWBAttributeSpec(
         name='labels',
         dtype='text',
         dims=['num_labels'],
         shape=[None],
-        doc=("Mapping from an integer (the zero-based index) to a string, used to understand the integer "
+        doc=("Mapping from an unsigned integer (the zero-based index) to a string, used to understand the "
              "values in the 'label_keys' dataset. Use an empty string to represent a label value that is not "
-             "mapped to any text. Use '' to represent any values that are None or empty.")
+             "mapped to any text."),
+    )
+
+    label_keys = NWBDatasetSpec(
+        name='label_keys',
+        dtype='uint8',
+        dims=['num_events'],
+        shape=[None],
+        doc=("Integer labels that map onto strings using the mapping in the 'labels' array attribute. This dataset "
+             "should have the same number of elements as the 'timestamps' dataset."),
+        attributes=[labels],
     )
 
     labeled_events = NWBGroupSpec(
@@ -85,13 +85,13 @@ def main():
         doc=("A list of timestamps, stored in seconds, of an event that can have different labels. For example, "
              "this type could represent the times that reward was given, as well as which of three different "
              "types of reward was given. In this case, the 'label_keys' dataset would contain values {0, 1, 2}, "
-             "and the 'labels' dataset would contain three text elements, where the first (index 0) specifies the "
-             "name of the reward associated with a label_keys = 0, the second (index 1) specifies the name of the "
-             "reward associated with a label_keys = 1, etc. The labels do not have to start at 0 and do not need to "
-             "be sequential, e.g. the 'label_keys' dataset could contain values {0, 10, 100}, and the 'labels' "
-             "dataset could contain 101 values, where labels[0] is 'No reward', labels[10] is '10% reward', "
+             "its 'labels' attribute would contain three text elements, where the first (index 0) specifies the "
+             "name of the reward associated with label_keys = 0, the second (index 1) specifies the name of the "
+             "reward associated with label_keys = 1, etc. The labels do not have to start at 0 and do not need to "
+             "be continuous, e.g. the 'label_keys' dataset could contain values {0, 10, 100}, and the 'labels' "
+             "attribute could contain 101 values, where labels[0] is 'No reward', labels[10] is '10% reward', "
              "labels[100] is 'Full reward', and all other entries in 'labels' are the empty string."),
-        datasets=[label_keys, labels]
+        datasets=[label_keys],
     )
 
     ttls = NWBGroupSpec(
@@ -101,7 +101,7 @@ def main():
              "and the 'labels' dataset contains user-defined labels associated with each pulse value. The value at "
              "index n of the 'labels' dataset corresponds to a pulse value of n. For example, the first value (index "
              "0) of the 'labels' dataset corresponds to a pulse value of 0. See the LabeledEvents type for more "
-             "details.")
+             "details."),
     )
 
     event_times_index = NWBDatasetSpec(
@@ -121,8 +121,8 @@ def main():
                 dtype='float32',
                 doc=('The smallest possible difference between two event times. Usually 1 divided by the event time '
                      'sampling rate on the data acquisition system.'),
-                required=False
-            )
+                required=False,
+            ),
         ],
     )
 
@@ -134,7 +134,7 @@ def main():
     )
 
     description_col = NWBDatasetSpec(
-        name='description',
+        name='event_description',
         neurodata_type_inc='VectorData',
         dtype='text',
         doc='Description for each event type.',
@@ -144,9 +144,9 @@ def main():
         neurodata_type_def='AnnotatedEvents',
         neurodata_type_inc='DynamicTable',
         doc=("Table to hold event timestamps and event metadata relevant to data preprocessing and analysis. Each "
-             "row corresponds to a different event type. Use the 'event_time' dataset to store timestamps for each "
+             "row corresponds to a different event type. Use the 'event_times' dataset to store timestamps for each "
              "event type. Add user-defined columns to add metadata for each event type or event time."),
-        datasets=[event_times_index, event_times, label_col, description_col]
+        datasets=[event_times_index, event_times, label_col, description_col],
     )
 
     new_data_types = [events, labeled_events, ttls, annotated_events]