From 53a83eb267241ada7777235b48c34abfc119a8a5 Mon Sep 17 00:00:00 2001
From: Ryan Ly <rly@lbl.gov>
Date: Thu, 28 May 2020 17:20:31 -0700
Subject: [PATCH] Make labels an attribute of label_keys, update tests and spec

---
 README.md                           |  9 ++++-
 spec/ndx-events.extensions.yaml     | 44 +++++++++++-----------
 src/pynwb/ndx_events/events.py      |  1 -
 src/pynwb/ndx_events/io/events.py   | 11 +++++-
 src/pynwb/tests/unit/test_events.py |  2 +-
 src/spec/create_extension_spec.py   | 58 ++++++++++++++---------------
 6 files changed, 70 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index d574d72..4348f90 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,15 @@
 # ndx-events Extension for NWB
 
+This is an NWB extension for storing event information and TTL pulses. Events can be:
+1. Simple events. These are stored in the `Events` type. The `Events` type consists of only a name, a description,
+and a 1D array of timestamps. This should be used instead of a `TimeSeries` when the time series has no data.
+2. Labeled events. These are stored in the `LabeledEvents` type. The `LabeledEvents` type expands on the `Events` type
+by adding a 1D array of integer label keys with the same length as the timestamps and a 1D array of labels. The label
+keys are indices into the array of labels. This can be used to encode more information about individual events, such as
+the reward values for reward events.
+
 ## Installation
 
-TODO:
 ```
 pip install ndx-events
 ```
diff --git a/spec/ndx-events.extensions.yaml b/spec/ndx-events.extensions.yaml
index 044c0ea..9fb6b9e 100644
--- a/spec/ndx-events.extensions.yaml
+++ b/spec/ndx-events.extensions.yaml
@@ -30,14 +30,14 @@ groups:
   doc: A list of timestamps, stored in seconds, of an event that can have different
     labels. For example, this type could represent the times that reward was given,
     as well as which of three different types of reward was given. In this case, the
-    'label_keys' dataset would contain values {0, 1, 2}, and the 'labels' dataset
-    would contain three text elements, where the first (index 0) specifies the name
-    of the reward associated with a label_keys = 0, the second (index 1) specifies
-    the name of the reward associated with a label_keys = 1, etc. The labels do not
-    have to start at 0 and do not need to be sequential, e.g. the 'label_keys' dataset
-    could contain values {0, 10, 100}, and the 'labels' dataset could contain 101
-    values, where labels[0] is 'No reward', labels[10] is '10% reward', labels[100]
-    is 'Full reward', and all other entries in 'labels' are the empty string.
+    'label_keys' dataset would contain values {0, 1, 2}, its 'labels' attribute would
+    contain three text elements, where the first (index 0) specifies the name of the
+    reward associated with label_keys = 0, the second (index 1) specifies the name
+    of the reward associated with label_keys = 1, etc. The labels do not have to start
+    at 0 and do not need to be continuous, e.g. the 'label_keys' dataset could contain
+    values {0, 10, 100}, and the 'labels' attribute could contain 101 values, where
+    labels[0] is 'No reward', labels[10] is '10% reward', labels[100] is 'Full reward',
+    and all other entries in 'labels' are the empty string.
   datasets:
   - name: label_keys
     dtype: uint8
@@ -45,19 +45,19 @@ groups:
     - num_events
     shape:
     - null
-    doc: Integer labels that map onto strings using the mapping in the 'labels' dataset.
-      Values must be 0 or greater and need not be sequential. This dataset should
-      have the same number of elements as the 'timestamps' dataset.
-  - name: labels
-    dtype: text
-    dims:
-    - num_labels
-    shape:
-    - null
-    doc: Mapping from an integer (the zero-based index) to a string, used to understand
-      the integer values in the 'label_keys' dataset. Use an empty string to represent
-      a label value that is not mapped to any text. Use '' to represent any values
-      that are None or empty.
+    doc: Integer labels that map onto strings using the mapping in the 'labels' array
+      attribute. This dataset should have the same number of elements as the 'timestamps'
+      dataset.
+    attributes:
+    - name: labels
+      dtype: text
+      dims:
+      - num_labels
+      shape:
+      - null
+      doc: Mapping from an unsigned integer (the zero-based index) to a string, used
+        to understand the values in the 'label_keys' dataset. Use an empty string
+        to represent a label value that is not mapped to any text.
 - neurodata_type_def: TTLs
   neurodata_type_inc: LabeledEvents
   doc: Data type to hold timestamps of TTL pulses. The 'label_keys' dataset contains
@@ -90,7 +90,7 @@ groups:
     neurodata_type_inc: VectorData
     dtype: text
     doc: Label for each event type.
-  - name: description
+  - name: event_description
     neurodata_type_inc: VectorData
     dtype: text
     doc: Description for each event type.
diff --git a/src/pynwb/ndx_events/events.py b/src/pynwb/ndx_events/events.py
index 1e788cb..985386a 100644
--- a/src/pynwb/ndx_events/events.py
+++ b/src/pynwb/ndx_events/events.py
@@ -140,5 +140,4 @@ def __init__(self, **kwargs):
             allow_extra=True)
     def add_event_type(self, **kwargs):
         """Add an event type as a row to this table."""
-        # TODO columns do not exist and are hitting table.py line 377 for a name clash
         super().add_row(**kwargs)
diff --git a/src/pynwb/ndx_events/io/events.py b/src/pynwb/ndx_events/io/events.py
index c39b89e..48357ca 100644
--- a/src/pynwb/ndx_events/io/events.py
+++ b/src/pynwb/ndx_events/io/events.py
@@ -6,7 +6,7 @@
 from hdmf.utils import getargs, docval
 from hdmf.spec import AttributeSpec
 
-from ..events import Events, AnnotatedEvents
+from ..events import Events, LabeledEvents, AnnotatedEvents
 
 
 @register_map(Events)
@@ -19,6 +19,15 @@ def __init__(self, spec):
         self.map_spec('resolution', timestamps_spec.get_attribute('resolution'))
 
 
+@register_map(LabeledEvents)
+class LabeledEventsMap(EventsMap):
+
+    def __init__(self, spec):
+        super().__init__(spec)
+        label_keys_spec = self.spec.get_dataset('label_keys')
+        self.map_spec('labels', label_keys_spec.get_attribute('labels'))
+
+
 @register_map(AnnotatedEvents)
 class AnnotatedEventsMap(DynamicTableMap):
 
diff --git a/src/pynwb/tests/unit/test_events.py b/src/pynwb/tests/unit/test_events.py
index c8e09e4..5a96090 100644
--- a/src/pynwb/tests/unit/test_events.py
+++ b/src/pynwb/tests/unit/test_events.py
@@ -120,7 +120,7 @@ def test_add_event_type(self):
             id=3
         )
         self.assertEqual(events.id.data, [3])
-        self.assertEqual(events.event_times.data, [1., 2., 3.])
+        self.assertEqual(events['event_times'][0], [1., 2., 3.])
         self.assertEqual(events.colnames, ('event_times', 'label', 'event_description'))
         self.assertEqual(len(events.columns), 4)
         self.assertEqual(events.columns[0].name, 'event_times_index')
diff --git a/src/spec/create_extension_spec.py b/src/spec/create_extension_spec.py
index 054dc86..a8f64f3 100644
--- a/src/spec/create_extension_spec.py
+++ b/src/spec/create_extension_spec.py
@@ -33,14 +33,14 @@ def main():
                 name='unit',
                 dtype='text',
                 value='seconds',
-                doc="Unit of measurement for timestamps, which is fixed to 'seconds'."
+                doc="Unit of measurement for timestamps, which is fixed to 'seconds'.",
             ),
             NWBAttributeSpec(
                 name='resolution',
                 dtype='float32',
                 doc=('The smallest possible difference between two event times. Usually 1 divided by the event time '
                      'sampling rate on the data acquisition system.'),
-                required=False
+                required=False,
             )
         ]
     )
@@ -53,30 +53,30 @@ def main():
             NWBAttributeSpec(
                 name='description',
                 dtype='text',
-                doc='Description of the event.'
-            )
+                doc='Description of the event.',
+            ),
         ],
         datasets=[timestamps]
     )
 
-    label_keys = NWBDatasetSpec(
-        name='label_keys',
-        dtype='uint8',
-        dims=['num_events'],
-        shape=[None],
-        doc=("Integer labels that map onto strings using the mapping in the 'labels' dataset. Values must "
-             "be 0 or greater and need not be sequential. This dataset should have the same number of elements "
-             "as the 'timestamps' dataset.")
-    )
-
-    labels = NWBDatasetSpec(
+    labels = NWBAttributeSpec(
         name='labels',
         dtype='text',
         dims=['num_labels'],
         shape=[None],
-        doc=("Mapping from an integer (the zero-based index) to a string, used to understand the integer "
+        doc=("Mapping from an unsigned integer (the zero-based index) to a string, used to understand the "
              "values in the 'label_keys' dataset. Use an empty string to represent a label value that is not "
-             "mapped to any text. Use '' to represent any values that are None or empty.")
+             "mapped to any text."),
+    )
+
+    label_keys = NWBDatasetSpec(
+        name='label_keys',
+        dtype='uint8',
+        dims=['num_events'],
+        shape=[None],
+        doc=("Integer labels that map onto strings using the mapping in the 'labels' array attribute. This dataset "
+             "should have the same number of elements as the 'timestamps' dataset."),
+        attributes=[labels],
     )
 
     labeled_events = NWBGroupSpec(
@@ -85,13 +85,13 @@ def main():
         doc=("A list of timestamps, stored in seconds, of an event that can have different labels. For example, "
              "this type could represent the times that reward was given, as well as which of three different "
              "types of reward was given. In this case, the 'label_keys' dataset would contain values {0, 1, 2}, "
-             "and the 'labels' dataset would contain three text elements, where the first (index 0) specifies the "
-             "name of the reward associated with a label_keys = 0, the second (index 1) specifies the name of the "
-             "reward associated with a label_keys = 1, etc. The labels do not have to start at 0 and do not need to "
-             "be sequential, e.g. the 'label_keys' dataset could contain values {0, 10, 100}, and the 'labels' "
-             "dataset could contain 101 values, where labels[0] is 'No reward', labels[10] is '10% reward', "
+             "its 'labels' attribute would contain three text elements, where the first (index 0) specifies the "
+             "name of the reward associated with label_keys = 0, the second (index 1) specifies the name of the "
+             "reward associated with label_keys = 1, etc. The labels do not have to start at 0 and do not need to "
+             "be continuous, e.g. the 'label_keys' dataset could contain values {0, 10, 100}, and the 'labels' "
+             "attribute could contain 101 values, where labels[0] is 'No reward', labels[10] is '10% reward', "
              "labels[100] is 'Full reward', and all other entries in 'labels' are the empty string."),
-        datasets=[label_keys, labels]
+        datasets=[label_keys],
     )
 
     ttls = NWBGroupSpec(
@@ -101,7 +101,7 @@ def main():
              "and the 'labels' dataset contains user-defined labels associated with each pulse value. The value at "
              "index n of the 'labels' dataset corresponds to a pulse value of n. For example, the first value (index "
              "0) of the 'labels' dataset corresponds to a pulse value of 0. See the LabeledEvents type for more "
-             "details.")
+             "details."),
     )
 
     event_times_index = NWBDatasetSpec(
@@ -121,8 +121,8 @@ def main():
                 dtype='float32',
                 doc=('The smallest possible difference between two event times. Usually 1 divided by the event time '
                      'sampling rate on the data acquisition system.'),
-                required=False
-            )
+                required=False,
+            ),
         ],
     )
 
@@ -134,7 +134,7 @@ def main():
     )
 
     description_col = NWBDatasetSpec(
-        name='description',
+        name='event_description',
         neurodata_type_inc='VectorData',
         dtype='text',
         doc='Description for each event type.',
@@ -144,9 +144,9 @@ def main():
         neurodata_type_def='AnnotatedEvents',
         neurodata_type_inc='DynamicTable',
         doc=("Table to hold event timestamps and event metadata relevant to data preprocessing and analysis. Each "
-             "row corresponds to a different event type. Use the 'event_time' dataset to store timestamps for each "
+             "row corresponds to a different event type. Use the 'event_times' dataset to store timestamps for each "
              "event type. Add user-defined columns to add metadata for each event type or event time."),
-        datasets=[event_times_index, event_times, label_col, description_col]
+        datasets=[event_times_index, event_times, label_col, description_col],
     )
 
     new_data_types = [events, labeled_events, ttls, annotated_events]