Skip to content

Mutable variables! #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 29, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 12 additions & 43 deletions src/polyglot/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,30 +57,10 @@ def sync(self):


class ScipyVariable(variable.Variable):

def __init__(self, scipy_var):
object.__setattr__(self, 'v', scipy_var)

def _allocate(self):
return variable.Variable(dims=(), data=0)

@property
def attributes(self):
return self.v._attributes

def __getattribute__(self, key):
"""
Here we give some of the attributes of self.data preference over
attributes in the object itself.
"""
if key == 'v':
return object.__getattribute__(self, 'v')
elif hasattr(self.v, key):
return object.__getattribute__(self.v, key)
elif not hasattr(self, key) and hasattr(self.v.data, key):
return getattr(self.v.data, key)
else:
return object.__getattribute__(self, key)
self._dimensions = scipy_var.dimensions
self._data = scipy_var.data
self._attributes = scipy_var._attributes


class ScipyDataStore(object):
Expand Down Expand Up @@ -145,12 +125,10 @@ def sync(self):
class NetCDF4Variable(variable.Variable):

def __init__(self, nc4_variable):
object.__setattr__(self, 'data',
variable.LazyVariableData(nc4_variable))
object.__setattr__(self, '_attributes', None)

def _allocate(self):
return variable.Variable(dims=(), data=0)
self._nc4_variable = nc4_variable
self._dimensions = nc4_variable.dimensions
self._data = nc4_variable
self._attributes = None

@property
def attributes(self):
Expand All @@ -166,22 +144,13 @@ def attributes(self):
# you would find that any packed variables in the original
# netcdf file would now have been scaled twice!
packing_attributes = ['scale_factor', 'add_offset']
keys = [k for k in self.ncattrs() if not k in packing_attributes]
attr_dict = variable.AttributesDict((k, self.data.getncattr(k))
for k in keys)
object.__setattr__(self, '_attributes', attr_dict)
keys = [k for k in self._nc4_variable.ncattrs()
if not k in packing_attributes]
attr_dict = variable.AttributesDict(
(k, self._nc4_variable.getncattr(k)) for k in keys)
self._attributes = attr_dict
return self._attributes

def __getattr__(self, attr):
"""__getattr__ is overloaded to selectively expose some of the
attributes of the underlying nc4 variable"""
if attr == 'data':
return object.__getattribute__(self, 'data')
elif hasattr(self.data, attr):
return getattr(self.data, attr)
else:
return object.__getattribute__(self, attr)


class NetCDF4DataStore(object):

Expand Down
6 changes: 0 additions & 6 deletions src/polyglot/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,12 +604,6 @@ def update(self, other):
# if a dimension is a new one it gets added, if the dimension already
# exists we confirm that they are identical (or throw an exception)
for (name, length) in other.dimensions.iteritems():
if (name == other.record_dimension and
name != self.record_dimension):
raise ValueError(
("record dimensions do not match: "
"self: %s, other: %s") %
(self.record_dimension, other.record_dimension))
if not name in self.dimensions:
self.create_dimension(name, length)
else:
Expand Down
233 changes: 107 additions & 126 deletions src/polyglot/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,98 +90,142 @@ def __eq__(self, other):
return True


def _expand_key(key, ndim):
"""Given a key for getting an item from an ndarray, expand the key to an
equivalent key which is a tuple with length equal to the number of
dimensions
"""
if not isinstance(key, tuple):
key = (key,)
new_key = [slice(None)] * ndim
new_key[:len(key)] = key
return tuple(new_key)


class Variable(object):
"""
A netcdf-like variable consisting of dimensions, data and attributes
which describe a single varRiable. A single variable object is not
fully described outside the context of its parent Dataset.
"""
def __init__(self, dims, data, attributes=None):
object.__setattr__(self, 'dimensions', dims)
object.__setattr__(self, 'data', data)
if len(dims) != data.ndim:
raise ValueError('data must have same shape as the number of '
'dimensions')
self._dimensions = tuple(dims)
self._data = data
if attributes is None:
attributes = {}
object.__setattr__(self, 'attributes', AttributesDict(attributes))
self._attributes = AttributesDict(attributes)

def _allocate(self):
return self.__class__(dims=(), data=0)
@property
def dimensions(self):
return self._dimensions

def __getattribute__(self, key):
@property
def data(self):
"""
Here we give some of the attributes of self.data preference over
attributes in the object instelf.
The variable's data as a numpy.ndarray
"""
if key in ['dtype', 'shape', 'size', 'ndim', 'nbytes',
'flat', '__iter__', 'view']:
return getattr(self.data, key)
else:
return object.__getattribute__(self, key)

def __setattr__(self, attr, value):
""""__setattr__ is overloaded to prevent operations that could
cause loss of data consistency. If you really intend to update
dir(self), use the self.__dict__.update method or the
super(type(a), self).__setattr__ method to bypass."""
raise AttributeError, "Object is tamper-proof"
if not isinstance(self._data, np.ndarray):
self._data = np.asarray(self._data[...])
return self._data

@data.setter
def data(self, value):
value = np.asarray(value)
if value.shape != self.shape:
raise ValueError("replacement data must match the Variable's "
"shape")
self._data = value

@property
def dtype(self):
return self._data.dtype

@property
def shape(self):
return self._data.shape

@property
def size(self):
return self._data.size

@property
def ndim(self):
return self._data.ndim

def __delattr__(self, attr):
raise AttributeError, "Object is tamper-proof"
def __len__(self):
return len(self._data)

def __getitem__(self, index):
"""__getitem__ is overloaded to access the underlying numpy data"""
return self.data[index]
def __getitem__(self, key):
"""
Return a new Variable object whose contents are consistent with getting
the provided key from the underlying data
"""
key = _expand_key(key, self.ndim)
dimensions = [dim for k, dim in zip(key, self.dimensions)
if not isinstance(k, int)]
return Variable(dimensions, self._data[key], self.attributes)

def __setitem__(self, index, data):
def __setitem__(self, key, value):
"""__setitem__ is overloaded to access the underlying numpy data"""
self.data[index] = data
self.data[key] = value

def __iter__(self):
"""
Iterate over the contents of this Variable
"""
for n in range(len(self)):
yield self[n]

def __hash__(self):
"""__hash__ is overloaded to guarantee that two variables with the same
attributes and np.data values have the same hash (the converse is not true)"""
return hash((self.dimensions,
frozenset((k,v.tostring()) if isinstance(v,np.ndarray) else (k,v)
for (k,v) in self.attributes.items()),
self.data.tostring()))
@property
def attributes(self):
return self._attributes

def __len__(self):
"""__len__ is overloaded to access the underlying numpy data"""
return self.data.__len__()
def copy(self):
"""
Returns a shallow copy of the current object.
"""
return self.__copy__()

def _copy(self, deepcopy=False):
# deepcopies should always be of a numpy view of the data, not the data
# itself, because non-memory backends don't necessarily have deepcopy
# defined sensibly (this is a problem for netCDF4 variables)
data = copy.deepcopy(self.data) if deepcopy else self._data
# note:
# dimensions is already an immutable tuple
# attributes will be copied when the new Variable is created
return Variable(self.dimensions, data, self.attributes)

def __copy__(self):
"""
Returns a shallow copy of the current object.
"""
# Create the simplest possible dummy object and then overwrite it
obj = self._allocate()
object.__setattr__(obj, 'dimensions', self.dimensions)
object.__setattr__(obj, 'data', self.data)
object.__setattr__(obj, 'attributes', self.attributes)
return obj
return self._copy(deepcopy=False)

def __deepcopy__(self, memo=None):
"""
Returns a deep copy of the current object.

memo does nothing but is required for compatability with copy.deepcopy
"""
# Create the simplest possible dummy object and then overwrite it
obj = self._allocate()
# tuples are immutable
object.__setattr__(obj, 'dimensions', self.dimensions)
object.__setattr__(obj, 'data', self.data[:].copy())
object.__setattr__(obj, 'attributes', self.attributes.copy())
return obj
return self._copy(deepcopy=True)

# mutable objects should not be hashable
__hash__ = None

def __eq__(self, other):
if self.dimensions != other.dimensions or \
(self.data.tostring() != other.data.tostring()):
return False
if not self.attributes == other.attributes:
try:
return (self.dimensions == other.dimensions
and np.all(self.data == other.data)
and self.attributes == other.attributes)
except AttributeError:
return False
return True

def __ne__(self, other):
return not self.__eq__(other)
return not self == other

def __str__(self):
"""Create a ncdump-like summary of the object"""
Expand Down Expand Up @@ -230,10 +274,7 @@ def views(self, slicers):
for i, dim in enumerate(self.dimensions):
if dim in slicers:
slices[i] = slicers[dim]
# Shallow copy
obj = copy.copy(self)
object.__setattr__(obj, 'data', self.data[slices])
return obj
return self[tuple(slices)]

def view(self, s, dim):
"""Return a new Variable object whose contents are a view of the object
Expand All @@ -244,9 +285,7 @@ def view(self, s, dim):
s : slice
The slice representing the range of the values to extract.
dim : string
The dimension to slice along. If multiple dimensions equal
dim (e.g. a correlation matrix), then the slicing is done
only along the first matching dimension.
The dimension to slice along.

Returns
-------
Expand All @@ -261,7 +300,7 @@ def view(self, s, dim):
--------
take
"""
return self.views({dim : s})
return self.views({dim: s})

def take(self, indices, dim):
"""Return a new Variable object whose contents are sliced from
Expand Down Expand Up @@ -293,65 +332,7 @@ def take(self, indices, dim):
raise ValueError('indices should have a single dimension')
# When dim appears repeatedly in self.dimensions, using the index()
# method gives us only the first one, which is the desired behavior
axis = list(self.dimensions).index(dim)
# Deep copy
obj = copy.deepcopy(self)
# In case data is lazy we need to slice out all the data before taking.
object.__setattr__(obj, 'data', self.data[:].take(indices, axis=axis))
return obj

class LazyVariableData(object):
"""
This object wraps around a Variable object (though
it only really makes sense to use it with a class that
extends variable.Variable). The result mascarades as
variable data, but doesn't actually try accessing the
data until indexing is attempted.

For example, imagine you have some variable that was
derived from an opendap dataset, 'nc'.

var = nc['massive_variable']

if you wanted to check the data type of var:

var.data.dtype

you would find that it might involve downloading all
of the actual data, then inspecting the resulting
numpy array. But with this wrapper calling:

nc['large_variable'].data.someattribute

will first inspect the Variable object to see if it has
the desired attribute and only then will it suck down the
actual numpy array and request 'someattribute'.
"""
def __init__(self, lazy_variable):
self.lazyvar = lazy_variable

def __eq__(self, other):
return self.lazyvar[:] == other

def __ne__(self, other):
return self.lazyvar[:] != other

def __getitem__(self, key):
return self.lazyvar[key]

def __setitem__(self, key, value):
if not isinstance(self.lazyvar, Variable):
self.lazyvar = Variable(self.lazyvar.dimensions,
data = self.lazyvar[:],
dtype = self.lazyvar.dtype,
shape = self.lazyvar.shape,
attributes = self.lazyvar.attributes)
self.lazyvar.__setitem__(key, value)

def __getattr__(self, attr):
"""__getattr__ is overloaded to selectively expose some of the
attributes of the underlying lazy variable"""
if hasattr(self.lazyvar, attr):
return getattr(self.lazyvar, attr)
else:
return getattr(self.lazyvar[:], attr)
axis = self.dimensions.index(dim)
# take only works on actual numpy arrays
data = self.data.take(indices, axis=axis)
return Variable(self.dimensions, data, self.attributes)
Loading