Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 164 additions & 2 deletions arkouda/categorical.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from __future__ import annotations
from typing import cast, List, Union
from typing import cast, List, Optional, Union
import numpy as np # type: ignore
from typeguard import typechecked
from arkouda.strings import Strings
from arkouda.pdarrayclass import pdarray
from arkouda.pdarrayclass import pdarray, RegistrationError, unregister_pdarray_by_name
from arkouda.groupbyclass import GroupBy
from arkouda.pdarraycreation import zeros, zeros_like, arange
from arkouda.dtypes import resolve_scalar_dtype, str_scalars
from arkouda.dtypes import int64 as akint64
from arkouda.sorting import argsort
from arkouda.pdarraysetops import concatenate, in1d
from arkouda.logger import getArkoudaLogger

__all__ = ['Categorical']

Expand Down Expand Up @@ -46,11 +47,13 @@ class Categorical:

"""
BinOps = frozenset(["==", "!="])
RegisterablePieces = frozenset(["categories", "codes", "permutation", "segments"])
objtype = "category"
permutation = None
segments = None

def __init__(self, values, **kwargs) -> None:
self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore
if 'codes' in kwargs and 'categories' in kwargs:
# This initialization is called by Categorical.from_codes()
# The values arg is ignored
Expand All @@ -77,6 +80,7 @@ def __init__(self, values, **kwargs) -> None:
self.nlevels = self.categories.size
self.ndim = self.codes.ndim
self.shape = self.codes.shape
self.name : Optional[str] = None

@classmethod
@typechecked
Expand Down Expand Up @@ -529,3 +533,161 @@ def concatenate(self, others : List[Categorical], ordered : bool=True) -> Catego
ordered=ordered)
newvals = wherediditgo[oldvals]
return Categorical.from_codes(newvals, newidx)

@typechecked()
def register(self, user_defined_name:str) -> Categorical:
"""
Register this Categorical object and underlying components with the Arkouda server

Parameters
----------
user_defined_name : str
user defined name the Categorical is to be registered under,
this will be the root name for underlying components

Returns
-------
Categorical
The same Categorical which is now registered with the arkouda server and has an updated name.
This is an in-place modification, the original is returned to support a fluid programming style.
Please note you cannot register two different Categoricals with the same name.

Raises
------
TypeError
Raised if user_defined_name is not a str
RegistrationError
If the server was unable to register the Categorical with the user_defined_name

See also
--------
unregister, attach, unregister_categorical_by_name, is_registered

Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
[getattr(self, p).register(f"{user_defined_name}.{p}") for p in Categorical.RegisterablePieces]
self.name = user_defined_name
return self

def unregister(self) -> None:
"""
Unregister this Categorical object in the arkouda server which was previously
registered using register() and/or attached to using attach()

Raises
------
RegistrationError
If the object is already unregistered or if there is a server error
when attempting to unregister

See also
--------
register, attach, unregister_categorical_by_name, is_registered

Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
if not self.name:
raise RegistrationError("This item does not have a name and does not appear to be registered.")
[getattr(self, p).unregister() for p in Categorical.RegisterablePieces]
self.name = None # Clear our internal Categorical object name

def is_registered(self) -> np.bool_:
"""
Return True iff the object is contained in the registry

Returns
-------
numpy.bool
Indicates if the object is contained in the registry

Raises
------
RegistrationError
Raised if there's a server-side error or a mis-match of registered components

See Also
--------
register, attach, unregister, unregister_categorical_by_name

Notes
-----
Objects registered with the server are immune to deletion until
they are unregistered.
"""
parts_registered:List[np.bool_] = [getattr(self, p).is_registered() for p in Categorical.RegisterablePieces]
if np.any(parts_registered) and not np.all(parts_registered): # test for error
raise RegistrationError(f"Not all registerable components of Categorical {self.name} are registered.")

return np.bool_(np.any(parts_registered))

@staticmethod
@typechecked
def attach(user_defined_name:str) -> Categorical:
"""
Function to return a Categorical object attached to the registered name in the
arkouda server which was registered using register()

Parameters
----------
user_defined_name : str
user defined name which Categorical object was registered under

Returns
-------
Categorical
The Categorical object created by re-attaching to the corresponding server components

Raises
------
TypeError
if user_defined_name is not a string

See Also
--------
register, is_registered, unregister, unregister_categorical_by_name
"""
# Build dict of registered components by invoking their corresponding Class.attach functions
parts = {
"categories" : Strings.attach(f"{user_defined_name}.categories"),
"codes" : pdarray.attach(f"{user_defined_name}.codes"),
"permutation": pdarray.attach(f"{user_defined_name}.permutation"),
"segments" : pdarray.attach(f"{user_defined_name}.segments")
}
c = Categorical(None, **parts) # Call constructor with unpacked kwargs
c.name = user_defined_name # Update our name
return c

@staticmethod
@typechecked
def unregister_categorical_by_name(user_defined_name:str) -> None:
"""
Function to unregister Categorical object by name which was registered
with the arkouda server via register()

Parameters
----------
user_defined_name : str
Name under which the Categorical object was registered

Raises
-------
TypeError
if user_defined_name is not a string
RegistrationError
if there is an issue attempting to unregister any underlying components

See Also
--------
register, unregister, attach, is_registered
"""
# We have 4 subcomponents, unregister each of them
Strings.unregister_strings_by_name(f"{user_defined_name}.categories")
unregister_pdarray_by_name(f"{user_defined_name}.codes")
unregister_pdarray_by_name(f"{user_defined_name}.permutation")
unregister_pdarray_by_name(f"{user_defined_name}.segments")
32 changes: 15 additions & 17 deletions arkouda/pdarrayclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
__all__ = ["pdarray", "info", "clear", "any", "all", "is_sorted", "list_registry", "list_symbol_table",
"sum", "prod", "min", "max", "argmin", "argmax", "mean", "var", "std", "mink",
"maxk", "argmink", "argmaxk", "attach_pdarray",
"unregister_pdarray", "RegistrationError"]
"unregister_pdarray_by_name", "RegistrationError"]

logger = getArkoudaLogger(name='pdarrayclass')

Expand Down Expand Up @@ -1001,7 +1001,7 @@ def register(self, user_defined_name: str) -> pdarray:

See also
--------
attach, unregister
attach, unregister, is_registered, list_registry, unregister_pdarray_by_name

Notes
-----
Expand Down Expand Up @@ -1048,7 +1048,7 @@ def unregister(self) -> None:

See also
--------
register, unregister
register, unregister, is_registered, unregister_pdarray_by_name, list_registry

Notes
-----
Expand All @@ -1064,8 +1064,8 @@ def unregister(self) -> None:
>>> # ...other work...
>>> b.unregister()
"""
unregister_pdarray(self)
unregister_pdarray_by_name(self.name)

# class method self is not passed in
# invoke with ak.pdarray.attach('user_defined_name')
@staticmethod
Expand All @@ -1092,7 +1092,7 @@ class method to return a pdarray attached to the registered name in the arkouda

See also
--------
register, unregister
register, unregister, is_registered, unregister_pdarray_by_name, list_registry

Notes
-----
Expand Down Expand Up @@ -1846,7 +1846,7 @@ class method to return a pdarray attached to the registered name in the arkouda

See also
--------
register, unregister_pdarray
register, unregister, is_registered, unregister_pdarray_by_name, list_registry

Notes
-----
Expand All @@ -1860,20 +1860,22 @@ class method to return a pdarray attached to the registered name in the arkouda
>>> # potentially disconnect from server and reconnect to server
>>> b = ak.attach_pdarray("my_zeros")
>>> # ...other work...
>>> ak.unregister_pdarray(b)
>>> b.unregister()
"""
repMsg = generic_msg(cmd="attach", args="{}".format(user_defined_name))
return create_pdarray(repMsg)


@typechecked
def unregister_pdarray(pda: Union[str,pdarray]) -> None:
def unregister_pdarray_by_name(user_defined_name:str) -> None:
"""
Unregister a pdarray in the arkouda server which was previously
Unregister a named pdarray in the arkouda server which was previously
registered using register() and/or attahced to using attach_pdarray()

Parameters
----------
user_defined_name : str
user defined name which array was registered under

Returns
-------
Expand All @@ -1886,7 +1888,7 @@ def unregister_pdarray(pda: Union[str,pdarray]) -> None:

See also
--------
register, unregister_pdarray
register, unregister, is_registered, list_registry, attach

Notes
-----
Expand All @@ -1900,13 +1902,9 @@ def unregister_pdarray(pda: Union[str,pdarray]) -> None:
>>> # potentially disconnect from server and reconnect to server
>>> b = ak.attach_pdarray("my_zeros")
>>> # ...other work...
>>> ak.unregister_pdarray(b)
>>> ak.unregister_pdarray_by_name(b)
"""
if isinstance(pda, pdarray):
repMsg = generic_msg(cmd="unregister", args="{}".format(pda.name))

if isinstance(pda, str):
repMsg = generic_msg(cmd="unregister", args="{}".format(pda))
repMsg = generic_msg(cmd="unregister", args=user_defined_name)


# TODO In the future move this to a specific errors file
Expand Down
42 changes: 33 additions & 9 deletions arkouda/strings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations
from typing import cast, Tuple, Union
from typing import cast, Optional, Tuple, Union
from typeguard import typechecked
from arkouda.client import generic_msg
from arkouda.pdarrayclass import pdarray, create_pdarray, parse_single_value, list_registry
from arkouda.pdarrayclass import pdarray, create_pdarray, parse_single_value, unregister_pdarray_by_name, RegistrationError
from arkouda.logger import getArkoudaLogger
import numpy as np # type: ignore
from arkouda.dtypes import npstr, int_scalars, str_scalars
Expand Down Expand Up @@ -96,7 +96,7 @@ def __init__(self, offset_attrib : Union[pdarray,str],
raise ValueError(e)

self.dtype = npstr
self.name:Union[str, None] = None
self.name:Optional[str] = None
self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore

def __iter__(self):
Expand Down Expand Up @@ -846,7 +846,12 @@ def is_registered(self) -> np.bool_:
RuntimeError
Raised if there's a server-side error thrown
"""
return np.bool_(self.offsets.is_registered() and self.bytes.is_registered())
parts_registered = [np.bool_(self.offsets.is_registered()), self.bytes.is_registered()]
if np.any(parts_registered) and not np.all(parts_registered): # test for error
raise RegistrationError(f"Not all registerable components of Strings {self.name} are registered.")

return np.bool_(np.any(parts_registered))


@typechecked
def register(self, user_defined_name: str) -> Strings:
Expand Down Expand Up @@ -887,8 +892,8 @@ def register(self, user_defined_name: str) -> Strings:
Registered names/Strings objects in the server are immune to deletion
until they are unregistered.
"""
self.offsets.register(user_defined_name+'_offsets')
self.bytes.register(user_defined_name+'_bytes')
self.offsets.register(f"{user_defined_name}.offsets")
self.bytes.register(f"{user_defined_name}.bytes")
self.name = user_defined_name
return self

Expand All @@ -911,7 +916,7 @@ def unregister(self) -> None:

See also
--------
register, unregister
register, attach

Notes
-----
Expand All @@ -920,6 +925,7 @@ def unregister(self) -> None:
"""
self.offsets.unregister()
self.bytes.unregister()
self.name = None

@staticmethod
@typechecked
Expand Down Expand Up @@ -952,7 +958,25 @@ class method to return a Strings object attached to the registered name in the a
Registered names/Strings objects in the server are immune to deletion
until they are unregistered.
"""
s = Strings(pdarray.attach(user_defined_name+'_offsets'),
pdarray.attach(user_defined_name+'_bytes'))
s = Strings(pdarray.attach(f"{user_defined_name}.offsets"),
pdarray.attach(f"{user_defined_name}.bytes"))
s.name = user_defined_name
return s

@staticmethod
@typechecked
def unregister_strings_by_name(user_defined_name : str) -> None:
"""
Unregister a Strings object in the arkouda server previously registered via register()

Parameters
----------
user_defined_name : str
The registered name of the Strings object

See also
--------
register, unregister, attach, is_registered
"""
unregister_pdarray_by_name(f"{user_defined_name}.bytes")
unregister_pdarray_by_name(f"{user_defined_name}.offsets")
Loading