Skip to content

Commit

Permalink
Drop unused categories in ExperimentAxisQuery.to_anndata (#204)
Browse files Browse the repository at this point in the history
Co-authored-by: John Kerl <kerl.john.r@gmail.com>
  • Loading branch information
nguyenv and johnkerl authored Jul 24, 2024
1 parent d9e9f70 commit 5c0c30d
Showing 1 changed file with 17 additions and 1 deletion.
18 changes: 17 additions & 1 deletion python-spec/src/somacore/query/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def to_anndata(
obsp_layers: Sequence[str] = (),
varm_layers: Sequence[str] = (),
varp_layers: Sequence[str] = (),
drop_levels: bool = False,
) -> anndata.AnnData:
"""
Executes the query and return result as an ``AnnData`` in-memory object.
Expand All @@ -295,10 +296,14 @@ def to_anndata(
Additional varm layers to read and return in the varm slot.
varp_layers:
Additional varp layers to read and return in the varp slot.
drop_levels:
Indicate whether unused categories on axis frames should be
dropped. By default, False, the categories which are present in the SOMA Experiment
and not present in the query output are not dropped.
Lifecycle: maturing
"""
return self._read(
ad = self._read(
X_name,
column_names=column_names or AxisColumnNames(obs=None, var=None),
X_layers=X_layers,
Expand All @@ -308,6 +313,17 @@ def to_anndata(
varp_layers=varp_layers,
).to_anndata()

# Drop unused categories on axis dataframes if requested
if drop_levels:
for name in ad.obs:
if pd.api.types.is_categorical_dtype(ad.obs[name]):
ad.obs[name] = ad.obs[name].cat.remove_unused_categories()
for name in ad.var:
if pd.api.types.is_categorical_dtype(ad.var[name]):
ad.var[name] = ad.var[name].cat.remove_unused_categories()

return ad

# Context management

def close(self) -> None:
Expand Down

0 comments on commit 5c0c30d

Please sign in to comment.