Skip to content

Commit

Permalink
added n_components and K as user-controlled parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
michael-alperovich committed Jan 10, 2022
1 parent 0374b80 commit 9863d78
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
7 changes: 4 additions & 3 deletions ddqc/ddqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,16 @@ def _boxplot_sorted(df, column, by, hline_x=None, log=False):
# n_genes_lower_bound - maximum cutoff for n genes
# percent_mito_upper_bound - minimum cutoff for percent mito
# return_df_qc: return a dataframe with cluster labels and thresholds for each metric
def ddqc_metrics(data: MultimodalData, res=1.3, method="mad", threshold=2, basic_n_genes=100, basic_percent_mito=80,
mito_prefix="MT-",
def ddqc_metrics(data: MultimodalData, res=1.3, n_components=50, K=20, method="mad", threshold=2, basic_n_genes=100,
basic_percent_mito=80, mito_prefix="MT-",
ribo_prefix="^RP[SL][[:digit:]]|^RPLP[[:digit:]]|^RPSA", do_counts=True, do_genes=True, do_mito=True,
do_ribo=False, n_genes_lower_bound=200, percent_mito_upper_bound=10, random_state=29,
return_df_qc=False,
display_plots=True) -> Union[None, pd.DataFrame]:
assert isinstance(data, MultimodalData)
obs_copy, var_copy, uns_copy = _cluster_data(data, basic_n_genes, basic_percent_mito, mito_prefix, ribo_prefix,
resolution=res, random_state=random_state)
resolution=res, n_components=n_components, K=K,
random_state=random_state)

df_qc = pd.DataFrame({"cluster_labels": data.obs["louvain_labels"].values}, index=data.obs_names)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name='ddqc',
version='0.1.0',
version='0.1.1',
description='Description',
url='https://github.com/ayshwaryas/ddqc',
author='Author',
Expand Down
18 changes: 14 additions & 4 deletions tutorials/ddqc_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
" - **Percent of mitochondrial transctipts**: keep cells that have percent_mito less than median + 2 MADs\n",
"- In order to prevent the removal of healthy cells in clusters with high median n_genes and low percent_mito there are additional bounds for those thresholds:\n",
" - Cluster-level threshold for n_genes can't be greater than 200 (default). If it is greater, it will be set to 200.\n",
" - Cluster-level threshold for percent_mito can't be lower than 10 (default). If it is lower, it will be set to 200."
" - Cluster-level threshold for percent_mito can't be lower than 10 (default). If it is lower, it will be set to 10."
]
},
{
Expand All @@ -124,6 +124,8 @@
"source": [
"If you want to customize the filtering you can use the following parameters:\n",
"- `res`: float - clustering resolution (default: 1.3)\n",
"- `n_components` - number of PCA components (default: 50)\n",
"- `K` - k to be used by `neighbors` Pegasus function (default: 20)\n",
"- `method`: string - statistic on which the threshold would be calculated (default: \"mad\", available options \"mad\", \"outlier\")\n",
"- `threshold`: float - parameter for the selected method (default: 2)\n",
"- `basic_n_genes`: int - parameter for the initial QC n_genes filtering (default: 100)\n",
Expand Down Expand Up @@ -635,7 +637,11 @@
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
Expand All @@ -653,7 +659,11 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": []
}
Expand All @@ -679,4 +689,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}

0 comments on commit 9863d78

Please sign in to comment.