From 9426edaae9418a0fa89963f965ca38c5be738e69 Mon Sep 17 00:00:00 2001
From: Johannes Ojanen <johannes.ojanen@gmail.com>
Date: Wed, 3 Jan 2024 14:46:34 +0200
Subject: [PATCH] Update readme, reqs, and setup

---
 README.md                |  5 +--
 license.md               |  0
 requirements.txt         |  5 +--
 setup.py                 |  1 +
 tutorials/tutorial.ipynb | 85 +++++++++++++++++++---------------------
 5 files changed, 45 insertions(+), 51 deletions(-)
 delete mode 100644 license.md

diff --git a/README.md b/README.md
index 84c9cef..b6177f1 100644
--- a/README.md
+++ b/README.md
@@ -10,13 +10,12 @@ Any and all comments/criticism/suggestions enthusiastically received! :-)
 
 ## Required packages
 
-- numpy
 - pandas
-- scrublet    
 - loompy
 - scanpy
-- anndata
 - scikit-learn
+- scrublet
+
 
 
 ## Installation
diff --git a/license.md b/license.md
deleted file mode 100644
index e69de29..0000000
diff --git a/requirements.txt b/requirements.txt
index f6e3b95..746daba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,5 @@
-numpy
 pandas
-scrublet
 loompy
 scanpy
-anndata
 scikit-learn
-leidenalg
\ No newline at end of file
+scrublet
diff --git a/setup.py b/setup.py
index a8bfb87..a894825 100644
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,7 @@
 
 setup(
     name='qclus',
+    version='0.1.0',
     description='Description',
     url='https://github.com/johannesojanen/qclus',
     author='Eloi Schauch and Johannes Ojanen',
diff --git a/tutorials/tutorial.ipynb b/tutorials/tutorial.ipynb
index 2e6edd8..1dc96d4 100644
--- a/tutorials/tutorial.ipynb
+++ b/tutorials/tutorial.ipynb
@@ -13,13 +13,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import sys\n",
+    "sys.path.append('..')\n",
+    "\n",
     "import qclus\n",
+    "from qclus.gene_lists import *\n",
     "import scanpy as sc\n",
-    "import numpy as np\n",
     "import pandas as pd\n",
     "\n",
     "import warnings\n",
@@ -28,60 +31,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "counts_path = \"../samples/filtered_feature_bc_matrix_66.h5\"\n",
-    "loompy_path = \"../samples/counts_counts_CAD_66.loom\"\n",
+    "counts_path = \"../../samples/filtered_feature_bc_matrix_739.h5\"\n",
+    "loompy_path = \"../../samples/counts_counts_CAD_739.loom\"\n",
+    "\n",
     "adata = qclus.run_qclus(counts_path, \n",
-    "                        loompy_path,\n",
+    "                        loompy_path, \n",
+    "                        gene_set_dict=celltype_gene_set_dict, \n",
+    "                        nucl_gene_set=nucl_genes_50, \n",
+    "                        minimum_genes=500, \n",
+    "                        maximum_genes=6000, \n",
+    "                        max_mito_perc=40, \n",
+    "                        clustering_features=['pct_counts_nonCM', \n",
+    "                                        'pct_counts_nucl_30', \n",
+    "                                        'pct_counts_MT', \n",
+    "                                        'pct_counts_CM_cyto', \n",
+    "                                        'pct_counts_CM_nucl', \n",
+    "                                        'fraction_unspliced'], \n",
     "                        clustering_k=4, \n",
     "                        clusters_to_select=[\"0\", \"1\", \"2\"], \n",
+    "                        scrublet_filter=True,\n",
+    "                        scrublet_expected_rate=0.06, \n",
+    "                        scrublet_minimum_counts=2, \n",
+    "                        scrublet_minimum_cells=3, \n",
+    "                        scrublet_minimum_gene_variability_pctl=85, \n",
+    "                        scrublet_n_pcs=30, \n",
+    "                        scrublet_thresh=0.1, \n",
+    "                        outlier_filter=True, \n",
     "                        outlier_unspliced_diff=0.1, \n",
-    "                        outlier_mito_diff=5)\n",
-    "\n",
-    "\n",
-    "run_qclus(counts_path, loompy_path, \n",
-    "                    gene_set_dict=celltype_gene_set_dict, \n",
-    "                    nucl_gene_set=nucl_genes_50, \n",
-    "                    minimum_genes=500, \n",
-    "                    maximum_genes=6000, \n",
-    "                    max_mito_perc=40, \n",
-    "                    clustering_features=['pct_counts_nonCM', \n",
-    "                                    'pct_counts_nucl_30', \n",
-    "                                    'pct_counts_MT', \n",
-    "                                    'pct_counts_CM_cyto', \n",
-    "                                    'pct_counts_CM_nucl', \n",
-    "                                    'fraction_unspliced'], \n",
-    "                    clustering_k=4, \n",
-    "                    clusters_to_select=[\"0\", \"1\", \"2\"], \n",
-    "                    scrublet_filter=True,\n",
-    "                    scrublet_expected_rate=0.06, \n",
-    "                    scrublet_minimum_counts=2, \n",
-    "                    scrublet_minimum_cells=3, \n",
-    "                    scrublet_minimum_gene_variability_pctl=85, \n",
-    "                    scrublet_n_pcs=30, \n",
-    "                    scrublet_thresh=0.1, \n",
-    "                    outlier_filter=True, \n",
-    "                    outlier_unspliced_diff=0.1, \n",
-    "                    outlier_mito_diff=5)"
+    "                        outlier_mito_diff=5)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "AnnData object with n_obs × n_vars = 31670 × 36601\n",
+       "AnnData object with n_obs × n_vars = 3404 × 36601\n",
        "    obs: 'fraction_unspliced', 'pct_counts_MT', 'total_counts', 'n_genes_by_counts', 'qclus'\n",
        "    var: 'gene_ids', 'feature_types', 'genome'"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -92,21 +89,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "outlier filter       11197\n",
-       "passed                8989\n",
-       "clustering filter     6542\n",
-       "scrublet filter       4819\n",
-       "initial filter         123\n",
+       "passed               2389\n",
+       "initial filter        338\n",
+       "scrublet filter       274\n",
+       "outlier filter        229\n",
+       "clustering filter     174\n",
        "Name: qclus, dtype: int64"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -414,7 +411,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.9.13"
   },
   "orig_nbformat": 4,
   "vscode": {