|
54 | 54 | }
|
55 | 55 | ],
|
56 | 56 | "source": [
|
| 57 | + "import os\n", |
| 58 | + "import tempfile\n", |
| 59 | + "\n", |
| 60 | + "import matplotlib.pyplot as plt\n", |
57 | 61 | "import mudata\n",
|
58 |
| - "import scanpy as sc\n", |
59 | 62 | "import numpy as np\n",
|
60 |
| - "import matplotlib.pyplot as plt\n", |
| 63 | + "import scanpy as sc\n", |
61 | 64 | "import scvi\n",
|
62 | 65 | "import seaborn as sns\n",
|
63 | 66 | "import torch\n",
|
64 |
| - "import os\n", |
65 |
| - "import tempfile\n", |
66 |
| - "\n", |
67 | 67 | "from scvi.external import METHYLVI"
|
68 | 68 | ]
|
69 | 69 | },
|
|
155 | 155 | "if not os.path.exists(mdata_path):\n",
|
156 | 156 | " os.system(f\"wget -q -O {mdata_path} https://figshare.com/ndownloader/files/49632108\")\n",
|
157 | 157 | "\n",
|
158 |
| - "mdata = mudata.read_h5mu(\n", |
159 |
| - " mdata_path\n", |
160 |
| - ")\n", |
| 158 | + "mdata = mudata.read_h5mu(mdata_path)\n", |
161 | 159 | "mdata.mod"
|
162 | 160 | ]
|
163 | 161 | },
|
|
185 | 183 | }
|
186 | 184 | ],
|
187 | 185 | "source": [
|
188 |
| - "mdata['mCG'].layers" |
| 186 | + "mdata[\"mCG\"].layers" |
189 | 187 | ]
|
190 | 188 | },
|
191 | 189 | {
|
|
224 | 222 | }
|
225 | 223 | ],
|
226 | 224 | "source": [
|
227 |
| - "mdata['mCG'].X" |
| 225 | + "mdata[\"mCG\"].X" |
228 | 226 | ]
|
229 | 227 | },
|
230 | 228 | {
|
|
240 | 238 | "metadata": {},
|
241 | 239 | "outputs": [],
|
242 | 240 | "source": [
|
243 |
| - "sc.tl.pca(mdata['mCG'])\n", |
244 |
| - "sc.tl.pca(mdata['mCH'])\n", |
| 241 | + "sc.tl.pca(mdata[\"mCG\"])\n", |
| 242 | + "sc.tl.pca(mdata[\"mCH\"])\n", |
245 | 243 | "\n",
|
246 |
| - "ch_pcs = mdata['mCH'].obsm['X_pca']\n", |
247 |
| - "cg_pcs = mdata['mCG'].obsm['X_pca']\n", |
| 244 | + "ch_pcs = mdata[\"mCH\"].obsm[\"X_pca\"]\n", |
| 245 | + "cg_pcs = mdata[\"mCG\"].obsm[\"X_pca\"]\n", |
248 | 246 | "\n",
|
249 | 247 | "# standardize the values of PCs from both modalities\n",
|
250 | 248 | "cg_pcs = cg_pcs / cg_pcs.std()\n",
|
|
253 | 251 | "# total_pcs\n",
|
254 | 252 | "total_pcs = np.hstack([ch_pcs, cg_pcs])\n",
|
255 | 253 | "\n",
|
256 |
| - "mdata.obsm['X_pca'] = total_pcs" |
| 254 | + "mdata.obsm[\"X_pca\"] = total_pcs" |
257 | 255 | ]
|
258 | 256 | },
|
259 | 257 | {
|
|
290 | 288 | "\n",
|
291 | 289 | "fig, ax = plt.subplots(1, 2, figsize=(11, 5))\n",
|
292 | 290 | "\n",
|
293 |
| - "sc.pl.umap(mdata, color='mCG:Platform', ax=ax[0], show=False, title=\"Sequencing protocol\")\n", |
294 |
| - "sc.pl.umap(mdata, color='mCG:CoarseType', ax=ax[1], show=False, title=\"Cell type\")\n", |
| 291 | + "sc.pl.umap(mdata, color=\"mCG:Platform\", ax=ax[0], show=False, title=\"Sequencing protocol\")\n", |
| 292 | + "sc.pl.umap(mdata, color=\"mCG:CoarseType\", ax=ax[1], show=False, title=\"Cell type\")\n", |
295 | 293 | "\n",
|
296 | 294 | "plt.subplots_adjust(wspace=0.5)"
|
297 | 295 | ]
|
|
316 | 314 | "source": [
|
317 | 315 | "Before training our model, we'll use methylVI's `setup_mudata` function to prepare our `MuData` object for training. \n",
|
318 | 316 | "\n",
|
319 |
| - "First, we need to tell methylVI which modalities in our MuData object to consider via the `methylation_contexts` argument. Here we'll jointly model both CpG and non-CpG methylation features, so we'll set this argument to a list containing the names of both modalities. Next, methylVI directly models the total coverage and number of methylated cytosines in each region. Thus, for each modality in our `MuData` object, we need layers containing the coverage in each region (specified by `cov_layer`) and layers with the number of methylated cytosines (specified by `mc_layer`). Finally, we'll provide methylVI with a categorical covariate specifying the sequencing protocol used for each cell." |
| 317 | + "First, we need to tell methylVI which modalities in our MuData object to consider via the `methylation_contexts` argument. Here we'll jointly model both CpG and non-CpG methylation features, so we'll set this argument to a list containing the names of both modalities. Next, methylVI directly models the total coverage and number of methylated cytosines in each region. Thus, for each modality in our `MuData` object, we need layers containing the coverage in each region (specified by `cov_layer`) and layers with the number of methylated cytosines (specified by `mc_layer`). Finally, we'll provide methylVI with a categorical covariate specifying the sequencing protocol used for each cell.\n" |
320 | 318 | ]
|
321 | 319 | },
|
322 | 320 | {
|
|
336 | 334 | ]
|
337 | 335 | },
|
338 | 336 | {
|
339 |
| - "metadata": {}, |
340 | 337 | "cell_type": "markdown",
|
| 338 | + "metadata": {}, |
341 | 339 | "source": [
|
342 | 340 | "```{note}\n",
|
343 |
| - "Specify the modality of each argument via the `modalities` dictionary, which maps layer/key arguments to MuData modalities. In our case, both the `mCG` and `mCH` modalities contain the all of the fields specified in the `categorical_covariate_keys` argument (i.e., `Protocol`) in their respective `.obs`, so we arbitrarily choose `mCG` here.\n", |
| 341 | + "Specify the modality of each argument via the `modalities` dictionary, which maps layer/key arguments to MuData modalities. In our case, both the `mCG` and `mCH` modalities contain the all of the fields specified in the `categorical_covariate_keys` argument (i.e., `Protocol`) in their respective `.obs`, so we arbitrarily choose `mCG` here\n", |
344 | 342 | "```"
|
345 | 343 | ]
|
346 | 344 | },
|
|
360 | 358 | "name": "stdout",
|
361 | 359 | "output_type": "stream",
|
362 | 360 | "text": [
|
363 |
| - "\u001B[34mINFO \u001B[0m The model has been initialized \n" |
| 361 | + "\u001b[34mINFO \u001b[0m The model has been initialized \n" |
364 | 362 | ]
|
365 | 363 | },
|
366 | 364 | {
|
|
407 | 405 | "metadata": {},
|
408 | 406 | "outputs": [],
|
409 | 407 | "source": [
|
410 |
| - "mdata.obsm['methylVI'] = model.get_latent_representation()" |
| 408 | + "mdata.obsm[\"methylVI\"] = model.get_latent_representation()" |
411 | 409 | ]
|
412 | 410 | },
|
413 | 411 | {
|
|
439 | 437 | }
|
440 | 438 | ],
|
441 | 439 | "source": [
|
442 |
| - "sc.pp.neighbors(mdata, use_rep='methylVI')\n", |
| 440 | + "sc.pp.neighbors(mdata, use_rep=\"methylVI\")\n", |
443 | 441 | "sc.tl.umap(mdata)\n",
|
444 | 442 | "\n",
|
445 | 443 | "fig, ax = plt.subplots(1, 2, figsize=(11, 5))\n",
|
446 | 444 | "\n",
|
447 |
| - "sc.pl.umap(mdata, color='mCG:Platform', ax=ax[0], show=False, title=\"Sequencing protocol\")\n", |
448 |
| - "sc.pl.umap(mdata, color='mCG:CoarseType', ax=ax[1], show=False, title=\"Cell type\")\n", |
| 445 | + "sc.pl.umap(mdata, color=\"mCG:Platform\", ax=ax[0], show=False, title=\"Sequencing protocol\")\n", |
| 446 | + "sc.pl.umap(mdata, color=\"mCG:CoarseType\", ax=ax[1], show=False, title=\"Cell type\")\n", |
449 | 447 | "\n",
|
450 | 448 | "plt.subplots_adjust(wspace=0.5)"
|
451 | 449 | ]
|
|
0 commit comments