-
Notifications
You must be signed in to change notification settings - Fork 8
Added Batch Integration Benchmarking and Auto Benchmarking Logs #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,4 +4,5 @@ __pycache__/ | |
| outputs/ | ||
| *.sif | ||
| *agent_systems/ | ||
| agent_systems/ | ||
| agent_systems/ | ||
| *.pyc | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,43 @@ | ||||||
| { | ||||||
| "agents": { | ||||||
| "master_agent": { | ||||||
| "prompt": "You are the master agent. Analyze every user request and delegate the task to the appropriate expert: the general coder for standard single-cell analysis or the integration expert for batch correction and data integration tasks. Respond ONLY with a delegation command.", | ||||||
| "neighbors": { | ||||||
| "delegate_to_general": { | ||||||
| "target_agent": "general_coder", | ||||||
| "description": "Delegate for general single-cell tasks like QC, normalization, and plotting." | ||||||
| }, | ||||||
| "delegate_to_integration": { | ||||||
| "target_agent": "integration_expert", | ||||||
| "description": "Delegate for complex data integration and batch correction using scvi-tools." | ||||||
| } | ||||||
| } | ||||||
| }, | ||||||
| "general_coder": { | ||||||
| "prompt": "You are the *general scRNA-seq coder*. You handle standard single-cell analysis tasks like data loading, QC, filtering, normalization, and basic plotting using scanpy. You are not an expert in data integration.\n\nExample of a task you would perform:\n```python\nimport scanpy as sc\n\n# Assume 'adata' is a loaded AnnData object\n# Basic QC and filtering\nsc.pp.filter_cells(adata, min_genes=200)\nsc.pp.filter_genes(adata, min_cells=3)\nadata.var['mt'] = adata.var_names.str.startswith('MT-')\nsc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], inplace=True)\n\n# Normalize and find highly variable genes\nsc.pp.normalize_total(adata, target_sum=1e4)\nsc.pp.log1p(adata)\nsc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)\n\n# Run PCA\nsc.tl.pca(adata, svd_solver='arpack')\n\nprint('Standard analysis complete. PCA is in adata.obsm[\"X_pca\"].')\n```", | ||||||
| "neighbors": { | ||||||
| "delegate_to_master": { | ||||||
| "target_agent": "master_agent", | ||||||
| "description": "Return to the master agent if you are not the correct expert." | ||||||
| }, | ||||||
| "delegate_to_integration": { | ||||||
| "target_agent": "integration_expert", | ||||||
| "description": "Delegate to this expert for complex data integration and batch correction." | ||||||
| } | ||||||
| } | ||||||
| }, | ||||||
| "integration_expert": { | ||||||
| "prompt": "You are the *integration expert*. You specialize in combining multiple single-cell datasets and correcting for batch effects using scvi-tools.\n\nExample of a task you would perform:\n```python\nimport scvi\nimport scanpy as sc\n\n# Assume 'adata' is loaded and preprocessed with a 'batch' column\n# Find highly variable genes across batches for integration\nsc.pp.highly_variable_genes(\n adata,\n n_top_genes=2000,\n subset=True,\n layer='counts',\n flavor='seurat_v3',\n batch_key='batch'\n)\n\n# Set up the AnnData object for the scVI model\nscvi.model.SCVI.setup_anndata(adata, layer='counts', batch_key='batch')\n\n# Create and train the scVI model\nmodel = scvi.model.SCVI(adata, n_layers=2, n_latent=30)\nmodel.train()\n\n# Store the integrated latent representation in the AnnData object\nadata.obsm['X_scVI'] = model.get_latent_representation()\n\nprint('Integration complete. Integrated embedding is in adata.obsm[\"X_scVI\"].')\n``` you remeber to wrap your code in triple backticks and python", | ||||||
|
||||||
| "prompt": "You are the *integration expert*. You specialize in combining multiple single-cell datasets and correcting for batch effects using scvi-tools.\n\nExample of a task you would perform:\n```python\nimport scvi\nimport scanpy as sc\n\n# Assume 'adata' is loaded and preprocessed with a 'batch' column\n# Find highly variable genes across batches for integration\nsc.pp.highly_variable_genes(\n adata,\n n_top_genes=2000,\n subset=True,\n layer='counts',\n flavor='seurat_v3',\n batch_key='batch'\n)\n\n# Set up the AnnData object for the scVI model\nscvi.model.SCVI.setup_anndata(adata, layer='counts', batch_key='batch')\n\n# Create and train the scVI model\nmodel = scvi.model.SCVI(adata, n_layers=2, n_latent=30)\nmodel.train()\n\n# Store the integrated latent representation in the AnnData object\nadata.obsm['X_scVI'] = model.get_latent_representation()\n\nprint('Integration complete. Integrated embedding is in adata.obsm[\"X_scVI\"].')\n``` you remeber to wrap your code in triple backticks and python", | |
| "prompt": "You are the *integration expert*. You specialize in combining multiple single-cell datasets and correcting for batch effects using scvi-tools.\n\nExample of a task you would perform:\n```python\nimport scvi\nimport scanpy as sc\n\n# Assume 'adata' is loaded and preprocessed with a 'batch' column\n# Find highly variable genes across batches for integration\nsc.pp.highly_variable_genes(\n adata,\n n_top_genes=2000,\n subset=True,\n layer='counts',\n flavor='seurat_v3',\n batch_key='batch'\n)\n\n# Set up the AnnData object for the scVI model\nscvi.model.SCVI.setup_anndata(adata, layer='counts', batch_key='batch')\n\n# Create and train the scVI model\nmodel = scvi.model.SCVI(adata, n_layers=2, n_latent=30)\nmodel.train()\n\n# Store the integrated latent representation in the AnnData object\nadata.obsm['X_scVI'] = model.get_latent_representation()\n\nprint('Integration complete. Integrated embedding is in adata.obsm[\"X_scVI\"].')\n``` you remember to wrap your code in triple backticks and python", |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,32 @@ | ||||||
| # --- New metric class using scib-metrics ------------------------------------ | ||||||
| from scib_metrics.benchmark import Benchmarker | ||||||
| from typing import Dict | ||||||
| import anndata | ||||||
| import numpy as np | ||||||
|
|
||||||
|
Comment on lines
+4
to
+6
|
||||||
| import anndata | |
| import numpy as np |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| { | ||
| "soma_joinid": 7, | ||
| "citation": "Publication: https://doi.org/10.1038/s41586-024-07944-6 Dataset Version: https://datasets.cellxgene.cziscience.com/463451bb-78a0-447f-9555-b05d11472d09.h5ad curated and distributed by CZ CELLxGENE Discover in Collection: https://cellxgene.cziscience.com/collections/fc19ae6c-d7c1-4dce-b703-62c5d52061b4", | ||
| "collection_id": "fc19ae6c-d7c1-4dce-b703-62c5d52061b4", | ||
| "collection_name": "A spatial human thymus cell atlas mapped to a continuous tissue axis", | ||
| "collection_doi": "10.1038/s41586-024-07944-6", | ||
| "collection_doi_label": "Yayon et al. (2024) Nature", | ||
| "dataset_id": "fbd69faa-b0c5-45ba-89c9-da938a7f5a14", | ||
| "dataset_version_id": "463451bb-78a0-447f-9555-b05d11472d09", | ||
| "dataset_title": "thymus scRNA-seq atlas - myeloid p2 subset", | ||
| "dataset_h5ad_path": "fbd69faa-b0c5-45ba-89c9-da938a7f5a14.h5ad", | ||
| "dataset_total_cell_count": 843 | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -214,23 +214,27 @@ def build_system(a: Agent) -> str: | |||||
|
|
||||||
| history.append({"role": "user", "content": feedback}) | ||||||
| display(console, "user", feedback) | ||||||
|
|
||||||
| if benchmark_module: | ||||||
| console.print("\n[bold]Next message (blank = continue, 'benchmark' to run benchmarks, 'exit' to quit):[/bold]") | ||||||
| else: | ||||||
| console.print("\n[bold]Next message (blank = continue, 'exit' to quit):[/bold]") | ||||||
| try: | ||||||
| user_in = input().strip() | ||||||
| except (EOFError, KeyboardInterrupt): | ||||||
| user_in = "exit" | ||||||
| if user_in.lower() in {"exit", "quit"}: | ||||||
|
|
||||||
| def input_loop(): | ||||||
| if benchmark_module: | ||||||
| console.print("\n[bold]Next message (blank = continue, 'benchmark' to run benchmarks, 'exit' to quit):[/bold]") | ||||||
| else: | ||||||
| console.print("\n[bold]Next message (blank = continue, 'exit' to quit):[/bold]") | ||||||
| try: | ||||||
| user_in = input().strip() | ||||||
| except (EOFError, KeyboardInterrupt): | ||||||
| user_in = "exit" | ||||||
| if user_in.lower() in {"exit", "quit"}: | ||||||
| return "break" | ||||||
| if user_in.lower() == "benchmark" and benchmark_module: | ||||||
| run_benchmark(mgr, benchmark_module) | ||||||
| input_loop() # Recurse to continue the loop after benchmarks | ||||||
|
||||||
| input_loop() # Recurse to continue the loop after benchmarks | |
| return input_loop() # Recurse to continue the loop after benchmarks |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -49,4 +49,4 @@ harmonypy | |
|
|
||
| # Additional Tools | ||
| rapids-singlecell | ||
| scib | ||
| scib-metrics | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This line is inside the loop that appends each command, causing it to repeat multiple times. Move it outside the loop so it appears just once.