diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index dcb6e046..29bf7275 100644 --- a/.editorconfig +++ b/.editorconfig @@ -25,10 +25,23 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset [/assets/*.Rmd] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 902a3782..e316f2a6 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,9 +9,8 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -:::info -If you need help using or modifying nf-core/airrflow then the best place to ask is on the nf-core Slack [#airrflow](https://nfcore.slack.com/channels/airrflow) channel ([join our Slack here](https://nf-co.re/join/slack)). -::: +> [!NOTE] +> If you need help using or modifying nf-core/airrflow then the best place to ask is on the nf-core Slack [#airrflow](https://nfcore.slack.com/channels/airrflow) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -27,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -87,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 2318ed30..c13f8d82 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/airr - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/airrflow/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/airrflow _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 6b25b2c6..62ab5695 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,7 +28,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 269c6875..869b2ab2 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 5090169a..d216e1c1 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5cd19a48..515482aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,13 +28,16 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results @@ -49,7 +52,17 @@ jobs: - "23.04.0" - "latest-everything" profile: - ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled_hs", "test_assembled_mm"] + [ + "test_tcr", + "test_no_umi", + "test_nocluster", + "test_fetchimgt", + "test_assembled_hs", + "test_assembled_mm", + "test_10x_sc", + "test_clontech_umi", + "test_nebnext_umi", + ] fail-fast: false steps: - name: Check out pipeline code diff --git a/.github/workflows/ci_immcantation.yml b/.github/workflows/ci_immcantation.yml index d74bb5f9..9301f01a 100644 --- a/.github/workflows/ci_immcantation.yml +++ b/.github/workflows/ci_immcantation.yml @@ -1,4 +1,4 @@ -name: nf-core CI immcantation +name: Immcantation # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: pull_request: diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..08622fd5 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,72 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 515f7455..8dda78ab 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/airrflow/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,72 +11,33 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: 3.11 + cache: "pip" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" @@ -99,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 80% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml index 6ad33927..d468aeaa 100644 --- a/.github/workflows/release-announcments.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.0.2 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.gitignore b/.gitignore index 8a83d028..9f348012 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ package-lock.json .idea/ nf-params.json .vscode/ +tests/ +test_flow/ diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,16 +4,17 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 4ae6f7f0..59995351 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -3,4 +3,12 @@ lint: - conf/igenomes.config multiqc_config: - report_comment + nextflow_config: + - config_defaults: + - params.miairr + - params.report_rmd + - params.report_css + - params.report_logo + - params.report_logo_img + - params.config_profile_url repository_type: pipeline diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..af57081f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 3484b265..1cc3c622 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,30 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [3.3.0] - 2024-03-31 Confringo + +### `Added` + +- [#294](https://github.com/nf-core/airrflow/pull/294) Merge template updates nf-core/tools v2.11.1 +- [#299](https://github.com/nf-core/airrflow/pull/299) Add profile for common NEB and TAKARA protocols +- [#289](https://github.com/nf-core/airrflow/pull/289) Add possibility to merge multi-lane samples when starting from fastq files +- [#289](https://github.com/nf-core/airrflow/pull/289) Add possibility to run cellranger for scVDJseq data + +### `Fixed` + +- [#294](https://github.com/nf-core/airrflow/pull/294) Removed optional output from FilterQuality to not fail silently +- [#293](https://github.com/nf-core/airrflow/pull/293) Clonal_threshold is validated to be 'auto' or number greater than zero +- [#295](https://github.com/nf-core/airrflow/pull/295) Fixed airrflow report sequence plot and add path to clonal analysis reports +- [#310](https://github.com/nf-core/airrflow/pull/310) Bump versions to 3.3.0, update containers and software versions + +### `Dependencies` + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| multiqc | 1.19 | 1.21 | +| enchantr | 0.1.9 | 0.1.11 | +| igblast | 1.19.0 | 1.22.0 | + ## [3.2.0] - 2023-10-27 Expecto patronum ### `Added` diff --git a/CITATIONS.md b/CITATIONS.md index 9faa369d..5dd7833c 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,5 +1,11 @@ # nf-core/airrflow: Citations +## [nf-core/airrflow](https://doi.org/10.1101/2024.01.18.576147) + +> **nf-core/airrflow: an adaptive immune receptor repertoire analysis workflow employing the Immcantation framework** +> +> Gisela Gabernet, Susanna Marquez, Robert Bjornson, Alexander Peltzer, Hailong Meng, Edel Aron, Noah Y. Lee, Cole Jensen, David Ladd, Friederike Hanssen, Simon Heumos, nf-core community, Gur Yaari, Markus C. Kowarik, Sven Nahnsen, Steven H. Kleinstein. BioRxiv. 2024. doi: [10.1101/2024.01.18.576147](https://doi.org/10.1101/2024.01.18.576147). + ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) > Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. @@ -46,13 +52,13 @@ > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS. -- [TIgGER](https://doi.org/10.1073/pnas.1417683112) +- [IgBLAST](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3692102/) - > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9. + > Ye J, Ma N, Madden TL, Ostell JM. (2013). IgBLAST: an immunoglobulin variable domain sequence analysis tool. Nucleic Acids Res. - [Fastp](https://doi.org/10.1093/bioinformatics/bty560) - > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890. doi: 10.1093/bioinformatics/bty560. + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17), i884–i890. doi: 10.1093/bioinformatics/bty560. - [pRESTO](https://doi.org/10.1093/bioinformatics/btu138) @@ -60,7 +66,7 @@ - [SHazaM, Change-O](https://doi.org/10.1093/bioinformatics/btv359) - > Gupta, N. T., Vander Heiden, J. A., Uduman, M., Gadala-Maria, D., Yaari, G., & Kleinstein, S. H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. Bioinformatics, 31(20), 3356–3358. + > Gupta, N. T., Vander Heiden, J. A., Uduman, M., Gadala-Maria, D., Yaari, G., & Kleinstein, S. H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data. Bioinformatics, 31(20), 3356–3358. - [Alakazam](https://doi.org/10.1126/scitranslmed.3008879) @@ -80,11 +86,7 @@ - [IgPhyML](https://www.pnas.org/doi/10.1073/pnas.1906020116) - > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS. - -- [TIgGER](https://doi.org/10.1073/pnas.1417683112) - - > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9. + > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination. PNAS, 116(45) 22664-22672." - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) diff --git a/README.md b/README.md index 7b11703f..6e1ff6e2 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,19 @@ -# ![nf-core/airrflow](docs/images/nf-core-airrflow_logo_light.png#gh-light-mode-only) ![nf-core/airrflow](docs/images/nf-core-airrflow_logo_dark.png#gh-dark-mode-only) - +

+ + + nf-core/airrflow + +

[![GitHub Actions CI Status](https://github.com/nf-core/airrflow/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/airrflow/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/airrflow/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/airrflow/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/airrflow/results) [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2642009-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2642009) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/airrflow) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/airrflow) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23airrflow-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/airrflow) [![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) [![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) @@ -16,7 +21,7 @@ ## Introduction -**nf-core/airrflow** is a bioinformatics best-practice pipeline to analyze B-cell or T-cell repertoire sequencing data. It makes use of the [Immcantation](https://immcantation.readthedocs.io) toolset. The input data can be targeted amplicon bulk sequencing data of the V, D, J and C regions of the B/T-cell receptor with multiplex PCR or 5' RACE protocol, or assembled reads (bulk or single cell). +**nf-core/airrflow** is a bioinformatics best-practice pipeline to analyze B-cell or T-cell repertoire sequencing data. It makes use of the [Immcantation](https://immcantation.readthedocs.io) toolset. The input data can be targeted amplicon bulk sequencing data of the V, D, J and C regions of the B/T-cell receptor with multiplex PCR or 5' RACE protocol, single-cell VDJ sequencing using the 10xGenomics libraries, or assembled reads (bulk or single-cell). ![nf-core/airrflow overview](docs/images/airrflow_workflow_overview.png) @@ -30,18 +35,25 @@ nf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single ![nf-core/airrflow overview](docs/images/metro-map-airrflow.png) -1. QC and sequence assembly (bulk only) - -- Raw read quality control, adapter trimming and clipping (`Fastp`). -- Filter sequences by base quality (`pRESTO FilterSeq`). -- Mask amplicon primers (`pRESTO MaskPrimers`). -- Pair read mates (`pRESTO PairSeq`). -- For UMI-based sequencing: - - Cluster sequences according to similarity (optional for insufficient UMI diversity) (`pRESTO ClusterSets`). - - Build consensus of sequences with the same UMI barcode (`pRESTO BuildConsensus`). -- Assemble R1 and R2 read mates (`pRESTO AssemblePairs`). -- Remove and annotate read duplicates (`pRESTO CollapseSeq`). -- Filter out sequences that do not have at least 2 duplicates (`pRESTO SplitSeq`). +1. QC and sequence assembly + +- Bulk + - Raw read quality control, adapter trimming and clipping (`Fastp`). + - Filter sequences by base quality (`pRESTO FilterSeq`). + - Mask amplicon primers (`pRESTO MaskPrimers`). + - Pair read mates (`pRESTO PairSeq`). + - For UMI-based sequencing: + - Cluster sequences according to similarity (optional for insufficient UMI diversity) (`pRESTO ClusterSets`). + - Build consensus of sequences with the same UMI barcode (`pRESTO BuildConsensus`). + - Assemble R1 and R2 read mates (`pRESTO AssemblePairs`). + - Remove and annotate read duplicates (`pRESTO CollapseSeq`). + - Filter out sequences that do not have at least 2 duplicates (`pRESTO SplitSeq`). +- single cell + - cellranger vdj + - Assemble contigs + - Annotate contigs + - Call cells + - Generate clonotypes 2. V(D)J annotation and filtering (bulk and single-cell) @@ -77,11 +89,8 @@ nf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, ensure that the pipeline tests run on your infrastructure: @@ -102,7 +111,7 @@ A typical command to run the pipeline from **bulk raw fastq files** is: ```bash nextflow run nf-core/airrflow \ --r 3.2.0 \ +-r \ -profile \ --mode fastq \ --input input_samplesheet.tsv \ @@ -114,11 +123,23 @@ nextflow run nf-core/airrflow \ --outdir ./results ``` +A typical command to run the pipeline from **single cell raw fastq files** (10X genomics) is: + +```bash +nextflow run nf-core/airrflow -r dev \ +-profile \ +--mode fastq \ +--input input_samplesheet.tsv \ +--library_generation_method sc_10x_genomics \ +--reference_10x reference/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz \ +--outdir ./results +``` + A typical command to run the pipeline from **single-cell AIRR rearrangement tables or assembled bulk sequencing fasta** data is: ```bash nextflow run nf-core/airrflow \ --r 3.2.0 \ +-r \ -profile \ --input input_samplesheet.tsv \ --mode assembled \ @@ -143,11 +164,17 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/airrflow was written by [Gisela Gabernet](https://github.com/ggabernet), [Susanna Marquez](https://github.com/ssnn-airr), [Alexander Peltzer](@apeltzer) and [Simon Heumos](@subwaystation). +nf-core/airrflow was originally written by: + +- [Gisela Gabernet](https://github.com/ggabernet) +- [Susanna Marquez](https://github.com/ssnn-airr) +- [Alexander Peltzer](@apeltzer) +- [Simon Heumos](@subwaystation) -Further contributors to the pipeline are: +We thank the following people for their extensive assistance in the development of the pipeline: -- [@dladd](https://github.com/dladd) +- [David Ladd](https://github.com/dladd) +- [Friederike Hanssen](https://github.com/ggabernet/friederikehanssen) ## Contributions and Support @@ -157,9 +184,17 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -If you use nf-core/airrflow for your analysis, please cite it using the following DOI: [10.5281/zenodo.2642009](https://doi.org/10.5281/zenodo.2642009) +If you use nf-core/airrflow for your analysis, please cite the preprint as follows: + +> **nf-core/airrflow: an adaptive immune receptor repertoire analysis workflow employing the Immcantation framework** +> +> Gisela Gabernet, Susanna Marquez, Robert Bjornson, Alexander Peltzer, Hailong Meng, Edel Aron, Noah Y. Lee, Cole Jensen, David Ladd, Friederike Hanssen, Simon Heumos, nf-core community, Gur Yaari, Markus C. Kowarik, Sven Nahnsen, Steven H. Kleinstein. +> +> BioRxiv. 2024. doi: [10.1101/2024.01.18.576147](https://doi.org/10.1101/2024.01.18.576147). + +The specific pipeline version using the following DOI: [10.5281/zenodo.2642009](https://doi.org/10.5281/zenodo.2642009) -An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. +Please also cite all the tools that are being used by the pipeline. An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: diff --git a/assets/email_template.html b/assets/email_template.html index 2603a841..43c3875d 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/airrflow v${version}

+

nf-core/airrflow ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 4c539b92..3629a189 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/airrflow v${version} + nf-core/airrflow ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index e4acf864..6a94b1e2 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,11 +3,9 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/airrflow Methods Description" section_href: "https://github.com/nf-core/airrflow" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/airrflow v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

+

Data was processed using nf-core/airrflow v${workflow.manifest.version} (${doi_text}; Gabernet et al., 2024) of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}

${tool_citations}

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index b54b1946..8bc5e1a7 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ -report_comment: - This report has been generated by the nf-core/airrflow +report_comment: > + This report has been generated by the nf-core/airrflow analysis pipeline. For information about how to interpret these results, please see the documentation. @@ -27,3 +27,5 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/nf-core-airrflow_logo_light.png b/assets/nf-core-airrflow_logo_light.png index 2c7265f7..fb7707da 100644 Binary files a/assets/nf-core-airrflow_logo_light.png and b/assets/nf-core-airrflow_logo_light.png differ diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 64d51a89..27ebbbad 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -16,7 +16,7 @@ output: --- -title: "Repertoire analysis" +title: "Airrflow results summary report" subtitle: "Interactive summary report produced by the nf-core/airrflow pipeline" date: '`r format(Sys.Date(), "%B %d, %Y")`' output: html_document @@ -48,6 +48,19 @@ datadir <- "." ``` +# AIRR repertoire results + +The repertoires per subject after clonal analysis can be found in the +subdirectory [clonal_analysis/define_clones/all_reps_clone_report/repertoires](clonal_analysis/define_clones/all_reps_clone_report/repertoires). + +Additionally, html reports summarizing the results are provided: + +- Report summarizing the clonal threshold found per each specified cloning group [clonal_analysis/find_threshold/all_reps_dist_report/index.html](clonal_analysis/find_threshold/all_reps_dist_report/index.html). +- Report summarizing the repertoire properties for all the samples [clonal_analysis/define_clones/all_reps_clone_report/index.html](clonal_analysis/define_clones/all_reps_clone_report/index.html). +- Report summarizing the lineage trees for each specified cloning group [clonal_analysis/dowser_lineages/](clonal_analysis/dowser_lineages/). + +A full description of the pipeline results can be found on the Output section of the [nf-core/airrflow website](https://nf-co.re/airrflow). + # Number of sequences ## Sequence assembly steps @@ -85,7 +98,7 @@ tryCatch( { ) ``` -## Filtering and clonal analysis steps +## V(D)J gene assignment and QC Number of sequences for each of the samples after each of the downstream filtering and clonal analysis steps. The full table can be found under [Table_sequences_assembled](repertoire_comparison/Sequence_numbers_summary/Table_sequences_assembled.tsv). @@ -97,11 +110,11 @@ if (any(is.na(tab_seqs_assembled$sample_id))) { tab_seqs_assembled$sample_id <- sapply(tab_seqs_assembled$file_0, function(x) unlist(strsplit(as.character(x), "_"))[1]) } -dat <- tidyr::pivot_wider(tab_seqs_assembled, - id_cols=sample_id, - names_from=task, - values_from=to_num_seqs) -dat <- dat %>% dplyr::relocate(any_of(c("sample_id","ConvertDb-fasta", "AssignGenes-igblast", "MakeDB-igblast", "FilterQuality", +dat <- tab_seqs_assembled %>% + tidyr::pivot_wider(id_cols=sample_id, + names_from=task, + values_from=to_num_seqs) +dat <- dat %>% dplyr::select(any_of(c("sample_id","ConvertDb-fasta", "AssignGenes-igblast", "MakeDB-igblast", "FilterQuality", "ParseDb-split", "FilterJunctionMod3","AddMetadata","SingleCellQC","CreateGermlines", "RemoveChimeric","CollapseDuplicates","ClonePass"))) %>% dplyr::arrange(sample_id) @@ -112,12 +125,17 @@ write.table(dat, file=paste0(seq_dir,"/Table_sequences_assembled.tsv"), sep="\t" ```{r assembled_seq_numbers_plot, echo=FALSE, warning=FALSE, results='asis'} -tab_seqs_assembled$task <- factor(tab_seqs_assembled$task, levels=c("AssignGenes-igblast", "MakeDB-igblast", - "FilterQuality", - "ParseDb-split", "FilterJunctionMod3", "AddMetadata", - "CreateGermlines", "RemoveChimeric", "CollapseDuplicates", - "ClonePass")) -tab_seqs_assembled <- tab_seqs_assembled[!grepl("productive-F",tab_seqs_assembled$to_name),] +tab_seqs_assembled <- tab_seqs_assembled %>% + filter( !grepl("-fail.tsv", to_name) ) %>% + filter( !grepl("productive-F.tsv", to_name) ) %>% + dplyr::filter( task %in% c("sample_id","AssignGenes-igblast", "MakeDB-igblast", "FilterQuality", + "ParseDb-split", "FilterJunctionMod3","AddMetadata","SingleCellQC","CreateGermlines", + "RemoveChimeric","CollapseDuplicates","ClonePass")) + +tab_seqs_assembled$task <- factor(tab_seqs_assembled$task, levels=c("AssignGenes-igblast", "MakeDB-igblast", "FilterQuality", + "ParseDb-split", "FilterJunctionMod3", "AddMetadata", "SingleCellQC", + "CreateGermlines", "RemoveChimeric", "CollapseDuplicates", + "ClonePass")) seqs_plot_assembled <- ggplot(data=tab_seqs_assembled, aes(x=task, y=to_num_seqs, group=sample_id)) + diff --git a/assets/schema_input.json b/assets/schema_input.json index 635dcd18..bb0ee0ba 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,7 +10,8 @@ "sample_id": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces." + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "subject_id": { "type": "string", diff --git a/assets/slackreport.json b/assets/slackreport.json index bd9523d9..14549b87 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/airrflow v${version} - ${runName}", + "author_name": "nf-core/airrflow ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 0a7dca71..9867c446 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -124,11 +124,6 @@ def check_samplesheet(file_in, assembled): ) ) else: - if any(tab["single_cell"].tolist()): - print_error( - "Some single cell column values are TRUE. The raw mode only accepts bulk samples. If processing single cell samples, please set the `--mode assembled` flag, and provide an AIRR rearrangement as input." - ) - for col in required_columns_raw: if col not in header: print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) @@ -165,9 +160,12 @@ def check_samplesheet(file_in, assembled): ## Check that sample ids are unique if len(tab["sample_id"]) != len(set(tab["sample_id"])): - print_error( - "Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample." - ) + if assembled: + print_error( + "Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample." + ) + else: + print("WARNING: Sample IDs are not unique! FastQs with the same sample ID will be merged.") ## Check that pcr_target_locus is IG or TR for val in tab["pcr_target_locus"]: diff --git a/bin/fetch_igblastdb.sh b/bin/fetch_igblastdb.sh index 50a341fa..a44e414b 100755 --- a/bin/fetch_igblastdb.sh +++ b/bin/fetch_igblastdb.sh @@ -49,7 +49,7 @@ fi # Fetch database wget -q -r -nH --cut-dirs=5 --no-parent \ - ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/database \ + ftp://ftp.ncbi.nlm.nih.gov/blast/executables/igblast/release/database \ -P ${OUTDIR}/database # Extract @@ -59,11 +59,11 @@ tar -C ${OUTDIR}/database -xf ${OUTDIR}/database/rhesus_monkey_VJ.tar if $DOWNLOAD_ALL; then # Fetch internal_data wget -q -r -nH --cut-dirs=5 --no-parent \ - ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/old_internal_data \ + ftp://ftp.ncbi.nlm.nih.gov/blast/executables/igblast/release/old_internal_data \ -P ${OUTDIR}/internal_data # Fetch optional_file wget -q -r -nH --cut-dirs=5 --no-parent \ - ftp://ftp.ncbi.nih.gov/blast/executables/igblast/release/old_optional_file \ + ftp://ftp.ncbi.nlm.nih.gov/blast/executables/igblast/release/old_optional_file \ -P ${OUTDIR}/optional_file fi diff --git a/bin/log_parsing.py b/bin/log_parsing.py index dce9c831..d262f51b 100755 --- a/bin/log_parsing.py +++ b/bin/log_parsing.py @@ -52,7 +52,7 @@ df_process_list = [] for process in processes: - find = subprocess.check_output(["find", process, "-name", "*command_log.txt"]) + find = subprocess.check_output(["find", process, "-name", "*command_log*"]) log_files = find.decode().split("\n") log_files = list(filter(None, log_files)) @@ -90,50 +90,37 @@ elif process in ["mask_primers", "filter_by_sequence_quality"]: s_code = [] + s_readtype = [] output_file = [] - seqs_R1 = [] - seqs_R2 = [] - pass_R1 = [] - pass_R2 = [] - fail_R1 = [] - fail_R2 = [] + n_seqs = [] + n_pass = [] + n_fail = [] process_name = [] for logfile in log_files: - c = 0 + if "_R1" in logfile: + s_readtype.append("R1") + elif "_R2" in logfile: + s_readtype.append("R2") with open(logfile, "r") as f: for line in f: if " START>" in line: - if c < 1: - s_code.append(logfile.split("/")[1].split("_command_log")[0]) - - process_name.append(process) + s_code.append(logfile.split("/")[1].split("_command_log")[0]) + process_name.append(process) elif "SEQUENCES>" in line: - if c < 1: - seqs_R1.append(line.strip().removeprefix("SEQUENCES> ")) - else: - seqs_R2.append(line.strip().removeprefix("SEQUENCES> ")) + n_seqs.append(line.strip().removeprefix("SEQUENCES> ")) elif "PASS>" in line: - if c < 1: - pass_R1.append(line.strip().removeprefix("PASS> ")) - else: - pass_R2.append(line.strip().removeprefix("PASS> ")) + n_pass.append(line.strip().removeprefix("PASS> ")) elif "FAIL>" in line: - if c < 1: - fail_R1.append(line.strip().removeprefix("FAIL> ")) - c += 1 - else: - fail_R2.append(line.strip().removeprefix("FAIL> ")) + n_fail.append(line.strip().removeprefix("FAIL> ")) df_process = pd.DataFrame.from_dict( { "Sample": s_code, - "start_R1": seqs_R1, - "start_R2": seqs_R2, - "pass_R1": pass_R1, - "pass_R2": pass_R2, - "fail_R1": fail_R1, - "fail_R2": fail_R2, + "readtype": s_readtype, + "start": n_seqs, + "pass": n_pass, + "fail": n_fail, "process": process_name, } ) @@ -344,48 +331,13 @@ df_process_list.append(df_process) -# Getting table colnames - -colnames = [ - "Sample", - "Sequences_R1", - "Sequences_R2", - "Filtered_quality_R1", - "Filtered_quality_R2", - "Mask_primers_R1", - "Mask_primers_R2", - "Paired", - "Build_consensus", - "Assemble_pairs", - "Unique", - "Representative_2", - "Igblast", -] - - -values = [ - df_process_list[0].sort_values(by=["Sample"]).iloc[:, 0].tolist(), - df_process_list[0].sort_values(by=["Sample"]).loc[:, "start_R1"].tolist(), - df_process_list[0].sort_values(by=["Sample"]).loc[:, "start_R2"].tolist(), - df_process_list[0].sort_values(by=["Sample"]).loc[:, "pass_R1"].tolist(), - df_process_list[0].sort_values(by=["Sample"]).loc[:, "pass_R2"].tolist(), - df_process_list[1].sort_values(by=["Sample"]).loc[:, "pass_R1"].tolist(), - df_process_list[1].sort_values(by=["Sample"]).loc[:, "pass_R2"].tolist(), - df_process_list[2].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(), - df_process_list[4].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(), - df_process_list[5].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(), - df_process_list[6].sort_values(by=["Sample"]).loc[:, "unique"].tolist(), - df_process_list[7].sort_values(by=["Sample"]).loc[:, "repres_2"].tolist(), - df_process_list[7].sort_values(by=["Sample"]).loc[:, "pass_igblast"].tolist(), -] - # Tables provide extra info and help debugging df_process_list[0].to_csv( path_or_buf="Table_all_details_filter_quality.tsv", sep="\t", header=True, - index=False, + index=True, ) df_process_list[1].to_csv(path_or_buf="Table_all_details_mask_primers.tsv", sep="\t", header=True, index=False) df_process_list[2].to_csv(path_or_buf="Table_all_details_paired.tsv", sep="\t", header=True, index=False) @@ -393,7 +345,7 @@ path_or_buf="Table_all_details_build_consensus.tsv", sep="\t", header=True, - index=False, + index=True, ) df_process_list[4].to_csv(path_or_buf="Table_all_details_repaired.tsv", sep="\t", header=True, index=False) df_process_list[5].to_csv( @@ -413,6 +365,43 @@ index=False, ) +# Getting table colnames + +colnames = [ + "Sample", + "Sequences_R1", + "Sequences_R2", + "Filtered_quality_R1", + "Filtered_quality_R2", + "Mask_primers_R1", + "Mask_primers_R2", + "Paired", + "Build_consensus", + "Assemble_pairs", + "Unique", + "Representative_2", + "Igblast", +] + +print(df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")) + +values = [ + df_process_list[2].sort_values(by=["Sample"]).iloc[:, 0].tolist(), + df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["start"]["R1"].tolist(), + df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["start"]["R2"].tolist(), + df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R1"].tolist(), + df_process_list[0].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R2"].tolist(), + df_process_list[1].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R1"].tolist(), + df_process_list[1].sort_values(by=["Sample"]).pivot(index="Sample", columns="readtype")["pass"]["R2"].tolist(), + df_process_list[2].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(), + df_process_list[4].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(), + df_process_list[5].sort_values(by=["Sample"]).loc[:, "pass_pairs"].tolist(), + df_process_list[6].sort_values(by=["Sample"]).loc[:, "unique"].tolist(), + df_process_list[7].sort_values(by=["Sample"]).loc[:, "repres_2"].tolist(), + df_process_list[7].sort_values(by=["Sample"]).loc[:, "pass_igblast"].tolist(), +] + + final_table = dict(zip(colnames, values)) print(final_table) df_final_table = pd.DataFrame.from_dict(final_table) diff --git a/bin/reveal_add_metadata.R b/bin/reveal_add_metadata.R index f2ff5b5f..1745da89 100755 --- a/bin/reveal_add_metadata.R +++ b/bin/reveal_add_metadata.R @@ -61,8 +61,12 @@ if (!("INPUTID" %in% names(opt))) { # Read metadata file metadata <- read.csv(opt$METADATA, sep = "\t", header = TRUE, stringsAsFactors = F) +# Merging samples over multiple lanes introduces multi-rows per sample +# We expect only one row per sample metadata <- metadata %>% - filter(sample_id == opt$INPUTID) + dplyr::filter(sample_id == opt$INPUTID) %>% + dplyr::select(!starts_with("filename_")) %>% + dplyr::distinct() if (nrow(metadata) != 1) { stop("Expecting nrow(metadata) == 1; nrow(metadata) == ", nrow(metadata), " found") @@ -81,10 +85,7 @@ internal_fields <- "id", "filetype", "valid_single_cell", - "valid_pcr_target_locus", - "filename_R1", - "filename_R2", - "filename_I1" + "valid_pcr_target_locus" ) metadata <- metadata[, !colnames(metadata) %in% internal_fields] diff --git a/conf/clontech_umi_bcr.config b/conf/clontech_umi_bcr.config new file mode 100644 index 00000000..d2d17e01 --- /dev/null +++ b/conf/clontech_umi_bcr.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile clontech_umi_bcr, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Takara Bio / Clontech SMARTer v2' + config_profile_description = 'Profile to run pipeline for the Takara Bio / Clontech SMARTer v2 (UMI) BCR protocol profile' + + mode = 'fastq' + + library_generation_method = 'dt_5p_race_umi' + + cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/c98269b194e9c6262fe3b098be3600ba7f64b85c/protocols/Universal/Human_IG_CRegion_RC.fasta' + + // primer options + cprimer_position = 'R1' + cprimer_start = 0 + vprimer_start = 0 + umi_length = 12 + umi_position = 'R2' + cluster_sets = false + + + // Mask primer options + maskprimers_align = true + primer_extract_len = 7 + primer_mask_mode = 'cut' + primer_maxlen = 70 + primer_r1_maxerror = 0.2 + assemblepairs_sequential = true + primer_consensus = 0.6 + isotype_column = 'cregion' +} diff --git a/conf/clontech_umi_tcr.config b/conf/clontech_umi_tcr.config new file mode 100644 index 00000000..d620dcee --- /dev/null +++ b/conf/clontech_umi_tcr.config @@ -0,0 +1,44 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile clontech_umi_tcr, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Takara Bio / Clontech SMARTer v2 TCR' + config_profile_description = 'Profile to run pipeline for the Takara Bio / Clontech SMARTer v2 (UMI) TCR protocol profile' + + mode = 'fastq' + + library_generation_method = 'dt_5p_race_umi' + + cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/16f94088c1df5c7a0ee1c9ea8b403cd4d2488e8a/protocols/Universal/Human_TR_CRegion_RC.fasta' + + // primer options + cprimer_position = 'R1' + cprimer_start = 0 + vprimer_start = 0 + umi_length = 12 + umi_position = 'R2' + cluster_sets = false + + + // Mask primer options + maskprimers_align = true + primer_extract_len = 7 + primer_mask_mode = 'cut' + primer_maxlen = 70 + primer_r1_maxerror = 0.2 + assemblepairs_sequential = true + primer_consensus = 0.6 + + // TCR options + clonal_threshold = 0 + skip_lineage = true +} diff --git a/conf/modules.config b/conf/modules.config index df8fad6f..a20f0bad 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -138,7 +138,7 @@ process { ] } - withName: PRESTO_MASKPRIMERS { + withName: PRESTO_MASKPRIMERS_UMI { publishDir = [ path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" }, mode: params.publish_dir_mode, @@ -146,6 +146,36 @@ process { ] } + withName: PRESTO_MASKPRIMERS_ALIGN { + publishDir = [ + path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--skiprc --pf CREGION' + ext.args2 = '-f ID CREGION ERROR' + } + + withName: PRESTO_ALIGN_CREGION { + publishDir = [ + path: { "${params.outdir}/presto/internal_cregion/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--skiprc --revpr --pf CREGION' + ext.args2 = '-f ID PRIMER ERROR --outname cregion_alignment' + } + + withName: PRESTO_MASKPRIMERS_EXTRACT { + publishDir = [ + path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--barcode --bf BARCODE' + ext.args2 = '-f ID PRIMER ERROR PRSTART' + } + withName: PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI { publishDir = [ path: { "${params.outdir}/presto/03-maskprimers/${meta.id}" }, @@ -160,6 +190,16 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + ext.args = "--coord illumina" + } + + withName: PRESTO_PAIRSEQ_ALIGN { + publishDir = [ + path: { "${params.outdir}/presto/03-pairseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--1f CREGION --coord illumina' } withName: PRESTO_CLUSTERSETS { @@ -184,9 +224,20 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.args = '' - ext.args2 = '' - ext.args3 = 'ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT' + ext.args = '--pf PRIMER' + ext.args2 = '--pf PRIMER' + ext.args3 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT ERROR' + } + + withName: PRESTO_BUILDCONSENSUS_ALIGN { + publishDir = [ + path: { "${params.outdir}/presto/06-build-consensus/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--pf CREGION' + ext.args2 = '--pf CREGION' + ext.args3 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT ERROR' } withName: PRESTO_POSTCONSENSUS_PAIRSEQ { @@ -207,6 +258,16 @@ process { ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE' } + withName: PRESTO_ASSEMBLEPAIRS_SEQUENTIAL { + publishDir = [ + path: { "${params.outdir}/presto/08-assemble-pairs/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--coord presto --rc tail --1f CONSCOUNT --2f CONSCOUNT PRCONS --minlen 8 --maxerror 0.3 --alpha 1e-5 --scanrev --minident 0.5 --evalue 1e-5 --maxhits 100 --aligner blastn' + ext.args2 = '-f ID REFID LENGTH OVERLAP GAP ERROR PVALUE EVALUE1 EVALUE2 IDENTITY FIELDS1 FIELDS2' + } + withName: PRESTO_ASSEMBLEPAIRS_SANS_UMI { publishDir = [ path: { "${params.outdir}/presto/01-assemble-pairs/${meta.id}" }, @@ -232,6 +293,14 @@ process { ext.args = 'PRCONS PRCONS' } + withName: PRESTO_PARSEHEADERS_CREGION { + publishDir = [ + enabled: false + ] + ext.subcommand = 'rename' + ext.args = '-f PRCONS -k CREGION' + } + withName: PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI { publishDir = [ enabled: false @@ -256,6 +325,26 @@ process { ext.args2 = '-f HEADER DUPCOUNT CONSCOUNT' } + withName: PRESTO_COLLAPSESEQ_ALIGN { + publishDir = [ + path: { "${params.outdir}/presto/09-collapseseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '-n 0 --inner --uf CREGION --cf CONSCOUNT --act sum --keepmiss' + ext.args2 = '-f HEADER DUPCOUNT CONSCOUNT' + } + + withName: PRESTO_COLLAPSESEQ_CREGION { + publishDir = [ + path: { "${params.outdir}/presto/09-collapseseq/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '-n 0 --inner --uf PRCONS CREGION --cf CONSCOUNT --act sum --keepmiss' + ext.args2 = '-f HEADER DUPCOUNT CONSCOUNT' + } + withName: PRESTO_COLLAPSESEQ_SANS_UMI { publishDir = [ path: { "${params.outdir}/presto/04-collapseseq/${meta.id}" }, @@ -453,7 +542,6 @@ process { ] ext.args = ['outname':'', 'model':'hierarchical', 'method':'nt', 'linkage':'single', - 'skip_convergence':true, 'min_n':30] } @@ -465,7 +553,6 @@ process { ] ext.args = ['outname':'', 'model':'hierarchical', 'method':'nt', 'linkage':'single', - 'skip_convergence':false, 'min_n':30] } @@ -487,7 +574,7 @@ process { withName: AIRRFLOW_REPORT { publishDir = [ - path: { "${params.outdir}/repertoire_analysis" }, + path: { "${params.outdir}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -511,7 +598,7 @@ process { } withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/conf/nebnext_umi_bcr.config b/conf/nebnext_umi_bcr.config new file mode 100644 index 00000000..d6bb6d5b --- /dev/null +++ b/conf/nebnext_umi_bcr.config @@ -0,0 +1,39 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile nebnext_umi_bcr, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'NEBNext - AbSeq BCR profile' + config_profile_description = 'Profile to run pipeline for the NEBNext - AbSeq (UMI) BCR experimental protocol' + + mode = 'fastq' + cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/354f49228a43b4c2858d67fb09886126b314e317/protocols/AbSeq/AbSeq_R1_Human_IG_Primers.fasta' + race_linker = 'https://bitbucket.org/kleinstein/immcantation/raw/354f49228a43b4c2858d67fb09886126b314e317/protocols/AbSeq/AbSeq_R2_TS.fasta' + + library_generation_method = 'dt_5p_race_umi' + cprimer_position = 'R1' + cprimer_start = 0 + umi_length = 17 + umi_position = 'R2' + cluster_sets = false + + //presto options + primer_r1_maxerror = 0.2 + primer_r2_maxerror = 0.5 + assemblepairs_sequential = true + maskprimers_align = false + align_cregion = true + internal_cregion_sequences = 'https://bitbucket.org/kleinstein/immcantation/raw/2025594fd9a2a64df4444070171d6fc00c4e78c7/protocols/AbSeq/AbSeq_Human_IG_InternalCRegion.fasta' + cregion_maxlen = 100 + cregion_maxerror = 0.3 + cregion_mask_mode = 'tag' + isotype_column = 'cregion' +} diff --git a/conf/nebnext_umi_tcr.config b/conf/nebnext_umi_tcr.config new file mode 100644 index 00000000..e030d952 --- /dev/null +++ b/conf/nebnext_umi_tcr.config @@ -0,0 +1,41 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile nebnext_umi_tcr, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'NEBNext - AbSeq TCR profile' + config_profile_description = 'Profile to run pipeline for the NEBNext - AbSeq (UMI) TCR experimental protocol' + + mode = 'fastq' + cprimers = 'https://bitbucket.org/kleinstein/immcantation/raw/16f94088c1df5c7a0ee1c9ea8b403cd4d2488e8a/protocols/AbSeq/AbSeq_R1_Human_TR_Primers.fasta' + race_linker = 'https://bitbucket.org/kleinstein/immcantation/raw/354f49228a43b4c2858d67fb09886126b314e317/protocols/AbSeq/AbSeq_R2_TS.fasta' + + library_generation_method = 'dt_5p_race_umi' + cprimer_position = 'R1' + cprimer_start = 0 + umi_length = 17 + umi_position = 'R2' + cluster_sets = false + + //presto options + primer_r1_maxerror = 0.2 + primer_r2_maxerror = 0.5 + assemblepairs_sequential = true + maskprimers_align = false + align_cregion = false + cregion_maxlen = 100 + cregion_maxerror = 0.3 + cregion_mask_mode = 'tag' + + //TCR options + clonal_threshold = 0 + skip_lineage +} diff --git a/conf/test.config b/conf/test.config index 4ca62b41..1bf667f7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -34,6 +34,7 @@ params { umi_start = 6 umi_position = 'R1' index_file = true + isotype_column = 'c_primer' } process{ diff --git a/conf/test_10x_sc.config b/conf/test_10x_sc.config new file mode 100644 index 00000000..76936ef9 --- /dev/null +++ b/conf/test_10x_sc.config @@ -0,0 +1,28 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/airrflow -profile test_10x_sc, + */ + +params { + config_profile_name = 'Test 10xGenomics single cell data' + config_profile_description = 'Minimal test dataset to check pipeline function with raw single cell data from 10xGenomics' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 48.h + + // params + mode = 'fastq' + library_generation_method = 'sc_10x_genomics' + clonal_threshold = 0 + + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-sc/10x_sc_raw.tsv' + reference_10x = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-sc/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz' +} diff --git a/conf/test_clontech_umi.config b/conf/test_clontech_umi.config new file mode 100644 index 00000000..2263d057 --- /dev/null +++ b/conf/test_clontech_umi.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile test_clontech_umi, --outdir + +---------------------------------------------------------------------------------------- +*/ +includeConfig 'clontech_umi_bcr.config' + +params { + config_profile_name = 'Test profile for TAKARA protocol' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-clontech/samplesheet.tsv' + + imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + + clonal_threshold = 0.1 + skip_lineage = true + +} diff --git a/conf/test_fetchimgt.config b/conf/test_fetchimgt.config index e223d687..cc6608f7 100644 --- a/conf/test_fetchimgt.config +++ b/conf/test_fetchimgt.config @@ -33,6 +33,7 @@ params { umi_start = 6 umi_position = 'R1' index_file = true + isotype_column = 'c_primer' } process{ diff --git a/conf/test_nebnext_umi.config b/conf/test_nebnext_umi.config new file mode 100644 index 00000000..d1712c8d --- /dev/null +++ b/conf/test_nebnext_umi.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/airrflow -profile test_nebnext_umi, --outdir + +---------------------------------------------------------------------------------------- +*/ + +includeConfig 'nebnext_umi_bcr.config' + +params { + config_profile_name = 'Test profile for NEBNext protocol' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-neb/samplesheet.tsv' + + imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + + clonal_threshold = 0.1 + skip_lineage = true + +} diff --git a/conf/test_no_umi.config b/conf/test_no_umi.config index f2952cb3..e17a6526 100644 --- a/conf/test_no_umi.config +++ b/conf/test_no_umi.config @@ -24,6 +24,7 @@ params { vprimer_start = 4 primer_revpr = true umi_length = 0 + isotype_column = 'c_primer' // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-no-umi/Metadata_test-no-umi_airr.tsv' diff --git a/conf/test_nocluster.config b/conf/test_nocluster.config index 418dd6c7..469de7b8 100644 --- a/conf/test_nocluster.config +++ b/conf/test_nocluster.config @@ -36,6 +36,7 @@ params { cluster_sets = false index_file = true clonal_threshold = 0.15 + isotype_column = 'c_primer' } process{ diff --git a/conf/test_raw_immcantation_devel.config b/conf/test_raw_immcantation_devel.config index fcaba109..b309cb60 100644 --- a/conf/test_raw_immcantation_devel.config +++ b/conf/test_raw_immcantation_devel.config @@ -36,6 +36,7 @@ params { umi_start = 6 umi_position = 'R1' index_file = true + isotype_column = 'c_primer' } process{ diff --git a/conf/test_tcr.config b/conf/test_tcr.config index 5010c010..fb878caa 100644 --- a/conf/test_tcr.config +++ b/conf/test_tcr.config @@ -25,6 +25,7 @@ params { library_generation_method = 'dt_5p_race_umi' cprimer_position = 'R1' clonal_threshold = 0 + skip_lineage = true // Input data diff --git a/docs/images/airrflow_workflow_overview.pdf b/docs/images/airrflow_workflow_overview.pdf new file mode 100644 index 00000000..b081f97c Binary files /dev/null and b/docs/images/airrflow_workflow_overview.pdf differ diff --git a/docs/images/airrflow_workflow_overview.png b/docs/images/airrflow_workflow_overview.png index 17b8eac2..d930c4d2 100644 Binary files a/docs/images/airrflow_workflow_overview.png and b/docs/images/airrflow_workflow_overview.png differ diff --git a/docs/images/airrflow_workflow_overview.svg b/docs/images/airrflow_workflow_overview.svg deleted file mode 100644 index 5a208af6..00000000 --- a/docs/images/airrflow_workflow_overview.svg +++ /dev/null @@ -1,2743 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -. -CC-BY 4.0. Design originally by Zandra Fagernäs - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bcftools - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/images/metro-map-airrflow.pdf b/docs/images/metro-map-airrflow.pdf new file mode 100644 index 00000000..b55e1477 Binary files /dev/null and b/docs/images/metro-map-airrflow.pdf differ diff --git a/docs/images/metro-map-airrflow.png b/docs/images/metro-map-airrflow.png index 9230e3ec..ac4ee67e 100644 Binary files a/docs/images/metro-map-airrflow.png and b/docs/images/metro-map-airrflow.png differ diff --git a/docs/images/metro-map-airrflow.svg b/docs/images/metro-map-airrflow.svg deleted file mode 100644 index 1117253c..00000000 --- a/docs/images/metro-map-airrflow.svg +++ /dev/null @@ -1,5312 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/images/nf-core-airrflow_logo_dark.png b/docs/images/nf-core-airrflow_logo_dark.png index 2ced6a8f..b1c9e6df 100644 Binary files a/docs/images/nf-core-airrflow_logo_dark.png and b/docs/images/nf-core-airrflow_logo_dark.png differ diff --git a/docs/images/nf-core-airrflow_logo_light.png b/docs/images/nf-core-airrflow_logo_light.png index 2c7265f7..f346829a 100644 Binary files a/docs/images/nf-core-airrflow_logo_light.png and b/docs/images/nf-core-airrflow_logo_light.png differ diff --git a/docs/usage.md b/docs/usage.md index aafc8349..69b7696b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -29,6 +29,28 @@ nextflow run nf-core/airrflow \ --outdir results ``` +You can optionally set a protocol profile if you're running the pipeline with data from one of the supported profiles. The full list of supported profiles can be found in the section [Supported protocol profiles](#supported-protocol-profiles). An example command running the NEBNext UMI protocol profile with docker containers is: + +```bash +nextflow run nf-core/airrflow \ +-profile nebnext_umi,docker \ +--mode fastq \ +--input input_samplesheet.tsv \ +--outdir results +``` + +A typical command to run the pipeline from **single cell raw fastq files** is: + +```bash +nextflow run nf-core/airrflow -r dev \ +-profile \ +--mode fastq \ +--input input_samplesheet.tsv \ +--library_generation_method sc_10x_genomics \ +--reference_10x reference/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz \ +--outdir ./results +``` + A typical command for running the pipeline departing from **single-cell AIRR rearrangement tables or assembled bulk sequencing fasta** data is: ```bash @@ -39,7 +61,7 @@ nextflow run nf-core/airrflow \ --outdir results ``` -Check the section [Input samplesheet](#input-samplesheet) below for instructions on how to create the samplesheet, and the [Supported library generation protocols](#supported-bulk-library-generation-methods-protocols) section below for examples on how to run the pipeline for different bulk sequencing protocols. +Check the section [Input samplesheet](#input-samplesheet) below for instructions on how to create the samplesheet, and the [Supported library generation protocols](#supported-bulk-library-generation-methods-protocols) section below for examples on how to run the pipeline for different bulk and the 10xGenomics single cell sequencing protocol. For more information about the parameters, please refer to the [parameters documentation](https://nf-co.re/airrflow/parameters). The command above will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -101,7 +123,7 @@ If you wish to share such profile (such as upload as supplementary material for ## Input samplesheet -### Fastq input samplesheet (bulk sequencing only) +### Fastq input samplesheet (bulk sequencing) The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. An example samplesheet is: @@ -121,7 +143,7 @@ The required input file for processing raw BCR or TCR bulk targeted sequencing d - `biomaterial_provider`: Institution / research group that provided the samples. - `sex`: Subject biological sex (`female`, `male`, etc.). - `age`: Subject biological age. -- `single_cell`: TRUE or FALSE. Fastq input samplesheet only supports a FALSE value. +- `single_cell`: TRUE or FALSE. Other optional columns can be added. These columns will be available when building the contrasts for the repertoire comparison report. It is recommended that these columns also follow the AIRR nomenclature. Examples are: @@ -133,6 +155,25 @@ Other optional columns can be added. These columns will be available when buildi The metadata specified in the input file will then be automatically annotated in a column with the same header in the tables generated by the pipeline. +### Fastq input samplesheet (single cell sequencing) + +The required input file for processing raw BCR or TCR single cell targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. You can refer to the bulk fastq input section for documentation on the individual columns. +An example samplesheet is: + +| sample_id | filename_R1 | filename_R2 | subject_id | species | pcr_target_locus | tissue | sex | age | biomaterial_provider | single_cell | intervention | collection_time_point_relative | cell_subset | +| --------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- | -------------- | ------------------------------ | ------------ | +| sample01 | sample1_S1_L001_R1_001.fastq.gz | sample1_S1_L001_R2_001.fastq.gz | Subject02 | human | IG | blood | NA | 53 | sequencing_facility | FALSE | Drug_treatment | Baseline | plasmablasts | +| sample02 | sample2_S1_L001_R1_001.fastq.gz | sample2_S1_L001_R2_001.fastq.gz | Subject02 | human | TR | blood | female | 78 | sequencing_facility | FALSE | Drug_treatment | Baseline | plasmablasts | + +> FASTQ files must confirm the 10xGenomics cellranger naming conventions
>**`[SAMPLE-NAME]`_S1_L00`[LANE-NUMBER]` _`[READ-TYPE]`\_001.fastq.gz** +> +> Read type is one of +> +> - `I1`: Sample index read (optional) +> - `I2`: Sample index read (optional) +> - `R1`: Read 1 +> - `R2`: Read 2 + ### Assembled input samplesheet (bulk or single-cell sequencing) The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. All fields are explained in the previous section, with the only difference being that there is only one `filename` column for the assembled input samplesheet. The provided file will be different from assembled single-cell or bulk data: @@ -165,6 +206,76 @@ nf-core/airrflow offers full support for the [AIRR standards 1.4](https://docs.a | biomaterial_provider | Samplesheet column | | Name of sample biomaterial provider | | library_generation_method | Parameter | `--library_generation_method` | Generic type of library generation | +## Supported protocol profiles + +### NEBNext Immune Sequencing Kit + +- [New England Biolabs NEBNext Immune sequencing kit](https://www.neb.com/en-us/products/e6320-nebnext-immune-sequencing-kit-human#Product%20Information) + +You can use the `nebnext_umi_bcr` or `nebnext_umi_tcr` preset defaults for analyzing bulk fastq sequencing data that was generated with the NEB Immune Profiling kit. An example using docker containers for the analysis is: + +```bash +nextflow run nf-core/airrflow -r \ +-profile nebnext_umi_bcr,docker \ +--input input_samplesheet.tsv \ +--outdir results +``` + +This profile executes the commands based on the pRESTO pre-set pipeline [presto-abseq.sh](https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/presto-abseq.sh). A summary of the performed steps is: + +- Filter sequences by base quality. +- Score and mask the provided R1 primers and R2 template switch oligo. Primer defaults are taken from the [Immcantation repository](https://bitbucket.org/kleinstein/immcantation/src/master/protocols/AbSeq/). +- Pair sequences, build UMI consensus sequence. +- Assemble read pairs with the pRESTO `AssemblePairs sequential` option. +- Align and annotate the internal C Region (for the BCR specific protocol) for a more specific isotype annotation. +- Remove duplicate sequences and filter to sequences with at least 2 supporting sources. + +Please note that the default primer sequences and internal CRegion sequences are for human. If you wish to run this protocol on mouse or other species, please provide the alternative primers: + +```bash +nextflow run nf-core/airrflow -r \ +-profile nebnext_umi_bcr,docker \ +--input input_samplesheet.tsv \ +--cprimers \ +--internal_cregion_sequences \ +--outdir results +``` + +### Clontech / Takara SMARTer Human BCR Profiling kit + +- [TaKaRa SMARTer Human BCR kit](https://www.takarabio.com/products/next-generation-sequencing/immune-profiling/human-repertoire/human-bcr-profiling-kit-for-illumina-sequencing) + +You can use the `clontech_umi_bcr` or `clontech_umi_tcr` preset defaults for analyzing bulk fastq sequencing data that was generated with the Takara SMARTer Human Profiling kit. An example using docker containers for the analysis is: + +```bash +nextflow run nf-core/airrflow -r \ +-profile clontech_umi_bcr,docker \ +--input input_samplesheet.tsv \ +--outdir results +``` + +This profile executes the sequence assembly commands based on the pRESTO pre-set pipeline [presto-clontech-umi.sh](https://bitbucket.org/kleinstein/immcantation/src/master/pipelines/presto-clontech-umi.sh). A summary of the performed steps is: + +- Filter sequences by base quality. +- Align and annotate the universal C region seqeunces in the R1 reads. Defaults are taken from the [Immcantation repository](https://bitbucket.org/kleinstein/immcantation/src/master/protocols/Universal/). +- Identify the primers sequences and UMI (12 nt length) in the R2 reads. +- Pair sequences, build UMI consensus sequence. +- Assemble read pairs with the pRESTO `AssemblePairs sequential` option. +- Align and annotate the C Region sequences. +- Remove duplicate sequences and filter to sequences with at least 2 supporting sources. + +After the sequence assembly steps, the remaining steps are common for all protocols. + +Please note that the default primer sequences and internal CRegion sequences are for human. If you wish to run this protocol on mouse or other species, please provide the alternative primer sequences: + +```bash +nextflow run nf-core/airrflow -r \ +-profile clontech_umi_bcr,docker \ +--input input_samplesheet.tsv \ +--cprimers \ +--outdir results +``` + ## Supported bulk library generation methods (protocols) When processing bulk sequencing data departing from raw `fastq` reads, several sequencing protocols are supported which can be provided with the parameter `--library_generation_method`. @@ -300,7 +411,7 @@ This sequencing type requires setting `--library_generation_method race_5p_umi` #### Takara Bio SMARTer Human BCR -The read configuration when sequenicng with the TAKARA Bio SMARTer Human BCR protocol is the following: +The read configuration when sequencing with the TAKARA Bio SMARTer Human BCR protocol is the following: ![nf-core/airrflow](images/TAKARA_RACE_BCR.png) @@ -369,6 +480,37 @@ The UMI barcodes are typically read from an index file but sometimes can be prov - No UMIs in R1 or R2 reads: if no UMIs are present in the samples, specify `--umi_length 0` to use the sans-UMI subworkflow. +## Supported single cell library generation methods (protocols) + +When processing single cell sequencing data departing from raw `fastq` reads, currently only a `--library_generation_method` to support 10xGenomics data is available. + +| Library generation methods | Description | Name in pipeline | Commercial protocols | +| -------------------------- | ----------------------------------------------------------------------------------------------------------- | ---------------- | -------------------- | +| RT(RHP)+PCR | sequencing data produced from Chromium single cell 5'V(D)J libraries containing cellular barcodes and UMIs. | sc_10x_genomics | 10xGenomics | + +### 10xGenomics + +This sequencing type requires setting `--library_generation_method sc_10x_genomics`. +The `cellranger vdj` automatically uses the Chromium cellular barcodes and UMIs to perform sequence assembly, paired clonotype calling and to assemble V(D)J transcripts per cell. +Examples are provided below to run airrflow to process 10xGenomics raw FASTQ data. + +```bash +nextflow run nf-core/airrflow -r dev \ +-profile \ +--mode fastq \ +--input input_samplesheet.tsv \ +--library_generation_method sc_10x_genomics \ +--reference_10x reference/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0.tar.gz \ +--outdir ./results +``` + +#### 10xGenomics reference + +10xGenomics requires a reference. This can be provided using the `--reference_10x` parameter. + +- The 10xGenomics reference can be downloaded from the [download page](https://www.10xgenomics.com/support/software/cell-ranger/downloads) +- To generate a V(D)J segment fasta file as reference from IMGT one can follow the [cellranger docs](https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references#imgt). + ## Core Nextflow arguments :::note diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 01b8653d..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,352 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") - def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowAirrflow.groovy b/lib/WorkflowAirrflow.groovy deleted file mode 100755 index a87bf915..00000000 --- a/lib/WorkflowAirrflow.groovy +++ /dev/null @@ -1,114 +0,0 @@ -// -// This file holds several functions specific to the workflow/bcellmagic.nf in the nf-core/bcellmagic pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowAirrflow { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += '
\n' - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += 'data: |\n' - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta['manifest_map'] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } - -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 538ea8a4..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,62 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/airrflow pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.2642009\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 24de2277..2f2ca54a 100644 --- a/main.nf +++ b/main.nf @@ -13,42 +13,92 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' +include { AIRRFLOW } from './workflows/airrflow' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_airrflow_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_airrflow_pipeline' -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_airrflow_pipeline' -// Validate input parameters -if (params.validate_params) { - validateParameters() -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -WorkflowMain.initialise(workflow, params, log) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { AIRRFLOW } from './workflows/airrflow' - +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// workflow NFCORE_AIRRFLOW { - AIRRFLOW() + + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // + // WORKFLOW: Run pipeline + // + AIRRFLOW ( + samplesheet + ) + + emit: + multiqc_report = AIRRFLOW.out.multiqc_report // channel: /path/to/multiqc_report.html + } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ workflow { - NFCORE_AIRRFLOW() + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_AIRRFLOW ( + PIPELINE_INITIALISATION.out.samplesheet + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_AIRRFLOW.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index 5eb6872f..9350ace5 100644 --- a/modules.json +++ b/modules.json @@ -5,27 +5,56 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "custom/dumpsoftwareversions": { + "cat/fastq": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "02fd5bd7275abad27aad32d5c852e0a9b1b98882", + "installed_by": ["modules"] + }, + "cellranger/mkvdjref": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "cellranger/vdj": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", + "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/airrflow_report/airrflow_report.nf b/modules/local/airrflow_report/airrflow_report.nf index ecac2e49..fafbd052 100644 --- a/modules/local/airrflow_report/airrflow_report.nf +++ b/modules/local/airrflow_report/airrflow_report.nf @@ -6,8 +6,8 @@ process AIRRFLOW_REPORT { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: tuple val(meta), path(tab) // sequence tsv table in AIRR format @@ -21,9 +21,6 @@ process AIRRFLOW_REPORT { path "versions.yml" , emit: versions path("repertoire_comparison"), emit: results_folder path("*.html"), emit: report_html - path(repertoire_report) - path(css) - path(logo) script: """ diff --git a/modules/local/changeo/changeo_assigngenes.nf b/modules/local/changeo/changeo_assigngenes.nf index a51331cf..ff080b72 100644 --- a/modules/local/changeo/changeo_assigngenes.nf +++ b/modules/local/changeo/changeo_assigngenes.nf @@ -3,10 +3,10 @@ process CHANGEO_ASSIGNGENES { label 'process_low' label 'immcantation' - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : + 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" input: tuple val(meta), path(reads) // reads in fasta format diff --git a/modules/local/changeo/changeo_convertdb_fasta.nf b/modules/local/changeo/changeo_convertdb_fasta.nf index d348cfbc..4cb8f022 100644 --- a/modules/local/changeo/changeo_convertdb_fasta.nf +++ b/modules/local/changeo/changeo_convertdb_fasta.nf @@ -4,16 +4,16 @@ process CHANGEO_CONVERTDB_FASTA { label 'immcantation' - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : + 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format output: - tuple val(meta), path("*.fasta"), emit: fasta // sequence tsv in AIRR format + tuple val(meta), path("*.fasta"), emit: fasta // sequence in fasta format path "versions.yml" , emit: versions path "*_command_log.txt" , emit: logs diff --git a/modules/local/changeo/changeo_creategermlines.nf b/modules/local/changeo/changeo_creategermlines.nf index feef1d2b..d424377a 100644 --- a/modules/local/changeo/changeo_creategermlines.nf +++ b/modules/local/changeo/changeo_creategermlines.nf @@ -4,10 +4,10 @@ process CHANGEO_CREATEGERMLINES { label 'immcantation' - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : + 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" input: tuple val(meta), path(tab) // sequence tsv table in AIRR format diff --git a/modules/local/changeo/changeo_makedb.nf b/modules/local/changeo/changeo_makedb.nf index e392cda9..a71d6282 100644 --- a/modules/local/changeo/changeo_makedb.nf +++ b/modules/local/changeo/changeo_makedb.nf @@ -4,10 +4,11 @@ process CHANGEO_MAKEDB { label 'immcantation' - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" + //TODO: update mulled containers when available + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : + 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" input: tuple val(meta), path(reads) // reads in fasta format diff --git a/modules/local/changeo/changeo_parsedb_select.nf b/modules/local/changeo/changeo_parsedb_select.nf index 9f592e7f..2bba4916 100644 --- a/modules/local/changeo/changeo_parsedb_select.nf +++ b/modules/local/changeo/changeo_parsedb_select.nf @@ -4,10 +4,10 @@ process CHANGEO_PARSEDB_SELECT { label 'immcantation' - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : + 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/changeo/changeo_parsedb_split.nf b/modules/local/changeo/changeo_parsedb_split.nf index 354c3019..f67405bb 100644 --- a/modules/local/changeo/changeo_parsedb_split.nf +++ b/modules/local/changeo/changeo_parsedb_split.nf @@ -4,10 +4,10 @@ process CHANGEO_PARSEDB_SPLIT { label 'immcantation' - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : + 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/enchantr/collapse_duplicates.nf b/modules/local/enchantr/collapse_duplicates.nf index af640cf5..ebec7209 100644 --- a/modules/local/enchantr/collapse_duplicates.nf +++ b/modules/local/enchantr/collapse_duplicates.nf @@ -8,8 +8,8 @@ process COLLAPSE_DUPLICATES { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: tuple val(meta), path(tabs) // tuple [val(meta), sequence tsv in AIRR format ] diff --git a/modules/local/enchantr/define_clones.nf b/modules/local/enchantr/define_clones.nf index e24c75d9..b6cf9ec8 100644 --- a/modules/local/enchantr/define_clones.nf +++ b/modules/local/enchantr/define_clones.nf @@ -25,8 +25,8 @@ process DEFINE_CLONES { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format @@ -56,11 +56,13 @@ process DEFINE_CLONES { 'imgt_db'='${imgt_base}', \\ 'species'='auto', \\ 'cloneby'='${params.cloneby}', \\ - 'outputby'='${params.cloneby}', + 'outputby'='${params.cloneby}', \\ 'force'=FALSE, \\ 'threshold'=${thr}, \\ - 'singlecell'='${params.singlecell}','outdir'=getwd(), \\ - 'nproc'=${task.cpus},\\ + 'singlecell'='${params.singlecell}', \\ + 'outdir'=getwd(), \\ + 'isotype_column'='${params.isotype_column}', \\ + 'nproc'=${task.cpus}, \\ 'log'='${meta.id}_clone_command_log' ${args}))" cp -r enchantr ${meta.id}_clone_report && rm -rf enchantr diff --git a/modules/local/enchantr/detect_contamination.nf b/modules/local/enchantr/detect_contamination.nf index 0267b81a..1ed10e8e 100644 --- a/modules/local/enchantr/detect_contamination.nf +++ b/modules/local/enchantr/detect_contamination.nf @@ -9,8 +9,8 @@ process DETECT_CONTAMINATION { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: path(tabs) diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf index b9a8de8e..e50a9e07 100644 --- a/modules/local/enchantr/dowser_lineages.nf +++ b/modules/local/enchantr/dowser_lineages.nf @@ -25,8 +25,8 @@ process DOWSER_LINEAGES { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: tuple val(meta), path(tabs) diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf index 08178111..89b1c3b8 100644 --- a/modules/local/enchantr/find_threshold.nf +++ b/modules/local/enchantr/find_threshold.nf @@ -25,8 +25,8 @@ process FIND_THRESHOLD { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: diff --git a/modules/local/enchantr/remove_chimeric.nf b/modules/local/enchantr/remove_chimeric.nf index 32522aa2..76f4e0b5 100644 --- a/modules/local/enchantr/remove_chimeric.nf +++ b/modules/local/enchantr/remove_chimeric.nf @@ -9,8 +9,8 @@ process REMOVE_CHIMERIC { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: diff --git a/modules/local/enchantr/report_file_size.nf b/modules/local/enchantr/report_file_size.nf index 804ebd61..ece9d93f 100644 --- a/modules/local/enchantr/report_file_size.nf +++ b/modules/local/enchantr/report_file_size.nf @@ -10,8 +10,8 @@ process REPORT_FILE_SIZE { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: path logs diff --git a/modules/local/enchantr/single_cell_qc.nf b/modules/local/enchantr/single_cell_qc.nf index 36733e4d..6b232155 100644 --- a/modules/local/enchantr/single_cell_qc.nf +++ b/modules/local/enchantr/single_cell_qc.nf @@ -24,8 +24,8 @@ process SINGLE_CELL_QC { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: path(tabs) diff --git a/modules/local/enchantr/validate_input.nf b/modules/local/enchantr/validate_input.nf index 224b391a..0dcd884e 100644 --- a/modules/local/enchantr/validate_input.nf +++ b/modules/local/enchantr/validate_input.nf @@ -10,8 +10,8 @@ process VALIDATE_INPUT { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: file samplesheet diff --git a/modules/local/fetch_databases.nf b/modules/local/fetch_databases.nf index b1d10ea3..2deb3cb4 100644 --- a/modules/local/fetch_databases.nf +++ b/modules/local/fetch_databases.nf @@ -3,10 +3,10 @@ process FETCH_DATABASES { label 'process_low' label 'immcantation' - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" + conda "bioconda::changeo=1.3.0 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' : + 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:a9ee25632c9b10bbb012da76e6eb539acca8f9cd-1' }" output: path("igblast_base"), emit: igblast diff --git a/modules/local/presto/presto_assemblepairs_sequential.nf b/modules/local/presto/presto_assemblepairs_sequential.nf new file mode 100644 index 00000000..7d9cabb0 --- /dev/null +++ b/modules/local/presto/presto_assemblepairs_sequential.nf @@ -0,0 +1,37 @@ +process PRESTO_ASSEMBLEPAIRS_SEQUENTIAL { + tag "$meta.id" + label 'process_long_parallelized' + label 'immcantation' + + conda "bioconda::presto=0.7.1 bioconda::igblast=1.22.0 conda-forge::wget=1.20.1 conda-forge::biopython=1.79" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-865ad74e0cfd6de39e9e3ade759d826fce726425:eb705f041731c0cf4f5e7595889cf9d14a05031b-0' : + 'biocontainers/mulled-v2-865ad74e0cfd6de39e9e3ade759d826fce726425:eb705f041731c0cf4f5e7595889cf9d14a05031b-0' }" + + input: + tuple val(meta), path(R1), path(R2) // reads in fastq format + path(igblast) // igblast references + + output: + tuple val(meta), path("*_assemble-pass.fastq"), emit: reads + path("*_command_log.txt"), emit: logs + path("*.log") + path("*_table.tab") + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + """ + AssemblePairs.py sequential -1 $R2 -2 $R1 --nproc ${task.cpus} \\ + -r "${igblast}/fasta/imgt_${meta.species}_${meta.locus.toLowerCase()}_v.fasta" \\ + $args \\ + --outname ${meta.id} --log ${meta.id}.log > ${meta.id}_command_log.txt + ParseLog.py -l ${meta.id}.log $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + presto: \$( AssemblePairs.py --version | awk -F' ' '{print \$2}' ) + END_VERSIONS + """ +} diff --git a/modules/local/presto/presto_buildconsensus.nf b/modules/local/presto/presto_buildconsensus.nf index 6f5d9b20..2a85c3ea 100644 --- a/modules/local/presto/presto_buildconsensus.nf +++ b/modules/local/presto/presto_buildconsensus.nf @@ -25,9 +25,9 @@ process PRESTO_BUILDCONSENSUS { def args2 = task.ext.args2 ?: '' def args3 = task.ext.args3 ?: '' """ - BuildConsensus.py -s $R1 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt - BuildConsensus.py -s $R2 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args2} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt - ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ${args3} + BuildConsensus.py -s $R1 --bf ${barcode_field} --nproc ${task.cpus} --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt + BuildConsensus.py -s $R2 --bf ${barcode_field} --nproc ${task.cpus} --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args2} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt + ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log ${args3} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/presto/presto_filterseq.nf b/modules/local/presto/presto_filterseq.nf index 4af4267b..a7733147 100644 --- a/modules/local/presto/presto_filterseq.nf +++ b/modules/local/presto/presto_filterseq.nf @@ -13,7 +13,7 @@ process PRESTO_FILTERSEQ { output: tuple val(meta), path("*R1_quality-pass.fastq"), path("*R2_quality-pass.fastq") , emit: reads - path "*_command_log.txt" , emit: logs + path "*_command_log_R?.txt" , emit: logs path "versions.yml" , emit: versions path "*_R1.log" path "*_R2.log" @@ -21,8 +21,8 @@ process PRESTO_FILTERSEQ { script: """ - FilterSeq.py quality -s $R1 -q ${params.filterseq_q} --outname ${meta.id}_R1 --log ${R1.baseName}_R1.log --nproc ${task.cpus} > ${meta.id}_command_log.txt - FilterSeq.py quality -s $R2 -q ${params.filterseq_q} --outname ${meta.id}_R2 --log ${R2.baseName}_R2.log --nproc ${task.cpus} >> ${meta.id}_command_log.txt + FilterSeq.py quality -s $R1 -q ${params.filterseq_q} --outname ${meta.id}_R1 --log ${R1.baseName}_R1.log --nproc ${task.cpus} > ${meta.id}_command_log_R1.txt + FilterSeq.py quality -s $R2 -q ${params.filterseq_q} --outname ${meta.id}_R2 --log ${R2.baseName}_R2.log --nproc ${task.cpus} >> ${meta.id}_command_log_R2.txt ParseLog.py -l ${R1.baseName}_R1.log ${R2.baseName}_R2.log -f ID QUALITY cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/presto/presto_maskprimers.nf b/modules/local/presto/presto_maskprimers.nf index 99aab4dd..48e66a84 100644 --- a/modules/local/presto/presto_maskprimers.nf +++ b/modules/local/presto/presto_maskprimers.nf @@ -15,7 +15,7 @@ process PRESTO_MASKPRIMERS { output: tuple val(meta), path("*_R1_primers-pass.fastq"), path("*_R2_primers-pass.fastq") , emit: reads - path "*_command_log.txt", emit: logs + path "*_command_log_R?.txt", emit: logs path "*_R1.log" path "*_R2.log" path "*.tab", emit: log_tab @@ -28,8 +28,8 @@ process PRESTO_MASKPRIMERS { def primer_start_R1 = (params.index_file | params.umi_position == 'R1') ? "--start ${params.umi_length + params.cprimer_start} --barcode" : "--start ${params.cprimer_start}" def primer_start_R2 = (params.umi_position == 'R2') ? "--start ${params.umi_length + params.vprimer_start} --barcode" : "--start ${params.vprimer_start}" """ - MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${cprimers} $primer_start_R1 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt - MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${vprimers} $primer_start_R2 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt + MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${cprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt + MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${vprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log > ${meta.id}_command_log_R2.txt ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ID PRIMER ERROR cat <<-END_VERSIONS > versions.yml @@ -41,8 +41,8 @@ process PRESTO_MASKPRIMERS { def primer_start_R1 = (params.index_file | params.umi_position == 'R1') ? "--start ${params.umi_length + params.vprimer_start} --barcode" : "--start ${params.vprimer_start}" def primer_start_R2 = (params.umi_position == 'R2') ? "--start ${params.umi_length + params.cprimer_start} --barcode" : "--start ${params.cprimer_start}" """ - MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${vprimers} $primer_start_R1 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt - MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${cprimers} $primer_start_R2 $revpr --maxerror ${params.primer_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt + MaskPrimers.py score --nproc ${task.cpus} -s $R1 -p ${vprimers} $primer_start_R1 $revpr --maxerror ${params.primer_r1_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt + MaskPrimers.py score --nproc ${task.cpus} -s $R2 -p ${cprimers} $primer_start_R2 $revpr --maxerror ${params.primer_r2_maxerror} --mode ${params.primer_mask_mode} --outname ${meta.id}_R2 --log ${meta.id}_R2.log > ${meta.id}_command_log_R2.txt ParseLog.py -l "${meta.id}_R1.log" "${meta.id}_R2.log" -f ID PRIMER ERROR cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/presto/presto_maskprimers_align.nf b/modules/local/presto/presto_maskprimers_align.nf new file mode 100644 index 00000000..055e5d93 --- /dev/null +++ b/modules/local/presto/presto_maskprimers_align.nf @@ -0,0 +1,45 @@ +process PRESTO_MASKPRIMERS_ALIGN { + tag "$meta.id" + label "process_high" + label 'immcantation' + + conda "bioconda::presto=0.7.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'biocontainers/presto:0.7.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(R1) + path(cprimers) + val(max_len) + val(max_error) + val(mask_mode) + + output: + tuple val(meta), path("*_R1_primers-pass.fastq") , emit: reads + path "*_command_log_R1.txt", emit: logs + path "*_R1.log" + path "*.tab", emit: log_tab + path "versions.yml" , emit: versions + + script: + def args = task.ext.args?: '' + def args2 = task.ext.args2?: '' + """ + MaskPrimers.py align --nproc ${task.cpus} \\ + -s $R1 \\ + -p ${cprimers} \\ + --maxlen ${max_len} \\ + --maxerror ${max_error} \\ + --mode ${mask_mode} \\ + $args \\ + --outname ${meta.id}_R1 \\ + --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt + ParseLog.py -l ${meta.id}_R1.log $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + presto: \$( MaskPrimers.py --version | awk -F' ' '{print \$2}' ) + END_VERSIONS + """ +} diff --git a/modules/local/presto/presto_maskprimers_extract.nf b/modules/local/presto/presto_maskprimers_extract.nf new file mode 100644 index 00000000..661389e0 --- /dev/null +++ b/modules/local/presto/presto_maskprimers_extract.nf @@ -0,0 +1,40 @@ +process PRESTO_MASKPRIMERS_EXTRACT { + tag "$meta.id" + label "process_high" + label 'immcantation' + + conda "bioconda::presto=0.7.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'biocontainers/presto:0.7.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(R2) + + output: + tuple val(meta), path("*_R2_primers-pass.fastq") , emit: reads + path "*_command_log_R2.txt", emit: logs + path "*_R2.log" + path "*.tab", emit: log_tab + path "versions.yml" , emit: versions + + script: + def args = task.ext.args?: '' + def args2 = task.ext.args2?: '' + """ + MaskPrimers.py extract --nproc ${task.cpus} \\ + -s $R2 \\ + --start ${params.umi_length} \\ + --len ${params.primer_extract_len} \\ + $args \\ + --mode ${params.primer_mask_mode} \\ + --outname ${meta.id}_R2 \\ + --log ${meta.id}_R2.log >> ${meta.id}_command_log_R2.txt + ParseLog.py -l ${meta.id}_R2.log $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + presto: \$( MaskPrimers.py --version | awk -F' ' '{print \$2}' ) + END_VERSIONS + """ +} diff --git a/modules/local/presto/presto_maskprimers_postassembly.nf b/modules/local/presto/presto_maskprimers_postassembly.nf index 982b6f46..91427c90 100644 --- a/modules/local/presto/presto_maskprimers_postassembly.nf +++ b/modules/local/presto/presto_maskprimers_postassembly.nf @@ -24,10 +24,10 @@ process PRESTO_MASKPRIMERS_POSTASSEMBLY { def revpr = params.primer_revpr ? '--revpr' : '' if (params.cprimer_position == "R1") { """ - MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${cprimers} --start ${params.cprimer_start} --maxerror ${params.primer_maxerror} \ + MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${cprimers} --start ${params.cprimer_start} --maxerror ${params.primer_r1_maxerror} \ --mode ${params.primer_mask_mode} --outname ${meta.id}-FWD \ --log ${meta.id}-FWD.log > ${meta.id}_command_log.txt - MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${vprimers} --start ${params.vprimer_start} --maxerror ${params.primer_maxerror} \ + MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${vprimers} --start ${params.vprimer_start} --maxerror ${params.primer_r2_maxerror} \ --mode ${params.primer_mask_mode} --outname ${meta.id}-REV $revpr \ --log ${meta.id}-REV.log >> ${meta.id}_command_log.txt ParseLog.py -l ${meta.id}-FWD.log ${meta.id}-REV.log -f ID PRIMER ERROR @@ -39,10 +39,10 @@ process PRESTO_MASKPRIMERS_POSTASSEMBLY { """ } else if (params.cprimer_position == "R2") { """ - MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${vprimers} --start ${params.cprimer_start} --maxerror ${params.primer_maxerror} \ + MaskPrimers.py score --nproc ${task.cpus} -s $reads -p ${vprimers} --start ${params.vprimer_start} --maxerror ${params.primer_r1_maxerror} \ --mode ${params.primer_mask_mode} --outname ${meta.id}-FWD \ --log ${meta.id}-FWD.log > ${meta.id}_command_log.txt - MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${cprimers} --start ${params.vprimer_start} --maxerror ${params.primer_maxerror} \ + MaskPrimers.py score --nproc ${task.cpus} -s ${meta.id}-FWD_primers-pass.fastq -p ${cprimers} --start ${params.cprimer_start} --maxerror ${params.primer_r2_maxerror} \ --mode ${params.primer_mask_mode} --outname ${meta.id}-REV $revpr \ --log ${meta.id}-REV.log >> ${meta.id}_command_log.txt ParseLog.py -l ${meta.id}-FWD.log ${meta.id}-REV.log -f ID PRIMER ERROR diff --git a/modules/local/presto/presto_pairseq.nf b/modules/local/presto/presto_pairseq.nf index 1027c880..40ac33b1 100644 --- a/modules/local/presto/presto_pairseq.nf +++ b/modules/local/presto/presto_pairseq.nf @@ -18,8 +18,9 @@ process PRESTO_PAIRSEQ { script: def copyfield = (params.index_file | params.umi_position == 'R1') ? "--1f BARCODE" : "--2f BARCODE" + def args = task.ext.args?: '' """ - PairSeq.py -1 ${meta.id}_R1.fastq -2 ${meta.id}_R2.fastq $copyfield --coord illumina > ${meta.id}_command_log.txt + PairSeq.py -1 ${meta.id}_R1.fastq -2 ${meta.id}_R2.fastq $copyfield $args > ${meta.id}_command_log.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/reveal/add_meta_to_tab.nf b/modules/local/reveal/add_meta_to_tab.nf index 0423695b..67c930d6 100644 --- a/modules/local/reveal/add_meta_to_tab.nf +++ b/modules/local/reveal/add_meta_to_tab.nf @@ -7,8 +7,8 @@ process ADD_META_TO_TAB { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" cache 'deep' // Without 'deep' this process would run when using -resume diff --git a/modules/local/reveal/filter_junction_mod3.nf b/modules/local/reveal/filter_junction_mod3.nf index 75a06eac..c373ddbf 100644 --- a/modules/local/reveal/filter_junction_mod3.nf +++ b/modules/local/reveal/filter_junction_mod3.nf @@ -7,8 +7,8 @@ process FILTER_JUNCTION_MOD3 { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/reveal/filter_quality.nf b/modules/local/reveal/filter_quality.nf index a20d029e..46062cb9 100644 --- a/modules/local/reveal/filter_quality.nf +++ b/modules/local/reveal/filter_quality.nf @@ -7,14 +7,14 @@ process FILTER_QUALITY { error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker.io/immcantation/airrflow:3.2.0': - 'docker.io/immcantation/airrflow:3.2.0' }" + 'docker.io/immcantation/airrflow:3.3.0': + 'docker.io/immcantation/airrflow:3.3.0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format output: - tuple val(meta), path("*quality-pass.tsv"), optional:true, emit: tab // sequence tsv in AIRR format + tuple val(meta), path("*quality-pass.tsv"), emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs path "versions.yml", emit: versions diff --git a/modules/local/unzip_cellrangerdb.nf b/modules/local/unzip_cellrangerdb.nf new file mode 100644 index 00000000..429c0db9 --- /dev/null +++ b/modules/local/unzip_cellrangerdb.nf @@ -0,0 +1,29 @@ +process UNZIP_CELLRANGERDB { + tag "unzip_cellrangerdb" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path(archive) + + output: + path("$unzipped") , emit: unzipped + path "versions.yml", emit: versions + + script: + unzipped = archive.toString() - '.tar.gz' + """ + echo "${unzipped}" + + tar -xzvf ${archive} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + unzip_cellrangerdb: \$(echo \$(tar --version 2>&1 | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..bff93add --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..3d963784 --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,80 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..db4ac3c7 --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,42 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..dab2e14c --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,138 @@ +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..43dfe28f --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,169 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:30:39.816981" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:35.229332" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:34:00.058829" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:33:33.031555" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:02.270935" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 00000000..6ac43614 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/cellranger/mkvdjref/environment.yml b/modules/nf-core/cellranger/mkvdjref/environment.yml new file mode 100644 index 00000000..e151aeea --- /dev/null +++ b/modules/nf-core/cellranger/mkvdjref/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_mkvdjref +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/mkvdjref/main.nf b/modules/nf-core/cellranger/mkvdjref/main.nf new file mode 100644 index 00000000..64e877f7 --- /dev/null +++ b/modules/nf-core/cellranger/mkvdjref/main.nf @@ -0,0 +1,38 @@ +process CELLRANGER_MKVDJREF { + tag "$fasta" + label 'process_high' + + container "nf-core/cellranger:7.1.0" + + input: + path fasta + path gtf + val reference_name + + output: + path "${reference_name}", emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_MKVDJREF module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + """ + cellranger \\ + mkvdjref \\ + --genome=$reference_name \\ + --fasta=$fasta \\ + --genes=$gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellranger/mkvdjref/meta.yml b/modules/nf-core/cellranger/mkvdjref/meta.yml new file mode 100644 index 00000000..6ee8956a --- /dev/null +++ b/modules/nf-core/cellranger/mkvdjref/meta.yml @@ -0,0 +1,43 @@ +name: cellranger_mkvdjref +description: Module to build the VDJ reference needed by the 10x Genomics Cell Ranger tool. Uses the cellranger mkvdjref command. +keywords: + - reference + - mkvdjref + - index + - immunoprofiling + - single-cell + - cellranger +tools: + - cellranger: + description: Cell Ranger processes data from 10X Genomics Chromium kits. `cellranger vdj` takes FASTQ files from `cellranger mkfastq` or `bcl2fastq` for V(D)J libraries and performs sequence assembly and paired clonotype calling. It uses the Chromium cellular barcodes and UMIs to assemble V(D)J transcripts per cell. Clonotypes and CDR3 sequences are output as a `.vloupe` file which can be loaded into Loupe V(D)J Browser. + homepage: https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/what-is-cell-ranger + documentation: https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references + tool_dev_url: https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references + licence: 10x Genomics EULA +input: + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fasta,fa}" + - genes: + type: file + description: Reference transcriptome GTF file + pattern: "*.gtf" + - genome: + type: string + description: The name to give the new reference folder, e.g. `my_vdj_ref` + pattern: str +output: + - reference: + type: directory + description: Folder containing all the reference indices needed by Cell Ranger + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@klkeys" +maintainers: + - "@ggabernet" + - "@klkeys" diff --git a/modules/nf-core/cellranger/vdj/environment.yml b/modules/nf-core/cellranger/vdj/environment.yml new file mode 100644 index 00000000..2ddcc0c6 --- /dev/null +++ b/modules/nf-core/cellranger/vdj/environment.yml @@ -0,0 +1,5 @@ +name: cellranger_vdj +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/cellranger/vdj/main.nf b/modules/nf-core/cellranger/vdj/main.nf new file mode 100644 index 00000000..45a920f0 --- /dev/null +++ b/modules/nf-core/cellranger/vdj/main.nf @@ -0,0 +1,57 @@ +process CELLRANGER_VDJ { + tag "${meta.id}" + label 'process_high' + + container "nf-core/cellranger:7.1.0" + + input: + tuple val(meta), path(reads) + path reference + + output: + tuple val(meta), path("**/outs/**"), emit: outs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_VDJ module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference_name = reference.name + """ + cellranger \\ + vdj \\ + --id='${prefix}' \\ + --fastqs=. \\ + --reference=$reference_name \\ + --localcores=${task.cpus} \\ + --localmem=${task.memory.toGiga()} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_VDJ module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p "${meta.id}/outs/" + touch ${meta.id}/outs/fake_file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellranger/vdj/meta.yml b/modules/nf-core/cellranger/vdj/meta.yml new file mode 100644 index 00000000..3f4d7788 --- /dev/null +++ b/modules/nf-core/cellranger/vdj/meta.yml @@ -0,0 +1,48 @@ +name: cellranger_vdj +description: Module to use Cell Ranger's pipelines analyze sequencing data produced from Chromium Single Cell Immune Profiling. +keywords: + - align + - vdj + - reference + - immunoprofiling + - single-cell + - cellranger +tools: + - cellranger: + description: Cell Ranger processes data from 10X Genomics Chromium kits. `cellranger vdj` takes FASTQ files from `cellranger mkfastq` or `bcl2fastq` for V(D)J libraries and performs sequence assembly and paired clonotype calling. It uses the Chromium cellular barcodes and UMIs to assemble V(D)J transcripts per cell. Clonotypes and CDR3 sequences are output as a `.vloupe` file which can be loaded into Loupe V(D)J Browser. + homepage: https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/what-is-cell-ranger + documentation: https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/tutorial/tutorial-vdj + tool_dev_url: https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/tutorial/tutorial-vdj + licence: 10x Genomics EULA +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "${Sample_Name}_S1_L00${Lane_Number}_${I1,I2,R1,R2}_001.fastq.gz" + - reference: + type: directory + description: Folder containing all the reference indices needed by Cell Ranger +output: + - outs: + type: file + description: Files containing the outputs of Cell Ranger, see official 10X Genomics documentation for a complete list + pattern: "${meta.id}/outs/*" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@klkeys" +maintainers: + - "@ggabernet" + - "@Emiller88" + - "@klkeys" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index ebc87273..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index c32657de..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..70389e66 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 831b7f12..2a3b679e 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda "bioconda::fastp=0.23.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : 'biocontainers/fastp:0.23.4--h5f740d0_0' }" @@ -45,7 +45,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ | gzip -c > ${prefix}.fastp.fastq.gz cat <<-END_VERSIONS > versions.yml @@ -66,7 +66,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -91,7 +91,7 @@ process FASTP { --thread $task.cpus \\ --detect_adapter_for_pe \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -99,4 +99,22 @@ process FASTP { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + """ + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ } diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 197ea7ca..c22a16ab 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -33,7 +33,6 @@ input: - save_merged: type: boolean description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` - output: - meta: type: map @@ -71,3 +70,6 @@ output: authors: - "@drpatelh" - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..9b3f9a38 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,723 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end_stub") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end-stub") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved-stub") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..b4c0e1dd --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,330 @@ +{ + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.123035" + }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:10:13.467574" + }, + "versions_interleaved": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:24.615634793" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.223817" + }, + "versions_paired_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:42.333545689" + }, + "test_fastp_paired_end_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:06.431833729" + }, + "test_fastp_interleaved-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:37.827323085" + }, + "test_fastp_paired_end_merged_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:08:44.496251446" + }, + "versions_single_end_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:27.354051299" + }, + "versions_interleaved-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:46.535528418" + }, + "versions_single_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:03.724591407" + }, + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:07:15.398827" + }, + "versions_paired_end-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:06.50017282" + }, + "versions_single_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:07.67921647" + }, + "versions_paired_end_merged_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:47.350653154" + }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.127974" + }, + "versions_paired_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:18.140484878" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.244202" + }, + "test_fastp_single_end-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:57:30.791982648" + }, + "versions_paired_end_merged_adapterlist": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:05:37.845370554" + }, + "versions_paired_end_merged": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:32.860543858" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:41.942317" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 00000000..0f7849ad --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 67209f79..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,7 +2,7 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.12.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a144..70edae4d 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,24 +3,20 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = [ + input[0] = Channel.of([ [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) """ } } @@ -28,14 +24,189 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
    Mon 2 Oct 2023
    test.gz
    // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } ) } } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32ce..86f7c311 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,88 @@ { - "versions": { + "fastqc_versions_interleaved": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..ca39fb67 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +29,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..f1c4242e --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index 1b772655..dd398999 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,7 @@ params { miairr="$projectDir/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv" index_file = false + // ---------------------------- // sequencing protocol options // ---------------------------- @@ -47,14 +48,34 @@ params { // -------------------------- // sequence assembly options // -------------------------- + // Filter sequences filterseq_q = 20 - primer_maxerror = 0.2 + + // Mask primers + + primer_r1_maxerror = 0.2 + primer_r2_maxerror = 0.2 primer_mask_mode = 'cut' + maskprimers_align = false + primer_extract_len = 0 + primer_maxlen = 50 + + // Build consensus primer_consensus = 0.6 buildconsensus_maxerror = 0.1 buildconsensus_maxgap = 0.5 cluster_sets = true + // Assemble pairs + assemblepairs_sequential = false + + // internal cregion + align_cregion = false + internal_cregion_sequences = null + cregion_maxlen = 100 + cregion_maxerror = 0.3 + cregion_mask_mode = 'tag' + // ----------------------- // vdj annotation options // ----------------------- @@ -64,12 +85,13 @@ params { imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' fetch_imgt = false save_databases = true + isotype_column = 'c_call' // ----------------------- // bulk filtering options // ----------------------- remove_chimeric = false - detect_contamination = null + detect_contamination = false collapseby = 'sample_id' // ----------------------- @@ -95,13 +117,19 @@ params { report_logo = "$projectDir/assets/nf-core-airrflow_logo_light.png" report_logo_img = "$projectDir/assets/nf-core-airrflow_logo_reports.png" + // ----------------------- + // Single cell raw input options + // ----------------------- + reference_10x = null + + // ----------------------- // generic nf-core options // ----------------------- // References - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = true + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = true // MultiQC options skip_multiqc = false @@ -130,7 +158,6 @@ params { config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -157,7 +184,7 @@ try { } // Load nf-core/airrflow custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/airrflow.config" // } catch (Exception e) { @@ -168,6 +195,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -176,6 +204,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -190,16 +219,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { conda.enabled = false @@ -266,7 +295,13 @@ profiles { test_assembled_immcantation_devel_mm { includeConfig 'conf/test_assembled_immcantation_devel_mm.config' } test_nocluster { includeConfig 'conf/test_nocluster.config' } test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' } - test_igblast { includeConfig 'conf/test_igblast.config' } + test_10x_sc { includeConfig 'conf/test_10x_sc.config' } + test_clontech_umi { includeConfig 'conf/test_clontech_umi.config' } + test_nebnext_umi { includeConfig 'conf/test_nebnext_umi.config' } + nebnext_umi_tcr { includeConfig 'conf/nebnext_umi_tcr.config' } + nebnext_umi_bcr { includeConfig 'conf/nebnext_umi_bcr.config' } + clontech_umi_bcr { includeConfig 'conf/clontech_umi_bcr.config' } + clontech_umi_tcr { includeConfig 'conf/clontech_umi_tcr.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -279,7 +314,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -304,6 +339,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -329,7 +367,7 @@ manifest { description = """B and T cell repertoire analysis pipeline with the Immcantation framework.""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '3.2.0' + version = '3.3.0' doi = '10.5281/zenodo.2642009' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 7117deb4..3902f9ed 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,9 +16,9 @@ "type": "string", "format": "file-path", "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.tsv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/airrflow/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" @@ -45,7 +45,7 @@ }, "miairr": { "type": "string", - "default": "airrflow/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", + "default": "${projectDir}/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", "description": "Path to MiAIRR-BioSample mapping", "fa_icon": "fas fa-table" } @@ -61,8 +61,8 @@ "type": "string", "fa_icon": "fas fa-flask", "description": "Protocol used for the V(D)J amplicon sequencing library generation.", - "enum": ["specific_pcr_umi", "specific_pcr", "dt_5p_race", "dt_5p_race_umi"], - "help_text": "Available protocols are:\n- `specific_pcr_umi`: RT-PCR using transcript-specific primers containing UMIs.\n- `specific_pcr`: RT-PCR using transcript-specific primers.\n- `dt_5p_race_umi`: 5\u2019-RACE PCR using oligo-dT primers and template switch primers containing UMI.\n- `dt_5p_race`: 5\u2019-RACE PCR (i.e. RT is followed by a template switch (TS) step) using oligo-dT primers." + "enum": ["specific_pcr_umi", "specific_pcr", "dt_5p_race", "dt_5p_race_umi", "sc_10x_genomics"], + "help_text": "Available protocols are:\n- `specific_pcr_umi`: RT-PCR using transcript-specific primers containing UMIs.\n- `specific_pcr`: RT-PCR using transcript-specific primers.\n- `dt_5p_race_umi`: 5\u2019-RACE PCR using oligo-dT primers and template switch primers containing UMI.\n- `dt_5p_race`: 5\u2019-RACE PCR (i.e. RT is followed by a template switch (TS) step) using oligo-dT primers.\n- `sc_10x_genomics`:10x genomics library preparation protocol for scVDJ sequencing." }, "race_linker": { "type": "string", @@ -167,7 +167,6 @@ }, "adapter_fasta": { "type": "string", - "default": "None", "fa_icon": "fas fa-file", "description": "Fasta file with adapter sequences to be trimmed." }, @@ -219,23 +218,17 @@ "description": "Quality threshold for pRESTO FilterSeq sequence filtering.", "fa_icon": "fas fa-filter" }, - "primer_maxerror": { - "type": "number", - "default": 0.2, - "description": "Maximum primer scoring error in the pRESTO MaskPrimer step for the C and/or V region primers identification.", - "fa_icon": "fas fa-align-center" - }, "primer_consensus": { "type": "number", "default": 0.6, "description": "Maximum error for building the primer consensus in the pRESTO Buildconsensus step.", - "fa_icon": "fas fa-align-center" + "fa_icon": "fas fa-align-left" }, "primer_mask_mode": { "type": "string", "default": "cut", "description": "Masking mode for the pRESTO MaskPrimer step. Available: cut, mask, trim, tag.", - "enum": ["cut", "mask", "trim", "tag"], + "enum": ["cut", "mask", "tag", "trim"], "help_text": "The primer masking modes will perform the following actions:\n\n* `cut`: remove both the primer region and the preceding sequence.\n* `mask`: replace the primer region with Ns and remove the preceding sequence.\n* `trim`: remove the region preceding the primer, but leave the primer region intact.\n* `tag`: leave the input sequence unmodified.", "fa_icon": "fas fa-mask" }, @@ -254,8 +247,70 @@ "cluster_sets": { "type": "boolean", "default": true, - "fa_icon": "fas fa-layer-group", + "fa_icon": "fas fa-align-center", "description": "Cluster sequences by similarity regardless of any annotation with pRESTO ClusterSets and annotate the cluster ID additionally to the UMI barcode." + }, + "primer_r1_maxerror": { + "type": "number", + "default": 0.2, + "fa_icon": "fas fa-align-left", + "description": "Maximum allowed error for R1 primer alignment." + }, + "primer_r2_maxerror": { + "type": "number", + "default": 0.2, + "fa_icon": "fas fa-align-right", + "description": "Maximum allowed error for R2 primer alignment." + }, + "maskprimers_align": { + "type": "boolean", + "fa_icon": "fas fa-align-center", + "description": "Align primers instead of scoring them. Used for protocols without primer fixed positions." + }, + "primer_extract_len": { + "type": "integer", + "default": 0, + "fa_icon": "fas fa-align-center", + "description": "Length of the extracted primers with MaskPrimer extract." + }, + "primer_maxlen": { + "type": "integer", + "default": 50, + "fa_icon": "fas fa-align-center", + "description": "Maximum allowed primer length when aligning the primers." + }, + "assemblepairs_sequential": { + "type": "boolean", + "fa_icon": "fas fa-align-center", + "description": "Use AssemblePairs sequential instead of AssemblePairs align when assembling read pairs." + }, + "align_cregion": { + "type": "boolean", + "fa_icon": "fas fa-align-center", + "description": "Align internal C-region for a more precise isotype characterization." + }, + "internal_cregion_sequences": { + "type": "string", + "fa_icon": "fas fa-align-center", + "description": "Provide internal C-region sequences for a more precise C-region characterization. Then also set the `align_cregion` flag." + }, + "cregion_maxlen": { + "type": "integer", + "default": 100, + "fa_icon": "fas fa-align-center", + "description": "Maximum allowed length when aligning the internal C-region." + }, + "cregion_maxerror": { + "type": "number", + "default": 0.3, + "fa_icon": "fas fa-align-center", + "description": "Maximum allowed error when aligning the internal C-region." + }, + "cregion_mask_mode": { + "type": "string", + "default": "tag", + "fa_icon": "fas fa-mask", + "description": "Mask mode for C-region alignment." } }, "fa_icon": "fas fa-align-center" @@ -299,6 +354,13 @@ "type": "boolean", "description": "Set this flag to fetch the IMGT reference data at runtime.", "fa_icon": "fas fa-cloud-download-alt" + }, + "isotype_column": { + "type": "string", + "description": "Set the column in the AIRR rearrangement file that isotype information should be gathered from.", + "default": "c_call", + "fa_icon": "fas fa-border-all", + "help_text": "Default is the `c_call` column. For bulk protocols one can use the c_primers or cregion columns (check `align_cregion`). The primer or cregions fasta file header need to contain the strings `IGHA`, `IGHD`, `IGHG`... for the isotypes to be properly parsed. This will be used for plotting mutation frequency by isotype in the clonal analysis report." } }, "fa_icon": "fas fa-edit" @@ -335,6 +397,16 @@ "default": "", "properties": { "clonal_threshold": { + "oneOf": [ + { + "type": "string", + "enum": ["auto"] + }, + { + "type": "number", + "minimum": 0 + } + ], "type": ["string", "number"], "default": "auto", "fa_icon": "fab fa-pagelines", @@ -383,6 +455,22 @@ "help_text": "By default, the pipeline will define clones for each of the samples, as two sequences having the same V-gene assignment, C-gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the CDR3 sequences will be assessed by Hamming distances. \n\nA distance threshold for determining if two sequences come from the same clone or not is automatically determined by the process find threshold. Alternatively, a hamming distance threshold can be manually set by setting the `--clonal_threshold` parameter.", "fa_icon": "fab fa-pagelines" }, + "single_cell_analysis_options": { + "title": "Single cell analysis options", + "type": "object", + "description": "Options specific for raw single cell input.", + "default": "", + "properties": { + "reference_10x": { + "type": "string", + "description": "Path to the reference directory required by cellranger. Can either be directory or tar.gz.", + "help_text": "See for [IMGT](https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/advanced/references#imgt) or [default](https://www.10xgenomics.com/support/software/cell-ranger/downloads).", + "fa_icon": "fas fa-database" + } + }, + "help_text": "Options for running raw single cell data.", + "fa_icon": "fab fa-pagelines" + }, "report_options": { "title": "Report options", "type": "object", @@ -685,6 +773,9 @@ { "$ref": "#/definitions/reference_genome_options" }, + { + "$ref": "#/definitions/single_cell_analysis_options" + }, { "$ref": "#/definitions/institutional_config_options" }, diff --git a/pyproject.toml b/pyproject.toml index 0d62beb6..56110621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/bulk_qc_and_filter.nf b/subworkflows/local/bulk_qc_and_filter.nf index 247a0341..34b082d1 100644 --- a/subworkflows/local/bulk_qc_and_filter.nf +++ b/subworkflows/local/bulk_qc_and_filter.nf @@ -23,7 +23,7 @@ workflow BULK_QC_AND_FILTER { ch_imgt.collect() ) ch_logs = ch_logs.mix(CHANGEO_CREATEGERMLINES.out.logs) - ch_versions = ch_versions.mix(CHANGEO_CREATEGERMLINES.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(CHANGEO_CREATEGERMLINES.out.versions) // Remove chimera REMOVE_CHIMERIC( @@ -31,7 +31,7 @@ workflow BULK_QC_AND_FILTER { ch_imgt.collect() ) ch_logs = ch_logs.mix(REMOVE_CHIMERIC.out.logs) - ch_versions = ch_versions.mix(REMOVE_CHIMERIC.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(REMOVE_CHIMERIC.out.versions) ch_bulk_chimeric_pass = REMOVE_CHIMERIC.out.tab @@ -51,14 +51,14 @@ workflow BULK_QC_AND_FILTER { .collect() ) ch_logs = ch_logs.mix(DETECT_CONTAMINATION.out.logs) - ch_versions = ch_versions.mix(DETECT_CONTAMINATION.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(DETECT_CONTAMINATION.out.versions) } COLLAPSE_DUPLICATES( ch_bulk_chimeric_pass ) - ch_versions = ch_versions.mix(COLLAPSE_DUPLICATES.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(COLLAPSE_DUPLICATES.out.versions) ch_logs = ch_logs.mix(COLLAPSE_DUPLICATES.out.logs) emit: diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf index 437aa5a9..887aed92 100644 --- a/subworkflows/local/clonal_analysis.nf +++ b/subworkflows/local/clonal_analysis.nf @@ -105,6 +105,7 @@ workflow CLONAL_ANALYSIS { ch_imgt.collect(), ch_all_repertoires_cloned_samplesheet ) + ch_versions = DEFINE_CLONES_REPORT.out.versions } // prepare ch for dowser lineages diff --git a/subworkflows/local/databases.nf b/subworkflows/local/databases.nf new file mode 100644 index 00000000..594b340e --- /dev/null +++ b/subworkflows/local/databases.nf @@ -0,0 +1,54 @@ +include { FETCH_DATABASES } from '../../modules/local/fetch_databases' +include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db' +include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db' + +workflow DATABASES { + + take: + + main: + ch_versions = Channel.empty() + + // FETCH DATABASES + if( !params.fetch_imgt ){ + if (params.igblast_base.endsWith(".zip")) { + Channel.fromPath("${params.igblast_base}") + .ifEmpty{ error "IGBLAST DB not found: ${params.igblast_base}" } + .set { ch_igblast_zipped } + UNZIP_IGBLAST( ch_igblast_zipped.collect() ) + ch_igblast = UNZIP_IGBLAST.out.unzipped + ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions) + } else { + Channel.fromPath("${params.igblast_base}") + .ifEmpty { error "IGBLAST DB not found: ${params.igblast_base}" } + .set { ch_igblast } + } + } + + if( !params.fetch_imgt ){ + if (params.imgtdb_base.endsWith(".zip")) { + Channel.fromPath("${params.imgtdb_base}") + .ifEmpty{ error "IMGTDB not found: ${params.imgtdb_base}" } + .set { ch_imgt_zipped } + UNZIP_IMGT( ch_imgt_zipped.collect() ) + ch_imgt = UNZIP_IMGT.out.unzipped + ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions) + } else { + Channel.fromPath("${params.imgtdb_base}") + .ifEmpty { error "IMGT DB not found: ${params.imgtdb_base}" } + .set { ch_imgt } + } + } + + if (params.fetch_imgt) { + FETCH_DATABASES() + ch_igblast = FETCH_DATABASES.out.igblast + ch_imgt = FETCH_DATABASES.out.imgt + ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions) + } + + emit: + versions = ch_versions + imgt = ch_imgt + igblast = ch_igblast +} diff --git a/subworkflows/local/fastq_input_check.nf b/subworkflows/local/fastq_input_check.nf index 52f87190..e14cfc21 100644 --- a/subworkflows/local/fastq_input_check.nf +++ b/subworkflows/local/fastq_input_check.nf @@ -3,8 +3,8 @@ */ include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' -//TODO: when enchantr supports input samplesheet from raw sequencing, update code here to commented one. -//include { VALIDATE_INPUT } from '../../modules/local/enchantr/validate_input' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' + workflow FASTQ_INPUT_CHECK { take: @@ -15,22 +15,42 @@ workflow FASTQ_INPUT_CHECK { .tsv .splitCsv ( header:true, sep:'\t' ) .map { create_fastq_channels(it) } + .dump (tag: 'fastq_channel_before_merge_samples') + .groupTuple(by: [0]) + .dump(tag: 'fastq_channel_after_merge_samples_grouped') + .branch { + meta, fastqs -> + single: fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } .set { ch_reads } - // VALIDATE_INPUT( - // samplesheet, - // params.miairr, - // params.collapseby, - // params.cloneby - // ) - - // VALIDATE_INPUT.out.validated_input - // .splitCsv(header: true, sep:'\t') - // .map { get_meta(it) } - // .set{ ch_reads } + + ch_versions = SAMPLESHEET_CHECK.out.versions + + // Merge multi-lane sample fastq for protocols except for 10x genomics (cellranger handles multi-fastq per sample) + if (params.library_generation_method == 'sc_10x_genomics') { + + ch_merged_reads = ch_reads.single.mix( ch_reads.multiple ) + + } else { + + CAT_FASTQ ( + ch_reads.multiple + ) + .reads + .mix( ch_reads.single ) + .dump (tag: 'fastq_channel_after_merge_samples') + .set { ch_merged_reads } + + ch_versions = ch_versions.mix( CAT_FASTQ.out.versions ) + + } emit: - reads = ch_reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] + reads = ch_merged_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] samplesheet = SAMPLESHEET_CHECK.out.tsv // tsv metadata file } @@ -47,6 +67,7 @@ def create_fastq_channels(LinkedHashMap col) { meta.filetype = "fastq" meta.single_cell = col.single_cell.toLowerCase() meta.locus = col.pcr_target_locus + meta.single_end = false def array = [] if (!file(col.filename_R1).exists()) { diff --git a/subworkflows/local/presto_sans_umi.nf b/subworkflows/local/presto_sans_umi.nf index 59a2db57..8d75856d 100644 --- a/subworkflows/local/presto_sans_umi.nf +++ b/subworkflows/local/presto_sans_umi.nf @@ -33,25 +33,25 @@ workflow PRESTO_SANS_UMI { params.save_trimmed, save_merged ) - ch_versions = ch_versions.mix(FASTP.out.versions.ifEmpty([])) + ch_versions = ch_versions.mix(FASTP.out.versions) ch_gunzip = FASTP.out.reads.map{ meta,reads -> [meta, reads[0], reads[1]] } // gunzip fastq.gz to fastq GUNZIP_SANS_UMI ( ch_gunzip ) - ch_versions = ch_versions.mix(GUNZIP_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(GUNZIP_SANS_UMI.out.versions) // Assemble read pairs PRESTO_ASSEMBLEPAIRS_SANS_UMI ( GUNZIP_SANS_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_SANS_UMI.out.versions) // Filter sequences by quality score PRESTO_FILTERSEQ_POSTASSEMBLY_SANS_UMI ( PRESTO_ASSEMBLEPAIRS_SANS_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_FILTERSEQ_POSTASSEMBLY_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_FILTERSEQ_POSTASSEMBLY_SANS_UMI.out.versions) // Mask primers PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI ( @@ -59,41 +59,41 @@ workflow PRESTO_SANS_UMI { ch_cprimers.collect(), ch_vprimers.collect() ) - ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI.out.versions) // Generate QC stats after reads paired and filtered but before collapsed FASTQC_POSTASSEMBLY_SANS_UMI ( PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI.out.reads ) - ch_versions = ch_versions.mix(FASTQC_POSTASSEMBLY_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(FASTQC_POSTASSEMBLY_SANS_UMI.out.versions) // Annotate primers in C_PRIMER and V_PRIMER field PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI ( PRESTO_MASKPRIMERS_POSTASSEMBLY_SANS_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI.out.versions) // Annotate metadata on primer headers PRESTO_PARSEHEADERS_METADATA_SANS_UMI ( PRESTO_PARSEHEADERS_PRIMERS_SANS_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_METADATA_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_METADATA_SANS_UMI.out.versions) // Mark and count duplicate sequences (DUPCOUNT) PRESTO_COLLAPSESEQ_SANS_UMI ( PRESTO_PARSEHEADERS_METADATA_SANS_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_SANS_UMI.out.versions) // Filter out sequences with less than 2 representative duplicates PRESTO_SPLITSEQ_SANS_UMI ( PRESTO_COLLAPSESEQ_SANS_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_SPLITSEQ_SANS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_SPLITSEQ_SANS_UMI.out.versions) emit: fasta = PRESTO_SPLITSEQ_SANS_UMI.out.fasta - software = ch_versions + versions = ch_versions fastp_reads_json = FASTP.out.json.collect{ meta,json -> json } fastp_reads_html = FASTP.out.html.collect{ meta,html -> html } fastqc_postassembly_gz = FASTQC_POSTASSEMBLY_SANS_UMI.out.zip diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf index 0c5b92aa..89e8d5f6 100644 --- a/subworkflows/local/presto_umi.nf +++ b/subworkflows/local/presto_umi.nf @@ -9,16 +9,25 @@ include { FASTP } from '../../modules/n //PRESTO include { PRESTO_FILTERSEQ as PRESTO_FILTERSEQ_UMI } from '../../modules/local/presto/presto_filterseq' include { PRESTO_MASKPRIMERS as PRESTO_MASKPRIMERS_UMI } from '../../modules/local/presto/presto_maskprimers' +include { PRESTO_MASKPRIMERS_ALIGN } from '../../modules/local/presto/presto_maskprimers_align' +include { PRESTO_MASKPRIMERS_EXTRACT } from '../../modules/local/presto/presto_maskprimers_extract' +include { PRESTO_MASKPRIMERS_ALIGN as PRESTO_ALIGN_CREGION } from '../../modules/local/presto/presto_maskprimers_align' include { PRESTO_PAIRSEQ as PRESTO_PAIRSEQ_UMI } from '../../modules/local/presto/presto_pairseq' +include { PRESTO_PAIRSEQ as PRESTO_PAIRSEQ_ALIGN } from '../../modules/local/presto/presto_pairseq' include { PRESTO_CLUSTERSETS as PRESTO_CLUSTERSETS_UMI } from '../../modules/local/presto/presto_clustersets' include { PRESTO_PARSE_CLUSTER as PRESTO_PARSE_CLUSTER_UMI } from '../../modules/local/presto/presto_parse_cluster' include { PRESTO_BUILDCONSENSUS as PRESTO_BUILDCONSENSUS_UMI } from '../../modules/local/presto/presto_buildconsensus' +include { PRESTO_BUILDCONSENSUS as PRESTO_BUILDCONSENSUS_ALIGN } from '../../modules/local/presto/presto_buildconsensus' include { PRESTO_POSTCONSENSUS_PAIRSEQ as PRESTO_POSTCONSENSUS_PAIRSEQ_UMI } from '../../modules/local/presto/presto_postconsensus_pairseq' include { PRESTO_ASSEMBLEPAIRS as PRESTO_ASSEMBLEPAIRS_UMI } from '../../modules/local/presto/presto_assemblepairs' +include { PRESTO_ASSEMBLEPAIRS_SEQUENTIAL } from '../../modules/local/presto/presto_assemblepairs_sequential' include { PRESTO_PARSEHEADERS as PRESTO_PARSEHEADERS_COLLAPSE_UMI } from '../../modules/local/presto/presto_parseheaders' +include { PRESTO_PARSEHEADERS as PRESTO_PARSEHEADERS_CREGION } from '../../modules/local/presto/presto_parseheaders' include { PRESTO_PARSEHEADERS_PRIMERS as PRESTO_PARSEHEADERS_PRIMERS_UMI } from '../../modules/local/presto/presto_parseheaders_primers' include { PRESTO_PARSEHEADERS_METADATA as PRESTO_PARSEHEADERS_METADATA_UMI } from '../../modules/local/presto/presto_parseheaders_metadata' include { PRESTO_COLLAPSESEQ as PRESTO_COLLAPSESEQ_UMI } from '../../modules/local/presto/presto_collapseseq' +include { PRESTO_COLLAPSESEQ as PRESTO_COLLAPSESEQ_ALIGN } from '../../modules/local/presto/presto_collapseseq' +include { PRESTO_COLLAPSESEQ as PRESTO_COLLAPSESEQ_CREGION } from '../../modules/local/presto/presto_collapseseq' include { PRESTO_SPLITSEQ as PRESTO_SPLITSEQ_UMI} from '../../modules/local/presto/presto_splitseq' @@ -28,6 +37,8 @@ workflow PRESTO_UMI { ch_cprimers // channel: [ cprimers.fasta ] ch_vprimers // channel: [ vprimers.fasta ] ch_adapter_fasta // channel: [ adapters.fasta ] + ch_internal_cregion // channel: [ internal_cregions.fasta ] + ch_igblast main: @@ -48,7 +59,7 @@ workflow PRESTO_UMI { params.save_trimmed, save_merged ) - ch_versions = ch_versions.mix(FASTP.out.versions.ifEmpty([])) + ch_versions = ch_versions.mix(FASTP.out.versions) //ch for merge umi ch_meta_R1_R2 = FASTP.out.reads @@ -60,7 +71,7 @@ workflow PRESTO_UMI { MERGE_UMI ( ch_meta_R1_R2_index ) ch_gunzip = MERGE_UMI.out.reads - ch_versions = ch_versions.mix(MERGE_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(MERGE_UMI.out.versions) } else { @@ -73,7 +84,7 @@ workflow PRESTO_UMI { params.save_trimmed, save_merged ) - ch_versions = ch_versions.mix(FASTP.out.versions.ifEmpty([])) + ch_versions = ch_versions.mix(FASTP.out.versions) ch_rename_fastq_umi = FASTP.out.reads.map{ meta,reads -> [meta, reads[0], reads[1]] } @@ -84,114 +95,222 @@ workflow PRESTO_UMI { // gunzip fastq.gz to fastq GUNZIP_UMI ( ch_gunzip ) - ch_versions = ch_versions.mix(GUNZIP_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(GUNZIP_UMI.out.versions) // Filter sequences by quality score PRESTO_FILTERSEQ_UMI ( GUNZIP_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_FILTERSEQ_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_FILTERSEQ_UMI.out.versions) // Mask primers - PRESTO_MASKPRIMERS_UMI ( - PRESTO_FILTERSEQ_UMI.out.reads, - ch_cprimers.collect(), - ch_vprimers.collect() - ) - ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions.ifEmpty(null)) + if (params.maskprimers_align) { + + ch_reads_R1 = PRESTO_FILTERSEQ_UMI.out.reads + .map{ reads -> [reads[0], reads[1]] }.dump(tag: 'ch_reads_R1') + ch_reads_R2 = PRESTO_FILTERSEQ_UMI.out.reads + .map{ reads -> [reads[0], reads[2]] }.dump(tag: 'ch_reads_R2') + PRESTO_MASKPRIMERS_ALIGN( + ch_reads_R1, + ch_cprimers.collect(), + params.primer_maxlen, + params.primer_r1_maxerror, + params.primer_mask_mode + ) + PRESTO_MASKPRIMERS_EXTRACT( + ch_reads_R2 + ) - // Pre-consensus pair - PRESTO_PAIRSEQ_UMI ( - PRESTO_MASKPRIMERS_UMI.out.reads - ) - ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_ALIGN.out.versions) + ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_EXTRACT.out.versions) + // Merge again R1 and R2 by sample ID. + ch_maskprimers_reads_R1 = PRESTO_MASKPRIMERS_ALIGN.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_maskprimers_reads_R1') + ch_maskprimers_reads_R2 = PRESTO_MASKPRIMERS_EXTRACT.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_maskprimers_reads_R2') + ch_maskprimers_reads = ch_maskprimers_reads_R1.join(ch_maskprimers_reads_R2) + .map{ it -> [it[1], it[2], it[4]] }.dump(tag: 'ch_maskprimers_reads_after_remerge') + + ch_maskprimers_logs = PRESTO_MASKPRIMERS_ALIGN.out.logs + ch_maskprimers_logs = ch_maskprimers_logs.mix(PRESTO_MASKPRIMERS_EXTRACT.out.logs) + + PRESTO_PAIRSEQ_ALIGN( ch_maskprimers_reads ) + ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_ALIGN.out.versions) + ch_for_clustersets = PRESTO_PAIRSEQ_ALIGN.out.reads + ch_pairseq_logs = PRESTO_PAIRSEQ_ALIGN.out.logs + + } else { + + PRESTO_MASKPRIMERS_UMI ( + PRESTO_FILTERSEQ_UMI.out.reads, + ch_cprimers.collect(), + ch_vprimers.collect() + ) + ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions) + ch_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs + + // Pre-consensus pair + PRESTO_PAIRSEQ_UMI ( + PRESTO_MASKPRIMERS_UMI.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_UMI.out.versions) + ch_for_clustersets = PRESTO_PAIRSEQ_UMI.out.reads + ch_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs + + } if (params.cluster_sets) { // Cluster sequences by similarity PRESTO_CLUSTERSETS_UMI ( - PRESTO_PAIRSEQ_UMI.out.reads + ch_for_clustersets ) - ch_versions = ch_versions.mix(PRESTO_CLUSTERSETS_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_CLUSTERSETS_UMI.out.versions) // Annotate cluster into barcode field PRESTO_PARSE_CLUSTER_UMI ( PRESTO_CLUSTERSETS_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_PARSE_CLUSTER_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_PARSE_CLUSTER_UMI.out.versions) ch_for_buildconsensus = PRESTO_PARSE_CLUSTER_UMI.out.reads ch_clustersets_logs = PRESTO_CLUSTERSETS_UMI.out.logs.collect() } else { - ch_for_buildconsensus = PRESTO_PAIRSEQ_UMI.out.reads + ch_for_buildconsensus = ch_for_clustersets ch_clustersets_logs = Channel.empty() } // Build consensus of sequences with same UMI barcode - PRESTO_BUILDCONSENSUS_UMI ( - ch_for_buildconsensus - ) - ch_versions = ch_versions.mix(PRESTO_BUILDCONSENSUS_UMI.out.versions.ifEmpty(null)) + if (params.maskprimers_align) { + PRESTO_BUILDCONSENSUS_ALIGN ( + ch_for_buildconsensus + ) + ch_versions = ch_versions.mix(PRESTO_BUILDCONSENSUS_ALIGN.out.versions) + ch_postconsensus = PRESTO_BUILDCONSENSUS_ALIGN.out.reads + ch_buildconsensus_logs = PRESTO_BUILDCONSENSUS_ALIGN.out.logs + } else { + PRESTO_BUILDCONSENSUS_UMI ( + ch_for_buildconsensus + ) + ch_versions = ch_versions.mix(PRESTO_BUILDCONSENSUS_UMI.out.versions) + ch_postconsensus = PRESTO_BUILDCONSENSUS_UMI.out.reads + ch_buildconsensus_logs = PRESTO_BUILDCONSENSUS_UMI.out.logs + } // Post-consensus pair PRESTO_POSTCONSENSUS_PAIRSEQ_UMI ( - PRESTO_BUILDCONSENSUS_UMI.out.reads + ch_postconsensus ) - ch_versions = ch_versions.mix(PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.versions) - // Assemble read pairs - PRESTO_ASSEMBLEPAIRS_UMI ( - PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads - ) - ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_UMI.out.versions.ifEmpty(null)) + if (params.assemblepairs_sequential){ + // Assemble read pairs sequential + PRESTO_ASSEMBLEPAIRS_SEQUENTIAL ( + PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads, + ch_igblast.collect() + ) + ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.versions) + ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.reads + ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.logs + } else { + // Assemble read pairs align + PRESTO_ASSEMBLEPAIRS_UMI ( + PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_UMI.out.versions) + ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_UMI.out.reads + ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs + } + + + if (params.align_cregion) { + PRESTO_ALIGN_CREGION( + ch_assemblepairs_reads, + ch_internal_cregion.collect(), + params.cregion_maxlen, + params.cregion_maxerror, + params.cregion_mask_mode + ) + ch_parseheaders_reads = PRESTO_ALIGN_CREGION.out.reads + ch_versions = ch_versions.mix(PRESTO_ALIGN_CREGION.out.versions) + } else { + ch_parseheaders_reads = ch_assemblepairs_reads + } // Generate QC stats after reads paired and filtered but before collapsed FASTQC_POSTASSEMBLY_UMI ( - PRESTO_ASSEMBLEPAIRS_UMI.out.reads + ch_assemblepairs_reads ) - ch_versions = ch_versions.mix(FASTQC_POSTASSEMBLY_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(FASTQC_POSTASSEMBLY_UMI.out.versions) // Combine UMI duplicate count PRESTO_PARSEHEADERS_COLLAPSE_UMI ( - PRESTO_ASSEMBLEPAIRS_UMI.out.reads + ch_parseheaders_reads ) - ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.versions) - // Annotate primers in C_PRIMER and V_PRIMER field - PRESTO_PARSEHEADERS_PRIMERS_UMI ( - PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.reads - ) - ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_PRIMERS_UMI.out.versions.ifEmpty(null)) + // Annotate primer fields and collapse duplicates + if (params.maskprimers_align) { + // Rename primer field to CREGION + PRESTO_PARSEHEADERS_CREGION ( + PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_CREGION.out.versions) - // Annotate metadata on primer headers - PRESTO_PARSEHEADERS_METADATA_UMI ( - PRESTO_PARSEHEADERS_PRIMERS_UMI.out.reads - ) - ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_METADATA_UMI.out.versions.ifEmpty(null)) + // Collapse duplicates + PRESTO_COLLAPSESEQ_ALIGN ( + PRESTO_PARSEHEADERS_CREGION.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_ALIGN.out.versions) + ch_collapsed = PRESTO_COLLAPSESEQ_ALIGN.out.reads + ch_collapse_logs = PRESTO_COLLAPSESEQ_ALIGN.out.logs - // Mark and count duplicate sequences with different UMI barcodes (DUPCOUNT) - PRESTO_COLLAPSESEQ_UMI ( - PRESTO_PARSEHEADERS_METADATA_UMI.out.reads + } else { + // Annotate primers in C_PRIMER and V_PRIMER field + PRESTO_PARSEHEADERS_PRIMERS_UMI ( + PRESTO_PARSEHEADERS_COLLAPSE_UMI.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_PRIMERS_UMI.out.versions) + + if (params.align_cregion) { + PRESTO_COLLAPSESEQ_CREGION ( + PRESTO_PARSEHEADERS_PRIMERS_UMI.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_CREGION.out.versions) + ch_collapsed = PRESTO_COLLAPSESEQ_CREGION.out.reads + ch_collapse_logs = PRESTO_COLLAPSESEQ_CREGION.out.logs + } else { + // Collapse duplicates + PRESTO_COLLAPSESEQ_UMI ( + PRESTO_PARSEHEADERS_PRIMERS_UMI.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_UMI.out.versions) + ch_collapsed = PRESTO_COLLAPSESEQ_UMI.out.reads + ch_collapse_logs = PRESTO_COLLAPSESEQ_UMI.out.logs + } + } + + // Annotate metadata on read headers + PRESTO_PARSEHEADERS_METADATA_UMI ( + ch_collapsed ) - ch_versions = ch_versions.mix(PRESTO_COLLAPSESEQ_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_PARSEHEADERS_METADATA_UMI.out.versions) // Filter out sequences with less than 2 representative duplicates with different UMIs PRESTO_SPLITSEQ_UMI ( - PRESTO_COLLAPSESEQ_UMI.out.reads + PRESTO_PARSEHEADERS_METADATA_UMI.out.reads ) - ch_versions = ch_versions.mix(PRESTO_SPLITSEQ_UMI.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(PRESTO_SPLITSEQ_UMI.out.versions) emit: fasta = PRESTO_SPLITSEQ_UMI.out.fasta - software = ch_versions + versions = ch_versions fastp_reads_json = FASTP.out.json.collect{ meta,json -> json } fastp_reads_html = FASTP.out.html.collect{ meta,html -> html } fastqc_postassembly_gz = FASTQC_POSTASSEMBLY_UMI.out.zip presto_filterseq_logs = PRESTO_FILTERSEQ_UMI.out.logs - presto_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs.collect() - presto_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs.collect() + presto_maskprimers_logs = ch_maskprimers_logs.collect() + presto_pairseq_logs = ch_pairseq_logs.collect() presto_clustersets_logs = ch_clustersets_logs - presto_buildconsensus_logs = PRESTO_BUILDCONSENSUS_UMI.out.logs.collect() + presto_buildconsensus_logs = ch_buildconsensus_logs.collect() presto_postconsensus_pairseq_logs = PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.logs.collect() - presto_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs.collect() - presto_collapseseq_logs = PRESTO_COLLAPSESEQ_UMI.out.logs.collect() + presto_assemblepairs_logs = ch_assemblepairs_logs.collect() + presto_collapseseq_logs = ch_collapse_logs.collect() presto_splitseq_logs = PRESTO_SPLITSEQ_UMI.out.logs.collect() } diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf index 1bd61a08..cbcf7456 100644 --- a/subworkflows/local/repertoire_analysis_reporting.nf +++ b/subworkflows/local/repertoire_analysis_reporting.nf @@ -30,7 +30,7 @@ workflow REPERTOIRE_ANALYSIS_REPORTING { main: ch_versions = Channel.empty() - if (params.mode == "fastq") { + if (params.mode == "fastq" && !params.library_generation_method in ["sc_10x_genomics"]) { PARSE_LOGS( ch_presto_filterseq_logs, ch_presto_maskprimers_logs, @@ -71,7 +71,7 @@ workflow REPERTOIRE_ANALYSIS_REPORTING { AIRRFLOW_REPORT( ch_repertoires, ch_parsed_logs.collect().ifEmpty([]), - REPORT_FILE_SIZE.out.table.ifEmpty([]), + REPORT_FILE_SIZE.out.table.collect().ifEmpty([]), ch_report_rmd, ch_report_css, ch_report_logo diff --git a/subworkflows/local/sc_raw_input.nf b/subworkflows/local/sc_raw_input.nf new file mode 100644 index 00000000..735a8c10 --- /dev/null +++ b/subworkflows/local/sc_raw_input.nf @@ -0,0 +1,99 @@ +include { CELLRANGER_VDJ } from '../../modules/nf-core/cellranger/vdj/main' +include { UNZIP_CELLRANGERDB } from '../../modules/local/unzip_cellrangerdb' +include { RENAME_FILE as RENAME_FILE_TSV } from '../../modules/local/rename_file' +include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '../../modules/local/changeo/changeo_convertdb_fasta' +include { FASTQ_INPUT_CHECK } from '../../subworkflows/local/fastq_input_check' + + +workflow SC_RAW_INPUT { + + take: + ch_input + + main: + + ch_versions = Channel.empty() + ch_logs = Channel.empty() + + // + // read in samplesheet, validate and stage input fies + // + FASTQ_INPUT_CHECK( + ch_input + ) + ch_versions = ch_versions.mix(FASTQ_INPUT_CHECK.out.versions) + + ch_reads = FASTQ_INPUT_CHECK.out.reads + + // validate library generation method parameter + if (params.vprimers) { + error "The single-cell 10X genomics library generation method does not require V-region primers, please provide a reference file instead or select another library method option." + } else if (params.race_linker) { + error "The single-cell 10X genomics library generation method does not require the --race_linker parameter, please provide a reference file instead or select another library method option." + } + if (params.cprimers) { + error "The single-cell 10X genomics library generation method does not require C-region primers, please provide a reference file instead or select another library method option." + } + if (params.umi_length > 0) { + error "The single-cell 10X genomics library generation method does not require to set the UMI length, please provide a reference file instead or select another library method option." + } + if (params.reference_10x) { + // necessary to allow tar.gz files as input so that tests can run + if (params.reference_10x.endsWith(".tar.gz")){ + UNZIP_CELLRANGERDB( + params.reference_10x + ) + UNZIP_CELLRANGERDB.out.unzipped.set { ch_sc_reference } + } else { + ch_sc_reference = Channel.fromPath(params.reference_10x, checkIfExists: true) + } + } else { + error "The single-cell 10X genomics library generation method requires you to provide a reference file." + } + + // run cellranger vdj + CELLRANGER_VDJ ( + ch_reads, + ch_sc_reference.collect() + ) + ch_versions = ch_versions.mix(CELLRANGER_VDJ.out.versions) + + ch_cellranger_out = CELLRANGER_VDJ.out.outs + + ch_cellranger_out + .map { meta, out_files -> + [ meta, out_files.find { it.endsWith("airr_rearrangement.tsv") } ] + } + .set { ch_cellranger_airr } + + // TODO : add VALIDATE_INPUT Module + // this module requires input in csv format... Might need to create this in an extra module + + // rename tsv file to unique name + RENAME_FILE_TSV( + ch_cellranger_airr + ) + .set { ch_renamed_tsv } + + // convert airr tsv to fasta (cellranger does not create any fasta with clonotype information) + CHANGEO_CONVERTDB_FASTA_FROM_AIRR( + RENAME_FILE_TSV.out.file + ) + + ch_versions = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.versions + + ch_fasta = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta + + // TODO: here you can add support for MiXCR sc protocols. + + + emit: + versions = ch_versions + // complete cellranger output + outs = ch_cellranger_out + // cellranger output in airr format + airr = ch_cellranger_airr + // cellranger output converted to FASTA format + fasta = ch_fasta + samplesheet = FASTQ_INPUT_CHECK.out.samplesheet +} diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf index 26bfd3cd..d001b124 100644 --- a/subworkflows/local/sequence_assembly.nf +++ b/subworkflows/local/sequence_assembly.nf @@ -38,7 +38,6 @@ include { PRESTO_SANS_UMI } from '../../subworkflows/local/presto_sa // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../../modules/nf-core/fastqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,11 +45,11 @@ include { FASTQC } from '../../modules/nf-core/fastqc/main' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - workflow SEQUENCE_ASSEMBLY { take: - ch_input // channel: + ch_input // channel: reads + ch_igblast main: @@ -84,6 +83,11 @@ workflow SEQUENCE_ASSEMBLY { if (params.umi_length < 2) { error "The 'specific_pcr_umi' library generation method requires setting the '--umi_length' to a value greater than 1." } + if (params.internal_cregion_sequences) { + ch_internal_cregion = Channel.fromPath(params.internal_cregion_sequences, checkIfExists: true) + } else { + ch_internal_cregion = Channel.of([]) + } } else if (params.library_generation_method == 'specific_pcr') { if (params.vprimers) { ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true) @@ -103,11 +107,16 @@ workflow SEQUENCE_ASSEMBLY { } else { params.umi_length = 0 } + if (params.internal_cregion_sequences) { + error "Please do not set '--internal_cregion_sequences' when using the 'specific_pcr' library generation method without UMIs." + } } else if (params.library_generation_method == 'dt_5p_race_umi') { if (params.vprimers) { error "The oligo-dT 5'-RACE UMI library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." } else if (params.race_linker) { ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true) + } else if (params.maskprimers_align) { + ch_vprimers_fasta = Channel.of([]) } else { error "The oligo-dT 5'-RACE UMI library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'." } @@ -119,11 +128,18 @@ workflow SEQUENCE_ASSEMBLY { if (params.umi_length < 2) { error "The oligo-dT 5'-RACE UMI 'dt_5p_race_umi' library generation method requires specifying the '--umi_length' to a value greater than 1." } + if (params.internal_cregion_sequences) { + ch_internal_cregion = Channel.fromPath(params.internal_cregion_sequences, checkIfExists: true) + } else { + ch_internal_cregion = Channel.of([]) + } } else if (params.library_generation_method == 'dt_5p_race') { if (params.vprimers) { error "The oligo-dT 5'-RACE library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." } else if (params.race_linker) { ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true) + } else if (params.maskprimers_align) { + ch_vprimers_fasta = Channel.of([]) } else { error "The oligo-dT 5'-RACE library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'." } @@ -137,6 +153,9 @@ workflow SEQUENCE_ASSEMBLY { } else { params.umi_length = 0 } + if (params.internal_cregion_sequences) { + error "Please do not set '--internal_cregion_sequences' when using the 'dt_5p_race' library generation method without UMIs." + } } else { error "The provided library generation method is not supported. Please check the docs for `--library_generation_method`." } @@ -145,7 +164,8 @@ workflow SEQUENCE_ASSEMBLY { if (params.index_file & params.umi_position == 'R2') {error "Please do not set `--umi_position` option if index file with UMIs is provided."} if (params.umi_length < 0) {error "Please provide the UMI barcode length in the option `--umi_length`. To run without UMIs, set umi_length to 0."} if (!params.index_file & params.umi_start != 0) {error "Setting a UMI start position is only allowed when providing the UMIs in a separate index read file. If so, please provide the `--index_file` flag as well."} - + if (params.maskprimers_align & params.umi_position == 'R1') {error "The maskprimers align option is only supported with UMI barcodes in the R2 reads (reads containing V region)."} + if (params.maskprimers_align & params.cprimer_position == 'R2') {error "The maskprimers align option is only supported with Cprimers in the R1 reads (reads containing C region)."} // // SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -168,7 +188,7 @@ workflow SEQUENCE_ASSEMBLY { ch_adapter_fasta ) ch_presto_fasta = PRESTO_SANS_UMI.out.fasta - ch_presto_software = PRESTO_SANS_UMI.out.software + ch_presto_software = PRESTO_SANS_UMI.out.versions ch_fastp_reads_html = PRESTO_SANS_UMI.out.fastp_reads_html ch_fastp_reads_json = PRESTO_SANS_UMI.out.fastp_reads_json ch_fastqc_postassembly = PRESTO_SANS_UMI.out.fastqc_postassembly_gz @@ -190,10 +210,12 @@ workflow SEQUENCE_ASSEMBLY { ch_reads, ch_cprimers_fasta, ch_vprimers_fasta, - ch_adapter_fasta + ch_adapter_fasta, + ch_internal_cregion, + ch_igblast.collect() ) ch_presto_fasta = PRESTO_UMI.out.fasta - ch_presto_software = PRESTO_UMI.out.software + ch_presto_software = PRESTO_UMI.out.versions ch_fastp_reads_html = PRESTO_UMI.out.fastp_reads_html ch_fastp_reads_json = PRESTO_UMI.out.fastp_reads_json ch_fastqc_postassembly = PRESTO_UMI.out.fastqc_postassembly_gz diff --git a/subworkflows/local/single_cell_qc_and_filtering.nf b/subworkflows/local/single_cell_qc_and_filtering.nf index 9de2701e..47cd520b 100644 --- a/subworkflows/local/single_cell_qc_and_filtering.nf +++ b/subworkflows/local/single_cell_qc_and_filtering.nf @@ -28,7 +28,7 @@ workflow SINGLE_CELL_QC_AND_FILTERING { .set{ch_repertoire_after_scqc_with_sampleid} ch_logs = ch_logs.mix(SINGLE_CELL_QC.out.logs) - ch_versions = ch_versions.mix(SINGLE_CELL_QC.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(SINGLE_CELL_QC.out.versions) ch_repertoire_after_scqc_withmeta = ch_onlymeta.join(ch_repertoire_after_scqc_with_sampleid) .map{ it -> [ it[1], it[2] ]} diff --git a/subworkflows/local/utils_nfcore_airrflow_pipeline/main.nf b/subworkflows/local/utils_nfcore_airrflow_pipeline/main.nf new file mode 100644 index 00000000..cbdd9668 --- /dev/null +++ b/subworkflows/local/utils_nfcore_airrflow_pipeline/main.nf @@ -0,0 +1,251 @@ +// +// Subworkflow with functionality specific to the nf-core/airrflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + ch_samplesheet = Channel.fromPath(input, checkIfExists: true) + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "pRESTO (Vander Heiden et al. 2014),", + "IgBLAST (Ye et al. 2013),", + "Alakazam (Stern et al. 2014),", + "Shazam (Gupta et al. 2015),", + "Change-O (Gupta et al. 2015),", + "SCOPer (Gupta et al. 2017, Nouri et al. 2018),", + "Dowser (Hoehn et al. 2022),", + "IgPhyML (Hoehn et al. 2019)", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "Fastp (Chen et al. 2018),", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Chen S, Zhou Y, Chen Y, Gu J. (2018) fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics, 34(17), i884-i890. doi: 10.1093/bioinformatics/bty560
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047-3048. doi: /10.1093/bioinformatics/btw354.
  • ", + "
  • Gupta NT, Vander Heiden JA, Uduman M, Gadala-Maria D, Yaari G, Kleinstein SH. (2015) Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data. Bioinformatics, 31(20), 3356-3358. doi: 10.1093/bioinformatics/btv359.
  • ", + "
  • Gupta NT, Adams K, Briggs A, Timberlake S, Vigneault F, Kleinstein S (2017). Hierarchical clustering can identify B cell clones with high confidence in Ig repertoire sequencing data. The Journal of Immunology, 2489-2499.
  • ", + "
  • Hoehn KB, Van der Heiden JA, Zhou JQ, Lunter, G, Pybus, OG, & Kleinstein SH. (2019) Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination. PNAS, 116(45) 22664-22672. https://www.pnas.org/doi/10.1073/pnas.1906020116
  • ", + "
  • Hoehn K, Pybus O, Kleinstein S (2022). Phylogenetic analysis of migration, differentiation, and class switching in B cells. PLoS Computational Biology. https://doi.org/10.1371/journal.pcbi.1009885.
  • ", + "
  • Nouri N, Kleinstein S (2018). A spectral clustering-based method for identifying clones from high-throughput B cell repertoire sequencing data. Bioinformatics, i341-i349.
  • ", + "
  • Stern JN, Yaari G, Vander Heiden JA, Church G, Donahue WF, Hintzen RQ, ... O'Connor, K.C. . (2014) B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Sci Transl Med, 6(248), 248ra107. doi: 10.1126/scitranslmed.aaa3822.
  • ", + "
  • Vander Heiden, J. A., Yaari, G., Uduman, M., Stern, J. N. H., O'Connor, K. C., Hafler, D. A., … Kleinstein, S. H. (2014). pRESTO: a toolkit for processing high-throughput sequencing raw reads of lymphocyte receptor repertoires. Bioinformatics, 30(13), 1930–1932.
  • ", + "
  • Ye J, Ma N, Madden TL, Ostell JM. (2013). IgBLAST: an immunoglobulin variable domain sequence analysis tool. Nucleic Acids Res. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3692102/.
  • " + + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // Tool citations + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf index d30375c6..4ac2b9df 100644 --- a/subworkflows/local/vdj_annotation.nf +++ b/subworkflows/local/vdj_annotation.nf @@ -1,6 +1,3 @@ -include { FETCH_DATABASES } from '../../modules/local/fetch_databases' -include { UNZIP_DB as UNZIP_IGBLAST } from '../../modules/local/unzip_db' -include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db' include { CHANGEO_ASSIGNGENES } from '../../modules/local/changeo/changeo_assigngenes' include { CHANGEO_MAKEDB } from '../../modules/local/changeo/changeo_makedb' include { CHANGEO_PARSEDB_SPLIT } from '../../modules/local/changeo/changeo_parsedb_split' @@ -15,59 +12,20 @@ workflow VDJ_ANNOTATION { take: ch_fasta // [meta, fasta] ch_validated_samplesheet + ch_igblast + ch_imgt main: ch_versions = Channel.empty() ch_logs = Channel.empty() - // FETCH DATABASES - // TODO: this can take a long time, and the progress shows 0%. Would be - // nice to have some better progress reporting. - // And maybe run this as 2 separate steps, one for IMGT and one for IgBLAST? - if( !params.fetch_imgt ){ - if (params.igblast_base.endsWith(".zip")) { - Channel.fromPath("${params.igblast_base}") - .ifEmpty{ error "IGBLAST DB not found: ${params.igblast_base}" } - .set { ch_igblast_zipped } - UNZIP_IGBLAST( ch_igblast_zipped.collect() ) - ch_igblast = UNZIP_IGBLAST.out.unzipped - ch_versions = ch_versions.mix(UNZIP_IGBLAST.out.versions.ifEmpty(null)) - } else { - Channel.fromPath("${params.igblast_base}") - .ifEmpty { error "IGBLAST DB not found: ${params.igblast_base}" } - .set { ch_igblast } - } - } - - if( !params.fetch_imgt ){ - if (params.imgtdb_base.endsWith(".zip")) { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty{ error "IMGTDB not found: ${params.imgtdb_base}" } - .set { ch_imgt_zipped } - UNZIP_IMGT( ch_imgt_zipped.collect() ) - ch_imgt = UNZIP_IMGT.out.unzipped - ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions.ifEmpty(null)) - } else { - Channel.fromPath("${params.imgtdb_base}") - .ifEmpty { error "IMGT DB not found: ${params.imgtdb_base}" } - .set { ch_imgt } - } - } - - if (params.fetch_imgt) { - FETCH_DATABASES() - ch_igblast = FETCH_DATABASES.out.igblast - ch_imgt = FETCH_DATABASES.out.imgt - ch_versions = ch_versions.mix(FETCH_DATABASES.out.versions.ifEmpty(null)) - } - CHANGEO_ASSIGNGENES ( ch_fasta, ch_igblast.collect() ) ch_logs = ch_logs.mix(CHANGEO_ASSIGNGENES.out.logs) - ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(CHANGEO_ASSIGNGENES.out.versions) CHANGEO_MAKEDB ( CHANGEO_ASSIGNGENES.out.fasta, @@ -75,7 +33,7 @@ workflow VDJ_ANNOTATION { ch_imgt.collect() ) ch_logs = ch_logs.mix(CHANGEO_MAKEDB.out.logs) - ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(CHANGEO_MAKEDB.out.versions) ch_assigned_tab = CHANGEO_MAKEDB.out.tab ch_assignment_logs = CHANGEO_MAKEDB.out.logs @@ -88,25 +46,25 @@ workflow VDJ_ANNOTATION { ch_assigned_tab ) ch_logs = ch_logs.mix(FILTER_QUALITY.out.logs) - ch_versions = ch_versions.mix(FILTER_QUALITY.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(FILTER_QUALITY.out.versions) if (params.productive_only) { CHANGEO_PARSEDB_SPLIT ( FILTER_QUALITY.out.tab ) ch_logs = ch_logs.mix(CHANGEO_PARSEDB_SPLIT.out.logs) - ch_versions = ch_versions.mix(CHANGEO_PARSEDB_SPLIT.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(CHANGEO_PARSEDB_SPLIT.out.versions) // Apply filter: junction length multiple of 3 FILTER_JUNCTION_MOD3( CHANGEO_PARSEDB_SPLIT.out.tab ) ch_logs = ch_logs.mix(FILTER_JUNCTION_MOD3.out.logs) - ch_versions = ch_versions.mix(FILTER_JUNCTION_MOD3.out.versions.ifEmpty(null)) - ch_repertoire = FILTER_JUNCTION_MOD3.out.tab.ifEmpty(null) + ch_versions = ch_versions.mix(FILTER_JUNCTION_MOD3.out.versions) + ch_repertoire = FILTER_JUNCTION_MOD3.out.tab } else { - ch_repertoire = FILTER_QUALITY.out.tab.ifEmpty(null) + ch_repertoire = FILTER_QUALITY.out.tab } ADD_META_TO_TAB( @@ -114,7 +72,7 @@ workflow VDJ_ANNOTATION { ch_validated_samplesheet ) ch_logs = ch_logs.mix(ADD_META_TO_TAB.out.logs) - ch_versions = ch_versions.mix(ADD_META_TO_TAB.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix(ADD_META_TO_TAB.out.versions) emit: diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..a8b55d6f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,440 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index abfbe497..41a96d90 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -1,30 +1,9 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowAirrflow.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -if (params.input) { - ch_input = Channel.fromPath(params.input, checkIfExists: true) -} else { - error "Please provide input file containing the sample metadata with the '--input' option." -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -38,10 +17,10 @@ ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.mult ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) // Report files -ch_report_rmd = Channel.fromPath(params.report_rmd, checkIfExists: true) -ch_report_css = Channel.fromPath(params.report_css, checkIfExists: true) -ch_report_logo = Channel.fromPath(params.report_logo, checkIfExists: true) -ch_report_logo_img = Channel.fromPath(params.report_logo_img, checkIfExists: true) +ch_report_rmd = Channel.fromPath(params.report_rmd, checkIfExists: true) +ch_report_css = Channel.fromPath(params.report_css, checkIfExists: true) +ch_report_logo = Channel.fromPath(params.report_logo, checkIfExists: true) +ch_report_logo_img = Channel.fromPath(params.report_logo_img, checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -54,6 +33,7 @@ include { CHANGEO_CONVERTDB_FASTA as CHANGEO_CONVERTDB_FASTA_FROM_AIRR } from '. // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // +include { DATABASES } from '../subworkflows/local/databases' include { SEQUENCE_ASSEMBLY } from '../subworkflows/local/sequence_assembly' include { ASSEMBLED_INPUT_CHECK } from '../subworkflows/local/assembled_input_check' include { VDJ_ANNOTATION } from '../subworkflows/local/vdj_annotation' @@ -61,6 +41,8 @@ include { BULK_QC_AND_FILTER } from '../subworkflows/local/bulk_qc_an include { SINGLE_CELL_QC_AND_FILTERING } from '../subworkflows/local/single_cell_qc_and_filtering' include { CLONAL_ANALYSIS } from '../subworkflows/local/clonal_analysis' include { REPERTOIRE_ANALYSIS_REPORTING } from '../subworkflows/local/repertoire_analysis_reporting' +include { SC_RAW_INPUT } from '../subworkflows/local/sc_raw_input' +include { FASTQ_INPUT_CHECK } from '../subworkflows/local/fastq_input_check' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -71,8 +53,11 @@ include { REPERTOIRE_ANALYSIS_REPORTING } from '../subworkflows/local/repertoire // // MODULE: Installed directly from nf-core/modules // -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_airrflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -80,201 +65,225 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow AIRRFLOW { - ch_versions = Channel.empty() - ch_reassign_logs = Channel.empty() - - if ( params.mode == "fastq" ) { - - // Perform sequence assembly if input type is fastq - SEQUENCE_ASSEMBLY( ch_input ) - - ch_fasta = SEQUENCE_ASSEMBLY.out.fasta - ch_versions = ch_versions.mix(SEQUENCE_ASSEMBLY.out.versions) - ch_fastp_html = SEQUENCE_ASSEMBLY.out.fastp_reads_html - ch_fastp_json = SEQUENCE_ASSEMBLY.out.fastp_reads_json - ch_fastqc_postassembly_mqc = SEQUENCE_ASSEMBLY.out.fastqc_postassembly - ch_validated_samplesheet = SEQUENCE_ASSEMBLY.out.samplesheet.collect() - - ch_presto_filterseq_logs = SEQUENCE_ASSEMBLY.out.presto_filterseq_logs - ch_presto_maskprimers_logs = SEQUENCE_ASSEMBLY.out.presto_maskprimers_logs - ch_presto_pairseq_logs = SEQUENCE_ASSEMBLY.out.presto_pairseq_logs - ch_presto_clustersets_logs = SEQUENCE_ASSEMBLY.out.presto_clustersets_logs - ch_presto_buildconsensus_logs = SEQUENCE_ASSEMBLY.out.presto_buildconsensus_logs - ch_presto_postconsensus_pairseq_logs = SEQUENCE_ASSEMBLY.out.presto_postconsensus_pairseq_logs - ch_presto_assemblepairs_logs = SEQUENCE_ASSEMBLY.out.presto_assemblepairs_logs - ch_presto_collapseseq_logs = SEQUENCE_ASSEMBLY.out.presto_collapseseq_logs - ch_presto_splitseq_logs = SEQUENCE_ASSEMBLY.out.presto_splitseq_logs - - } else if ( params.mode == "assembled" ) { - - ASSEMBLED_INPUT_CHECK ( - ch_input, - params.miairr, - params.collapseby, - params.cloneby - ) - ch_versions = ch_versions.mix( ASSEMBLED_INPUT_CHECK.out.versions.ifEmpty([]) ) - - if (params.reassign) { - CHANGEO_CONVERTDB_FASTA_FROM_AIRR( - ASSEMBLED_INPUT_CHECK.out.ch_tsv + take: + ch_input + + main: + + ch_versions = Channel.empty() + ch_reassign_logs = Channel.empty() + + // Download or fetch databases + DATABASES() + + if ( params.mode == "fastq" ) { + + // SC:Perform sequence assembly if input type is fastq from single-cell sequencing data (currently only 10XGenomics) + if (params.library_generation_method == "sc_10x_genomics") { + + SC_RAW_INPUT( + ch_input + ) + + ch_fasta = SC_RAW_INPUT.out.fasta + ch_versions = ch_versions.mix(SC_RAW_INPUT.out.versions) + ch_cellranger_airr = SC_RAW_INPUT.out.airr + ch_cellranger_out = SC_RAW_INPUT.out.outs + + ch_validated_samplesheet = SC_RAW_INPUT.out.samplesheet.collect() + + ch_presto_filterseq_logs = Channel.empty() + ch_presto_maskprimers_logs = Channel.empty() + ch_presto_pairseq_logs = Channel.empty() + ch_presto_clustersets_logs = Channel.empty() + ch_presto_buildconsensus_logs = Channel.empty() + ch_presto_postconsensus_pairseq_logs = Channel.empty() + ch_presto_assemblepairs_logs = Channel.empty() + ch_presto_collapseseq_logs = Channel.empty() + ch_presto_splitseq_logs = Channel.empty() + ch_fastp_html = Channel.empty() + ch_fastp_json = Channel.empty() + ch_fastqc_postassembly_mqc = Channel.empty() + } else { + // Perform sequence assembly if input type is fastq from bulk sequencing data + SEQUENCE_ASSEMBLY( + ch_input, + DATABASES.out.igblast.collect() + ) + + ch_fasta = SEQUENCE_ASSEMBLY.out.fasta + ch_versions = ch_versions.mix(SEQUENCE_ASSEMBLY.out.versions) + ch_fastp_html = SEQUENCE_ASSEMBLY.out.fastp_reads_html + ch_fastp_json = SEQUENCE_ASSEMBLY.out.fastp_reads_json + ch_fastqc_postassembly_mqc = SEQUENCE_ASSEMBLY.out.fastqc_postassembly + ch_validated_samplesheet = SEQUENCE_ASSEMBLY.out.samplesheet.collect() + ch_presto_filterseq_logs = SEQUENCE_ASSEMBLY.out.presto_filterseq_logs + ch_presto_maskprimers_logs = SEQUENCE_ASSEMBLY.out.presto_maskprimers_logs + ch_presto_pairseq_logs = SEQUENCE_ASSEMBLY.out.presto_pairseq_logs + ch_presto_clustersets_logs = SEQUENCE_ASSEMBLY.out.presto_clustersets_logs + ch_presto_buildconsensus_logs = SEQUENCE_ASSEMBLY.out.presto_buildconsensus_logs + ch_presto_postconsensus_pairseq_logs = SEQUENCE_ASSEMBLY.out.presto_postconsensus_pairseq_logs + ch_presto_assemblepairs_logs = SEQUENCE_ASSEMBLY.out.presto_assemblepairs_logs + ch_presto_collapseseq_logs = SEQUENCE_ASSEMBLY.out.presto_collapseseq_logs + ch_presto_splitseq_logs = SEQUENCE_ASSEMBLY.out.presto_splitseq_logs + } + + } else if ( params.mode == "assembled" ) { + + ASSEMBLED_INPUT_CHECK ( + ch_input, + params.miairr, + params.collapseby, + params.cloneby ) - ch_fasta_from_tsv = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta - ch_versions = ch_versions.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.versions.ifEmpty([])) - ch_reassign_logs = ch_reassign_logs.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.logs) - } else { - ch_fasta_from_tsv = Channel.empty() - } + ch_versions = ch_versions.mix( ASSEMBLED_INPUT_CHECK.out.versions ) + + if (params.reassign) { + CHANGEO_CONVERTDB_FASTA_FROM_AIRR( + ASSEMBLED_INPUT_CHECK.out.ch_tsv + ) + ch_fasta_from_tsv = CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.fasta + ch_versions = ch_versions.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.versions) + ch_reassign_logs = ch_reassign_logs.mix(CHANGEO_CONVERTDB_FASTA_FROM_AIRR.out.logs) + } else { + ch_fasta_from_tsv = Channel.empty() + } + + ch_fasta = ASSEMBLED_INPUT_CHECK.out.ch_fasta.mix(ch_fasta_from_tsv) + ch_validated_samplesheet = ASSEMBLED_INPUT_CHECK.out.validated_input.collect() + + ch_presto_filterseq_logs = Channel.empty() + ch_presto_maskprimers_logs = Channel.empty() + ch_presto_pairseq_logs = Channel.empty() + ch_presto_clustersets_logs = Channel.empty() + ch_presto_buildconsensus_logs = Channel.empty() + ch_presto_postconsensus_pairseq_logs = Channel.empty() + ch_presto_assemblepairs_logs = Channel.empty() + ch_presto_collapseseq_logs = Channel.empty() + ch_presto_splitseq_logs = Channel.empty() + ch_fastp_html = Channel.empty() + ch_fastp_json = Channel.empty() + ch_fastqc_postassembly_mqc = Channel.empty() - ch_fasta = ASSEMBLED_INPUT_CHECK.out.ch_fasta.mix(ch_fasta_from_tsv) - ch_validated_samplesheet = ASSEMBLED_INPUT_CHECK.out.validated_input.collect() - - ch_presto_filterseq_logs = Channel.empty() - ch_presto_maskprimers_logs = Channel.empty() - ch_presto_pairseq_logs = Channel.empty() - ch_presto_clustersets_logs = Channel.empty() - ch_presto_buildconsensus_logs = Channel.empty() - ch_presto_postconsensus_pairseq_logs = Channel.empty() - ch_presto_assemblepairs_logs = Channel.empty() - ch_presto_collapseseq_logs = Channel.empty() - ch_presto_splitseq_logs = Channel.empty() - ch_fastp_html = Channel.empty() - ch_fastp_json = Channel.empty() - ch_fastqc_postassembly_mqc = Channel.empty() - - } else { - error "Mode parameter value not valid." - } - // Perform V(D)J annotation and filtering - VDJ_ANNOTATION( - ch_fasta, - ch_validated_samplesheet.collect() - ) - ch_versions = ch_versions.mix( VDJ_ANNOTATION.out.versions.ifEmpty([])) - - // Split bulk and single cell repertoires - ch_repertoire_by_processing = VDJ_ANNOTATION.out.repertoire - .branch { it -> - single: it[0].single_cell == 'true' - bulk: it[0].single_cell == 'false' + } else { + error "Mode parameter value not valid." } - - // Bulk: Assign germlines and filtering - ch_repertoire_by_processing.bulk - .dump(tag: 'bulk') - - BULK_QC_AND_FILTER( - ch_repertoire_by_processing.bulk, - VDJ_ANNOTATION.out.imgt.collect() - ) - ch_versions = ch_versions.mix( BULK_QC_AND_FILTER.out.versions.ifEmpty([])) - - ch_bulk_filtered = BULK_QC_AND_FILTER.out.repertoires - - // Single cell: QC and filtering - ch_repertoire_by_processing.single - .dump(tag: 'single') - - SINGLE_CELL_QC_AND_FILTERING( - ch_repertoire_by_processing.single - ) - ch_versions = ch_versions.mix( SINGLE_CELL_QC_AND_FILTERING.out.versions.ifEmpty([]) ) - - // Mixing bulk and single cell channels for clonal analysis - ch_repertoires_for_clones = ch_bulk_filtered - .mix(SINGLE_CELL_QC_AND_FILTERING.out.repertoires) - .dump(tag: 'sc bulk mix') - - // Clonal analysis - CLONAL_ANALYSIS( - ch_repertoires_for_clones, - VDJ_ANNOTATION.out.imgt.collect(), - ch_report_logo_img.collect().ifEmpty([]) - ) - ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions.ifEmpty([])) - - if (!params.skip_report){ - REPERTOIRE_ANALYSIS_REPORTING( - ch_presto_filterseq_logs.collect().ifEmpty([]), - ch_presto_maskprimers_logs.collect().ifEmpty([]), - ch_presto_pairseq_logs.collect().ifEmpty([]), - ch_presto_clustersets_logs.collect().ifEmpty([]), - ch_presto_buildconsensus_logs.collect().ifEmpty([]), - ch_presto_postconsensus_pairseq_logs.collect().ifEmpty([]), - ch_presto_assemblepairs_logs.collect().ifEmpty([]), - ch_presto_collapseseq_logs.collect().ifEmpty([]), - ch_presto_splitseq_logs.collect().ifEmpty([]), - ch_reassign_logs.collect().ifEmpty([]), - VDJ_ANNOTATION.out.changeo_makedb_logs.collect().ifEmpty([]), - VDJ_ANNOTATION.out.logs.collect().ifEmpty([]), - BULK_QC_AND_FILTER.out.logs.collect().ifEmpty([]), - SINGLE_CELL_QC_AND_FILTERING.out.logs.collect().ifEmpty([]), - CLONAL_ANALYSIS.out.logs.collect().ifEmpty([]), - CLONAL_ANALYSIS.out.repertoire, - ch_input.collect(), - ch_report_rmd.collect(), - ch_report_css.collect(), - ch_report_logo.collect(), - ch_validated_samplesheet.collect() + // Perform V(D)J annotation and filtering + VDJ_ANNOTATION( + ch_fasta, + ch_validated_samplesheet.collect(), + DATABASES.out.igblast.collect(), + DATABASES.out.imgt.collect() ) - } - ch_versions = ch_versions.mix( REPERTOIRE_ANALYSIS_REPORTING.out.versions ) - ch_versions.dump(tag: "channel_versions") - // Software versions - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - if (!params.skip_multiqc) { - workflow_summary = WorkflowAirrflow.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowAirrflow.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_html.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_json.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_fastqc_postassembly_mqc.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.collect(), - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_report_logo.collect().ifEmpty([]) + ch_versions = ch_versions.mix( VDJ_ANNOTATION.out.versions ) + + // Split bulk and single cell repertoires + ch_repertoire_by_processing = VDJ_ANNOTATION.out.repertoire + .branch { it -> + single: it[0].single_cell == 'true' + bulk: it[0].single_cell == 'false' + } + + // Bulk: Assign germlines and filtering + ch_repertoire_by_processing.bulk + .dump(tag: 'bulk') + + BULK_QC_AND_FILTER( + ch_repertoire_by_processing.bulk, + VDJ_ANNOTATION.out.imgt.collect() ) - multiqc_report = MULTIQC.out.report.toList() - } - -} + ch_versions = ch_versions.mix( BULK_QC_AND_FILTER.out.versions ) -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + ch_bulk_filtered = BULK_QC_AND_FILTER.out.repertoires -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) + // Single cell: QC and filtering + ch_repertoire_by_processing.single + .dump(tag: 'single') - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + SINGLE_CELL_QC_AND_FILTERING( + ch_repertoire_by_processing.single + ) + ch_versions = ch_versions.mix( SINGLE_CELL_QC_AND_FILTERING.out.versions ) + + // Mixing bulk and single cell channels for clonal analysis + ch_repertoires_for_clones = ch_bulk_filtered + .mix(SINGLE_CELL_QC_AND_FILTERING.out.repertoires) + .dump(tag: 'sc bulk mix') + + // Clonal analysis + CLONAL_ANALYSIS( + ch_repertoires_for_clones, + VDJ_ANNOTATION.out.imgt.collect(), + ch_report_logo_img.collect().ifEmpty([]) + ) + ch_versions = ch_versions.mix( CLONAL_ANALYSIS.out.versions) + + if (!params.skip_report){ + REPERTOIRE_ANALYSIS_REPORTING( + ch_presto_filterseq_logs.collect().ifEmpty([]), + ch_presto_maskprimers_logs.collect().ifEmpty([]), + ch_presto_pairseq_logs.collect().ifEmpty([]), + ch_presto_clustersets_logs.collect().ifEmpty([]), + ch_presto_buildconsensus_logs.collect().ifEmpty([]), + ch_presto_postconsensus_pairseq_logs.collect().ifEmpty([]), + ch_presto_assemblepairs_logs.collect().ifEmpty([]), + ch_presto_collapseseq_logs.collect().ifEmpty([]), + ch_presto_splitseq_logs.collect().ifEmpty([]), + ch_reassign_logs.collect().ifEmpty([]), + VDJ_ANNOTATION.out.changeo_makedb_logs.collect().ifEmpty([]), + VDJ_ANNOTATION.out.logs.collect().ifEmpty([]), + BULK_QC_AND_FILTER.out.logs.collect().ifEmpty([]), + SINGLE_CELL_QC_AND_FILTERING.out.logs.collect().ifEmpty([]), + CLONAL_ANALYSIS.out.logs.collect().ifEmpty([]), + CLONAL_ANALYSIS.out.repertoire, + ch_input.collect(), + ch_report_rmd.collect(), + ch_report_css.collect(), + ch_report_logo.collect(), + ch_validated_samplesheet.collect() + ) + } + ch_versions = ch_versions.mix( REPERTOIRE_ANALYSIS_REPORTING.out.versions ) + ch_versions.dump(tag: "channel_versions") + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } + + + // MODULE: MultiQC + + if (!params.skip_multiqc) { + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_html.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_json.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_fastqc_postassembly_mqc.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_report_logo.toList() + ) + multiqc_report = MULTIQC.out.report.toList() + } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } /*