diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..b290e09 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,20 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + "runArgs": ["--privileged"], + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index 2813def..b06688a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules @@ -18,43 +18,34 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset - [/subworkflows/nf-core/**] charset = unset end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset + +[/assets/email*] indent_size = unset -# These files are edited and tested upstream in pfr/modules -[/modules/pfr/**] +# ignore python and markdown +[*.{py,md}] +indent_style = unset + +# These files are edited and tested upstream in gallvp/modules,subworkflows +[/modules/gallvp/**] charset = unset end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset -[/subworkflows/pfr/**] +[/subworkflows/gallvp/**] charset = unset end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset - -[/assets/email*] -indent_size = unset - -# ignore Readme -[README.md] -indent_style = unset - -# ignore python -[*.{py}] -indent_style = unset # ignore perl [*.{pl,pm}] diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7a2dabc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.config linguist-language=nextflow +*.nf.test linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 0000000..191fabd --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..ba376bf --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,118 @@ +# plant-food-research-open/genepal: Contributing Guidelines + +Hi there! +Many thanks for taking an interest in improving plant-food-research-open/genepal. + +We try to manage the required tasks for plant-food-research-open/genepal using GitHub issues, you probably came to this page when creating one. +Please use the pre-filled template to save time. + +However, don't be put off by this template - other more general issues and suggestions are welcome! +Contributions to the code are even more welcome ;) + +## Contribution workflow + +If you'd like to write some code for plant-food-research-open/genepal, the standard workflow is as follows: + +1. Check that there isn't already an issue about your idea in the [plant-food-research-open/genepal issues](https://github.com/plant-food-research-open/genepal/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this +2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [plant-food-research-open/genepal repository](https://github.com/plant-food-research-open/genepal) to your GitHub account +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged + +If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). + +## Tests + +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + +When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. +Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. + +There are typically two types of tests that run: + +### Lint tests + +`nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. + +If any failures or warnings are encountered, please follow the listed URL for more documentation. + +### Pipeline tests + +Each `nf-core` pipeline should be set up with a minimal set of test-data. +`GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. +If there are any failures then the automated tests fail. +These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. + +## Patch + +:warning: Only in the unlikely and regretful event of a release happening with a bug. + +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. + +## Pipeline contribution conventions + +To make the plant-food-research-open/genepal code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/plant-food-research-open/genepal/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..0e3b51d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,54 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used + to launch the pipeline and the output from your terminal. + render: console + placeholder: "$ nextflow run ... + + + Some output where something broke + + " + - type: textarea + id: files + attributes: + label: Relevant files + description: "Please drag and drop the relevant files here. Create a `.zip` archive + if the extension is not allowed. + + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file + in the directory where you launched the pipeline)_ as well as custom Nextflow + configuration files. + + " + - type: textarea + id: system + attributes: + label: System information + description: "* Nextflow version _(eg. 23.04.0)_ + + * Hardware _(eg. HPC, Desktop, Cloud)_ + + * Executor _(eg. slurm, local, awsbatch)_ + + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, + or Apptainer)_ + + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + + * Version of plant-food-research-open/genepal _(eg. 1.1, 1.5, 1.8.2)_ + + " diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..15a0288 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the plant-food-research-open/genepal pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..9687358 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,25 @@ + + +## PR checklist + +- [ ] This comment contains a description of changes (with reason). +- [ ] If you've fixed a bug or added code that should be tested, add tests! +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/plant-food-research-open/genepal/tree/master/.github/CONTRIBUTING.md) +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/contributors.sh b/.github/contributors.sh new file mode 100755 index 0000000..e694e49 --- /dev/null +++ b/.github/contributors.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +module_authors=$(find ./modules -name meta.yml | xargs -I {} grep -A20 'authors:' {} | grep '\- ' | tr -d '"' | tr '[:upper:]' '[:lower:]' | awk '{print $2}') +workflow_authors=$(find ./subworkflows -name meta.yml | xargs -I {} grep -A20 'authors:' {} | grep '\- ' | tr -d '"' | tr '[:upper:]' '[:lower:]' | awk '{print $2}') +echo -e "${module_authors}\n${workflow_authors}" | sort -V | uniq -c | sort -k1,1 -r | awk '{print $2}' | sed -n 's|@\(.*\)||p' diff --git a/version_check.sh b/.github/version_checks.sh similarity index 100% rename from version_check.sh rename to .github/version_checks.sh diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml new file mode 100644 index 0000000..d1d686b --- /dev/null +++ b/.github/workflows/branch.yml @@ -0,0 +1,44 @@ +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request_target: + branches: [master] + +jobs: + test: + runs-on: ubuntu-latest + steps: + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + - name: Check PRs + if: github.repository == 'Plant-Food-Research-Open/genepal' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name }} == Plant-Food-Research-Open/genepal ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..e697dec --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,58 @@ +name: nf-core CI +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + +jobs: + test: + name: Run pipeline with test data + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'Plant-Food-Research-Open/genepal') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + TEST_PARAMS: + - minimal + - short + - nonmasked + - stub + OPTION_STUB: + - "-stub" + + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data + run: | + nextflow run \ + ${GITHUB_WORKSPACE} \ + -profile docker \ + -params-file \ + ./tests/${{ matrix.TEST_PARAMS }}/params.json \ + ${{ matrix.OPTION_STUB }} \ + --outdir ./results \ + --max_cpus 2 \ + --max_memory '6.GB' \ + --max_time '2.h' diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 0000000..0b6b1f2 --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..2d20d64 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 0000000..7b4109b --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,89 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + fix-linting: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'Plant-Food-Research-Open/genepal' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true + + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" + + - name: Commit & push changes + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix code linting" + git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/plant-food-research-open/genepal/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..1fcafe8 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,68 @@ +name: nf-core linting +# This workflow is triggered on pushes and PRs to the repository. +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core + + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 0000000..40acc23 --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 04497a0..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,71 +0,0 @@ -# Adopted from https://github.com/nf-core/modules/blob/master/.github/workflows/test.yml - -name: Lint and -stub on Linux/Docker -on: - push: - branches: [main] - pull_request: - branches: [main] - -# Cancel if a newer run is started -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: 3.11 - cache: "pip" - - - name: Install pre-commit - run: pip install pre-commit - - - name: Run pre-commit - run: pre-commit run --all-files - - stub-test: - runs-on: ubuntu-latest - name: Run stub test with docker - env: - NXF_ANSI_LOG: false - - steps: - - uses: actions/checkout@v4 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "23.04.4" - - - name: Run stub-test - run: | - nextflow run \ - main.nf \ - -profile local,docker \ - -stub \ - -params-file tests/stub/params.json - - confirm-pass: - runs-on: ubuntu-latest - needs: [pre-commit, stub-test] - if: always() - steps: - - name: All tests ok - if: ${{ success() || !contains(needs.*.result, 'failure') }} - run: exit 0 - - name: One or more tests failed - if: ${{ contains(needs.*.result, 'failure') }} - run: exit 1 - - - name: debug-print - if: always() - run: | - echo "toJSON(needs) = ${{ toJSON(needs) }}" - echo "toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" diff --git a/.gitignore b/.gitignore index 8bdcb04..c9a1e95 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,12 @@ .nextflow* work/ +data/ +null/ results/ .DS_Store -*.code-workspace -.screenrc -.*.sw? -__pycache__ -*.pyo +testing/ +testing* *.pyc *.stdout *.stderr - -pangene-test/ diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 0000000..105a182 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,20 @@ +image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc8..9dc0394 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,33 @@ +lint: + files_exist: + - CODE_OF_CONDUCT.md + - assets/nf-core-genepal_logo_light.png + - docs/images/nf-core-genepal_logo_light.png + - docs/images/nf-core-genepal_logo_dark.png + - .github/ISSUE_TEMPLATE/config.yml + - .github/workflows/awstest.yml + - .github/workflows/awsfulltest.yml + - conf/igenomes.config + - assets/multiqc_config.yml + files_unchanged: + - CODE_OF_CONDUCT.md + - assets/nf-core-genepal_logo_light.png + - docs/images/nf-core-genepal_logo_light.png + - docs/images/nf-core-genepal_logo_dark.png + - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/workflows/branch.yml + - docs/README.md + - LICENSE + - .gitignore + nextflow_config: + - manifest.name + - manifest.homePage + - config_defaults: + - params.ribo_database_manifest + multiqc_config: false +nf_core_version: 2.14.1 repository_type: pipeline +template: + prefix: plant-food-research-open + skip: + - igenomes diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b942ccd..1168e07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,10 +1,11 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v4.0.0-alpha.8" + rev: "v3.1.0" hooks: - id: prettier additional_dependencies: - prettier@3.2.5 + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python rev: "2.7.3" hooks: @@ -16,7 +17,7 @@ repos: name: Version checks language: system entry: > - ./version_check.sh + .github/version_checks.sh always_run: true fail_fast: true pass_filenames: false diff --git a/.prettierignore b/.prettierignore index 543341f..437d763 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,18 +1,12 @@ -includes/Maven_Pro/ - -# gitignore +email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ +data/ results/ .DS_Store -*.code-workspace -.screenrc -.*.sw? -__pycache__ -*.pyo +testing/ +testing* *.pyc - -*.stdout -*.stderr - -pangene-test/ +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index be03a5c..414d5d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,63 @@ -# PlantandFoodResearch/pangene: Changelog +# plant-food-research-open/genepal: Changelog The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.4.0 - [04-Oct-2024] + +### `Added` + +1. Added `orthofinder_annotations` param +2. Added `FASTA_GFF_ORTHOFINDER` sub-workflow +3. Added evaluation by BUSCO [#41](https://github.com/plant-food-research-open/genepal/issues/41) +4. Included common tax ids for eggnog mapper [#27](https://github.com/plant-food-research-open/genepal/issues/27) +5. Implemented hierarchical naming scheme: geneI.tJ, geneI.tJ.exonK, geneI.tJ.cdsK [#19](https://github.com/plant-food-research-open/genepal/issues/19), [#34](https://github.com/plant-food-research-open/genepal/issues/34) +6. Now sorting list of bam and list of fastq before cat to avoid resume cache misses +7. Allowed BAM files for RNA evidence [#3](https://github.com/plant-food-research-open/genepal/issues/3) +8. Added `GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES` sub-workflow for splice type statistics [#11](https://github.com/plant-food-research-open/genepal/issues/11) +9. Changed `orthofinder_annotations` from FASTA/GFF to protein FASTA [#43](https://github.com/plant-food-research-open/genepal/issues/43) +10. Added param `enforce_full_intron_support` to turn on/off strict model purging by TSEBRA [#21](https://github.com/plant-food-research-open/genepal/issues/21) +11. Added param `filter_liftoff_by_hints` to evaluate liftoff models with TSEBRA to make sure they have the same level of evidence as BRAKER [#28](https://github.com/plant-food-research-open/genepal/issues/28) +12. Added a script to automatically check module version updates +13. Reduced `BRAKER3` threads to 8 [#55](https://github.com/plant-food-research-open/genepal/issues/55) +14. Now the final annotations are stored in the `annotations` folder [#53](https://github.com/plant-food-research-open/genepal/issues/53) +15. Now a single `fasta` file can be directly specified for `protein_evidence` +16. `eggnogmapper_db_dir` is not a required parameter anymore +17. `eggnogmapper_tax_scope` is now set to 1 (root div) by default +18. Added a `test` profile based on public data +19. Added parameter `add_attrs_to_proteins_fasta` to enable/disable addition of decoded gff attributes to proteins fasta [#58](https://github.com/plant-food-research-open/genepal/issues/58) +20. Added a check for input assemblies. If an assembly is smaller than 1 MB (or 300KB in zipped format), the pipeline errors out before starting the downstream processes [#47](https://github.com/plant-food-research-open/genepal/issues/47) +21. Now `REPEATMASKER` GFF output is saved via `CUSTOM_RMOUTTOGFF3` [#54](https://github.com/plant-food-research-open/genepal/issues/54) +22. Added `benchmark` column to the input sheet and used `GFFCOMPARE` to perform benchmarking [#63](https://github.com/plant-food-research-open/genepal/issues/63) +23. Added `SEQKIT_RMDUP` to detect duplicate sequence and wrap the fasta to 80 characters +24. Updated parameter section labels for annotation and post-annotation filtering [#64](https://github.com/plant-food-research-open/genepal/issues/64) +25. Updated modules and sub-workflows + +### `Fixed` + +1. Fixed BRAKER spellings [#36](https://github.com/plant-food-research-open/genepal/issues/36) +2. Fixed liftoff failure when lifting off from a single reference [#40](https://github.com/plant-food-research-open/genepal/issues/40) +3. Added versions from GFF_STORE sub-workflows [#33](https://github.com/plant-food-research-open/genepal/issues/33) + +### `Dependencies` + +1. NextFlow!>=23.04.4 +2. nf-validation=1.1.3 + +### `Deprecated` + +1. Renamed `external_protein_fastas` param to `protein_evidence` +2. Renamed `fastq` param to `rna_evidence` +3. Renamed `braker_allow_isoforms` param to `allow_isoforms` +4. Moved liftoffID from gene level to mRNA/transcript level +5. Moved `version_check.sh` to `.github/version_checks.sh` +6. Removed dependency on for `BRAKER3` and `REPEATMASKER` modules which are now installed from +7. Removed dependency on +8. Now the final annotations are not stored in the `final` folder +9. Now BRAKER3 outputs are not saved by default [#53](https://github.com/plant-food-research-open/genepal/issues/53) and saved under `etc` folder when enabled +10. Removed `local` profile. Local executor is the default when no executor is specified. Therefore, the `local` profile was not needed. +11. Removed `CUSTOM_DUMPSOFTWAREVERSIONS` + ## 0.3.3 - [18-Jun-2024] ### `Added` @@ -62,8 +117,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 2. Changed license to MIT 3. Updated `.editorconfig` 4. Moved .literature to test/ branch -5. Renamed `pangene_local` to `local_pangene` -6. Renamed `pangene_pfr` to `pfr_pangene` +5. Renamed `genepal_local` to `local_genepal` +6. Renamed `genepal_pfr` to `pfr_genepal` 7. Added versioning checking 8. Updated github workflow to use pre-commit instead of prettier and editorconfig check 9. Added central singularity cache dir for pfr config @@ -80,7 +135,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 20. Now using a txt file for `params.external_protein_fastas` 21. Now using nf-validation for `params.liftoff_annotations` 22. Now using nf-validation for all the parameters -23. Added `PURGE_BREAKER_MODELS` sub-workflow +23. Added `PURGE_BRAKER_MODELS` sub-workflow 24. Added `GFF_EGGNOGMAPPER` sub-workflow 25. Now using a custom version of `GFFREAD` which supports `meta` and `fasta` 26. Now using TSEBRA to purge models which do not have full intron support from BRAKER hints @@ -95,7 +150,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 35. `external_protein_fastas` and `eggnogmapper_db_dir` are not mandatory parameters 36. Added contributors 37. Add a document for the pipeline parameters -38. Updated `pfr_pangene` and `pfr/profile.config` +38. Updated `pfr_genepal` and `pfr/profile.config` 39. Now using local tests/stub files for GitHub CI 40. Now removing iso-forms left by TSEBRA using `AGAT_SPFILTERFEATUREFROMKILLLIST` 41. Added `pyproject.toml` diff --git a/CITATIONS.md b/CITATIONS.md new file mode 100644 index 0000000..a76afa2 --- /dev/null +++ b/CITATIONS.md @@ -0,0 +1,123 @@ +# plant-food-research-open/genepal: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- py_fasta_validator, [MIT](https://github.com/linsalrob/py_fasta_validator/blob/master/LICENSE) + + > Edwards, R.A. 2019. fasta_validate: a fast and efficient fasta validator written in pure C. doi: + +- GenomeTools, [ISC](http://genometools.org/license.html) + + > Gremme G, Steinbiss S, Kurtz S. 2013. "GenomeTools: A Comprehensive Software Library for Efficient Processing of Structured Genome Annotations," in IEEE/ACM Transactions on Computational Biology and Bioinformatics, vol. 10, no. 3, pp. 645-656, May 2013, doi: + +- SAMTOOLS, [MIT/Expat](https://github.com/samtools/samtools/blob/develop/LICENSE) + + > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. 2021. Twelve years of SAMtools and BCFtools, GigaScience, Volume 10, Issue 2, February 2021, giab008, + +- BUSCO, [MIT](https://gitlab.com/ezlab/busco/-/blob/master/LICENSE) + + > Manni M, Berkeley MR, Seppey M, Simão FA, Zdobnov EM. 2021. BUSCO Update: Novel and Streamlined Workflows along with Broader and Deeper Phylogenetic Coverage for Scoring of Eukaryotic, Prokaryotic, and Viral Genomes, Molecular Biology and Evolution, Volume 38, Issue 10, October 2021, Pages 4647–4654, + +- GFFREAD, [MIT](https://github.com/gpertea/gffread/blob/master/LICENSE) + + > Pertea G, Pertea M. GFF Utilities: GffRead and GffCompare. F1000Res. 2020 Apr 28;9:ISCB Comm J-304. doi: . PMID: 32489650; PMCID: PMC7222033. + +- SEQKIT, [MIT](https://github.com/shenwei356/seqkit/blob/master/LICENSE) + + > Shen W, Le S, Li Y, Hu F. 2016. SeqKit: A Cross-Platform and Ultrafast Toolkit for FASTA/Q File Manipulation. PLoS ONE 11(10): e0163962. + +- FASTP, [MIT](https://github.com/OpenGene/fastp/blob/master/LICENSE) + + > Chen S, Zhou Y, Chen Y, Gu J. 2018. fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics, Volume 34, Issue 17, 01 September 2018, Pages i884–i890, + +- FASTQC, [GPL v3](https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt) + + > + +- AGAT, [GPL v3](https://github.com/NBISweden/AGAT/blob/master/LICENSE) + + > Dainat J. AGAT: Another Gff Analysis Toolkit to handle annotations in any GTF/GFF format. (Version v0.8.0). Zenodo. https://www.doi.org/10.5281/zenodo.3552717 + +- BRAKER, [Artistic license-1.0](https://github.com/Gaius-Augustus/BRAKER/blob/master/LICENSE.TXT) + + > Stanke, M., Diekhans, M., Baertsch, R. and Haussler, D. (2008). Using native and syntenically mapped cDNA alignments to improve de novo gene finding. Bioinformatics, doi: 10.1093/bioinformatics/btn013. + + > Stanke. M., Schöffmann, O., Morgenstern, B. and Waack, S. (2006). Gene prediction in eukaryotes with a generalized hidden Markov model that uses hints from external sources. BMC Bioinformatics 7, 62. + + > Gabriel, L., Bruna, T., Hoff, K. J., Ebel, M., Lomsadze, A., Borodovsky, M., Stanke, M. (2023). BRAKER3: Fully Automated Genome Annotation Using RNA-Seq and Protein Evidence with GeneMark-ETP, AUGUSTUS and TSEBRA. bioRxiV, doi: 10.1101/2023.06.10.54444910.1101/2023.01.01.474747. + + > Bruna, T., Lomsadze, A., Borodovsky, M. (2023). GeneMark-ETP: Automatic Gene Finding in Eukaryotic Genomes in Consistence with Extrinsic Data. bioRxiv, doi: 10.1101/2023.01.13.524024. + + > Kovaka, S., Zimin, A. V., Pertea, G. M., Razaghi, R., Salzberg, S. L., & Pertea, M. (2019). Transcriptome assembly from long-read RNA-seq alignments with StringTie2. Genome biology, 20(1):1-13. + +- EDTA, [GPL v3](https://github.com/oushujun/EDTA/blob/master/LICENSE) + + > Ou S., Su W., Liao Y., Chougule K., Agda J. R. A., Hellinga A. J., Lugo C. S. B., Elliott T. A., Ware D., Peterson T., Jiang N., Hirsch C. N. and Hufford M. B. (2019). Benchmarking Transposable Element Annotation Methods for Creation of a Streamlined, Comprehensive Pipeline. Genome Biol. 20(1): 275. doi: + +- RepeatMasker, [Open Software License v. 2.1](https://github.com/rmhubley/RepeatMasker/blob/master/LICENSE) + + > + +- eggnog-mapper, [GPL v3](https://github.com/eggnogdb/eggnog-mapper/blob/master/LICENSE.txt) + + > eggNOG-mapper v2: functional annotation, orthology assignments, and domain prediction at the metagenomic scale. Carlos P. Cantalapiedra, Ana Hernandez-Plaza, Ivica Letunic, Peer Bork, Jaime Huerta-Cepas. 2021. Molecular Biology and Evolution, msab293, + + > eggNOG 5.0: a hierarchical, functionally and phylogenetically annotated orthology resource based on 5090 organisms and 2502 viruses. Jaime Huerta-Cepas, Damian Szklarczyk, Davide Heller, Ana Hernández-Plaza, Sofia K Forslund, Helen Cook, Daniel R Mende, Ivica Letunic, Thomas Rattei, Lars J Jensen, Christian von Mering, Peer Bork Nucleic Acids Res. 2019 Jan 8; 47(Database issue): D309–D314. doi: 10.1093/nar/gky1085 + + > Sensitive protein alignments at tree-of-life scale using DIAMOND. Buchfink B, Reuter K, Drost HG. 2021. Nature Methods 18, 366–368 (2021). + +- Liftoff, [GPL v3](https://github.com/agshumate/Liftoff/blob/master/LICENSE.md) + + > Shumate, Alaina, and Steven L. Salzberg. 2020. “Liftoff: Accurate Mapping of Gene Annotations.” Bioinformatics , December. + +- OrthoFinder, [GPL v3](https://github.com/davidemms/OrthoFinder/blob/master/License.md) + + > Emms, D.M. and Kelly, S. (2019) OrthoFinder: phylogenetic orthology inference for comparative genomics. Genome Biology 20:238 + + > Emms, D.M. and Kelly, S. (2015) OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy. Genome Biology 16:157 + +- RepeatModeler, [Open Software License v. 2.1](https://github.com/Dfam-consortium/RepeatModeler/blob/master/LICENSE) + + > + +- sortmerna, [GPL v3](https://github.com/sortmerna/sortmerna/blob/master/LICENSE.txt) + + > Kopylova E., Noé L. and Touzet H., "SortMeRNA: Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data", Bioinformatics (2012), doi: . + +- STAR, [MIT](https://github.com/alexdobin/STAR/blob/master/LICENSE) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner. Bioinformatics. 2013 Jan 1;29(1):15-21. doi: . Epub 2012 Oct 25. PMID: 23104886; PMCID: PMC3530905. + +- TSEBRA, [The Artistic License 2.0](https://github.com/Gaius-Augustus/TSEBRA/blob/main/bin/LICENSE.txt) + + > Gabriel, L., Hoff, K.J., Brůna, T. et al. TSEBRA: transcript selector for BRAKER. BMC Bioinformatics 22, 566 (2021). + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index d255a36..e84dd72 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,83 @@ -# PANGENE +[![GitHub Actions CI Status](https://github.com/plant-food-research-open/genepal/actions/workflows/ci.yml/badge.svg)](https://github.com/plant-food-research-open/genepal/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/plant-food-research-open/genepal/actions/workflows/linting.yml/badge.svg)](https://github.com/plant-food-research-open/genepal/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Lint/stub on Linux/Docker](https://github.com/PlantandFoodResearch/pangene/actions/workflows/test.yml/badge.svg)](https://github.com/PlantandFoodResearch/pangene/actions/workflows/test.yml) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) +[![run with conda ❌](http://img.shields.io/badge/run%20with-conda%20❌-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/plant-food-research-open/genepal) -A NextFlow pipeline for pan-genome annotation. It can also be used for annotation of a single genome. +## Introduction -## Flowchart +**plant-food-research-open/genepal** is a bioinformatics pipeline for single genome, multiple genomes and pan-genome annotation. An overview is shown in the [Pipeline Flowchart](#pipeline-flowchart) and the references for the tools are listed in [CITATIONS.md](./CITATIONS.md). -

+## Pipeline Flowchart -## Alpha Release +

-This release is not fully documented and under alpha testing by the Bioinformatics Team. There are several [outstanding issues](https://github.com/PlantandFoodResearch/pangene/issues) which will be addressed before a general release. +- [FASTA VALIDATOR](https://github.com/linsalrob/fasta_validator): Validate genome fasta +- [REPEATMODELER](https://github.com/Dfam-consortium/RepeatModeler) or [EDTA](https://github.com/oushujun/EDTA): Create TE library +- [REPEATMASKER](https://github.com/rmhubley/RepeatMasker): Soft mask the genome fasta +- [FASTQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc), [FASTP](https://github.com/OpenGene/fastp), [SORTMERNA](https://github.com/sortmerna/sortmerna): QC, trim and filter RNASeq evidence +- [STAR](https://github.com/alexdobin/STAR): RNASeq alignment +- [BRAKER](https://github.com/Gaius-Augustus/BRAKER): Annotate the genome fasta +- [LIFTOFF](https://github.com/agshumate/Liftoff): Liftoff annotations from reference genome fasta/gff +- [TSEBRA](https://github.com/Gaius-Augustus/TSEBRA), [AGAT](https://github.com/NBISweden/AGAT): Merge BRAKER and Liftoff annotations +- [EGGNOG-MAPPER](https://github.com/eggnogdb/eggnog-mapper): Add functional annotation to gff +- [ORTHOFINDER](https://github.com/davidemms/OrthoFinder): Perform phylogenetic orthology inference across input genomes +- [GENOMETOOLS](https://github.com/genometools/genometools), [GFFREAD](https://github.com/gpertea/gffread): Final GFF format validation and extraction of protein sequences +- [BUSCO](https://gitlab.com/ezlab/busco): Completeness statistics for genome and annotation through proteins -## Plant&Food Users +## Usage -Download the pipeline to your `/workspace/$USER` folder. Change the parameters defined in the [pfr/params.json](./pfr/params.json) file. Submit the pipeline to SLURM for execution. For a description of the parameters, see [parameters.md](./docs/parameters.md). +Refer to [usage](./docs/usage.md), [parameters](./docs/parameters.md) and [output](./docs/output.md) documents for details. + +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. + +First, prepare an assemblysheet with your input genomes that looks as follows: + +`assemblysheet.csv`: + +```csv +tag ,fasta ,is_masked +a_thaliana ,/path/to/genome.fa ,yes +``` + +Each row represents an input genome and the fields are: + +- `tag:` A unique tag which represents the genome throughout the pipeline +- `fasta:` fasta file for the genome +- `is_masked`: yes or no to denote whether the fasta file is already masked or not + +At minimum, a file with proteins as evidence is also required. Now, you can run the pipeline using: + +```bash +nextflow run plant-food-research-open/genepal \ + -profile \ + --input assemblysheet.csv \ + --protein_evidence proteins.faa \ + --outdir +``` + +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + +### Plant&Food Users + +Download the pipeline to your `/workspace/$USER` folder. Change the parameters defined in the [pfr/params.json](./pfr/params.json) file. Submit the pipeline to SLURM for execution. ```bash -sbatch ./pfr_pangene +sbatch ./pfr_genepal ``` ## Credits -plantandfoodresearch/pangene workflows were originally scripted by Jason Shiller. Usman Rashid wrote the NextFLow pipeline. +plant-food-research-open/genepal workflows were originally scripted by Jason Shiller ([@jasonshiller](https://github.com/jasonshiller)). Usman Rashid ([@gallvp](https://github.com/gallvp)) wrote the Nextflow pipeline. -We thank the following people for their extensive assistance in the development of this pipeline. +We thank the following people for their extensive assistance in the development of this pipeline: - Cecilia Deng [@CeciliaDeng](https://github.com/CeciliaDeng) - Charles David [@charlesdavid](https://github.com/charlesdavid) @@ -34,30 +87,46 @@ We thank the following people for their extensive assistance in the development - Susan Thomson [@cflsjt](https://github.com/cflsjt) - Ting-Hsuan Chen [@ting-hsuan-chen](https://github.com/ting-hsuan-chen) -The pipeline uses nf-core modules contributed by following authors. +The pipeline uses nf-core modules contributed by following authors: - - - - - - - - - - + - - - - - + + + + + + + + + + + + + + + + + + + + + + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). ## Citations + + + +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. + This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). > **The nf-core framework for community-curated bioinformatics pipelines.** diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 0000000..24315fd --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "plant-food-research-open/genepal v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html new file mode 100644 index 0000000..c7f9d61 --- /dev/null +++ b/assets/email_template.html @@ -0,0 +1,53 @@ + + + + + + + + plant-food-research-open/genepal Pipeline Report + + +
+ + + +

plant-food-research-open/genepal ${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

plant-food-research-open/genepal execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+ """ +} else { + out << """ +
+ plant-food-research-open/genepal execution completed successfully! +
+ """ +} +%> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ +

plant-food-research-open/genepal

+

https://github.com/plant-food-research-open/genepal

+ +
+ + + diff --git a/assets/email_template.txt b/assets/email_template.txt new file mode 100644 index 0000000..2bc5fc1 --- /dev/null +++ b/assets/email_template.txt @@ -0,0 +1,31 @@ +Run Name: $runName + +<% if (success){ + out << "## plant-food-research-open/genepal execution completed successfully! ##" +} else { + out << """#################################################### +## plant-food-research-open/genepal execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + +-- +plant-food-research-open/genepal +https://github.com/plant-food-research-open/genepal diff --git a/assets/rrna-db-test.txt b/assets/rrna-db-test.txt deleted file mode 100644 index 20116f9..0000000 --- a/assets/rrna-db-test.txt +++ /dev/null @@ -1 +0,0 @@ -https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/silva-euk-28s-id98.fasta diff --git a/assets/schema_input.json b/assets/schema_input.json index 287b222..0824476 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/assets/schema_input.json", - "title": "plantandfoodresearch/pangene pipeline - params.input schema", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/genepal/master/assets/schema_input.json", + "title": "plant-food-research-open/genepal pipeline - params.input schema", "description": "Schema for the file provided with params.input", "type": "array", "items": { @@ -63,6 +63,19 @@ ], "errorMessage": "BRAKER hints GFF/GFF3 file path cannot contain spaces and must have extension '.gff.gz', '.gff3.gz', '.gff' or '.gff3'", "dependentRequired": ["braker_gff3"] + }, + "benchmark": { + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.gff(3)?$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "errorMessage": "GFF/GFF3 file for benchmarking cannot contain spaces and must have extension '.gff' or '.gff3'" } }, "required": ["tag", "fasta", "is_masked"] diff --git a/assets/schema_liftoff.json b/assets/schema_liftoff.json index f4bb651..3f925b1 100644 --- a/assets/schema_liftoff.json +++ b/assets/schema_liftoff.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/assets/schema_fastq.json", - "title": "plantandfoodresearch/pangene pipeline - params.fastq schema", - "description": "Schema for the file provided with params.fastq", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/genepal/master/assets/schema_liftoff.json", + "title": "plant-food-research-open/genepal pipeline - params.liftoff_annotations schema", + "description": "Schema for the file provided with params.liftoff_annotations", "type": "array", "items": { "type": "object", diff --git a/assets/schema_orthofinder.json b/assets/schema_orthofinder.json new file mode 100644 index 0000000..c568143 --- /dev/null +++ b/assets/schema_orthofinder.json @@ -0,0 +1,25 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/genepal/master/assets/schema_orthofinder.json", + "title": "plant-food-research-open/genepal pipeline - params.orthofinder_annotations schema", + "description": "Schema for the file provided with params.orthofinder_annotations", + "type": "array", + "items": { + "type": "object", + "properties": { + "tag": { + "type": "string", + "pattern": "^\\w+$", + "errorMessage": "Annotation tags must be provided and can only contain alphanumeric characters including '_'", + "unique": true + }, + "fasta": { + "type": "string", + "pattern": "^\\S+\\.(fa|fas|pep|faa|fasta)(\\.gz)?$", + "errorMessage": "Proteins FASTA file path cannot contain spaces and must have extension '.(fa|fas|pep|faa|fasta)' or '.(fa|fas|pep|faa|fasta).gz'. Orthofinder does not allow 'fna' extension", + "unique": true + } + }, + "required": ["tag", "fasta"] + } +} diff --git a/assets/schema_fastq.json b/assets/schema_rna.json similarity index 59% rename from assets/schema_fastq.json rename to assets/schema_rna.json index 0890ce3..6cc681c 100644 --- a/assets/schema_fastq.json +++ b/assets/schema_rna.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/assets/schema_fastq.json", - "title": "plantandfoodresearch/pangene pipeline - params.fastq schema", - "description": "Schema for the file provided with params.fastq", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/genepal/master/assets/schema_rna.json", + "title": "plant-food-research-open/genepal pipeline - params.rna_evidence schema", + "description": "Schema for the file provided with params.rna_evidence", "type": "array", "items": { "type": "object", @@ -13,13 +13,14 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, - "fastq_1": { + "file_1": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.(f(ast)?q\\.gz|bam)$", + "errorMessage": "FASTQ file for reads 1 or BAM file containing mapped reads must be provided, cannot contain spaces and must have extension '.fq.gz', '.fastq.gz' or '.bam'", + "unique": true }, - "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "file_2": { + "errorMessage": "FASTQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", "anyOf": [ { "type": "string", @@ -30,7 +31,8 @@ "maxLength": 0 } ], - "dependentRequired": ["fastq_1"] + "dependentRequired": ["file_1"], + "unique": true }, "target_assemblies": { "type": "string", @@ -39,6 +41,6 @@ "meta": ["target_assemblies"] } }, - "required": ["sample", "fastq_1", "target_assemblies"] + "required": ["sample", "file_1", "target_assemblies"] } } diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt new file mode 100644 index 0000000..8611aca --- /dev/null +++ b/assets/sendmail_template.txt @@ -0,0 +1,53 @@ +To: $email +Subject: $subject +Mime-Version: 1.0 +Content-Type: multipart/related;boundary="nfcoremimeboundary" + +--nfcoremimeboundary +Content-Type: text/html; charset=utf-8 + +$email_html + +--nfcoremimeboundary +Content-Type: image/png;name="plant-food-research-open-genepal_logo.png" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: inline; filename="plant-food-research-open-genepal_logo_light.png" + +<% out << new File("$projectDir/assets/plant-food-research-open-genepal_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> + +<% +if (mqcFile){ +def mqcFileObj = new File("$mqcFile") +if (mqcFileObj.length() < mqcMaxSize){ +out << """ +--nfcoremimeboundary +Content-Type: text/html; name=\"multiqc_report\" +Content-Transfer-Encoding: base64 +Content-ID: +Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" + +${mqcFileObj. + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} +""" +}} +%> + +--nfcoremimeboundary-- diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 0000000..6c2ea45 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "plant-food-research-open/genepal ${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/assets/tsebra-1form.cfg b/assets/tsebra-1form.cfg deleted file mode 100644 index 2b076fa..0000000 --- a/assets/tsebra-1form.cfg +++ /dev/null @@ -1,16 +0,0 @@ -# Weight for each hint source -# Values have to be >= 0 -P 1 -E 20 -C 1 -M 1 -# Required fraction of supported introns or supported start/stop-codons for a transcript -# Values have to be in [0,1] -intron_support 1.0 -stasto_support 2 -# Allowed difference for each feature -# Values have to be in [0,1] -e_1 0 -e_2 0 -e_3 0 -e_4 0 diff --git a/assets/tsebra-default.cfg b/assets/tsebra-template.cfg similarity index 75% rename from assets/tsebra-default.cfg rename to assets/tsebra-template.cfg index 8ec1a98..fa06c50 100644 --- a/assets/tsebra-default.cfg +++ b/assets/tsebra-template.cfg @@ -6,11 +6,11 @@ C 1 M 1 # Required fraction of supported introns or supported start/stop-codons for a transcript # Values have to be in [0,1] -intron_support 1.0 +intron_support PARAM_INTRON_SUPPORT stasto_support 2 # Allowed difference for each feature # Values have to be in [0,1] -e_1 0.1 -e_2 0.5 -e_3 0.05 -e_4 0.2 +e_1 PARAM_E1 +e_2 PARAM_E2 +e_3 PARAM_E3 +e_4 PARAM_E4 diff --git a/conf/base.config b/conf/base.config index 16824cd..06a63be 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,118 +1,77 @@ -profiles { - pfr { - process { - executor = 'slurm' - } - - apptainer { - envWhitelist = 'APPTAINER_BINDPATH,APPTAINER_BIND' - cacheDir = "/workspace/pangene/singularity" - } - } - - local { - process { - executor = 'local' - } - } - - apptainer { - apptainer.enabled = true - apptainer.autoMounts= true - apptainer.registry = 'quay.io' - } - - docker { - docker.enabled = true - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' - docker.registry = 'quay.io' - } -} +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + plant-food-research-open/genepal Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [140,143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 } withName:EGGNOGMAPPER { - time = { check_max( 1.day * task.attempt, 'time' ) } + time = { check_max( 1.day * task.attempt, 'time' ) } } - withName:REPEATMASKER { - time = { check_max( 2.days * task.attempt, 'time' ) } + withName:REPEATMASKER_REPEATMASKER { + time = { check_max( 2.days * task.attempt, 'time' ) } } withName:REPEATMODELER_REPEATMODELER { - time = { check_max( 5.days * task.attempt, 'time' ) } + time = { check_max( 5.days * task.attempt, 'time' ) } } withName:EDTA_EDTA { - time = { check_max( 7.days * task.attempt, 'time' ) } + time = { check_max( 7.days * task.attempt, 'time' ) } } withName:BRAKER3 { - time = { check_max( 7.days * task.attempt, 'time' ) } - } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } -} - -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + time = { check_max( 7.days * task.attempt, 'time' ) } } } diff --git a/conf/modules.config b/conf/modules.config index f5e0cbf..83688e5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,5 +1,23 @@ -process { - withName: 'EDTA_EDTA' { +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { // SUBWORKFLOW: PREPARE_ASSEMBLY + + withName: '.*:PREPARE_ASSEMBLY:SEQKIT_RMDUP' { + ext.args = '--by-seq --ignore-case -w 80' + ext.prefix = { "${meta.id}.seqkit.rmdup" } + } + + withName: '.*:PREPARE_ASSEMBLY:FASTA_EDTA_LAI:EDTA_EDTA' { ext.args = [ params.edta_is_sensitive ? "--sensitive 1" : "--sensitive 0", "--anno 0", @@ -8,24 +26,24 @@ process { publishDir = [ path: { "${params.outdir}/edta" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: '*.EDTA.TElib.fa', enabled: params.save_annotated_te_lib ] } - withName: 'REPEATMODELER_REPEATMODELER' { + withName: '.*:PREPARE_ASSEMBLY:REPEATMODELER_REPEATMODELER' { ext.args = '-LTRStruct' publishDir = [ path: { "${params.outdir}/repeatmodeler" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: '*.fa', enabled: params.save_annotated_te_lib ] } - withName: 'REPEATMASKER' { + withName: '.*:PREPARE_ASSEMBLY:REPEATMASKER_REPEATMASKER' { ext.args = [ "-no_is", "-xsmall", @@ -33,78 +51,89 @@ process { publishDir = [ path: { "${params.outdir}/repeatmasker" }, - mode: "copy", + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename }, + enabled: params.repeatmasker_save_outputs + ] + } + + withName: '.*:PREPARE_ASSEMBLY:CUSTOM_RMOUTTOGFF3' { + publishDir = [ + path: { "${params.outdir}/repeatmasker" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals("versions.yml") ? null : filename }, enabled: params.repeatmasker_save_outputs ] } } -if(!params.skip_fastqc) { - process { - withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { +process { // SUBWORKFLOW: PREPROCESS_RNASEQ + if(!params.skip_fastqc) { + withName: '.*:PREPROCESS_RNASEQ:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_RAW' { ext.args = '--quiet' + + publishDir = [ + path: { "${params.outdir}/fastqc_raw" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] } - withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { + withName: '.*:PREPROCESS_RNASEQ:FASTQ_FASTQC_UMITOOLS_FASTP:FASTQC_TRIM' { ext.args = '--quiet' publishDir = [ - path: { "${params.outdir}/fastp/fastqc" }, - mode: "copy", + path: { "${params.outdir}/fastqc_trim" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } } -} -if(!params.skip_fastp) { - process { - withName: '.*:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' { + if(!params.skip_fastp) { + withName: '.*:PREPROCESS_RNASEQ:FASTQ_FASTQC_UMITOOLS_FASTP:FASTP' { ext.args = params.extra_fastp_args ?: '' publishDir = [ [ path: { "${params.outdir}/fastp/html" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: "*.{html}" ], [ path: { "${params.outdir}/fastp/json" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: "*.{json}" ], [ path: { "${params.outdir}/fastp/log" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: "*.log" ], [ path: { "${params.outdir}/fastp" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: "*.fastq.gz", enabled: params.save_trimmed ] ] } } -} -if (params.remove_ribo_rna) { - process { - withName: SORTMERNA_INDEX { + if (params.remove_ribo_rna) { + withName: '.*:PREPROCESS_RNASEQ:SORTMERNA_INDEX' { ext.args = '--index 1' } - withName: SORTMERNA_READS { + withName: '.*:PREPROCESS_RNASEQ:SORTMERNA_READS' { ext.args = '--index 0 --num_alignments 1 -v' publishDir = [ [ path: { "${params.outdir}/sortmerna" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: "*.log" ], [ path: { "${params.outdir}/sortmerna" }, - mode: "copy", + mode: params.publish_dir_mode, pattern: "*.fastq.gz", enabled: params.save_non_ribo_reads ] @@ -113,8 +142,8 @@ if (params.remove_ribo_rna) { } } -process { - withName: STAR_ALIGN { +process { // SUBWORKFLOW: ALIGN_RNASEQ + withName: '.*:ALIGN_RNASEQ:STAR_ALIGN' { ext.args = [ "--outSAMstrandField intronMotif", "--outSAMtype BAM SortedByCoordinate", @@ -125,7 +154,7 @@ process { ext.prefix = { "${meta.id}" } publishDir = [ path: { "${params.outdir}/star/alignment" }, - mode: "copy", + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.star_save_outputs ] @@ -134,73 +163,78 @@ process { withName: '.*:ALIGN_RNASEQ:SAMTOOLS_CAT' { publishDir = [ path: { "${params.outdir}/star/cat_bam" }, - mode: "copy", + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.save_cat_bam ] } +} - withName: BRAKER3 { +process { // SUBWORKFLOW: FASTA_BRAKER3 + withName: '.*:FASTA_BRAKER3:BRAKER3' { ext.args = [ "--gff3", params.braker_extra_args ? params.braker_extra_args.split("\\s(?=--)") : '' ].flatten().unique(false).join(' ').trim() ext.prefix = { "${meta.id}" } publishDir = [ - path: { "${params.outdir}/braker/" }, - mode: "copy", + path: { "${params.outdir}/etc/braker/" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.braker_save_outputs ] } } -if(params.liftoff_annotations) { - process { - withName: LIFTOFF { - ext.args = [ - '-exclude_partial', - '-copies', - '-polish', - "-a $params.liftoff_coverage", - "-s $params.liftoff_identity" - ].join(' ').trim() - } +process { // SUBWORKFLOW: FASTA_LIFTOFF + withName: '.*:FASTA_LIFTOFF:LIFTOFF' { + ext.args = [ + '-exclude_partial', + '-copies', + '-polish', + "-a $params.liftoff_coverage", + "-s $params.liftoff_identity" + ].join(' ').trim() + } - withName: '.*:FASTA_LIFTOFF:GFFREAD_BEFORE_LIFTOFF' { - ext.args = '--no-pseudo --keep-genes' - } + withName: '.*:FASTA_LIFTOFF:GFFREAD_BEFORE_LIFTOFF' { + ext.args = '--no-pseudo --keep-genes' + } - withName: MERGE_LIFTOFF_ANNOTATIONS { - ext.prefix = { "${meta.id}.merged.liftoffs" } - } + withName: '.*:FASTA_LIFTOFF:MERGE_LIFTOFF_ANNOTATIONS' { + ext.prefix = { "${meta.id}.merged.liftoffs" } + } - withName: '.*:FASTA_LIFTOFF:AGAT_SPFILTERFEATUREFROMKILLLIST' { - ext.prefix = { "${meta.id}.invalid.orf.purged" } - } + withName: '.*:FASTA_LIFTOFF:AGAT_SPFILTERFEATUREFROMKILLLIST' { + ext.prefix = { "${meta.id}.invalid.orf.purged" } + } - withName: '.*:FASTA_LIFTOFF:GFFREAD_AFTER_LIFTOFF' { - ext.prefix = { "${meta.id}.liftoff" } - ext.args = '--keep-genes' - } + withName: '.*:FASTA_LIFTOFF:GFFREAD_AFTER_LIFTOFF' { + ext.prefix = { "${meta.id}.liftoff" } + ext.args = '--keep-genes' + } + + withName: '.*:FASTA_LIFTOFF:GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST:AGAT_CONVERTSPGFF2GTF' { + ext.args = '--gtf_version relax' + } + + withName: '.*:FASTA_LIFTOFF:GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST:KILL_TSEBRA_ISOFORMS' { + ext.prefix = { "${meta.id}.liftoff.1form" } } } -process { +process { // SUBWORKFLOW: PURGE_BRAKER_MODELS - withName: 'AGAT_CONVERTSPGFF2GTF' { + withName: '.*:PURGE_BRAKER_MODELS:GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST:AGAT_CONVERTSPGFF2GTF' { ext.args = '--gtf_version relax' } - withName: 'KILL_TSEBRA_ISOFORMS' { + withName: '.*:PURGE_BRAKER_MODELS:GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST:KILL_TSEBRA_ISOFORMS' { ext.prefix = { "${meta.id}.1form" } } - - withName: 'AGAT_SPFILTERFEATUREFROMKILLLIST' { - ext.prefix = { "${meta.id}.purged" } - } } -process { +process { // SUBWORKFLOW: GFF_MERGE_CLEANUP withName: '.*:GFF_MERGE_CLEANUP:AGAT_SPMERGEANNOTATIONS' { ext.prefix = { "${meta.id}.liftoff.braker" } } @@ -210,12 +244,12 @@ process { } } -process { - withName: GFF2FASTA_FOR_EGGNOGMAPPER { +process { // SUBWORKFLOW: GFF_EGGNOGMAPPER + withName: '.*:GFF_EGGNOGMAPPER:GFF2FASTA_FOR_EGGNOGMAPPER' { ext.args = '-y' } - withName: EGGNOGMAPPER { + withName: '.*:GFF_EGGNOGMAPPER:EGGNOGMAPPER' { ext.args = [ "--evalue $params.eggnogmapper_evalue", "--pident $params.eggnogmapper_pident", @@ -226,38 +260,125 @@ process { ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/final/$meta.id" }, - mode: "copy", + path: { "${params.outdir}/annotations/$meta.id" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } } -process { +process { // SUBWORKFLOW: PURGE_NOHIT_MODELS withName: '.*:PURGE_NOHIT_MODELS:AGAT_SPFILTERFEATUREFROMKILLLIST' { ext.prefix = { "${meta.id}.nohits.purged" } } } -process { - withName: 'FINAL_GFF_CHECK' { +process { // SUBWORKFLOW: GFF_STORE + withName: '.*:GFF_STORE:FINAL_GFF_CHECK' { ext.args = '-tidy -retainids -sort' publishDir = [ - path: { "${params.outdir}/final/$meta.id" }, - mode: "copy", + path: { "${params.outdir}/annotations/$meta.id" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*:GFF_STORE:EXTRACT_PROTEINS' { + ext.args = params.add_attrs_to_proteins_fasta ? '-F -D -y' : '-y' + ext.prefix = { "${meta.id}.pep" } + + publishDir = [ + path: { "${params.outdir}/annotations/$meta.id" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] } } -process { - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { +process { // SUBWORKFLOW: FASTA_ORTHOFINDER + withName: '.*:FASTA_ORTHOFINDER:FASTA_GUNZIP:GUNZIP' { + ext.prefix = { "${meta.id}.pep" } + } + + withName: '.*:FASTA_ORTHOFINDER:ORTHOFINDER' { publishDir = [ - path: "$params.outdir/pipeline_info", - pattern: "software_versions.yml", - mode: "copy", - enabled: true + path: { "${params.outdir}/orthofinder" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } +} + +process { // SUBWORKFLOW: FASTA_GXF_BUSCO_PLOT + withName: '.*:FASTA_GXF_BUSCO_PLOT:BUSCO_ASSEMBLY' { + ext.args = '--metaeuk' + publishDir = [ + path: { "${params.outdir}/busco/fasta" }, + mode: params.publish_dir_mode, + pattern: 'short_summary.*.txt', + saveAs: { filename -> "short_summary.specific.${meta.id}.${lineage}.txt" } + ] + } + + withName: '.*:FASTA_GXF_BUSCO_PLOT:PLOT_ASSEMBLY' { + ext.prefix = 'busco_figure' + publishDir = [ + path: { "${params.outdir}/busco/fasta" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + + withName: '.*:FASTA_GXF_BUSCO_PLOT:EXTRACT_PROTEINS' { + ext.args = '-y' + } + + withName: '.*:FASTA_GXF_BUSCO_PLOT:BUSCO_ANNOTATION' { + ext.args = '--metaeuk' + publishDir = [ + path: { "${params.outdir}/busco/gff" }, + mode: params.publish_dir_mode, + pattern: 'short_summary.*.txt', + saveAs: { filename -> "short_summary.specific.${meta.id}.${lineage}.txt" } + ] + } + + withName: '.*:FASTA_GXF_BUSCO_PLOT:PLOT_ANNOTATION' { + ext.prefix = 'busco_figure' + publishDir = [ + path: { "${params.outdir}/busco/gff" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } +} + +process { // SUBWORKFLOW: GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES + withName: '.*:GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES:AGAT_SPEXTRACTSEQUENCES' { + ext.args = '-t intron' + } +} + +process { // Universal + withName: AGAT_SPFILTERFEATUREFROMKILLLIST { + ext.prefix = { "${meta.id}.purged" } + } + + withName: SAVE_MARKED_GFF3 { + publishDir = [ + path: { "${params.outdir}/etc/splicing_marked" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: BENCHMARK { + publishDir = [ + path: { "${params.outdir}/benchmark" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: '*.stats' ] } } diff --git a/conf/test.config b/conf/test.config new file mode 100644 index 0000000..d4c9cc0 --- /dev/null +++ b/conf/test.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run plant-food-research-open/genepal -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Tested with these minimal resources + max_cpus = 8 + max_memory = '32.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/tests/minimal/assemblysheet.csv" + protein_evidence = 'https://raw.githubusercontent.com/Gaius-Augustus/BRAKER/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471/example/proteins.fa' + + braker_extra_args = '--gm_max_intergenic 10000 --skipOptimize' // Added for faster test execution! Do not use with actual data! + busco_lineage_datasets = 'eudicots_odb10' +} diff --git a/conf/test_full.config b/conf/test_full.config new file mode 100644 index 0000000..8c88716 --- /dev/null +++ b/conf/test_full.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run plant-food-research-open/genepal -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Tested with these minimal resources + max_cpus = 8 + max_memory = '32.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/tests/minimal/assemblysheet.csv" + protein_evidence = 'https://raw.githubusercontent.com/Gaius-Augustus/BRAKER/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471/example/proteins.fa' + + braker_extra_args = '--gm_max_intergenic 10000 --skipOptimize' // Added for faster test execution! Do not use with actual data! + busco_lineage_datasets = 'eudicots_odb10' +} diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..b019955 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,10 @@ +# plant-food-research-open/genepal: Documentation + +The plant-food-research-open/genepal documentation is split into the following pages: + +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Parameters](parameters.md) + - An exhaustive list of pipeline parameters with their descriptions. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. diff --git a/docs/contributors.sh b/docs/contributors.sh deleted file mode 100755 index 8dbfc36..0000000 --- a/docs/contributors.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -module_authors=$(find ./modules -name meta.yml | xargs -I {} grep -A20 'authors:' {} | grep '\- ' | tr -d '[-" ]' | tr '[:upper:]' '[:lower:]') -workflow_authors=$(find ./subworkflows -name meta.yml | xargs -I {} grep -A20 'authors:' {} | grep '\- ' | tr -d '[-" ]' | tr '[:upper:]' '[:lower:]') -echo -e "${module_authors}\n${workflow_authors}" | sort -V | uniq | sed -n 's|@\(.*\)||p' diff --git a/docs/img/genepal.drawio b/docs/img/genepal.drawio new file mode 100644 index 0000000..232762a --- /dev/null +++ b/docs/img/genepal.drawio @@ -0,0 +1,444 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/img/genepal.png b/docs/img/genepal.png new file mode 100644 index 0000000..f5c27eb Binary files /dev/null and b/docs/img/genepal.png differ diff --git a/docs/img/pangene.drawio b/docs/img/pangene.drawio deleted file mode 100644 index 0c61685..0000000 --- a/docs/img/pangene.drawio +++ /dev/null @@ -1,432 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/img/pangene.png b/docs/img/pangene.png deleted file mode 100644 index a81e1c1..0000000 Binary files a/docs/img/pangene.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md new file mode 100644 index 0000000..5fe498d --- /dev/null +++ b/docs/output.md @@ -0,0 +1,205 @@ +# plant-food-research-open/genepal: Output + +## Introduction + +This document describes the output produced by the pipeline. + +The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. + +## Pipeline overview + +The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: + + + +- [Repeat annotation](#repeat-annotation) +- [Repeat masking](#repeat-masking) +- [RNASeq trimming, filtering and QC](#rnaseq-trimming-filtering-and-qc) +- [RNASeq alignment](#rnaseq-alignment) +- [Annotation with BRAKER](#annotation-with-braker) +- [Annotation with Liftoff](#annotation-with-liftoff) +- [Annotation filtering and merging](#annotation-filtering-and-merging) +- [Functional annotation](#functional-annotation) +- [Orthology inference](#orthology-inference) +- [Final annotation files](#final-annotation-files) +- [Annotation QC](#annotation-qc) + +### Repeat annotation + +
+Output files + +- `repeatmodeler/` + - `*.fa`: Repeat library +- `edta/` + - `*.EDTA.TElib.fa`: Repeat library + +
+ +A repeat library is created with either [REPEATMODELER](https://github.com/Dfam-consortium/RepeatModeler) or [EDTA](https://github.com/oushujun/EDTA). The choice of the tool is specified by the `repeat_annotator` parameter (default: `repeatmodeler`). Repeat annotation outputs are saved to the output directory only if `save_annotated_te_lib` parameter is set to `true` (default: `false`). + +### Repeat masking + +
+Output files + +- `repeatmasker/` + - `*.masked`: Masked assembly + +
+ +Soft masking of the repeats is performed with [REPEATMASKER](https://github.com/rmhubley/RepeatMasker) using the repeat library prepared in the previous step. Masking outputs are saved to the output directory only if `repeatmasker_save_outputs` parameter is set to `true` (default: `false`). + +### RNASeq trimming, filtering and QC + +
+Output files + +- `fastqc_raw/` + - `*.html`: HTML QC report for a sample before trimming + - `*.zip`: Zipped QC files for a sample before trimming +- `fastqc_trim/` + - `*.html`: HTML QC report for a sample after trimming + - `*.zip`: Zipped QC files for a sample after trimming +- `fastp/` + - `html/` + - `*.fastp.html`: HTML trimming report for a sample + - `json/` + - `*.fastp.json`: Trimming statistics for a sample + - `log/` + - `*.fastp.log`: Trimming log for a sample + - `*_{1,2}.fail.fastq.gz`: Reads which failed trimming + - `*.paired.fail.fastq.gz`: Pairs of reads which failed trimming + - `*.merged.fastq.gz`: Reads which passed trimming. For paired reads, reads 1 and 2 are merged into a single file +- `sortmerna/` + - `*.sortmerna.log`: Filtering log for a sample + - `*_{1,2}.non_rRNA.fastq.gz`: Filtered reads + +
+ +RNASeq reads are trimmed with [FASTP](https://github.com/OpenGene/fastp) and are QC'ed with [FASTQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc). Ribosomal reads are filtered out using [SORTMERNA](https://github.com/sortmerna/sortmerna). Trimmed reads are only stored to the output directory if the `save_trimmed` parameter is set to `true` (default: `false`). Reads filtered by [SORTMERNA](https://github.com/sortmerna/sortmerna) are stored to the output directory if the `save_non_ribo_reads` parameter is set to `true` (default: `false`). + +### RNASeq alignment + +
+Output files + +- `star/` + - `alignment/` + - `X.on.Y.Aligned.sortedByCoord.out.bam`: Sorted BAM file of read alignments for sample `X` against reference `Y` + - `X.on.Y.Log.final.out`: STAR final log file for sample `X` against reference `Y` + - `cat_bam/` + - `Y.bam`: A single BAM file for reference `Y` created by concatenating alignments from sample-wise `*.on.Y.Aligned.sortedByCoord.out.bam` files + +
+ +RNASeq alignment is performed with [STAR](https://github.com/alexdobin/STAR). Alignment files are only stored to the output directory if the `star_save_outputs` parameter is set to `true` (default: `false`). Concatenated bam files are stored to the output directory if the `save_cat_bam` parameter is set to `true` (default: `false`). + +### Annotation with BRAKER + +
+Output files + +- `etc/braker/` + - `Y/` + - `braker.gff3`: Gene models predicted by BRAKER in GFF3 format + - `braker.gtf`: Gene models predicted by BRAKER in GTF format + - `braker.codingseq`: Coding sequences for the predicted genes + - `braker.aa`: Protein sequences for the predicted genes + - `braker.log`: BRAKER log file + - `hintsfile.gff`: Evidential hints used by BRAKER in GFF format + - `what-to-cite.txt`: A list of references which must be cited when reporting outputs created by BRAKER + +
+ +[BRAKER](https://github.com/Gaius-Augustus/BRAKER) is used to annotate each genome assembly using the provide protein and RNASeq evidence. Outputs from BRAKER are stored to the output directory if the `braker_save_outputs` parameter is set to `true` (default: `false`). + +> [!CAUTION] +> +> BRAKER outputs are not the final outputs from the pipeline and that's why they are not stored by default. These are only intermediary files. +> +> The pipeline further processes the BRAKER predictions and stores the final validated outputs in the `annotations` directory. The `braker_save_outputs` option is only provided to allow a manual resume of the pipeline for advanced use cases. + +### Annotation with Liftoff + +Gene models are lifted from reference assembly(ies) to the target assembly using [LIFTOFF](https://github.com/agshumate/Liftoff). Currently, the outputs from Liftoff are considered intermediary and an option to store them in the output directory is not available. + +### Annotation filtering and merging + +Annotations obtained from [BRAKER](https://github.com/Gaius-Augustus/BRAKER) and [LIFTOFF](https://github.com/agshumate/Liftoff) are filtered with [TSEBRA](https://github.com/Gaius-Augustus/TSEBRA) and merged with [AGAT](https://github.com/NBISweden/AGAT). Currently, the outputs from these processes are considered intermediary and an option to store them in the output directory is not available. + +### Functional annotation + +
+Output files + +- `annotations/` + - `Y/` + - `Y.emapper.annotations`: TSV with the annotation results + - `Y.emapper.hits`: TSV with the search results + - `Y.emapper.seed_orthologs`: TSV with the results from parsing the hits, linking queries with seed orthologs + +
+ +Functional annotation of the gene models from BRAKER and Liftoff is performed with [EGGNOG-MAPPER](https://github.com/eggnogdb/eggnog-mapper). + +### Orthology inference + +
+Output files + +- `orthofinder/` + - `genepal/*` + +
+ +If more than one genome is included in the pipeline, [ORTHOFINDER](https://github.com/davidemms/OrthoFinder) is used to perform an orthology inference. + +### Final annotation files + +
+Output files + +- `annotations/` + - `Y/` + - `Y.gt.gff3`: Final annotation file for genome `Y` which contains gene models and their functional annotations + - `Y.pep.fasta`: Protein sequences for the gene models + +
+ +The final annotation files are saved in GFF3 format validated with [GENOMETOOLS](https://github.com/genometools/genometools) and FASTA format obtained with [GFFREAD](https://github.com/gpertea/gffread). + +### Annotation QC + +
+Output files + +- `busco/` + - `gff/` + - `short_summary.specific.Y.eudicots_odb10.txt`: BUSCO summary for annotations from genome `Y` against the `eudicots_odb10` database + - `busco_figure`: BUSCO summary figure including statistics for annotations from all the genomes + - `fasta/` + - `short_summary.specific.Y.eudicots_odb10.txt`: BUSCO summary for genome `Y` against the `eudicots_odb10` database + - `busco_figure`: BUSCO summary figure including statistics for all the genomes +- `etc/` + - `splicing_marked/` + - `Y.gff3`: Final annotation file for genome `Y` which contains gene models and their functional annotations. Additionally, the intron features are marked as canonical or non-canonical and the splice motif is also added an attribute. + +
+ +The completeness of the annotations is checked with [BUSCO](https://gitlab.com/ezlab/busco). TO provide a comparative baseline, the completeness of the genomes is also checked. Moreover, the canonical/non-canonical splicing of the introns is also assessed by the pipeline. + +### Pipeline information + +
+Output files + +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. + +
+ +[Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/parameters.md b/docs/parameters.md index 8403d49..0bb7871 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -1,18 +1,19 @@ -# plantandfoodresearch/pangene pipeline parameters +# plant-food-research-open/genepal pipeline parameters -A NextFlow pipeline for pan-genome annotation +A Nextflow pipeline for single genome, multiple genomes and pan-genome annotation ## Input/output options -| Parameter | Description | Type | Default | Required | Hidden | -| ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | --------- | -------- | ------ | -| `input` | Target assemblies listed in a CSV sheet
HelpFASTA and other associated files for target assemblies provided as a CSV sheet
| `string` | | True | | -| `external_protein_fastas` | External protein fastas listed in a text sheet
HelpA text file listing FASTA files to provide protein evidence for annotation
| `string` | | True | | -| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | True | | -| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre | `integer` | | True | | -| `fastq` | FASTQ samples listed in a CSV sheet
HelpFASTQ files for RNASeq samples corresponding to each target assembly provided in a CSV sheet
| `string` | | | | -| `liftoff_annotations` | Reference annotations listed in a CSV sheet
HelpFASTA and GFF3 files for reference annotations for liftoff listed in a CSV sheet
| `string` | | | | -| `outdir` | The output directory where the results will be saved
Help Use absolute paths to storage on Cloud infrastructure
| `string` | ./results | True | | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------- | -------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `input` | Target assemblies listed in a CSV sheet | `string` | | True | | +| `protein_evidence` | Protein evidence provided as a fasta file or multiple fasta files listed in a plain txt file | `string` | | True | | +| `eggnogmapper_db_dir` | Eggnogmapper database directory | `string` | | | | +| `eggnogmapper_tax_scope` | Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1 | `integer` | 1 | | | +| `rna_evidence` | FASTQ/BAM samples listed in a CSV sheet | `string` | | | | +| `liftoff_annotations` | Reference annotations listed in a CSV sheet | `string` | | | | +| `orthofinder_annotations` | Additional annotations for orthology listed in a CSV sheet | `string` | | | | +| `outdir` | The output directory where the results will be saved | `string` | | True | | ## Repeat annotation options @@ -36,7 +37,7 @@ A NextFlow pipeline for pan-genome annotation | `save_non_ribo_reads` | Save FASTQ files after Ribosomal RNA removal or not? | `boolean` | | | | | `ribo_database_manifest` | Ribosomal RNA fastas listed in a text sheet | `string` | ${projectDir}/assets/rrna-db-defaults.txt | | | -## RNAseq alignment options +## RNASeq alignment options | Parameter | Description | Type | Default | Required | Hidden | | ------------------------ | ------------------------------------------------- | --------- | ------- | -------- | ------ | @@ -47,22 +48,76 @@ A NextFlow pipeline for pan-genome annotation ## Annotation options -| Parameter | Description | Type | Default | Required | Hidden | -| --------------------------- | --------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `braker_extra_args` | Extra arguments for BRAKER | `string` | | | | -| `braker_allow_isoforms` | Allow multiple isoforms for gene models | `boolean` | True | | | -| `liftoff_coverage` | Liftoff coverage parameter | `number` | 0.9 | | | -| `liftoff_identity` | Liftoff identity parameter | `number` | 0.9 | | | -| `eggnogmapper_evalue` | Only report alignments below or equal the e-value threshold | `number` | 1e-05 | | | -| `eggnogmapper_pident` | Only report alignments above or equal to the given percentage of identity (0-100) | `integer` | 35 | | | -| `eggnogmapper_purge_nohits` | Purge transcripts which do not have a hit against eggnog | `boolean` | | | | +| Parameter | Description | Type | Default | Required | Hidden | +| --------------------- | --------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `braker_extra_args` | Extra arguments for BRAKER | `string` | | | | +| `liftoff_coverage` | Liftoff coverage parameter | `number` | 0.9 | | | +| `liftoff_identity` | Liftoff identity parameter | `number` | 0.9 | | | +| `eggnogmapper_evalue` | Only report alignments below or equal the e-value threshold | `number` | 1e-05 | | | +| `eggnogmapper_pident` | Only report alignments above or equal to the given percentage of identity (0-100) | `integer` | 35 | | | + +## Post-annotation filtering options + +| Parameter | Description | Type | Default | Required | Hidden | +| ----------------------------- | ----------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `allow_isoforms` | Allow multiple isoforms for gene models | `boolean` | True | | | +| `enforce_full_intron_support` | Require every model to have external evidence for all its introns | `boolean` | True | | | +| `filter_liftoff_by_hints` | Use BRAKER hints to filter Liftoff models | `boolean` | True | | | +| `eggnogmapper_purge_nohits` | Purge transcripts which do not have a hit against eggnog | `boolean` | | | | + +## Annotation output options + +| Parameter | Description | Type | Default | Required | Hidden | +| ----------------------------- | ------------------------------------ | --------- | ------- | -------- | ------ | +| `braker_save_outputs` | Save BRAKER files | `boolean` | | | | +| `add_attrs_to_proteins_fasta` | Add gff attributes to proteins fasta | `boolean` | | | | + +## Evaluation options + +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------------ | --------------------------------------------------------------------------- | --------- | --------------- | -------- | ------ | +| `busco_skip` | Skip evaluation by BUSCO | `boolean` | | | | +| `busco_lineage_datasets` | BUSCO lineages as a space-separated list: 'fungi_odb10 microsporidia_odb10' | `string` | eukaryota_odb10 | | | ## Max job request options Set the top limit for requested resources for any single job. -| Parameter | Description | Type | Default | Required | Hidden | -| ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | -| `max_cpus` | Maximum number of CPUs that can be requested for any single job.
HelpUse to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`
| `integer` | 12 | | True | -| `max_memory` | Maximum amount of memory that can be requested for any single job.
HelpUse to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`
| `string` | 200.GB | | True | -| `max_time` | Maximum amount of time that can be requested for any single job.
HelpUse to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`
| `string` | 7.day | | True | +| Parameter | Description | Type | Default | Required | Hidden | +| ------------ | ---------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `max_cpus` | Maximum number of CPUs that can be requested for any single job. | `integer` | 16 | | | +| `max_memory` | Maximum amount of memory that can be requested for any single job. Example: '8.GB' | `string` | 72.GB | | | +| `max_time` | Maximum amount of time that can be requested for any single job. Example: '1.day' | `string` | 7.day | | | + +## Institutional config options + +Parameters used to describe centralised config profiles. These should not be edited. + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------- | ----------------------------------------- | -------- | -------------------------------------------------------- | -------- | ------ | +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | +| `custom_config_base` | Base directory for Institutional configs. | `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | +| `config_profile_contact` | Institutional config contact information. | `string` | | | True | +| `config_profile_url` | Institutional config URL link. | `string` | | | True | + +## Generic options + +Less common options for the pipeline, typically set in a config file. + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------------- | ----------------------------------------------------------------------- | --------- | -------------------------------------------------------- | -------- | ------ | +| `help` | Display help text. | `boolean` | | | True | +| `version` | Display version and exit. | `boolean` | | | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory. | `string` | copy | | True | +| `email` | Email address for completion summary. | `string` | | | True | +| `email_on_fail` | Email address for completion summary, only when pipeline fails. | `string` | | | True | +| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | +| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | +| `hook_url` | Incoming hook URL for messaging service | `string` | | | True | +| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True | +| `validationShowHiddenParams` | Show all params when using `--help` | `boolean` | | | True | +| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found. | `boolean` | | | True | +| `validationLenientMode` | Validation of parameters in lenient more. | `boolean` | | | True | +| `pipelines_testdata_base_path` | Base path for pipeline test datasets | `string` | https://raw.githubusercontent.com/nf-core/test-datasets/ | | True | diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..107f26b --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,332 @@ +# plant-food-research-open/genepal: Usage + +> [!NOTE] +> +> This document does not describe every pipeline parameter. For an exhaustive list of parameters, see [parameters.md](./parameters.md). + +- [Assemblysheet input](#assemblysheet-input) +- [Protein evidence](#protein-evidence) + - [BRAKER workflow](#braker-workflow) +- [RNASeq evidence](#rnaseq-evidence) + - [BRAKER workflow](#braker-workflow-1) + - [Preprocessing](#preprocessing) + - [Alignment](#alignment) +- [Liftoff annotations](#liftoff-annotations) +- [EggNOG-mapper DB](#eggnog-mapper-db) +- [Orthology inference input](#orthology-inference-input) +- [Iso-forms and full intron support](#iso-forms-and-full-intron-support) +- [Running the pipeline](#running-the-pipeline) + - [Updating the pipeline](#updating-the-pipeline) + - [Reproducibility](#reproducibility) +- [Core Nextflow arguments](#core-nextflow-arguments) + - [`-profile`](#-profile) + - [`-resume`](#-resume) + - [`-c`](#-c) +- [Custom configuration](#custom-configuration) + - [Resource requests](#resource-requests) + - [Custom Containers](#custom-containers) + - [Custom Tool Arguments](#custom-tool-arguments) + - [nf-core/configs](#nf-coreconfigs) +- [Azure Resource Requests](#azure-resource-requests) +- [Running in the background](#running-in-the-background) +- [Nextflow memory requirements](#nextflow-memory-requirements) + +## Assemblysheet input + +> ✅ Mandatory `--input` + +You will need to create an assemblysheet with information about the genome assemblies you would like to annotate before running the pipeline. Use the `input` parameter to specify its location. It has to be a comma-separated file with at least three columns, and a header row. + +- `tag:` A unique tag which represents the target assembly throughout the pipeline. The `tag` and `fasta` file name should not be same, such as `tag.fasta`. This can create file name collisions in the pipeline or result in file overwrite. It is also a good-practice to make all the input files read-only. +- `fasta:` FASTA file for the genome +- `is_masked:` Whether the FASTA is masked or not? Use yes/no to indicate the masking. If the assembly is not masked. The pipeline will soft mask it before annotating it. +- `te_lib [Optional]`: If an assembly is not masked and a TE library is available which cna be used to mask the assembly, the path of the TE library FASTA file can be provided here. If this column is absent and the assembly is not masked, the pipeline will first create a TE library so that it can soft mask the assembly. + +## Protein evidence + +> ✅ Mandatory `--protein_evidence` + +Protein evidence can be provided in two ways. First, a single FASTA file. Second, a list of FASTA files listed in a plain text file. The extension of the text file must be `txt`. + +### BRAKER workflow + +With these two parameters, the pipeline has sufficient inputs to execute the [BRAKER workflow C](https://github.com/Gaius-Augustus/BRAKER/tree/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471?tab=readme-ov-file#overview-of-modes-for-running-braker) (see Figure 4) in which GeneMark-EP+ is trained on protein spliced alignments, then GeneMark-EP+ generates training data for AUGUSTUS which then performs the final gene prediction. + +## RNASeq evidence + +> ❔ Optional `--rna_evidence` + +RNASeq evidence must be provided through a samplesheet in CSV format which has the following columns, + +- `sample:` A sample identifier. The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. +- `file_1:` A FASTQ or BAM file +- `file_2:` A FASTQ file if `file_1` is also a FASTQ file and provides paired samples. +- `target_assemblies:` A semicolon `;` separated list of assembly tags from the [assemblysheet input](#assemblysheet-input). If `file_1` points to a BAM file, only a single assembly can be listed under `target_assemblies` for that sample. FASTQ data from `file_1` and `file_2` is aligned against each target assembly. BAM data from `file_1` is considered already aligned against the target assembly and is directly fed to BRAKER. + +### BRAKER workflow + +If RNASeq evidence is provided, the pipeline executes the [BRAKER workflow D](https://github.com/Gaius-Augustus/BRAKER/tree/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471?tab=readme-ov-file#overview-of-modes-for-running-braker) (see Figure 4) in which GeneMark-ETP is trained with both protein and RNASeq evidence and the training data generated by GeneMark-ETP is used to optimise AUGUSTUS for final gene predictions. + +### Preprocessing + +RNASeq reads provided in FASTQ files are by default trimmed with [FASTP](https://github.com/OpenGene/fastp). No parameters are provided by default. Although, additional parameters can be provided with `--extra_fastp_args` parameter. After trimming, any sample which does not have `10000` reads left is dropped. This threshold can be specified with the `--min_trimmed_reads` parameter. If trimming was already performed ot it is not desirable, it can be skipped by setting the `--skip_fastp` flag to `true`. + +Optionally, [SORTMERNA](https://github.com/sortmerna/sortmerna) can be activated by setting the `--remove_ribo_rna` flag to `true`. A default list of rRNA databases is pre-configured and can be seen in the [assets/rrna-db-defaults.txt](../assets/rrna-db-defaults.txt) file. A path to a custom list of databases can be specified by the `--ribo_database_manifest` parameter. + +### Alignment + +RNASeq evidence provided as FASTQ files is aligned using [STAR](https://github.com/alexdobin/STAR). The default alignment parameters are, + +```bash +--outSAMstrandField intronMotif \ +--outSAMtype BAM SortedByCoordinate \ +--readFilesCommand gunzip -c \ +--alignIntronMax $star_max_intron_length +``` + +where `--star_max_intron_length` is a pipeline parameter and its default value is `16000`. In our experience, the performance of BRAKER predictions is fairly sensitive to this parameter and the parameter value should be based on some estimation of the length of introns in the genes of the target _species_. Additional STAR parameters can be specified with `--star_align_extra_args`. + +> [!WARNING] +> +> If pre-aligned RNASeq data is provided as a BAM file and the alignment was not performed with `--outSAMstrandField intronMotif` parameter, the pipeline might trough an error. + +## Liftoff annotations + +> ❔ Optional `--liftoff_annotations` + +In addition to gene prediction with BRAKER, the pipeline also enables gene model transfer from one or more reference assemblies to all the target assemblies. The reference assemblies and the associated gene models must be specified through a CSV file with the following two columns, + +- `fasta:` Reference assembly genome in a FASTA file +- `gff3:` Reference assembly gene models in a GFF3 file + +[LIFTOFF](https://github.com/agshumate/Liftoff) is used for lifting over the models. The default alignment parameters are, + +```bash +-exclude_partial \ +-copies \ +-polish \ +-a $liftoff_coverage \ +-s $liftoff_identity +``` + +where `--liftoff_coverage` and `--liftoff_identity` are pipeline parameters and their default value is `0.9`. After the liftoff, the pipeline filters out any model which is marked as `valid_ORF=False` by [LIFTOFF](https://github.com/agshumate/Liftoff). Then, the BRAKER and LIFTOFF annotations are merged together. During this merge, LIFTOFF models are given precedence over BRAKER models. A region where the LIFTOFF model overlaps a BRAKER model, the BRAKER model is dropped. + +## EggNOG-mapper DB + +> ❔ Optional `--eggnogmapper_db_dir`, `--eggnogmapper_tax_scope` + +EggNOG-mapper is used to add functional annotations to the gene models. The EggNOG-mapper database must be downloaded manually before running the pipeline. The database is available at . The path to the database folder must be provided with the `--eggnogmapper_db_dir` parameter. The pipeline assumes following directory structure for the database path. + +```bash +/path/to/db +├── eggnog.db +├── eggnog.taxa.db +├── eggnog.taxa.db.traverse.pkl +├── eggnog_proteins.dmnd +├── mmseqs +│ ├── mmseqs.db +│ ├── mmseqs.db.dbtype +│ ├── mmseqs.db.index +│ ├── mmseqs.db.lookup +│ ├── mmseqs.db.source +│ ├── mmseqs.db_h +│ ├── mmseqs.db_h.dbtype +│ └── mmseqs.db_h.index +└── pfam + ├── Pfam-A.clans.tsv.gz + ├── Pfam-A.hmm + ├── Pfam-A.hmm.h3f + ├── Pfam-A.hmm.h3i + ├── Pfam-A.hmm.h3m + ├── Pfam-A.hmm.h3m.ssi + ├── Pfam-A.hmm.h3p + └── Pfam-A.hmm.idmap +``` + +An appropriate taxonomic scope for the mapper can be specified with `--eggnogmapper_tax_scope` parameter, otherwise, the pipeline uses teh default value of `1` for the taxonomic scope. Common taxonomic scopes are Eukaryota: `2759`, Viridiplantae: `33090`, Archaea: `2157`, Bacteria: `2` and root: `1`. For a comprehensive list of available scopes, see . + +## Orthology inference input + +> ❔ Optional `--orthofinder_annotations` + +If there are more than one target assemblies, an orthology inference is performed with [ORTHOFINDER](https://github.com/davidemms/OrthoFinder). Additional annotations can be directly provided for the orthology inference with the `--orthofinder_annotations` parameter. This should be the path to a CSV file with following two columns, + +- `tag:` A unique tag which represents the annotation. The `tag` and `fasta` file name should not be same, such as `tag.fasta`. This can create file name collisions in the pipeline or result in file overwrite. It is also a good-practice to make all the input files read-only. +- `fasta:` FASTA file containing protein sequences. + +## Iso-forms and full intron support + +By default the pipeline allows multiple isoforms from BRAKER. This behavior can be changed by setting the `--allow_isoforms` flag to `false`. Moreover, every intron from every model from BRAKER and LIFTOFF must have support from protein or RNASeq evidence. This is enforced with [TSEBRA](https://github.com/Gaius-Augustus/TSEBRA). This requirement can be removed by setting the `--enforce_full_intron_support` flag to `false`. Or, selectively only applying this criterion to BRAKER models by setting the `--filter_liftoff_by_hints` flag to `false`. + +## Running the pipeline + +The typical command for running the pipeline is as follows: + +```bash +nextflow run plant-food-research-open/genepal \ + -profile \ + --input assemblysheet.csv \ + --protein_evidence proteins.faa \ + --outdir +``` + +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. + +Note that the pipeline will create the following files in your working directory: + +```bash +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. +``` + +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run plant-food-research-open/genepal -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './assemblysheet.csv' +outdir: './results/' +protein_evidence: './proteins.faa' +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + +### Updating the pipeline + +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: + +```bash +nextflow pull plant-food-research-open/genepal +``` + +### Reproducibility + +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. + +First, go to the [plant-food-research-open/genepal releases page](https://github.com/plant-food-research-open/genepal/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. + +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. + +## Core Nextflow arguments + +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +> [!INFO] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). + +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + +## Custom configuration + +### Resource requests + +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. + +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. + +### Custom Containers + +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. + +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. + +### Custom Tool Arguments + +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. + +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. + +### nf-core/configs + +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. + +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). + +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + +## Running in the background + +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. + +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. + +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). + +## Nextflow memory requirements + +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): + +```bash +NXF_OPTS='-Xms1g -Xmx4g' +``` diff --git a/local_pangene b/local_genepal similarity index 64% rename from local_pangene rename to local_genepal index 8a1aa8a..525f930 100755 --- a/local_pangene +++ b/local_genepal @@ -14,8 +14,11 @@ F_BOLD="\033[1m" nextflow run \ main.nf \ - -profile local,docker \ + -profile docker,test \ -resume \ $stub \ - -params-file pangene-test/params.json \ - --eggnogmapper_db_dir ../dbs/emapperdb/5.0.2 + --max_cpus 8 \ + --max_memory '32.GB' \ + --eggnogmapper_tax_scope 33090 \ + --eggnogmapper_db_dir ../dbs/emapperdb/5.0.2 \ + --outdir results diff --git a/main.nf b/main.nf index 067425b..ae1f9e5 100755 --- a/main.nf +++ b/main.nf @@ -1,17 +1,137 @@ #!/usr/bin/env nextflow +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + plant-food-research-open/genepal +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/plant-food-research-open/genepal +---------------------------------------------------------------------------------------- +*/ -nextflow.enable.dsl=2 +nextflow.enable.dsl = 2 -include { validateParameters } from 'plugin/nf-validation' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -validateParameters() +include { GENEPAL } from './workflows/genepal' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_genepal_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_genepal_pipeline' -include { PANGENE } from './workflows/pangene.nf' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// WORKFLOW: Run main analysis pipeline depending on type of input +// +workflow PLANTFOODRESEARCHOPEN_GENEPAL { + + take: + ch_target_assembly + ch_tar_assm_str + ch_is_masked + ch_te_library + ch_braker_annotation + ch_braker_ex_asm_str + ch_benchmark_gff + ch_rna_fq + ch_rna_bam + ch_rna_bam_by_assembly + ch_sortmerna_fastas + ch_ext_prot_fastas + ch_liftoff_fasta + ch_liftoff_gff + ch_tsebra_config + ch_orthofinder_pep + + main: + // + // WORKFLOW: Run pipeline + // + GENEPAL( + ch_target_assembly, + ch_tar_assm_str, + ch_is_masked, + ch_te_library, + ch_braker_annotation, + ch_braker_ex_asm_str, + ch_benchmark_gff, + ch_rna_fq, + ch_rna_bam, + ch_rna_bam_by_assembly, + ch_sortmerna_fastas, + ch_ext_prot_fastas, + ch_liftoff_fasta, + ch_liftoff_gff, + ch_tsebra_config, + ch_orthofinder_pep + ) -workflow { - PFR_PANGENE() } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow { + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) -workflow PFR_PANGENE { - PANGENE() + // + // WORKFLOW: Run main workflow + // + PLANTFOODRESEARCHOPEN_GENEPAL( + PIPELINE_INITIALISATION.out.target_assembly, + PIPELINE_INITIALISATION.out.tar_assm_str, + PIPELINE_INITIALISATION.out.is_masked, + PIPELINE_INITIALISATION.out.te_library, + PIPELINE_INITIALISATION.out.braker_annotation, + PIPELINE_INITIALISATION.out.braker_ex_asm_str, + PIPELINE_INITIALISATION.out.benchmark_gff, + PIPELINE_INITIALISATION.out.rna_fq, + PIPELINE_INITIALISATION.out.rna_bam, + PIPELINE_INITIALISATION.out.rna_bam_by_assembly, + PIPELINE_INITIALISATION.out.sortmerna_fastas, + PIPELINE_INITIALISATION.out.ext_prot_fastas, + PIPELINE_INITIALISATION.out.liftoff_fasta, + PIPELINE_INITIALISATION.out.liftoff_gff, + PIPELINE_INITIALISATION.out.tsebra_config, + PIPELINE_INITIALISATION.out.orthofinder_pep + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url + ) } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json index 27dc611..682ed80 100644 --- a/modules.json +++ b/modules.json @@ -1,79 +1,88 @@ { - "name": "PlantandFoodResearch/pangene", - "homePage": "https://github.com/PlantandFoodResearch/pangene", + "name": "plant-food-research-open/genepal", + "homePage": "https://github.com/plant-food-research-open/genepal", "repos": { - "git@github.com:PlantandFoodResearch/nxf-modules.git": { + "https://github.com/GallVp/nxf-components.git": { "modules": { - "pfr": { - "agat/spfilterfeaturefromkilllist": { + "gallvp": { + "agat/spaddintrons": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", - "installed_by": ["modules"] + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", + "installed_by": ["gxf_fasta_agat_spaddintrons_spextractsequences"] }, - "agat/spmergeannotations": { + "agat/spextractsequences": { + "branch": "main", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", + "installed_by": ["gxf_fasta_agat_spaddintrons_spextractsequences"] + }, + "braker3": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", "installed_by": ["modules"] }, + "busco/busco": { + "branch": "main", + "git_sha": "92d59e5f578a2929b75f7588985b9bf451f4c370", + "installed_by": ["fasta_gxf_busco_plot"] + }, + "busco/generateplot": { + "branch": "main", + "git_sha": "ae9714c21ede9199a3118e3c20b65484aa73e232", + "installed_by": ["fasta_gxf_busco_plot"] + }, "custom/restoregffids": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", "installed_by": ["fasta_edta_lai"] }, - "custom/shortenfastaids": { + "custom/rmouttogff3": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", - "installed_by": ["fasta_edta_lai"] + "git_sha": "cf6e6dee79b29313ed6cda962908056a15f8c531", + "installed_by": ["modules"] }, - "edta/edta": { + "custom/shortenfastaids": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", "installed_by": ["fasta_edta_lai"] }, - "ltrretriever/lai": { + "edta/edta": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", "installed_by": ["fasta_edta_lai"] }, - "repeatmodeler/builddatabase": { + "gffread": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", - "installed_by": ["modules"] + "git_sha": "ae9714c21ede9199a3118e3c20b65484aa73e232", + "installed_by": ["fasta_gxf_busco_plot"] }, - "repeatmodeler/repeatmodeler": { + "ltrretriever/lai": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", - "installed_by": ["modules"] + "git_sha": "ae9714c21ede9199a3118e3c20b65484aa73e232", + "installed_by": ["fasta_edta_lai"] }, - "tsebra": { + "repeatmasker/repeatmasker": { "branch": "main", - "git_sha": "a3b86c357980e5244cb313027c1d980d89c19ef4", + "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", "installed_by": ["modules"] } } }, "subworkflows": { - "pfr": { + "gallvp": { "fasta_edta_lai": { "branch": "main", - "git_sha": "10b046eaac396f279c08e7e2bb067482ccd9c74e", + "git_sha": "7bf6fbca23edc94490ffa6709f52b2f71c6fb130", "installed_by": ["subworkflows"] - } - } - } - }, - "git@github.com:kherronism/nf-modules.git": { - "modules": { - "kherronism": { - "braker3": { - "branch": "dev", - "git_sha": "b01fec253f3b73b24e3f166a96d4beb49e58b0a6", - "installed_by": ["modules"] }, - "repeatmasker": { - "branch": "dev", - "git_sha": "6778d5bb4c9f3d597753c699226fcde8d0811bfb", - "installed_by": ["modules"] + "fasta_gxf_busco_plot": { + "branch": "main", + "git_sha": "7bf6fbca23edc94490ffa6709f52b2f71c6fb130", + "installed_by": ["subworkflows"] + }, + "gxf_fasta_agat_spaddintrons_spextractsequences": { + "branch": "main", + "git_sha": "7bf6fbca23edc94490ffa6709f52b2f71c6fb130", + "installed_by": ["subworkflows"] } } } @@ -83,99 +92,127 @@ "nf-core": { "agat/convertspgff2gtf": { "branch": "master", - "git_sha": "15f1cf0a1a12da63839c336eb1ecd96d03320e94", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "agat/convertspgxf2gxf": { "branch": "master", - "git_sha": "71ccbccbd498af48c33939e1123517340bab3d6f", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "cat/cat": { + "agat/spfilterfeaturefromkilllist": { "branch": "master", - "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "cat/fastq": { + "agat/spmergeannotations": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "cat/cat": { "branch": "master", - "git_sha": "0997b47c93c06b49aa7b3fefda87e728312cf2ca", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "custom/dumpsoftwareversions": { + "cat/fastq": { "branch": "master", - "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "eggnogmapper": { "branch": "master", - "git_sha": "9be0d5f9aeb31bded6780b5b589de7158ccf2c7b", - "installed_by": ["modules"], - "patch": "modules/nf-core/eggnogmapper/eggnogmapper.diff" + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] }, "fastavalidator": { "branch": "master", - "git_sha": "89ff95427f695086369d7927a3c17cea2a37a382", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "95cf5fe0194c7bf5cb0e3027a2eb7e7c89385080", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fastq_fastqc_umitools_fastp"] }, "fastqc": { "branch": "master", - "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fastq_fastqc_umitools_fastp", "modules"] }, "gffcompare": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gffread": { "branch": "master", - "git_sha": "b1b959609bda44341120aed1766329909f54b8d0", - "installed_by": ["modules"], - "patch": "modules/nf-core/gffread/gffread.diff" + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] }, "gt/gff3": { "branch": "master", - "git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "liftoff": { "branch": "master", - "git_sha": "8ce34a40589137b75b65dfe8bb334c9b94f1d6c8", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "orthofinder": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "repeatmodeler/builddatabase": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "repeatmodeler/repeatmodeler": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "samtools/cat": { "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "seqkit/rmdup": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "sortmerna": { "branch": "master", - "git_sha": "df05c8db5195867c0bc7b92c1788115b66f0d17d", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "star/align": { "branch": "master", - "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "tsebra": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", - "git_sha": "d2c5e76f291379f3dd403e48e46ed7e6ba5da744", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["fastq_fastqc_umitools_fastp"] } } @@ -184,7 +221,22 @@ "nf-core": { "fastq_fastqc_umitools_fastp": { "branch": "master", - "git_sha": "cabcc0dadf8366aa7a9930066a7b3dd90d9825d5", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", + "installed_by": ["subworkflows"] + }, + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "d20fb2a9cc3e2835e9d067d1046a63252eb17352", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "2fdce49d30c0254f76bc0f13c55c17455c1251ab", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", "installed_by": ["subworkflows"] } } diff --git a/modules/gallvp/agat/spaddintrons/environment.yml b/modules/gallvp/agat/spaddintrons/environment.yml new file mode 100644 index 0000000..fd42731 --- /dev/null +++ b/modules/gallvp/agat/spaddintrons/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - "bioconda::agat=1.4.0" diff --git a/modules/gallvp/agat/spaddintrons/main.nf b/modules/gallvp/agat/spaddintrons/main.nf new file mode 100644 index 0000000..164b3d3 --- /dev/null +++ b/modules/gallvp/agat/spaddintrons/main.nf @@ -0,0 +1,50 @@ +process AGAT_SPADDINTRONS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gxf) + path config + + output: + tuple val(meta), path("*.gff") , emit: gff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def config_arg = config ? "-c $config" : '' + if( "$gxf" == "${prefix}.gff" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + agat_sp_add_introns.pl \\ + $args \\ + -g $gxf \\ + $config_arg \\ + -o ${prefix}.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_add_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if( "$gxf" == "${prefix}.gff" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_add_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/gallvp/agat/spaddintrons/meta.yml b/modules/gallvp/agat/spaddintrons/meta.yml new file mode 100644 index 0000000..d5d13ac --- /dev/null +++ b/modules/gallvp/agat/spaddintrons/meta.yml @@ -0,0 +1,56 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "agat_spaddintrons" +description: | + The script aims to add intron features to gtf/gff file without intron features. +keywords: + - genomics + - gff + - gtf + - add + - intron + - feature +tools: + - "agat": + description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene + annotations in any GTF/GFF format." + homepage: "https://agat.readthedocs.io/en/latest/" + documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_add_introns.html" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - gxf: + type: file + description: Input GFF3/GTF file + pattern: "*.{gff,gff3,gtf}" + - - config: + type: file + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + pattern: "*.yaml" +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ] + - "*.gff": + type: file + description: Output GFF file. + pattern: "*.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/agat/spaddintrons/tests/main.nf.test b/modules/gallvp/agat/spaddintrons/tests/main.nf.test new file mode 100644 index 0000000..6c4bd03 --- /dev/null +++ b/modules/gallvp/agat/spaddintrons/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process AGAT_SPADDINTRONS" + script "../main.nf" + process "AGAT_SPADDINTRONS" + + tag "modules" + tag "modules_gallvp" + tag "agat" + tag "agat/spaddintrons" + + test("homo_sapiens - genome - gtf") { + + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - genome - gtf - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap b/modules/gallvp/agat/spaddintrons/tests/main.nf.test.snap similarity index 52% rename from modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap rename to modules/gallvp/agat/spaddintrons/tests/main.nf.test.snap index bbc8cea..6947711 100644 --- a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap +++ b/modules/gallvp/agat/spaddintrons/tests/main.nf.test.snap @@ -1,68 +1,72 @@ { - "sarscov2-genome_gff3-stub": { + "homo_sapiens - genome - gtf - stub": { "content": [ { "0": [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - "versions.yml:md5,ace24108f514da465e068372b18d4651" + "versions.yml:md5,480dfe983a8b4469c1dff8b7a08d855d" ], "gff": [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ - "versions.yml:md5,ace24108f514da465e068372b18d4651" + "versions.yml:md5,480dfe983a8b4469c1dff8b7a08d855d" ] } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-02T13:29:56.638311" + "timestamp": "2024-06-24T16:22:13.888389" }, - "sarscov2-genome_gff3": { + "homo_sapiens - genome - gtf": { "content": [ { "0": [ [ { - "id": "test" + "id": "test", + "single_end": false }, - "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + "test.gff:md5,a798830c6ad0a27b1564b98e1a1fc3aa" ] ], "1": [ - "versions.yml:md5,ace24108f514da465e068372b18d4651" + "versions.yml:md5,480dfe983a8b4469c1dff8b7a08d855d" ], "gff": [ [ { - "id": "test" + "id": "test", + "single_end": false }, - "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + "test.gff:md5,a798830c6ad0a27b1564b98e1a1fc3aa" ] ], "versions": [ - "versions.yml:md5,ace24108f514da465e068372b18d4651" + "versions.yml:md5,480dfe983a8b4469c1dff8b7a08d855d" ] } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-04-02T13:11:42.236263" + "timestamp": "2024-06-24T15:21:51.693488" } } \ No newline at end of file diff --git a/modules/gallvp/agat/spextractsequences/environment.yml b/modules/gallvp/agat/spextractsequences/environment.yml new file mode 100644 index 0000000..214b22f --- /dev/null +++ b/modules/gallvp/agat/spextractsequences/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::agat=1.4.0 diff --git a/modules/gallvp/agat/spextractsequences/main.nf b/modules/gallvp/agat/spextractsequences/main.nf new file mode 100644 index 0000000..ccf4b4d --- /dev/null +++ b/modules/gallvp/agat/spextractsequences/main.nf @@ -0,0 +1,52 @@ +process AGAT_SPEXTRACTSEQUENCES { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gxf) + path fasta + path config + + output: + tuple val(meta), path("*.fasta") , emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def config_arg = config ? "-c $config" : '' + if( "$fasta" == "${prefix}.fasta" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + agat_sp_extract_sequences.pl \\ + $args \\ + -g $gxf \\ + -f $fasta \\ + $config_arg \\ + -o ${prefix}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_extract_sequences.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if( "$fasta" == "${prefix}.fasta" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_extract_sequences.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/gallvp/agat/spextractsequences/meta.yml b/modules/gallvp/agat/spextractsequences/meta.yml new file mode 100644 index 0000000..a3c1dc6 --- /dev/null +++ b/modules/gallvp/agat/spextractsequences/meta.yml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "agat_spextractsequences" +description: | + This script extracts sequences in fasta format according to features described + in a gff file. +keywords: + - genomics + - gff + - extract + - fasta + - sequence + - feature +tools: + - "agat": + description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene + annotations in any GTF/GFF format." + homepage: "https://agat.readthedocs.io/en/latest/" + documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_extract_sequences.html" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - gxf: + type: file + description: Input GFF3/GTF file + pattern: "*.{gff,gff3,gtf}" + - - fasta: + type: file + description: Input FASTA file + pattern: "*.{fa,fsa,faa,fasta}" + - - config: + type: file + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + pattern: "*.yaml" +output: + - fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ] + - "*.fasta": + type: file + description: Output FASTA file. + pattern: "*.fasta" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/agat/spextractsequences/tests/main.nf.test b/modules/gallvp/agat/spextractsequences/tests/main.nf.test new file mode 100644 index 0000000..72b5f99 --- /dev/null +++ b/modules/gallvp/agat/spextractsequences/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process AGAT_SPEXTRACTSEQUENCES" + script "../main.nf" + process "AGAT_SPEXTRACTSEQUENCES" + + tag "modules" + tag "modules_gallvp" + tag "agat" + tag "agat/spextractsequences" + + test("sarscov2 - gff - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - gff - fasta - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/agat/spextractsequences/tests/main.nf.test.snap b/modules/gallvp/agat/spextractsequences/tests/main.nf.test.snap new file mode 100644 index 0000000..4ee76fe --- /dev/null +++ b/modules/gallvp/agat/spextractsequences/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - gff - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,e4a8c6c1d2b33c92240e9d2ba07aecc2" + ] + ], + "1": [ + "versions.yml:md5,b31a3d3961871a1e453919bc3b45bf2f" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,e4a8c6c1d2b33c92240e9d2ba07aecc2" + ] + ], + "versions": [ + "versions.yml:md5,b31a3d3961871a1e453919bc3b45bf2f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T16:20:58.856035" + }, + "sarscov2 - gff - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b31a3d3961871a1e453919bc3b45bf2f" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b31a3d3961871a1e453919bc3b45bf2f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-21T16:21:04.022292" + } +} \ No newline at end of file diff --git a/modules/gallvp/braker3/main.nf b/modules/gallvp/braker3/main.nf new file mode 100644 index 0000000..3cba6f8 --- /dev/null +++ b/modules/gallvp/braker3/main.nf @@ -0,0 +1,96 @@ +process BRAKER3 { + tag "${meta.id}" + label 'process_high' + + container "docker.io/teambraker/braker3:v3.0.7.5" + + input: + tuple val(meta), path(fasta) + path bam + path rnaseq_sets_dirs + path rnaseq_sets_ids + path proteins + path hintsfile + + output: + tuple val(meta), path("$prefix/braker.gtf") , emit: gtf + tuple val(meta), path("$prefix/braker.codingseq") , emit: cds + tuple val(meta), path("$prefix/braker.aa") , emit: aa + tuple val(meta), path("$prefix/braker.log") , emit: log + tuple val(meta), path("$prefix/hintsfile.gff") , emit: hintsfile , optional: true + tuple val(meta), path("$prefix/braker.gff3") , emit: gff3 , optional: true + tuple val(meta), path("$prefix/what-to-cite.txt") , emit: citations + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def rna_ids = rnaseq_sets_ids ? "--rnaseq_sets_ids=$rnaseq_sets_ids" : '' + def rna_dirs = rnaseq_sets_dirs ? "--rnaseq_sets_dirs=$rnaseq_sets_dirs" : '' + def bam = bam ? "--bam=$bam" : '' + def proteins = proteins ? "--prot_seq=$proteins" : '' + def hints = hintsfile ? "--hints=$hintsfile" : '' + def new_species = args.contains('--species')? '' : '--species new_species' + """ + cp -r \$AUGUSTUS_CONFIG_PATH \\ + augustus_config + + chmod -R a+w \\ + augustus_config + + perl -p -e 's/^(>\\S+).*\$/\$1/' \\ + $fasta \\ + > ${prefix}.name.only.genome.masked.fasta + + braker.pl \\ + --genome ${prefix}.name.only.genome.masked.fasta \\ + $new_species \\ + --workingdir $prefix \\ + --AUGUSTUS_CONFIG_PATH "\$(pwd)/augustus_config" \\ + --threads $task.cpus \\ + $rna_ids \\ + $rna_dirs \\ + $bam \\ + $proteins \\ + $hints \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version //') + augustus: \$(augustus --version |& sed -n 's/AUGUSTUS (\\(.*\\)) is a gene .*/\\1/p') + genemark-etp: \$(echo "\$(gmetp.pl || echo '')" | sed -n 's/ETP version \\(.*\\)/\\1/p') + prothint: \$(prothint.py --version | sed 's/prothint.py //1') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def rna_ids = rnaseq_sets_ids ? "--rnaseq_sets_ids=$rnaseq_sets_ids" : '' + def hints = hintsfile ? "--hints=$hintsfile" : '' + def touch_hints = (rna_ids || bam || proteins || hints) ? "touch $prefix/hintsfile.gff" : '' + def touch_gff = args.contains('--gff3') ? "touch $prefix/braker.gff3" : '' + """ + mkdir "$prefix" + + touch "$prefix/braker.gtf" + touch "$prefix/braker.codingseq" + touch "$prefix/braker.aa" + $touch_hints + touch "$prefix/braker.log" + touch "$prefix/what-to-cite.txt" + $touch_gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version //') + augustus: \$(augustus --version |& sed -n 's/AUGUSTUS (\\(.*\\)) is a gene .*/\\1/p') + genemark-etp: \$(echo "\$(gmetp.pl || echo '')" | sed -n 's/ETP version \\(.*\\)/\\1/p') + prothint: \$(prothint.py --version | sed 's/prothint.py //1') + END_VERSIONS + """ +} diff --git a/modules/gallvp/braker3/meta.yml b/modules/gallvp/braker3/meta.yml new file mode 100644 index 0000000..7e79adb --- /dev/null +++ b/modules/gallvp/braker3/meta.yml @@ -0,0 +1,122 @@ +name: braker3 +description: | + Gene prediction in novel genomes using RNA-seq and protein homology information +keywords: + - genome + - annotation + - braker + - gff + - gtf +tools: + - braker3: + description: "BRAKER3 is a pipeline for fully automated prediction of protein + coding gene structures using protein and RNA-seq and protein homology information" + homepage: https://github.com/Gaius-Augustus/BRAKER + documentation: https://github.com/Gaius-Augustus/BRAKER + tool_dev_url: https://github.com/Gaius-Augustus/BRAKER + doi: "10.13140/RG.2.2.20047.36004" + licence: ["Artistic-1.0"] + identifier: biotools:braker3 + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: Genome assembly fasta + pattern: "*.{fasta,fa,fas,faa,fna}" + - - bam: + type: file + description: BAM file of RNA-seq data to be passed to --bam + pattern: "*.bam" + - - rnaseq_sets_dirs: + type: file + description: Directories of RNA-seq data sets to be passed to --rnaseq_sets_dirs + - - rnaseq_sets_ids: + type: file + description: IDs of RNA-seq data sets to be passed to --rnaseq_sets_ids + - - proteins: + type: file + description: Protein evidence to be passed to --proteins + pattern: "*.{fasta,fa,fas,faa}" + - - hintsfile: + type: file + description: Hintsfile to be passed to --hintsfile + pattern: "*.{gff, gtf, gff3}" +output: + - gtf: + - meta: + type: file + description: Gene transfer format file as output by BRAKER3 + pattern: "*.{gtf}" + - $prefix/braker.gtf: + type: file + description: Gene transfer format file as output by BRAKER3 + pattern: "*.{gtf}" + - cds: + - meta: + type: file + description: Coding sequence file as output by BRAKER3 + pattern: "*.{codingseq}" + - $prefix/braker.codingseq: + type: file + description: Coding sequence file as output by BRAKER3 + pattern: "*.{codingseq}" + - aa: + - meta: + type: file + description: Protein sequence file as output by BRAKER3 + pattern: "*.{aa}" + - $prefix/braker.aa: + type: file + description: Protein sequence file as output by BRAKER3 + pattern: "*.{aa}" + - log: + - meta: + type: file + description: BRAKER3 log file + pattern: "*.log" + - $prefix/braker.log: + type: file + description: BRAKER3 log file + pattern: "*.log" + - hintsfile: + - meta: + type: file + description: Hints file as output by BRAKER3 + pattern: "*hintsfile.{gff}" + - $prefix/hintsfile.gff: + type: file + description: Hints file as output by BRAKER3 + pattern: "*hintsfile.{gff}" + - gff3: + - meta: + type: file + description: GFF3 file as output by BRAKER3 + pattern: "*.{gff3}" + - $prefix/braker.gff3: + type: file + description: GFF3 file as output by BRAKER3 + pattern: "*.{gff3}" + - citations: + - meta: + type: file + description: BRAKER3 citations + pattern: "what-to-cite.txt" + - $prefix/what-to-cite.txt: + type: file + description: BRAKER3 citations + pattern: "what-to-cite.txt" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kherronism" + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/braker3/tests/main.nf.test b/modules/gallvp/braker3/tests/main.nf.test new file mode 100644 index 0000000..6302ea2 --- /dev/null +++ b/modules/gallvp/braker3/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_process { + + name "Test Process BRAKER3" + script "../main.nf" + config "./nextflow.config" + process "BRAKER3" + + tag "modules" + tag "modules_gallvp" + tag "braker3" + + test("braker3 - test - 4") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file('https://raw.githubusercontent.com/Gaius-Augustus/BRAKER/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471/example/genome.fa', checkIfExists: true) + ] + input[1] = file('http://topaz.gatech.edu/GeneMark/Braker/RNAseq.bam', checkIfExists: true) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.aa, + process.out.cds, + process.out.citations, + process.out.gff3, + process.out.gtf, + process.out.hintsfile, + process.out.versions, + ).match() }, + { assert file(process.out.log[0][1]).text.contains('BRAKER RUN FINISHED') } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/braker3/tests/main.nf.test.snap b/modules/gallvp/braker3/tests/main.nf.test.snap new file mode 100644 index 0000000..e9dc283 --- /dev/null +++ b/modules/gallvp/braker3/tests/main.nf.test.snap @@ -0,0 +1,181 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.codingseq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.aa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "what-to-cite.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,465227c41b8b4aae2dea6ec249676935" + ], + "aa": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.aa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cds": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.codingseq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "citations": [ + [ + { + "id": "test", + "single_end": false + }, + "what-to-cite.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gff3": [ + + ], + "gtf": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "hintsfile": [ + + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "braker.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,465227c41b8b4aae2dea6ec249676935" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-25T16:01:15.128244" + }, + "braker3 - test - 4": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "braker.aa:md5,56ec6600d2843fe4a7fc9913e6b1448a" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "braker.codingseq:md5,47f7f312e4f5b3854b960a373717a0f7" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "what-to-cite.txt:md5,2ab67eaf09994751a09f5de8de7160a7" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "braker.gtf:md5,22a968505188b7aa57c37fa85f570295" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "hintsfile.gff:md5,d42b89e12af7ba66ae25fd977146640a" + ] + ], + [ + "versions.yml:md5,465227c41b8b4aae2dea6ec249676935" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-25T16:03:42.343716" + } +} \ No newline at end of file diff --git a/modules/gallvp/braker3/tests/nextflow.config b/modules/gallvp/braker3/tests/nextflow.config new file mode 100644 index 0000000..bf8441b --- /dev/null +++ b/modules/gallvp/braker3/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BRAKER3 { + ext.args = '--species=arabidopsis --skipAllTraining' // Not required, but significantly cuts the runtime + } +} diff --git a/modules/gallvp/busco/busco/environment.yml b/modules/gallvp/busco/busco/environment.yml new file mode 100644 index 0000000..5b918b4 --- /dev/null +++ b/modules/gallvp/busco/busco/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::busco=5.7.1 diff --git a/modules/gallvp/busco/busco/main.nf b/modules/gallvp/busco/busco/main.nf new file mode 100644 index 0000000..98cf5b0 --- /dev/null +++ b/modules/gallvp/busco/busco/main.nf @@ -0,0 +1,107 @@ +process BUSCO_BUSCO { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0': + 'biocontainers/busco:5.7.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta, stageAs:'tmp_input/*') + val mode // Required: One of genome, proteins, or transcriptome + val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead + path busco_lineages_path // Recommended: p_ath to busco lineages - downloads if not set + path config_file // Optional: busco configuration file + + output: + tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true + tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir + tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true + tuple val(meta), path("*-busco") , emit: busco_dir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + if ( mode !in [ 'genome', 'proteins', 'transcriptome' ] ) { + error "Mode must be one of 'genome', 'proteins', or 'transcriptome'." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def busco_config = config_file ? "--config $config_file" : '' + def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}" + def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : '' + """ + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) + # Check for container variable initialisation script and source it. + if [ -f "/usr/local/env-activate.sh" ]; then + set +u # Otherwise, errors out because of various unbound variables + . "/usr/local/env-activate.sh" + set -u + fi + + # If the augustus config directory is not writable, then copy to writeable area + if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then + # Create writable tmp directory for augustus + AUG_CONF_DIR=\$( mktemp -d -p \$PWD ) + cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR + export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR + echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}" + fi + + # Ensure the input is uncompressed + INPUT_SEQS=input_seqs + mkdir "\$INPUT_SEQS" + cd "\$INPUT_SEQS" + for FASTA in ../tmp_input/*; do + if [ "\${FASTA##*.}" == 'gz' ]; then + gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz ) + else + ln -s "\$FASTA" . + fi + done + cd .. + + busco \\ + --cpu $task.cpus \\ + --in "\$INPUT_SEQS" \\ + --out ${prefix}-busco \\ + --mode $mode \\ + $busco_lineage \\ + $busco_lineage_dir \\ + $busco_config \\ + $args + + # clean up + rm -rf "\$INPUT_SEQS" + + # Move files to avoid staging/publishing issues + mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt + mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found." + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def fasta_name = files(fasta).first().name - '.gz' + """ + touch ${prefix}-busco.batch_summary.txt + mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ +} diff --git a/modules/gallvp/busco/busco/meta.yml b/modules/gallvp/busco/busco/meta.yml new file mode 100644 index 0000000..7cb6d69 --- /dev/null +++ b/modules/gallvp/busco/busco/meta.yml @@ -0,0 +1,152 @@ +name: busco_busco +description: Benchmarking Universal Single Copy Orthologs +keywords: + - quality control + - genome + - transcriptome + - proteome +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, + gene set, and transcriptome completeness based on evolutionarily informed expectations + of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ["MIT"] + identifier: biotools:busco +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleic or amino acid sequence file in FASTA format. + pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" + - - mode: + type: string + description: The mode to run Busco in. One of genome, proteins, or transcriptome + pattern: "{genome,proteins,transcriptome}" + - - lineage: + type: string + description: The BUSCO lineage to use, or "auto" to automatically select lineage + - - busco_lineages_path: + type: directory + description: Path to local BUSCO lineages directory. + - - config_file: + type: file + description: Path to BUSCO config file. +output: + - batch_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco.batch_summary.txt": + type: file + description: Summary of all sequence files analyzed + pattern: "*-busco.batch_summary.txt" + - short_summaries_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.txt: + type: file + description: Short Busco summary in plain text format + pattern: "short_summary.*.txt" + - short_summaries_json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.json: + type: file + description: Short Busco summary in JSON format + pattern: "short_summary.*.json" + - full_table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/full_table.tsv": + type: file + description: Full BUSCO results table + pattern: "full_table.tsv" + - missing_busco_list: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/missing_busco_list.tsv": + type: file + description: List of missing BUSCOs + pattern: "missing_busco_list.tsv" + - single_copy_proteins: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/single_copy_proteins.faa": + type: file + description: Fasta file of single copy proteins (transcriptome mode) + pattern: "single_copy_proteins.faa" + - seq_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/busco_sequences": + type: directory + description: BUSCO sequence directory + pattern: "busco_sequences" + - translated_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/translated_proteins": + type: directory + description: Six frame translations of each transcript made by the transcriptome + mode + pattern: "translated_dir" + - busco_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco": + type: directory + description: BUSCO lineage specific output + pattern: "*-busco" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" +maintainers: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" diff --git a/modules/gallvp/busco/busco/tests/main.nf.test b/modules/gallvp/busco/busco/tests/main.nf.test new file mode 100644 index 0000000..e0eb735 --- /dev/null +++ b/modules/gallvp/busco/busco/tests/main.nf.test @@ -0,0 +1,415 @@ +nextflow_process { + + name "Test Process BUSCO_BUSCO" + script "../main.nf" + process "BUSCO_BUSCO" + + tag "modules" + tag "modules_gallvp" + tag "busco" + tag "busco/busco" + + test("test_busco_genome_single_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues + input[3] = [] // Download busco lineage + input[4] = [] // No config + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + } + + test("test_busco_genome_multi_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true) + ] + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1][0]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_txt[0][1][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1][0]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + with(path(process.out.short_summaries_json[0][1][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + file(process.out.batch_summary[0][1]).name, + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(file(process.out.seq_dir[0][1][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_metaeuk") { + + config './nextflow.metaeuk.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_augustus") { + + config './nextflow.augustus.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + assert snapshot( + process.out.batch_summary[0][1], + process.out.versions[0] + ).match() + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Augustus did not recognize any genes') + + } + + assert process.out.short_summaries_json == [] + assert process.out.short_summaries_txt == [] + assert process.out.missing_busco_list == [] + assert process.out.full_table == [] + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_protein") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + ] + input[1] = 'proteins' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_transcriptome") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + input[1] = 'transcriptome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.translated_dir[0][1], + process.out.single_copy_proteins[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + } + + } + + test("minimal-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/gallvp/busco/busco/tests/main.nf.test.snap b/modules/gallvp/busco/busco/tests/main.nf.test.snap new file mode 100644 index 0000000..825ddb9 --- /dev/null +++ b/modules/gallvp/busco/busco/tests/main.nf.test.snap @@ -0,0 +1,230 @@ +{ + "minimal-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "9": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "batch_summary": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "busco_dir": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "full_table": [ + + ], + "missing_busco_list": [ + + ], + "seq_dir": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "short_summaries_json": [ + + ], + "short_summaries_txt": [ + + ], + "single_copy_proteins": [ + + ], + "translated_dir": [ + + ], + "versions": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:28:04.451297" + }, + "test_busco_eukaryote_augustus": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:26:36.974986" + }, + "test_busco_genome_single_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:22:45.07816" + }, + "test_busco_genome_multi_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt", + [ + "full_table.tsv:md5,c657edcc7d0de0175869717551df6e83", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1" + ], + [ + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a" + ], + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:23:50.255602" + }, + "test_busco_eukaryote_metaeuk": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6", + "full_table.tsv:md5,92b1b1d5cb5ea0e2093d16f00187e8c7", + "missing_busco_list.tsv:md5,0352e563de290bf804c708323c35a9e3", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:25:38.159041" + }, + "test_busco_transcriptome": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64", + "full_table.tsv:md5,1b2ce808fdafa744c56b5f781551272d", + "missing_busco_list.tsv:md5,a6931b6470262b997b8b99ea0f1d14a4", + [ + "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0", + "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195", + "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc", + "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a", + "143460at2.faa:md5,d887431fd988a5556a523440f02d9594", + "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5", + "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67", + "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248", + "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57", + "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5", + "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0", + "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08", + "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5", + "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829", + "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc", + "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999", + "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845", + "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793", + "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d", + "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69", + "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb", + "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c" + ], + "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:53.992893" + }, + "test_busco_protein": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,f5a782378f9f94a748aa907381fdef91", + "full_table.tsv:md5,812ab6a0496fccab774643cf40c4f2a8", + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:12.724862" + } +} diff --git a/modules/gallvp/busco/busco/tests/nextflow.augustus.config b/modules/gallvp/busco/busco/tests/nextflow.augustus.config new file mode 100644 index 0000000..84daa69 --- /dev/null +++ b/modules/gallvp/busco/busco/tests/nextflow.augustus.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --augustus' + } +} diff --git a/modules/gallvp/busco/busco/tests/nextflow.config b/modules/gallvp/busco/busco/tests/nextflow.config new file mode 100644 index 0000000..1ec3fec --- /dev/null +++ b/modules/gallvp/busco/busco/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar' + } +} diff --git a/modules/gallvp/busco/busco/tests/nextflow.metaeuk.config b/modules/gallvp/busco/busco/tests/nextflow.metaeuk.config new file mode 100644 index 0000000..c141844 --- /dev/null +++ b/modules/gallvp/busco/busco/tests/nextflow.metaeuk.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --metaeuk' + } +} diff --git a/modules/gallvp/busco/busco/tests/old_test.yml b/modules/gallvp/busco/busco/tests/old_test.yml new file mode 100644 index 0000000..75177f5 --- /dev/null +++ b/modules/gallvp/busco/busco/tests/old_test.yml @@ -0,0 +1,624 @@ +- name: busco test_busco_genome_single_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "lineage_dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_genome_multi_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: 8f047bdb33264d22a83920bc2c63f29a + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa + md5sum: e56fd59c38248dc21ac94355dca98121 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna + md5sum: b365f84bf99c68357952e0b98ed7ce42 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log + md5sum: e5f14d7925ba14a0f9850542f3739894 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log + md5sum: d41971bfc1b621d4ffd2633bc47017ea + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: c9651b88b10871abc260ee655898e828 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_metaeuk + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_augustus + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_protein + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: 0e34f1011cd83ea1d5d5103ec62b8922 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/versions.yml + +- name: busco test_busco_transcriptome + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 46118ecf60d1b87d22b96d80f4f03632 + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint + contains: + - "Tool: makeblastdb" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb + md5sum: 3788c017fe5e6f0f58224e9cdd21822b + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr + md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not + md5sum: 0c340e376c7e85d19f82ec1a833e6a6e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq + md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf + md5sum: de1250813f0c7affc6d12dac9d0fb6bb + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto + md5sum: ff74bd41f9cc9b011c63a32c4f7693bf + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log + contains: + - "Building a new DB" + - "Adding sequences from FASTA" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint + contains: + - "Tool: tblastn" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv + md5sum: cc30eed321944af293452bdbcfc24292 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp + md5sum: 73e9c65fc83fedc58f57f09b08f08238 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp + md5sum: 7fa4cc7955ec0cc36330a221c579b975 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp + md5sum: 6f1601c875d019e3f6f1f98ed8e988d4 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp + md5sum: 3f8e034686cd240c2330650d791bcae2 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp + md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp + md5sum: 7d463e0e6cf7169bc9077d8dc776dda1 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp + md5sum: 2288edf7fa4f88f51b4cf4d94086f77e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp + md5sum: 029906abbad6d87fc57830dd548cac24 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp + md5sum: 4937f3b348774a31b1160a00297c29cc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp + md5sum: afcb20ba4c466479d6b91c8c62251e1f + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp + md5sum: 2e1e823ce017345bd998191a39fa9924 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp + md5sum: 08c2d82c34ecffbe1c638b410349412e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp + md5sum: cd9b63cf93524284781535c888313764 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp + md5sum: d1929b742b24ebe379bf4801ca882dca + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp + md5sum: 69215765b010c05336538cb322c900b3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp + md5sum: 6feaa1cc3b0899a147ea9d466878f3e3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp + md5sum: 13625eae14e860a96ce17cd4e37e9d01 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp + md5sum: e14b2484649b0dbc8926815c207b806d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp + md5sum: 6902c93691df00e690faea914c71839e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp + md5sum: 0a0d9d38a83acbd5ad43c29cdf429988 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv + contains: + - "TBLASTN" + - "BLAST processed" + - "queries" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv + md5sum: 24df25199e13c88bd892fc3e7b541ca0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv + md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa + md5sum: e04b9465733577ae6e4bccb7aa01e720 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa + md5sum: 7333c39a20258f20c7019ea0cd83157c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa + md5sum: ebb481e77a824685fbe04d8a2f3a0d7d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa + md5sum: 34621c7d499034e8f8e6b92fd4020a93 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa + md5sum: aa89ca381c1c70c9c4e1380351ca7c2a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa + md5sum: f2e91d78b8dd3722840378789f29e8c8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa + md5sum: 73c25aef5c9cba7f4151804941b146ea + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa + md5sum: cda556018d1f84ebe517e89f6fc107d0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa + md5sum: a9096c9fb8b25c78a72871ab0463acdc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa + md5sum: e463d25ce186c0cebfd749474f3a4c64 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa + md5sum: f2cfd241590c6d8377286d6135480937 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa + md5sum: 586569546fb9861502468e3d9ba2775c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa + md5sum: 24c658bee14ad84b062d81ad96642eb8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa + md5sum: 0b8e26ddf5149bbd8805be7af125208d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa + md5sum: 159320712ee01fb2ccb31a25df44eead + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa + md5sum: 812629c0b06ac3d18661c2ca78de0c08 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa + md5sum: f7ff4e1591342d30b77392a2e84b57d9 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa + md5sum: 7b34a24fc49c540d46fcf96ff5129564 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa + md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa + md5sum: 613af7a3fea30ea2bece66f603b9284a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa + md5sum: a7cd1b13c9ef91c7ef4e31614166f197 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa + md5sum: fe313ffd5efdb0fed887a04fba352552 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa + md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0 + - path: output/busco/versions.yml diff --git a/modules/gallvp/busco/busco/tests/tags.yml b/modules/gallvp/busco/busco/tests/tags.yml new file mode 100644 index 0000000..7c4d283 --- /dev/null +++ b/modules/gallvp/busco/busco/tests/tags.yml @@ -0,0 +1,2 @@ +busco/busco: + - "modules/nf-core/busco/busco/**" diff --git a/modules/pfr/edta/edta/environment.yml b/modules/gallvp/busco/generateplot/environment.yml similarity index 75% rename from modules/pfr/edta/edta/environment.yml rename to modules/gallvp/busco/generateplot/environment.yml index 63160e8..766c0f4 100644 --- a/modules/pfr/edta/edta/environment.yml +++ b/modules/gallvp/busco/generateplot/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "edta_edta" channels: - conda-forge - bioconda - - defaults dependencies: - - "bioconda::edta=2.1.0" + - bioconda::busco=5.7.1 diff --git a/modules/gallvp/busco/generateplot/main.nf b/modules/gallvp/busco/generateplot/main.nf new file mode 100644 index 0000000..6a4b339 --- /dev/null +++ b/modules/gallvp/busco/generateplot/main.nf @@ -0,0 +1,45 @@ +process BUSCO_GENERATEPLOT { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0': + 'biocontainers/busco:5.7.1--pyhdfd78af_0' }" + + input: + path short_summary_txt, stageAs: 'busco/*' + + output: + path '*.png' , emit: png + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: 'busco_figure' + """ + generate_plot.py \\ + $args \\ + -wd busco + + mv ./busco/busco_figure.png ${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: 'busco_figure' + """ + touch ${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ +} diff --git a/modules/gallvp/busco/generateplot/meta.yml b/modules/gallvp/busco/generateplot/meta.yml new file mode 100644 index 0000000..72ad2c9 --- /dev/null +++ b/modules/gallvp/busco/generateplot/meta.yml @@ -0,0 +1,40 @@ +name: "busco_generateplot" +description: BUSCO plot generation tool +keywords: + - genome + - fasta + - annotation + - busco + - transcriptome + - quality control +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, + gene set, and transcriptome completeness based on evolutionarily informed expectations + of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ["MIT"] + identifier: biotools:busco +input: + - - short_summary_txt: + type: file + description: One or more short summary txt files from BUSCO + pattern: "short_summary.*.txt" +output: + - png: + - "*.png": + type: file + description: A summary plot in png format + pattern: "*.png" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/busco/generateplot/tests/main.nf.test b/modules/gallvp/busco/generateplot/tests/main.nf.test new file mode 100644 index 0000000..6a43fa7 --- /dev/null +++ b/modules/gallvp/busco/generateplot/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process BUSCO_GENERATEPLOT" + script "../main.nf" + process "BUSCO_GENERATEPLOT" + + tag "modules" + tag "modules_gallvp" + tag "busco" + tag "busco/busco" + tag "busco/generateplot" + + test("bacteroides_fragilis-genome_fna_gz") { + + setup { + run("BUSCO_BUSCO") { + script "../../busco" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + } + + when { + process { + """ + input[0] = BUSCO_BUSCO.out.short_summaries_txt.map { meta, summary -> summary } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.png != null } // PNGs with same data but different meta-data. Not sure how to get around this, yet! + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/gallvp/busco/generateplot/tests/main.nf.test.snap b/modules/gallvp/busco/generateplot/tests/main.nf.test.snap new file mode 100644 index 0000000..d9773ec --- /dev/null +++ b/modules/gallvp/busco/generateplot/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,726fa3440ea3a0b2e9d032d7e4d25e74" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T15:40:01.523993" + }, + "stub": { + "content": [ + { + "0": [ + "busco_figure.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,726fa3440ea3a0b2e9d032d7e4d25e74" + ], + "png": [ + "busco_figure.png:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,726fa3440ea3a0b2e9d032d7e4d25e74" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T15:40:11.864276" + } +} \ No newline at end of file diff --git a/modules/gallvp/busco/generateplot/tests/tags.yml b/modules/gallvp/busco/generateplot/tests/tags.yml new file mode 100644 index 0000000..b6548a6 --- /dev/null +++ b/modules/gallvp/busco/generateplot/tests/tags.yml @@ -0,0 +1,2 @@ +busco/generateplot: + - "modules/nf-core/busco/generateplot/**" diff --git a/modules/gallvp/custom/restoregffids/environment.yml b/modules/gallvp/custom/restoregffids/environment.yml new file mode 100644 index 0000000..68eaa88 --- /dev/null +++ b/modules/gallvp/custom/restoregffids/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - "python=3.10.2" diff --git a/modules/pfr/custom/restoregffids/main.nf b/modules/gallvp/custom/restoregffids/main.nf similarity index 100% rename from modules/pfr/custom/restoregffids/main.nf rename to modules/gallvp/custom/restoregffids/main.nf diff --git a/modules/pfr/custom/restoregffids/meta.yml b/modules/gallvp/custom/restoregffids/meta.yml similarity index 51% rename from modules/pfr/custom/restoregffids/meta.yml rename to modules/gallvp/custom/restoregffids/meta.yml index 4e42b82..dea7577 100644 --- a/modules/pfr/custom/restoregffids/meta.yml +++ b/modules/gallvp/custom/restoregffids/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "custom_restoregffids" description: | @@ -22,36 +21,39 @@ tools: documentation: "https://docs.python.org/3/" tool_dev_url: "https://github.com/python/cpython" licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - gff3: - type: file - description: Input gff3 file - pattern: "*.{gff,gff3}" - - ids_tsv: - type: file - description: | - A TSV file with original (first column) and new ids (second column) - if id change was required - pattern: "*.tsv" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Input gff3 file + pattern: "*.{gff,gff3}" + - - ids_tsv: + type: file + description: | + A TSV file with original (first column) and new ids (second column) + if id change was required + pattern: "*.tsv" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - restored_ids_gff3: - type: file - description: GFF3 file with restored ids - pattern: "*.restored.ids.gff3" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.restored.ids.gff3": + type: file + description: GFF3 file with restored ids + pattern: "*.restored.ids.gff3" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@GallVp" maintainers: diff --git a/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py b/modules/gallvp/custom/restoregffids/templates/restore_gff_ids.py similarity index 90% rename from modules/pfr/custom/restoregffids/templates/restore_gff_ids.py rename to modules/gallvp/custom/restoregffids/templates/restore_gff_ids.py index d0699de..2bde7ab 100755 --- a/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py +++ b/modules/gallvp/custom/restoregffids/templates/restore_gff_ids.py @@ -10,7 +10,7 @@ def create_name_mapping_from_tsv(file_path): dictionary = {} - with open(file_path, "r") as tsv_file: + with open(file_path) as tsv_file: for line in tsv_file: columns = line.strip().split("\\t") if len(columns) != 2: @@ -24,11 +24,11 @@ def create_name_mapping_from_tsv(file_path): def restore_gff3_ids(new_to_orig_ids, file_path, output_file_name): # Write versions - with open(f"versions.yml", "w") as f_versions: + with open("versions.yml", "w") as f_versions: f_versions.write('"${task.process}":\\n') f_versions.write(f" python: {python_version()}\\n") - with open(file_path, "r") as input_gff3_file: + with open(file_path) as input_gff3_file: input_lines = input_gff3_file.readlines() with open(output_file_name, "w") as output_gff_file: diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test b/modules/gallvp/custom/restoregffids/tests/main.nf.test similarity index 85% rename from modules/pfr/custom/restoregffids/tests/main.nf.test rename to modules/gallvp/custom/restoregffids/tests/main.nf.test index cc374b7..2c248c9 100644 --- a/modules/pfr/custom/restoregffids/tests/main.nf.test +++ b/modules/gallvp/custom/restoregffids/tests/main.nf.test @@ -5,7 +5,7 @@ nextflow_process { process "CUSTOM_RESTOREGFFIDS" tag "modules" - tag "modules_nfcore" + tag "modules_gallvp" tag "custom" tag "custom/restoregffids" @@ -15,7 +15,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true) """ @@ -43,7 +43,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] input[1] = Channel.of('Chr1\tMT192765.1').collectFile(name: 'id_map.tsv', newLine: true) """ @@ -54,7 +54,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.restored_ids_gff3 != null }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("versions_stub") } ) } diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test.snap b/modules/gallvp/custom/restoregffids/tests/main.nf.test.snap similarity index 71% rename from modules/pfr/custom/restoregffids/tests/main.nf.test.snap rename to modules/gallvp/custom/restoregffids/tests/main.nf.test.snap index ffe43e7..ebe850a 100644 --- a/modules/pfr/custom/restoregffids/tests/main.nf.test.snap +++ b/modules/gallvp/custom/restoregffids/tests/main.nf.test.snap @@ -28,14 +28,34 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2023-12-07T13:49:30.047425" }, + "versions_stub": { + "content": [ + [ + "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-10T15:26:11.66528" + }, "versions": { "content": [ [ "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2023-12-07T13:49:30.071175" } } \ No newline at end of file diff --git a/modules/pfr/tsebra/environment.yml b/modules/gallvp/custom/rmouttogff3/environment.yml similarity index 75% rename from modules/pfr/tsebra/environment.yml rename to modules/gallvp/custom/rmouttogff3/environment.yml index 3505512..d2f633e 100644 --- a/modules/pfr/tsebra/environment.yml +++ b/modules/gallvp/custom/rmouttogff3/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "tsebra" channels: - conda-forge - bioconda - - defaults dependencies: - - "bioconda::tsebra=1.1.2.4" + - "bioconda::perl-bioperl=1.7.8" diff --git a/modules/gallvp/custom/rmouttogff3/main.nf b/modules/gallvp/custom/rmouttogff3/main.nf new file mode 100644 index 0000000..49ca098 --- /dev/null +++ b/modules/gallvp/custom/rmouttogff3/main.nf @@ -0,0 +1,34 @@ +process CUSTOM_RMOUTTOGFF3 { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/perl-bioperl:1.7.8--hdfd78af_1': + 'biocontainers/perl-bioperl:1.7.8--hdfd78af_1' }" + + input: + tuple val(meta), path(rmout) + + output: + tuple val(meta), path("*.gff3") , emit: gff3 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + shell: + prefix = task.ext.prefix ?: "${meta.id}" + template 'rmouttogff3.pl' + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.gff3 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + perl: \$(perl --version | sed -n 's|This is perl.*(v\\(.*\\)) .*|\\1|p' ) + END_VERSIONS + """ +} diff --git a/modules/gallvp/custom/rmouttogff3/meta.yml b/modules/gallvp/custom/rmouttogff3/meta.yml new file mode 100644 index 0000000..1f20ed8 --- /dev/null +++ b/modules/gallvp/custom/rmouttogff3/meta.yml @@ -0,0 +1,46 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_rmouttogff3" +description: Convert RepeatMasker out file to gff3 +keywords: + - genomics + - repeat + - gff +tools: + - "perl": + description: "Bioinformatics Toolkit" + homepage: "https://www.perl.org" + documentation: "https://www.perl.org" + tool_dev_url: "https://www.perl.org" + licence: ["GPL"] + identifier: "" + +input: + # Only when we have meta + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - rmout: + type: file + description: RepeatMasker out file + pattern: "*.out" +output: + - gff3: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.gff3": + type: file + description: GFF3 formatted output + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/custom/rmouttogff3/templates/rmouttogff3.pl b/modules/gallvp/custom/rmouttogff3/templates/rmouttogff3.pl new file mode 100644 index 0000000..46ba2ed --- /dev/null +++ b/modules/gallvp/custom/rmouttogff3/templates/rmouttogff3.pl @@ -0,0 +1,92 @@ +#!/usr/bin/env perl +use strict; +use warnings; + +# Originally written by Ross Crowhurst +# Adapted by Usman Rashid for Nextflow +# AS IS WHERE IS - USE AT YOUR OWN RISK +# License: MIT + +=head1 DESCRIPTION + +Converts a RepeatMasker .out file to gff3 format. The +standard gff output from RepeatMasker is gff version 2. + +RepeatMasker "out.gff" + +seq1 RepeatMasker similarity 1 1295 28.1 - . Target "Motif:Gypsy7-PTR_I-int" 3544 4847 + +RepeatMasker "out" file has the following format: + +SW perc perc perc query position in query matching repeat position in repeat +score div. del. ins. sequence begin end (left) repeat class/family begin end (left) ID + +4634 28.1 1.2 0.5 seq1 1 1295 (0) C Gypsy7-PTR_I-int LTR/Gypsy (1215) 4847 3544 1 + +After conversion to gff3: + +seq1 RepeatMasker dispersed_repeat 1 1295 4634 - . ID=1_seq1_1_1295_Gypsy7-PTR_I-int;Name=Gypsy7-PTR_I-int;class=LTR;family=Gypsy;percDiv=28.1;percDel=1.2;percIns=0.5 + +Notes: + +- The Target attribute is not added in this implementation + +=cut + +my $repeatmaskerOutFile = "!{rmout}"; +my $gff3Outfile = "!{prefix}.gff3"; + +my $source = "RepeatMasker"; +my $type = "dispersed_repeat"; + +open(IN, "<$repeatmaskerOutFile") or die "ERROR can not open repeatmasker out file\n"; +open(OUT, ">$gff3Outfile") or die "ERROR can not open gff3 out file\n"; +select OUT; print OUT "##gff-version 3\n"; +my $lastqName = ""; +while ( my $line = ) +{ + next if ($line =~ m/^$/); + next if ($line =~ m/(perc|score|SW)/); + chomp $line; + $line =~ s/^([ ]+)//; + $line =~ s/ / /g; + $line =~ s/ /\t/g; + $line =~ s/([\t]+)/\t/g; + my ($SWscore, $percDiv, $percDel, $percIns, $qName, $qStart, $qEnd, $left, $ori, $repeatName, $repeatClassFamily, $rStart, $rEnd, $rLeft, $rId, @junk) = split/\t/, $line; + ($ori eq "C") and $ori = "-"; + my $id = join("_", $rId, $qName, $qStart, $qEnd, $repeatName); + my ($class, $family) = split/\//, $repeatClassFamily; + $class ||= "na"; + $family ||= "na"; + my $gff3Line = join("\t", + $qName, + "$source", + "$type", + $qStart, + $qEnd, + $SWscore, + $ori, + ".", + "ID=$id;Name=$repeatName;class=$class;family=$family;percDiv=$percDiv;percDel=$percDel;percIns=$percIns"); + if (($lastqName ne $qName) and ($lastqName ne "")) + { + select OUT; print OUT "###\n"; + } + select OUT; print OUT "$gff3Line\n"; + $lastqName = $qName; +} +select OUT; print OUT "###\n"; + +close(OUT); +close(IN); + +# Capture the Perl version +my $perl_version = `perl --version`; +$perl_version =~ s/.*\(v(.*?)\).*/$1/s; + +# Open the file and write the YAML content +open my $fh, '>', 'versions.yml' or die "Could not open versions.yml file"; +print $fh qq{!{task.process}:\n perl: $perl_version\n}; +close $fh; + +exit(0); diff --git a/modules/gallvp/custom/rmouttogff3/tests/main.nf.test b/modules/gallvp/custom/rmouttogff3/tests/main.nf.test new file mode 100644 index 0000000..6b9b7d9 --- /dev/null +++ b/modules/gallvp/custom/rmouttogff3/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process CUSTOM_RMOUTTOGFF3" + script "../main.nf" + process "CUSTOM_RMOUTTOGFF3" + + tag "modules" + tag "modules_gallvp" + tag "custom" + tag "custom/rmouttogff3" + tag "repeatmasker/repeatmasker" + + setup { + run("REPEATMASKER_REPEATMASKER") { + script "../../../repeatmasker/repeatmasker/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [] + """ + } + } + } + + test("sarscov2 - genome - fasta - repeatmasker - out") { + + when { + process { + """ + input[0] = REPEATMASKER_REPEATMASKER.out.out + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - genome - fasta - repeatmasker - out - stub") { + + options "-stub" + + when { + process { + """ + input[0] = input[0] = REPEATMASKER_REPEATMASKER.out.out + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/custom/rmouttogff3/tests/main.nf.test.snap b/modules/gallvp/custom/rmouttogff3/tests/main.nf.test.snap new file mode 100644 index 0000000..f41cc92 --- /dev/null +++ b/modules/gallvp/custom/rmouttogff3/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - genome - fasta - repeatmasker - out - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,46901143ed4508b93cb4b64cd0b352f2" + ], + "gff3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,46901143ed4508b93cb4b64cd0b352f2" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-27T16:27:50.770327" + }, + "sarscov2 - genome - fasta - repeatmasker - out": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff3:md5,689df952b225e56b521d12f0dfab2ab8" + ] + ], + "1": [ + "versions.yml:md5,029655f0760e918db2ef104e09d379c1" + ], + "gff3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff3:md5,689df952b225e56b521d12f0dfab2ab8" + ] + ], + "versions": [ + "versions.yml:md5,029655f0760e918db2ef104e09d379c1" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-27T16:49:03.738128" + } +} \ No newline at end of file diff --git a/modules/gallvp/custom/shortenfastaids/environment.yml b/modules/gallvp/custom/shortenfastaids/environment.yml new file mode 100644 index 0000000..04ccfc9 --- /dev/null +++ b/modules/gallvp/custom/shortenfastaids/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - biopython==1.75 + - python==3.8.13 diff --git a/modules/pfr/custom/shortenfastaids/main.nf b/modules/gallvp/custom/shortenfastaids/main.nf similarity index 88% rename from modules/pfr/custom/shortenfastaids/main.nf rename to modules/gallvp/custom/shortenfastaids/main.nf index 92762ef..3cdaa07 100644 --- a/modules/pfr/custom/shortenfastaids/main.nf +++ b/modules/gallvp/custom/shortenfastaids/main.nf @@ -12,7 +12,7 @@ process CUSTOM_SHORTENFASTAIDS { output: tuple val(meta), path("*.short.ids.fasta") , emit: short_ids_fasta , optional: true - tuple val(meta), path("*.short.ids.tsv") , emit: short_ids_tsv , optional: true + tuple val(meta), path("*.short.ids.tsv") , emit: short_ids_tsv path "versions.yml" , emit: versions when: @@ -25,6 +25,10 @@ process CUSTOM_SHORTENFASTAIDS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ + echo \\ + 'IDs have acceptable length and character. No change required.' \\ + > ${meta.id}.short.ids.tsv + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | cut -d' ' -f2) diff --git a/modules/gallvp/custom/shortenfastaids/meta.yml b/modules/gallvp/custom/shortenfastaids/meta.yml new file mode 100644 index 0000000..bb14b0a --- /dev/null +++ b/modules/gallvp/custom/shortenfastaids/meta.yml @@ -0,0 +1,66 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "custom_shortenfastaids" +description: | + Shortens fasta IDs and produces a new fasta along with a TSV table + consisting of original (first column) and new IDs (second column). + This module is helpful when some tools like EDTA implicitly shorten + the IDs without producing the ID map, leading to downstream mismatch + in IDs across files. +keywords: + - genome + - fasta + - ID + - shorten +tools: + - "biopython": + description: | + Biopython is a set of freely available tools for biological computation written in Python by + an international team of developers. + homepage: "https://biopython.org" + documentation: "https://biopython.org/wiki/Documentation" + tool_dev_url: "https://github.com/biopython/biopython" + doi: "10.1093/bioinformatics/btp163" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - fasta: + type: file + description: Input fasta file + pattern: "*.{fsa,fa,fasta}" +output: + - short_ids_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.short.ids.fasta": + type: file + description: Fasta file with shortened ids if id change is required + pattern: "*.{fsa,fa,fasta}" + - short_ids_tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.short.ids.tsv": + type: file + description: | + A TSV file with original (first column) and new ids (second column) + if id change is required + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py b/modules/gallvp/custom/shortenfastaids/templates/shorten_fasta_ids.py similarity index 85% rename from modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py rename to modules/gallvp/custom/shortenfastaids/templates/shorten_fasta_ids.py index 54f35bf..9d9c6e1 100755 --- a/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py +++ b/modules/gallvp/custom/shortenfastaids/templates/shorten_fasta_ids.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 import re - -from Bio import SeqIO from importlib.metadata import version from platform import python_version +from Bio import SeqIO + # The input fasta file path fasta_file_path = "$fasta" output_files_prefix = "$prefix" @@ -61,12 +61,7 @@ def do_id_need_to_change(id_and_description, silent=False): def do_ids_need_to_change(ids_and_descriptions, silent=False): - return any( - [ - do_id_need_to_change(id_and_description, silent) - for id_and_description in ids_and_descriptions - ] - ) + return any([do_id_need_to_change(id_and_description, silent) for id_and_description in ids_and_descriptions]) def extract_common_patterns(ids): @@ -76,9 +71,7 @@ def extract_common_patterns(ids): for pattern in set(patterns): pattern_counts[pattern] = pattern_counts.get(pattern, 0) + 1 - common_patterns = [ - pattern for pattern, count in pattern_counts.items() if count >= 2 - ] + common_patterns = [pattern for pattern, count in pattern_counts.items() if count >= 2] if len(common_patterns) < 1: return {} @@ -122,15 +115,11 @@ def shorten_id_by_pattern_replacement(patterns_dict, id): for pattern in matches_for_id: shortened_id = re.sub( - r"({})".format(re.escape(pattern)), + rf"({re.escape(pattern)})", patterns_dict[pattern], shortened_id, ) - return ( - shortened_id - if shortened_id[len(shortened_id) - 1] != "_" - else shortened_id[0 : (len(shortened_id) - 1)] - ) + return shortened_id if shortened_id[len(shortened_id) - 1] != "_" else shortened_id[0 : (len(shortened_id) - 1)] def match_substrings(substrings, target_string): @@ -158,24 +147,22 @@ def fail_if_new_ids_not_valid(ids): input_ids = [x[0] for x in input_ids_and_descriptions] # Write versions - with open(f"versions.yml", "w") as f_versions: + with open("versions.yml", "w") as f_versions: f_versions.write('"${task.process}":\\n') f_versions.write(f" python: {python_version()}\\n") f_versions.write(f" biopython: {version('biopython')}\\n") if not do_ids_need_to_change(input_ids_and_descriptions): print("IDs have acceptable length and character. No change required.") + with open(f"{output_files_prefix}.short.ids.tsv", "w") as f: + f.write("IDs have acceptable length and character. No change required.") exit(0) - new_ids = shorten_ids( - input_ids_and_descriptions, extract_common_patterns(input_ids) - ) + new_ids = shorten_ids(input_ids_and_descriptions, extract_common_patterns(input_ids)) fail_if_new_ids_not_valid(new_ids) with open(f"{output_files_prefix}.short.ids.tsv", "w") as f: for input_id, new_id in zip(input_ids, new_ids): f.write(f"{input_id}\\t{new_id}\\n") - write_fasta_with_new_ids( - fasta_file_path, zip(input_ids, new_ids), output_files_prefix - ) + write_fasta_with_new_ids(fasta_file_path, zip(input_ids, new_ids), output_files_prefix) diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test b/modules/gallvp/custom/shortenfastaids/tests/main.nf.test similarity index 61% rename from modules/pfr/custom/shortenfastaids/tests/main.nf.test rename to modules/gallvp/custom/shortenfastaids/tests/main.nf.test index efff639..8eb2099 100644 --- a/modules/pfr/custom/shortenfastaids/tests/main.nf.test +++ b/modules/gallvp/custom/shortenfastaids/tests/main.nf.test @@ -5,7 +5,7 @@ nextflow_process { process "CUSTOM_SHORTENFASTAIDS" tag "modules" - tag "modules_nfcore" + tag "modules_gallvp" tag "custom" tag "custom/shortenfastaids" @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] """ } @@ -25,10 +25,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.short_ids_fasta == [] }, - { assert process.out.short_ids_tsv == [] } + { assert snapshot(process.out).match() } ) } @@ -41,7 +38,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -50,8 +47,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out).match() } ) } @@ -64,7 +60,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome2.fasta', checkIfExists: true) ] """ } @@ -73,8 +69,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out).match() } ) } @@ -95,8 +90,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out).match() } ) } @@ -111,7 +105,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] """ } @@ -120,9 +114,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.short_ids_fasta == [] }, - { assert process.out.short_ids_tsv == [] } + { assert snapshot(process.out).match() } ) } diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap b/modules/gallvp/custom/shortenfastaids/tests/main.nf.test.snap similarity index 72% rename from modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap rename to modules/gallvp/custom/shortenfastaids/tests/main.nf.test.snap index 8fed1b9..2506ebd 100644 --- a/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap +++ b/modules/gallvp/custom/shortenfastaids/tests/main.nf.test.snap @@ -42,15 +42,50 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2023-12-07T13:33:05.523745" }, - "versions": { + "stub": { "content": [ - [ - "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" - ] + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,fcf920d9a7b57a1e3c29a9e88673330f" + ] + ], + "2": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ], + "short_ids_fasta": [ + + ], + "short_ids_tsv": [ + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,fcf920d9a7b57a1e3c29a9e88673330f" + ] + ], + "versions": [ + "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" + ] + } ], - "timestamp": "2023-12-07T13:30:30.361527" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T13:58:30.161542" }, "homo_sapiens-genome_fasta-no_change": { "content": [ @@ -59,7 +94,12 @@ ], "1": [ - + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,642382addc4beba37088b1ebe09d38cf" + ] ], "2": [ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" @@ -68,14 +108,23 @@ ], "short_ids_tsv": [ - + [ + { + "id": "test" + }, + "test.short.ids.tsv:md5,642382addc4beba37088b1ebe09d38cf" + ] ], "versions": [ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba" ] } ], - "timestamp": "2023-12-07T13:32:54.220188" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-06-02T20:54:17.945233" }, "homo_sapiens-genome2_fasta-length_change": { "content": [ @@ -120,6 +169,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2023-12-07T13:33:01.924483" }, "sarscov2-genome_fasta-pattern_change": { @@ -165,6 +218,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2023-12-07T13:32:58.12885" } } \ No newline at end of file diff --git a/modules/pfr/edta/edta/main.nf b/modules/gallvp/edta/edta/main.nf similarity index 98% rename from modules/pfr/edta/edta/main.nf rename to modules/gallvp/edta/edta/main.nf index a81c528..cddac07 100644 --- a/modules/pfr/edta/edta/main.nf +++ b/modules/gallvp/edta/edta/main.nf @@ -2,7 +2,6 @@ process EDTA_EDTA { tag "$meta.id" label 'process_high' - conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/edta:2.1.0--hdfd78af_1': 'biocontainers/edta:2.1.0--hdfd78af_1' }" diff --git a/modules/gallvp/edta/edta/meta.yml b/modules/gallvp/edta/edta/meta.yml new file mode 100644 index 0000000..c24adc0 --- /dev/null +++ b/modules/gallvp/edta/edta/meta.yml @@ -0,0 +1,111 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "edta_edta" +description: Extensive de-novo TE Annotator (EDTA) +keywords: + - genome + - repeat + - annotation + - transposable-elements +tools: + - "edta": + description: Extensive de-novo TE Annotator (EDTA) + homepage: "https://github.com/oushujun/EDTA" + documentation: "https://github.com/oushujun/EDTA" + tool_dev_url: "https://github.com/oushujun/EDTA" + doi: "10.1186/s13059-019-1905-y" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fsa,fa,fasta}" + - - cds: + type: file + description: | + A FASTA file containing the coding sequence (no introns, UTRs, nor TEs) + of this genome or its close relative + pattern: "*.{fsa,fa,fasta}" + - - curatedlib: + type: file + description: | + A curated library to keep consistent naming and classification for known TEs + pattern: "*.liban" + - - rmout: + type: file + description: | + Homology-based TE annotation instead of using the EDTA library for masking in + RepeatMasker .out format + pattern: "*.out" + - - exclude: + type: file + description: Exclude regions (bed format) from TE masking in the MAKER.masked + output + pattern: "*.bed" +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.log": + type: file + description: Log emitted by EDTA + pattern: "*.log" + - te_lib_fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.EDTA.TElib.fa": + type: file + description: A non-redundant TE library in fasta format + pattern: "*.EDTA.TElib.fa" + - pass_list: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.EDTA.pass.list": + type: file + description: A summary table of intact LTR-RTs with coordinate and structural + information + pattern: "*.EDTA.pass.list" + - out_file: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.EDTA.out": + type: file + description: RepeatMasker annotation of all LTR sequences in the genome + pattern: "*.EDTA.out" + - te_anno_gff3: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.EDTA.TEanno.gff3": + type: file + description: A gff3 file containing both structurally intact and fragmented + TE annotations + pattern: "*.EDTA.TEanno.gff3" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/edta/edta/tests/main.nf.test b/modules/gallvp/edta/edta/tests/main.nf.test new file mode 100644 index 0000000..b3ec30c --- /dev/null +++ b/modules/gallvp/edta/edta/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process EDTA_EDTA" + script "../main.nf" + process "EDTA_EDTA" + config "./nextflow.config" + + tag "modules" + tag "modules_gallvp" + tag "edta" + tag "edta/edta" + tag "modules/nf-core/gunzip" + + test("actinidia_chinensis-genome_1_fasta_gz") { + + setup { + run("GUNZIP") { + script "../../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert path(process.out.te_lib_fasta[0][1]).text.contains('LTR/Copia') }, + { assert path(process.out.pass_list[0][1]).text.contains('Copia') }, + { assert process.out.out_file == [] }, + { assert process.out.te_anno_gff3 == [] } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/gallvp/edta/edta/tests/main.nf.test.snap b/modules/gallvp/edta/edta/tests/main.nf.test.snap new file mode 100644 index 0000000..d989e01 --- /dev/null +++ b/modules/gallvp/edta/edta/tests/main.nf.test.snap @@ -0,0 +1,81 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-10T14:43:10.298103" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + "versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405" + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "out_file": [ + + ], + "pass_list": [ + + ], + "te_anno_gff3": [ + + ], + "te_lib_fasta": [ + [ + { + "id": "test" + }, + "test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f9e6b414c1eb81520a9fdbb15f797405" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-10T14:43:16.561778" + } +} \ No newline at end of file diff --git a/modules/gallvp/edta/edta/tests/nextflow.config b/modules/gallvp/edta/edta/tests/nextflow.config new file mode 100644 index 0000000..ac46798 --- /dev/null +++ b/modules/gallvp/edta/edta/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: EDTA_EDTA { + ext.args = '--anno 0' + } +} diff --git a/modules/gallvp/gffread/environment.yml b/modules/gallvp/gffread/environment.yml new file mode 100644 index 0000000..ee23984 --- /dev/null +++ b/modules/gallvp/gffread/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gffread=0.12.7 diff --git a/modules/gallvp/gffread/main.nf b/modules/gallvp/gffread/main.nf new file mode 100644 index 0000000..da55cba --- /dev/null +++ b/modules/gallvp/gffread/main.nf @@ -0,0 +1,60 @@ +process GFFREAD { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' : + 'biocontainers/gffread:0.12.7--hdcf5f25_4' }" + + input: + tuple val(meta), path(gff) + path fasta + + output: + tuple val(meta), path("*.gtf") , emit: gtf , optional: true + tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true + tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def fasta_arg = fasta ? "-g $fasta" : '' + def output_name = "${prefix}.${extension}" + def output = extension == "fasta" ? "$output_name" : "-o $output_name" + def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() + // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + gffread \\ + $gff \\ + $fasta_arg \\ + $args_sorted \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def output_name = "${prefix}.${extension}" + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch $output_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/gallvp/gffread/meta.yml b/modules/gallvp/gffread/meta.yml new file mode 100644 index 0000000..bebe7f5 --- /dev/null +++ b/modules/gallvp/gffread/meta.yml @@ -0,0 +1,75 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF + files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA + sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] + identifier: biotools:gffread +input: + - - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + - - fasta: + type: file + description: A multi-fasta file with the genomic sequences + pattern: "*.{fasta,fa,faa,fas,fsa}" +output: + - gtf: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gtf": + type: file + description: GTF file resulting from the conversion of the GFF input file if + '-T' argument is present + pattern: "*.{gtf}" + - gffread_gff: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gff3": + type: file + description: GFF3 file resulting from the conversion of the GFF input file if + '-T' argument is absent + pattern: "*.gff3" + - gffread_fasta: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: Fasta file produced when either of '-w', '-x', '-y' parameters + is present + pattern: "*.fasta" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" +maintainers: + - "@edmundmiller" + - "@gallvp" diff --git a/modules/gallvp/gffread/tests/main.nf.test b/modules/gallvp/gffread/tests/main.nf.test new file mode 100644 index 0000000..17b2ee6 --- /dev/null +++ b/modules/gallvp/gffread/tests/main.nf.test @@ -0,0 +1,223 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_gallvp" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gtf-stub") { + + options '-stub' + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3-stub") { + + options '-stub' + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-fasta") { + + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-stub") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-fail-catch") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'genome'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert ! process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/gallvp/gffread/tests/main.nf.test.snap b/modules/gallvp/gffread/tests/main.nf.test.snap new file mode 100644 index 0000000..1526232 --- /dev/null +++ b/modules/gallvp/gffread/tests/main.nf.test.snap @@ -0,0 +1,272 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:48:56.496187" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:49:00.892782" + }, + "sarscov2-gff3-gtf-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:26.975666" + }, + "sarscov2-gff3-fasta-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:44.34792" + }, + "sarscov2-gff3-gff3-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:35.221671" + }, + "sarscov2-gff3-fasta": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:54:02.88143" + } +} \ No newline at end of file diff --git a/modules/gallvp/gffread/tests/nextflow-fasta.config b/modules/gallvp/gffread/tests/nextflow-fasta.config new file mode 100644 index 0000000..ac6cb14 --- /dev/null +++ b/modules/gallvp/gffread/tests/nextflow-fasta.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-w -S' + } +} diff --git a/modules/gallvp/gffread/tests/nextflow-gff3.config b/modules/gallvp/gffread/tests/nextflow-gff3.config new file mode 100644 index 0000000..afe0830 --- /dev/null +++ b/modules/gallvp/gffread/tests/nextflow-gff3.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '' + } +} diff --git a/modules/gallvp/gffread/tests/nextflow.config b/modules/gallvp/gffread/tests/nextflow.config new file mode 100644 index 0000000..74b2509 --- /dev/null +++ b/modules/gallvp/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/gallvp/gffread/tests/tags.yml b/modules/gallvp/gffread/tests/tags.yml new file mode 100644 index 0000000..0557606 --- /dev/null +++ b/modules/gallvp/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/pfr/ltrretriever/lai/environment.yml b/modules/gallvp/ltrretriever/lai/environment.yml similarity index 84% rename from modules/pfr/ltrretriever/lai/environment.yml rename to modules/gallvp/ltrretriever/lai/environment.yml index e0e4968..f1c392a 100644 --- a/modules/pfr/ltrretriever/lai/environment.yml +++ b/modules/gallvp/ltrretriever/lai/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "ltrretriever_lai" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::LTR_retriever=2.9.9" diff --git a/modules/pfr/ltrretriever/lai/main.nf b/modules/gallvp/ltrretriever/lai/main.nf similarity index 100% rename from modules/pfr/ltrretriever/lai/main.nf rename to modules/gallvp/ltrretriever/lai/main.nf diff --git a/modules/gallvp/ltrretriever/lai/meta.yml b/modules/gallvp/ltrretriever/lai/meta.yml new file mode 100644 index 0000000..56efccc --- /dev/null +++ b/modules/gallvp/ltrretriever/lai/meta.yml @@ -0,0 +1,78 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "ltrretriever_lai" +description: | + Estimates the mean LTR sequence identity in the genome. The input genome fasta should + have short alphanumeric IDs without comments +keywords: + - genomics + - annotation + - repeat + - long terminal retrotransposon + - retrotransposon + - stats + - qc +tools: + - "lai": + description: Assessing genome assembly quality using the LTR Assembly Index (LAI) + homepage: "https://github.com/oushujun/LTR_retriever" + documentation: "https://github.com/oushujun/LTR_retriever" + tool_dev_url: "https://github.com/oushujun/LTR_retriever" + doi: "10.1093/nar/gky730" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: The genome file that is used to generate everything + pattern: "*.{fsa,fa,fasta}" + - - pass_list: + type: file + description: A list of intact LTR-RTs generated by LTR_retriever + pattern: "*.pass.list" + - - annotation_out: + type: file + description: RepeatMasker annotation of all LTR sequences in the genome + pattern: "*.out" + - - monoploid_seqs: + type: file + description: | + This parameter is mainly for ployploid genomes. User provides a list of + sequence names that represent a monoploid (1x). LAI will be calculated only + on these sequences if provided. + pattern: "*.txt" +output: + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.LAI.log": + type: file + description: Log from LAI + pattern: "*.LAI.log" + - lai_out: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.LAI.out": + type: file + description: | + Output file from LAI if LAI is able to estimate the index from the inputs + pattern: "*.LAI.out" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/ltrretriever/lai/tests/main.nf.test b/modules/gallvp/ltrretriever/lai/tests/main.nf.test similarity index 89% rename from modules/pfr/ltrretriever/lai/tests/main.nf.test rename to modules/gallvp/ltrretriever/lai/tests/main.nf.test index a617811..e428918 100644 --- a/modules/pfr/ltrretriever/lai/tests/main.nf.test +++ b/modules/gallvp/ltrretriever/lai/tests/main.nf.test @@ -6,7 +6,7 @@ nextflow_process { config "./nextflow.config" tag "modules" - tag "modules_nfcore" + tag "modules_gallvp" tag "gunzip" tag "ltrretriever" tag "ltrretriever/ltrretriever" @@ -26,7 +26,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) ] """ } @@ -114,7 +114,7 @@ nextflow_process { input[0] = [ [ id:'test' ], - file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) ] input[1] = pass_list.toPath() input[2] = out_file.toPath() @@ -145,7 +145,7 @@ nextflow_process { input[0] = [ [ id:'test' ], - file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) ] input[1] = pass_list.toPath() input[2] = out_file.toPath() @@ -163,4 +163,4 @@ nextflow_process { } -} \ No newline at end of file +} diff --git a/modules/pfr/ltrretriever/lai/tests/main.nf.test.snap b/modules/gallvp/ltrretriever/lai/tests/main.nf.test.snap similarity index 100% rename from modules/pfr/ltrretriever/lai/tests/main.nf.test.snap rename to modules/gallvp/ltrretriever/lai/tests/main.nf.test.snap diff --git a/modules/pfr/ltrretriever/lai/tests/nextflow.config b/modules/gallvp/ltrretriever/lai/tests/nextflow.config similarity index 100% rename from modules/pfr/ltrretriever/lai/tests/nextflow.config rename to modules/gallvp/ltrretriever/lai/tests/nextflow.config diff --git a/modules/pfr/ltrretriever/lai/tests/tags.yml b/modules/gallvp/ltrretriever/lai/tests/tags.yml similarity index 100% rename from modules/pfr/ltrretriever/lai/tests/tags.yml rename to modules/gallvp/ltrretriever/lai/tests/tags.yml diff --git a/modules/gallvp/repeatmasker/repeatmasker/environment.yml b/modules/gallvp/repeatmasker/repeatmasker/environment.yml new file mode 100644 index 0000000..8c4d8c1 --- /dev/null +++ b/modules/gallvp/repeatmasker/repeatmasker/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - "bioconda::repeatmasker=4.1.5" diff --git a/modules/gallvp/repeatmasker/repeatmasker/main.nf b/modules/gallvp/repeatmasker/repeatmasker/main.nf new file mode 100644 index 0000000..4b17414 --- /dev/null +++ b/modules/gallvp/repeatmasker/repeatmasker/main.nf @@ -0,0 +1,62 @@ +process REPEATMASKER_REPEATMASKER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/repeatmasker:4.1.5--pl5321hdfd78af_0': + 'biocontainers/repeatmasker:4.1.5--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + path(lib) + + output: + tuple val(meta), path("${prefix}.masked") , emit: masked + tuple val(meta), path("${prefix}.out") , emit: out + tuple val(meta), path("${prefix}.tbl") , emit: tbl + tuple val(meta), path("${prefix}.gff") , emit: gff , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def lib_arg = lib ? "-lib $lib" : '' + """ + RepeatMasker \\ + $lib_arg \\ + -pa ${task.cpus} \\ + -dir ${prefix} \\ + ${args} \\ + ${fasta} + + mv $prefix/${fasta}.masked ${prefix}.masked + mv $prefix/${fasta}.out ${prefix}.out + mv $prefix/${fasta}.tbl ${prefix}.tbl + mv $prefix/${fasta}.out.gff ${prefix}.gff || echo "GFF is not produced" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmasker: \$(RepeatMasker -v | sed 's/RepeatMasker version //1') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def touch_gff = args.contains('-gff') ? "touch ${prefix}.gff" : '' + """ + touch ${prefix}.masked + touch ${prefix}.out + touch ${prefix}.tbl + $touch_gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + repeatmasker: \$(RepeatMasker -v | sed 's/RepeatMasker version //1') + END_VERSIONS + """ +} diff --git a/modules/gallvp/repeatmasker/repeatmasker/meta.yml b/modules/gallvp/repeatmasker/repeatmasker/meta.yml new file mode 100644 index 0000000..8731edf --- /dev/null +++ b/modules/gallvp/repeatmasker/repeatmasker/meta.yml @@ -0,0 +1,84 @@ +name: repeatmasker_repeatmasker +description: | + Screening DNA sequences for interspersed repeats and low complexity DNA sequences + +keywords: + - genome + - annotation + - repeat + - mask + +tools: + - repeatmasker: + description: | + RepeatMasker is a program that screens DNA sequences for interspersed + repeats and low complexity DNA sequences + homepage: "https://www.repeatmasker.org/" + documentation: "https://www.repeatmasker.org/webrepeatmaskerhelp.html" + tool_dev_url: "https://github.com/rmhubley/RepeatMasker" + licence: ["Open Software License v. 2.1"] + identifier: biotools:repeatmasker + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: Genome assembly + pattern: "*.{fasta,fa,fas,fsa,faa,fna}" + - - lib: + type: file + description: Custom library (e.g. from another species) + pattern: "*.{fasta,fa,fas,fsa,faa,fna}" +output: + - masked: + - meta: + type: file + description: Masked fasta + pattern: "*.masked" + - ${prefix}.masked: + type: file + description: Masked fasta + pattern: "*.masked" + - out: + - meta: + type: file + description: Out file + pattern: "*.out" + - ${prefix}.out: + type: file + description: Out file + pattern: "*.out" + - tbl: + - meta: + type: file + description: tbl file + pattern: "*.tbl" + - ${prefix}.tbl: + type: file + description: tbl file + pattern: "*.tbl" + - gff: + - meta: + type: file + description: GFF file + pattern: "*.gff" + - ${prefix}.gff: + type: file + description: GFF file + pattern: "*.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kherronism" + - "@gallvp" + +maintainers: + - "@kherronism" + - "@gallvp" diff --git a/modules/gallvp/repeatmasker/repeatmasker/tests/main.nf.test b/modules/gallvp/repeatmasker/repeatmasker/tests/main.nf.test new file mode 100644 index 0000000..d55e5f4 --- /dev/null +++ b/modules/gallvp/repeatmasker/repeatmasker/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process REPEATMASKER_REPEATMASKER" + script "../main.nf" + process "REPEATMASKER_REPEATMASKER" + + tag "modules" + tag "modules_gallvp" + tag "repeatmasker" + tag "repeatmasker/repeatmasker" + + test("sarscov2 - genome - fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.masked, + process.out.out, + process.out.gff, + process.out.versions + ).match() }, + { assert file(process.out.tbl[0][1]).text.contains('run with rmblastn') } + ) + } + + } + + test("sarscov2 - genome - fasta - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/repeatmasker/repeatmasker/tests/main.nf.test.snap b/modules/gallvp/repeatmasker/repeatmasker/tests/main.nf.test.snap new file mode 100644 index 0000000..2e584de --- /dev/null +++ b/modules/gallvp/repeatmasker/repeatmasker/tests/main.nf.test.snap @@ -0,0 +1,118 @@ +{ + "sarscov2 - genome - fasta": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.masked:md5,c0eb8dd958ce3b4b1fdc7fcb6b0d5161" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.out:md5,8610cb2b8d87356bf2ab0a895c065752" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,289cdcae609a8c450a20080107ea6351" + ] + ], + [ + "versions.yml:md5,1386abb5112b809c321da8ddc598c573" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-07T14:25:15.979032" + }, + "sarscov2 - genome - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.masked:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,1386abb5112b809c321da8ddc598c573" + ], + "gff": [ + + ], + "masked": [ + [ + { + "id": "test", + "single_end": false + }, + "test.masked:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbl": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tbl:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1386abb5112b809c321da8ddc598c573" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-07T13:39:51.365612" + } +} \ No newline at end of file diff --git a/modules/gallvp/repeatmasker/repeatmasker/tests/nextflow.config b/modules/gallvp/repeatmasker/repeatmasker/tests/nextflow.config new file mode 100644 index 0000000..6e4b600 --- /dev/null +++ b/modules/gallvp/repeatmasker/repeatmasker/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: REPEATMASKER_REPEATMASKER { + ext.args = '-no_is -gff' // Not required but significantly cuts the runtime + } +} diff --git a/modules/kherronism/braker3/main.nf b/modules/kherronism/braker3/main.nf deleted file mode 100644 index 9a15143..0000000 --- a/modules/kherronism/braker3/main.nf +++ /dev/null @@ -1,90 +0,0 @@ -process BRAKER3 { - tag "${meta.id}" - label 'process_high' - - container "gallvp/teambraker_braker3:v1.0.6" - - input: - tuple val(meta), path(fasta) - path bam - path rnaseq_sets_dirs - path rnaseq_sets_ids - path proteins - path hintsfile - - output: - tuple val(meta), path("${prefix}/braker.gtf") , emit: gtf - tuple val(meta), path("${prefix}/braker.codingseq"), emit: cds - tuple val(meta), path("${prefix}/braker.aa") , emit: aa - tuple val(meta), path("${prefix}/hintsfile.gff") , emit: hintsfile, optional: true - tuple val(meta), path("${prefix}/braker.log") , emit: log - tuple val(meta), path("${prefix}/what-to-cite.txt"), emit: citations - tuple val(meta), path("${prefix}/braker.gff3") , emit: gff3 , optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - - def test_mode = args.contains('--testMode') // Custom flag for test data - def args_fmt = test_mode ? args.replace('--testMode', '') : args - - def rna_ids = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : '' - def rna_dirs = rnaseq_sets_dirs ? "--rnaseq_sets_dirs=${rnaseq_sets_dirs}" : '' - def bam = bam && !test_mode ? "--bam=${bam}" : '' - def proteins = proteins && !test_mode ? "--prot_seq=${proteins}" : '' - def hints = hintsfile ? "--hints=${hintsfile}" : '' - - def new_species = args.contains('--species') ? '' : "--species new_species" - """ - cp -r /usr/share/augustus/config augustus_config - - perl -p -e 's/^(>\\S+).*\$/\$1/' \\ - ${fasta} \\ - > ${prefix}.name.only.genome.masked.fasta - - braker.pl \\ - --genome ${prefix}.name.only.genome.masked.fasta \\ - ${new_species} \\ - --workingdir ${prefix} \\ - --AUGUSTUS_CONFIG_PATH "\$(pwd)/augustus_config" \\ - --threads ${task.cpus} \\ - ${rna_ids} \\ - ${rna_dirs} \\ - ${bam} \\ - ${proteins} \\ - ${hints} \\ - ${args_fmt} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version//') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def rna_ids = rnaseq_sets_ids ? "--rnaseq_sets_ids=${rnaseq_sets_ids}" : '' - def touch_hints = (rna_ids || bam || proteins || hints) ? "touch ${prefix}/hintsfile.gff" : '' - def touch_gff = args.contains('--gff3') ? "touch ${prefix}/braker.gff3" : '' - """ - mkdir "$prefix" - - touch "${prefix}/braker.gtf" - touch "${prefix}/braker.codingseq" - touch "${prefix}/braker.aa" - $touch_hints - touch "${prefix}/braker.log" - touch "${prefix}/what-to-cite.txt" - $touch_gff - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - braker3: \$(braker.pl --version 2>/dev/null | sed 's/braker.pl version//') - END_VERSIONS - """ -} diff --git a/modules/kherronism/braker3/meta.yml b/modules/kherronism/braker3/meta.yml deleted file mode 100644 index ed4da40..0000000 --- a/modules/kherronism/braker3/meta.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: braker3 -description: | - Gene prediction in novel genomes using RNA-seq and protein homology information -keywords: - - genome - - annotation - -tools: - - braker3: - description: "BRAKER3 is a pipeline for fully automated prediction of - protein coding gene structures using protein and RNA-seq and protein homology - information" - homepage: "https://github.com/Gaius-Augustus/BRAKER" - documentation: "https://github.com/Gaius-Augustus/BRAKER" - tool_dev_url: "https://github.com/Gaius-Augustus/BRAKER" - doi: "10.13140/RG.2.2.20047.36004" - licence: ["Artistic-1.0"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Genome assembly fasta - pattern: "*.{fasta,fa,fas,faa,fna}" - - rnaseq_sets_ids: - type: file - description: IDs of RNA-seq data sets to be passed to --rnaseq_sets_ids - - rnaseq_sets_dirs: - type: file - description: Directories of RNA-seq data sets to be passed to --rnaseq_sets_dirs - - bam: - type: file - description: BAM file of RNA-seq data to be passed to --bam - pattern: "*.bam" - - proteins: - type: file - description: Protein evidence to be passed to --proteins - pattern: "*.{fasta,fa,fas,faa}" - - hintsfile: - type: file - description: Hintsfile to be passed to --hintsfile - pattern: "*.{gff, gtf, gff3}" -output: - - gtf: - type: file - description: Gene transfer format file as output by BRAKER3 - pattern: "*.{gtf}" - - cds: - type: file - description: Coding sequence file as output by BRAKER3 - pattern: "*.{codingseq}" - - aa: - type: file - description: Protein sequence file as output by BRAKER3 - pattern: "*.{aa}" - - hintsfile: - type: file - description: Hints file as output by BRAKER3 - pattern: "*hintsfile.{gff}" - - gff3: - type: file - description: GFF3 file as output by BRAKER3 - pattern: "*.{gff3}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@kherronism" diff --git a/modules/kherronism/braker3/tests/name.only.sh b/modules/kherronism/braker3/tests/name.only.sh deleted file mode 100755 index 172e651..0000000 --- a/modules/kherronism/braker3/tests/name.only.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash - -perl -p -e 's/^(>\S+).*$/$1/' \ - modules/kherronism/braker3/tests/test.fa diff --git a/modules/kherronism/braker3/tests/test.fa b/modules/kherronism/braker3/tests/test.fa deleted file mode 100644 index 8b195b8..0000000 --- a/modules/kherronism/braker3/tests/test.fa +++ /dev/null @@ -1,16 +0,0 @@ ->chr1 This is with four spaces and a space and a tab -AAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAAC -CCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTA ->chr2 This is with four spaces -TAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAA -CCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCT -AAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAAC ->chr3 This is with a single space -TAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAA -CCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCT ->chrX -AACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACC -CTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAA ->chrY This desc is with tab and another tab | and a vertical slash -AACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACC -CTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAA diff --git a/modules/kherronism/repeatmasker/main.nf b/modules/kherronism/repeatmasker/main.nf deleted file mode 100644 index fdab29e..0000000 --- a/modules/kherronism/repeatmasker/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process REPEATMASKER { - tag "$meta.id" - label 'process_high' - - conda "bioconda::repeatmasker=4.1.5" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/repeatmasker:4.1.5--pl5321hdfd78af_0': - 'biocontainers/repeatmasker:4.1.5--pl5321hdfd78af_0' }" - - input: - tuple val(meta), path(fasta) - path(lib) - - output: - tuple val(meta), path("${meta.id}/*.f*a.masked") , emit: fasta_masked - tuple val(meta), path("${meta.id}/*.f*a.out") , emit: fasta_out - tuple val(meta), path("${meta.id}/*.f*a.tbl") , emit: fasta_tbl - tuple val(meta), path("${meta.id}/*.f*a.cat.gz") , emit: fasta_cat_gz , optional: true - tuple val(meta), path("${meta.id}/*.f*a.out.gff"), emit: fasta_out_gff, optional: true - tuple val(meta), path("${meta.id}/*.f*a.align") , emit: fasta_align , optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '4.1.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - RepeatMasker \\ - -lib ${lib} \\ - -pa ${task.cpus} \\ - -dir ${prefix} \\ - ${args} \\ - ${fasta} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - repeatmasker: ${VERSION} - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '4.1.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - mkdir "$meta.id" - - touch "${meta.id}/${meta.id}.fasta.masked" - touch "${meta.id}/${meta.id}.fasta.out" - touch "${meta.id}/${meta.id}.fasta.tbl" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - repeatmasker: ${VERSION} - END_VERSIONS - """ -} diff --git a/modules/kherronism/repeatmasker/meta.yml b/modules/kherronism/repeatmasker/meta.yml deleted file mode 100644 index 0cab608..0000000 --- a/modules/kherronism/repeatmasker/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: repeatmasker -description: | - Screening DNA sequences for interspersed repeats and low complexity DNA sequences - -keywords: - - genome - - annotation - - repeats - -tools: - - repeatmasker: - description: | - RepeatMasker is a program that screens DNA sequences for interspersed - repeats and low complexity DNA sequences - homepage: "https://www.repeatmasker.org/" - documentation: "https://www.repeatmasker.org/webrepeatmaskerhelp.html" - tool_dev_url: "https://github.com/rmhubley/RepeatMasker" - doi: "10.1073/pnas.1921046117" - licence: ["Open Software License v. 2.1"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Genome assembly - pattern: "*.{fasta,fa,fas,faa,fna}" - -output: - - rm_fasta: - type: file - description: Gene transfer format file as output by BRAKER3 - pattern: "*.{-families.fa}" - - rm_stk: - type: - description: "" - pattern: "*.{stk}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@kherronism" diff --git a/modules/local/utils.nf b/modules/local/utils.nf deleted file mode 100644 index 7e1c86a..0000000 --- a/modules/local/utils.nf +++ /dev/null @@ -1,40 +0,0 @@ -def idFromFileName(fileName) { - - def trial = ( fileName - ).replaceFirst( - /\.f(ast)?q$/, '' - ).replaceFirst( - /\.f(asta|sa|a|as|aa)?$/, '' - ).replaceFirst( - /\.gff(3)?$/, '' - ).replaceFirst( - /\.gz$/, '' - ) - - if ( trial == fileName ) { return fileName } - - return idFromFileName ( trial ) -} - -def validateFastqMetadata(metas, fqs, permAssString) { - def permAssList = permAssString.split(",") - - // Check if each listed assembly is permissible - metas.each { meta -> - if ( meta.target_assemblies.any { !permAssList.contains( it ) } ) { - exit 1, "Sample ${meta.id} targets ${meta.target_assemblies} which are not in $permAssList" - } - } - - // Check if multiple runs of a sample have the same target assemblies - if ( metas.collect { meta -> meta.target_assemblies }.unique().size() > 1 ) { - error "Multiple runs of sample ${metas.first().id} must target same assemblies" - } - - // Check if multiple runs of a sample have the same endedness - if ( metas.collect { meta -> meta.single_end }.unique().size() > 1 ) { - error "Multiple runs of sample ${metas.first().id} must have same endedness" - } - - [ metas.first(), fqs ] -} diff --git a/modules/nf-core/agat/convertspgff2gtf/environment.yml b/modules/nf-core/agat/convertspgff2gtf/environment.yml index 381154f..0410ee7 100644 --- a/modules/nf-core/agat/convertspgff2gtf/environment.yml +++ b/modules/nf-core/agat/convertspgff2gtf/environment.yml @@ -1,7 +1,5 @@ -name: agat_convertspgff2gtf channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::agat=1.0.0 + - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/convertspgff2gtf/main.nf b/modules/nf-core/agat/convertspgff2gtf/main.nf index 8f1f8b4..38af025 100644 --- a/modules/nf-core/agat/convertspgff2gtf/main.nf +++ b/modules/nf-core/agat/convertspgff2gtf/main.nf @@ -4,8 +4,8 @@ process AGAT_CONVERTSPGFF2GTF { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/agat:1.0.0--pl5321hdfd78af_0' : - 'biocontainers/agat:1.0.0--pl5321hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" input: tuple val(meta), path(gff) @@ -29,7 +29,7 @@ process AGAT_CONVERTSPGFF2GTF { cat <<-END_VERSIONS > versions.yml "${task.process}": - agat: \$(agat_convert_sp_gff2gtf.pl --help | sed '4!d; s/.*v//') + agat: \$(agat_convert_sp_gff2gtf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') END_VERSIONS """ @@ -42,7 +42,7 @@ process AGAT_CONVERTSPGFF2GTF { cat <<-END_VERSIONS > versions.yml "${task.process}": - agat: \$(agat_convert_sp_gff2gtf.pl --help | sed '4!d; s/.*v//') + agat: \$(agat_convert_sp_gff2gtf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') END_VERSIONS """ } diff --git a/modules/nf-core/agat/convertspgff2gtf/meta.yml b/modules/nf-core/agat/convertspgff2gtf/meta.yml index dcdc8d9..5330167 100644 --- a/modules/nf-core/agat/convertspgff2gtf/meta.yml +++ b/modules/nf-core/agat/convertspgff2gtf/meta.yml @@ -8,36 +8,50 @@ keywords: - conversion tools: - agat: - description: "AGAT is a toolkit for manipulation and getting information from GFF/GTF files" + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" homepage: "https://github.com/NBISweden/AGAT" documentation: "https://agat.readthedocs.io/" tool_dev_url: "https://github.com/NBISweden/AGAT" doi: "10.5281/zenodo.3552717" licence: ["GPL v3"] + identifier: biotools:AGAT input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - gff: - type: file - description: Annotation file in GFF3/GTF format - pattern: "*.{gff, gtf}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gff: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" output: - output_gtf: - type: file - description: Annotation file in GTF format - pattern: "*.{gtf}" + - meta: + type: file + description: Annotation file in GTF format + pattern: "*.{gtf}" + - "*.agat.gtf": + type: file + description: Annotation file in GTF format + pattern: "*.{gtf}" - log: - type: file - description: Log file of the conversion process - pattern: "*.{log}" + - meta: + type: file + description: Log file of the conversion process + pattern: "*.{log}" + - "*.log": + type: file + description: Log file of the conversion process + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@toniher" maintainers: - "@toniher" + - "@gallvp" diff --git a/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test index 9accfec..401f455 100644 --- a/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test +++ b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id: 'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] """ } @@ -42,7 +42,7 @@ nextflow_process { """ input[0] = [ [ id: 'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] """ } diff --git a/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test.snap index 6193be8..4088c71 100644 --- a/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test.snap +++ b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test.snap @@ -4,10 +4,14 @@ [ "test.agat.gtf", "genome.gff3.agat.log", - "versions.yml:md5,dcbde1b24eb36571645f2d4bd4b4e551" + "versions.yml:md5,dcc621fac77aa683287f6a0d61e10395" ] ], - "timestamp": "2023-12-24T23:36:49.538312808" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:05:11.177573" }, "sarscov2 - genome [gff3]": { "content": [ @@ -16,13 +20,17 @@ { "id": "test" }, - "test.agat.gtf:md5,bbe333239767d048eb8392bba6856616" + "test.agat.gtf:md5,be1c396ac00fd5a84dc08a36d84ff8c5" ] ], [ - "versions.yml:md5,dcbde1b24eb36571645f2d4bd4b4e551" + "versions.yml:md5,dcc621fac77aa683287f6a0d61e10395" ] ], - "timestamp": "2023-12-24T23:36:39.319717066" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:05:06.695419" } } \ No newline at end of file diff --git a/modules/nf-core/agat/convertspgxf2gxf/environment.yml b/modules/nf-core/agat/convertspgxf2gxf/environment.yml index 6ed34fa..0410ee7 100644 --- a/modules/nf-core/agat/convertspgxf2gxf/environment.yml +++ b/modules/nf-core/agat/convertspgxf2gxf/environment.yml @@ -1,7 +1,5 @@ -name: agat_convertspgxf2gxf channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/convertspgxf2gxf/meta.yml b/modules/nf-core/agat/convertspgxf2gxf/meta.yml index 0ef9881..d9468ec 100644 --- a/modules/nf-core/agat/convertspgxf2gxf/meta.yml +++ b/modules/nf-core/agat/convertspgxf2gxf/meta.yml @@ -8,35 +8,48 @@ keywords: - conversion tools: - agat: - description: "AGAT is a toolkit for manipulation and getting information from GFF/GTF files" + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" homepage: "https://github.com/NBISweden/AGAT" documentation: "https://agat.readthedocs.io/" tool_dev_url: "https://github.com/NBISweden/AGAT" doi: "10.5281/zenodo.3552717" licence: ["GPL v3"] + identifier: biotools:AGAT input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - gxf: - type: file - description: Annotation file in GFF3/GTF format - pattern: "*.{gff, gtf}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gxf: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" output: - output_gff: - type: file - description: Cleaned annotation file in GFF3 format - pattern: "*.{gff}" + - meta: + type: file + description: Cleaned annotation file in GFF3 format + pattern: "*.{gff}" + - "*.agat.gff": + type: file + description: Cleaned annotation file in GFF3 format + pattern: "*.{gff}" - log: - type: file - description: Log file of the conversion process - pattern: "*.{log}" + - meta: + type: file + description: Log file of the conversion process + pattern: "*.{log}" + - "*.log": + type: file + description: Log file of the conversion process + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@toniher" maintainers: diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test index db85991..d8d7bc2 100644 --- a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test +++ b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id: 'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] """ } @@ -42,7 +42,7 @@ nextflow_process { """ input[0] = [ [ id: 'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] """ } diff --git a/modules/pfr/custom/restoregffids/environment.yml b/modules/nf-core/agat/spfilterfeaturefromkilllist/environment.yml similarity index 74% rename from modules/pfr/custom/restoregffids/environment.yml rename to modules/nf-core/agat/spfilterfeaturefromkilllist/environment.yml index 2450c45..a6b5f2b 100644 --- a/modules/pfr/custom/restoregffids/environment.yml +++ b/modules/nf-core/agat/spfilterfeaturefromkilllist/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "custom_restoregffids" channels: - conda-forge - bioconda - - defaults dependencies: - - "python=3.10.2" + - "bioconda::agat=1.4.0" diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/main.nf b/modules/nf-core/agat/spfilterfeaturefromkilllist/main.nf similarity index 93% rename from modules/pfr/agat/spfilterfeaturefromkilllist/main.nf rename to modules/nf-core/agat/spfilterfeaturefromkilllist/main.nf index ffb91d1..4918ed7 100644 --- a/modules/pfr/agat/spfilterfeaturefromkilllist/main.nf +++ b/modules/nf-core/agat/spfilterfeaturefromkilllist/main.nf @@ -4,8 +4,8 @@ process AGAT_SPFILTERFEATUREFROMKILLLIST { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/agat:1.3.3--pl5321hdfd78af_0': - 'biocontainers/agat:1.3.3--pl5321hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0': + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" input: tuple val(meta), path(gff) diff --git a/modules/nf-core/agat/spfilterfeaturefromkilllist/meta.yml b/modules/nf-core/agat/spfilterfeaturefromkilllist/meta.yml new file mode 100644 index 0000000..d6c8dbb --- /dev/null +++ b/modules/nf-core/agat/spfilterfeaturefromkilllist/meta.yml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "agat_spfilterfeaturefromkilllist" +description: | + The script aims to remove features based on a kill list. The default behaviour is to look at the features's ID. + If the feature has an ID (case insensitive) listed among the kill list it will be removed. /!\ Removing a level1 + or level2 feature will automatically remove all linked subfeatures, and removing all children of a feature will + automatically remove this feature too. +keywords: + - genomics + - gff + - remove + - feature +tools: + - "agat": + description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene + annotations in any GTF/GFF format." + homepage: "https://agat.readthedocs.io/en/latest/" + documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_filter_feature_from_kill_list.html" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - gff: + type: file + description: Input GFF3 file that will be read + pattern: "*.{gff,gff3}" + - - kill_list: + type: file + description: Kill list. One value per line. + pattern: "*.txt" + - - config: + type: file + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + pattern: "*.yaml" +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ] + - "*.gff": + type: file + description: Output GFF file. + pattern: "*.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test b/modules/nf-core/agat/spfilterfeaturefromkilllist/tests/main.nf.test similarity index 56% rename from modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test rename to modules/nf-core/agat/spfilterfeaturefromkilllist/tests/main.nf.test index 891e0a7..82a3c30 100644 --- a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test +++ b/modules/nf-core/agat/spfilterfeaturefromkilllist/tests/main.nf.test @@ -9,14 +9,14 @@ nextflow_process { tag "agat" tag "agat/spfilterfeaturefromkilllist" - test("sarscov2-genome_gff3") { + test("sarscov2 - gff3") { when { process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] def kill_list = "unknown_transcript_1" @@ -39,7 +39,37 @@ nextflow_process { } - test("sarscov2-genome_gff3-stub") { + test("sarscov2 - gff3 - config") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + + def kill_list = "unknown_transcript_1" + def kill_list_file = new File('kill.list.txt') + kill_list_file.text = kill_list + + input[1] = kill_list_file.toPath() + + input[2] = file(params.modules_testdata_base_path + 'generic/config/agat_config.yaml', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - gff3 - stub") { options "-stub" @@ -48,7 +78,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] def kill_list = "unknown_transcript_1" diff --git a/modules/nf-core/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap b/modules/nf-core/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap new file mode 100644 index 0000000..8322d0f --- /dev/null +++ b/modules/nf-core/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "sarscov2 - gff3 - config": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ] + ], + "1": [ + "versions.yml:md5,e2962240799182aee69421c746be183a" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ] + ], + "versions": [ + "versions.yml:md5,e2962240799182aee69421c746be183a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T15:32:54.707393" + }, + "sarscov2 - gff3 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e2962240799182aee69421c746be183a" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e2962240799182aee69421c746be183a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T15:32:59.888053" + }, + "sarscov2 - gff3": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ] + ], + "1": [ + "versions.yml:md5,e2962240799182aee69421c746be183a" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ] + ], + "versions": [ + "versions.yml:md5,e2962240799182aee69421c746be183a" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T15:32:47.772624" + } +} \ No newline at end of file diff --git a/modules/pfr/agat/spmergeannotations/environment.yml b/modules/nf-core/agat/spmergeannotations/environment.yml similarity index 71% rename from modules/pfr/agat/spmergeannotations/environment.yml rename to modules/nf-core/agat/spmergeannotations/environment.yml index 6df7aea..a6b5f2b 100644 --- a/modules/pfr/agat/spmergeannotations/environment.yml +++ b/modules/nf-core/agat/spmergeannotations/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "agat_spmergeannotations" channels: - conda-forge - bioconda - - defaults dependencies: - - "bioconda::agat=1.3.3" + - "bioconda::agat=1.4.0" diff --git a/modules/pfr/agat/spmergeannotations/main.nf b/modules/nf-core/agat/spmergeannotations/main.nf similarity index 83% rename from modules/pfr/agat/spmergeannotations/main.nf rename to modules/nf-core/agat/spmergeannotations/main.nf index 7738ac5..e0861c0 100644 --- a/modules/pfr/agat/spmergeannotations/main.nf +++ b/modules/nf-core/agat/spmergeannotations/main.nf @@ -4,8 +4,8 @@ process AGAT_SPMERGEANNOTATIONS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/agat:1.3.3--pl5321hdfd78af_0': - 'biocontainers/agat:1.3.3--pl5321hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0': + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" input: tuple val(meta), path(gffs) @@ -22,8 +22,8 @@ process AGAT_SPMERGEANNOTATIONS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def config_param = config ? "--config $config" : '' - def gff_param = "$gffs".split(' ').collect { "--gff $it" }.join(' ') - def file_names = gffs.collect { "$it" } + def file_names = "$gffs".split(' ') + def gff_param = file_names.collect { "--gff $it" }.join(' ') if ( file_names.contains ( "${prefix}.gff" ) ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ agat_sp_merge_annotations.pl \\ @@ -40,7 +40,7 @@ process AGAT_SPMERGEANNOTATIONS { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def file_names = gffs.collect { "$it" } + def file_names = "$gffs".split(' ') if ( file_names.contains ( "${prefix}.gff" ) ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch ${prefix}.gff diff --git a/modules/nf-core/agat/spmergeannotations/meta.yml b/modules/nf-core/agat/spmergeannotations/meta.yml new file mode 100644 index 0000000..fd5083b --- /dev/null +++ b/modules/nf-core/agat/spmergeannotations/meta.yml @@ -0,0 +1,57 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "agat_spmergeannotations" +description: | + This script merge different gff annotation files in one. It uses the AGAT parser that takes care of duplicated names and fixes other oddities met in those files. +keywords: + - genomics + - gff + - merge + - combine +tools: + - "agat": + description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene + annotations in any GTF/GFF format." + homepage: "https://agat.readthedocs.io/en/latest/" + documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_merge_annotations.html" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - gffs: + type: list + description: A list of GFFs to merge + pattern: "[ *.{gff,gff3} ]" + - - config: + type: file + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml + locally type: "agat config --expose". The --config option gives you the possibility to use your + own AGAT config file (located elsewhere or named differently). + pattern: "*.yaml" +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.gff": + type: file + description: Output GFF file. + pattern: "*.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/agat/spmergeannotations/tests/main.nf.test b/modules/nf-core/agat/spmergeannotations/tests/main.nf.test new file mode 100644 index 0000000..3f500fa --- /dev/null +++ b/modules/nf-core/agat/spmergeannotations/tests/main.nf.test @@ -0,0 +1,130 @@ +nextflow_process { + + name "Test Process AGAT_SPMERGEANNOTATIONS" + script "../main.nf" + process "AGAT_SPMERGEANNOTATIONS" + + tag "modules" + tag "modules_nfcore" + tag "agat" + tag "agat/spmergeannotations" + + test("candidatus_portiera_aleyrodidarum - multi_gffs") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test2.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test3.gff', checkIfExists: true) + ] + ] + + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.gff[0][1]).text.contains('AGAT gene') }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("candidatus_portiera_aleyrodidarum - multi_gffs - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test2.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test3.gff', checkIfExists: true) + ] + ] + + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("candidatus_portiera_aleyrodidarum - multi_gffs - config") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test2.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test3.gff', checkIfExists: true) + ] + ] + + input[1] = file(params.modules_testdata_base_path + 'generic/config/agat_config.yaml', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.gff[0][1]).text.contains('AGAT gene') }, + { assert snapshot(process.out.versions).match("versions_config") } + ) + } + + } + + test("candidatus_portiera_aleyrodidarum - multi_gffs - stub - config") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test2.gff', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test3.gff', checkIfExists: true) + ] + ] + + input[1] = file(params.modules_testdata_base_path + 'generic/config/agat_config.yaml', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/agat/spmergeannotations/tests/main.nf.test.snap b/modules/nf-core/agat/spmergeannotations/tests/main.nf.test.snap new file mode 100644 index 0000000..5b56cd9 --- /dev/null +++ b/modules/nf-core/agat/spmergeannotations/tests/main.nf.test.snap @@ -0,0 +1,92 @@ +{ + "candidatus_portiera_aleyrodidarum - multi_gffs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,bb159018d6a64ae51339f7c886ad28d7" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,bb159018d6a64ae51339f7c886ad28d7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T13:23:28.495387" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,bb159018d6a64ae51339f7c886ad28d7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T13:23:23.220341" + }, + "versions_config": { + "content": [ + [ + "versions.yml:md5,bb159018d6a64ae51339f7c886ad28d7" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T13:25:34.519378" + }, + "candidatus_portiera_aleyrodidarum - multi_gffs - stub - config": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,bb159018d6a64ae51339f7c886ad28d7" + ], + "gff": [ + [ + { + "id": "test" + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,bb159018d6a64ae51339f7c886ad28d7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-15T13:23:43.811463" + } +} \ No newline at end of file diff --git a/modules/nf-core/agat/spmergeannotations/tests/tags.yml b/modules/nf-core/agat/spmergeannotations/tests/tags.yml new file mode 100644 index 0000000..de92188 --- /dev/null +++ b/modules/nf-core/agat/spmergeannotations/tests/tags.yml @@ -0,0 +1,2 @@ +agat/spmergeannotations: + - "modules/nf-core/agat/spmergeannotations/**" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml index 17a04ef..9b01c86 100644 --- a/modules/nf-core/cat/cat/environment.yml +++ b/modules/nf-core/cat/cat/environment.yml @@ -1,7 +1,5 @@ -name: cat_cat channels: - conda-forge - bioconda - - defaults dependencies: - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index adbdbd7..2862c64 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -76,4 +76,3 @@ def getFileSuffix(filename) { def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) } - diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 00a8db0..81778a0 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -9,25 +9,32 @@ tools: description: Just concatenation documentation: https://man7.org/linux/man-pages/man1/cat.1.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - files_in: - type: file - description: List of compressed / uncompressed files - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - file_out: - type: file - description: Concatenated file. Will be gzipped if file_out ends with ".gz" - pattern: "${file_out}" + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test index fcee2d1..9cb1617 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -29,7 +29,8 @@ nextflow_process { then { assertAll( { assert !process.success }, - { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -83,8 +84,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } @@ -142,8 +147,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } @@ -170,8 +179,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap index 423571b..b7623ee 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -1,10 +1,4 @@ { - "test_cat_unzipped_zipped_size": { - "content": [ - 375 - ], - "timestamp": "2023-10-16T14:33:08.049445686" - }, "test_cat_unzipped_unzipped": { "content": [ { @@ -34,6 +28,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-10-16T14:32:18.500464399" }, "test_cat_zipped_unzipped": { @@ -65,9 +63,13 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped_lines": { + "test_cat_zipped_zipped": { "content": [ [ "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", @@ -76,11 +78,31 @@ "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + ] ], - "timestamp": "2023-10-16T14:32:33.629048645" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" }, - "test_cat_unzipped_zipped_lines": { + "test_cat_one_file_unzipped_zipped": { "content": [ [ ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", @@ -89,11 +111,19 @@ "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" ] ], - "timestamp": "2023-10-16T14:33:08.038830506" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" }, - "test_cat_one_file_unzipped_zipped_lines": { + "test_cat_unzipped_zipped": { "content": [ [ ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", @@ -102,20 +132,16 @@ "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" ] ], - "timestamp": "2023-10-16T14:33:21.39642399" - }, - "test_cat_zipped_zipped_size": { - "content": [ - 78 - ], - "timestamp": "2023-10-16T14:32:33.641869244" - }, - "test_cat_one_file_unzipped_zipped_size": { - "content": [ - 374 - ], - "timestamp": "2023-10-16T14:33:21.4094373" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" } } \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml index 8c69b12..c7eb9bd 100644 --- a/modules/nf-core/cat/fastq/environment.yml +++ b/modules/nf-core/cat/fastq/environment.yml @@ -1,7 +1,5 @@ -name: cat_fastq channels: - conda-forge - bioconda - - defaults dependencies: - conda-forge::coreutils=8.30 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index f132b2a..b68e5f9 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -53,9 +53,9 @@ process CAT_FASTQ { def prefix = task.ext.prefix ?: "${meta.id}" def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ - touch ${prefix}.merged.fastq.gz + echo '' | gzip > ${prefix}.merged.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -64,10 +64,10 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { """ - touch ${prefix}_1.merged.fastq.gz - touch ${prefix}_2.merged.fastq.gz + echo '' | gzip > ${prefix}_1.merged.fastq.gz + echo '' | gzip > ${prefix}_2.merged.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml index db4ac3c..91ff2fb 100644 --- a/modules/nf-core/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -10,30 +10,33 @@ tools: The cat utility reads files sequentially, writing them to the standard output. documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files to be concatenated. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - reads: - type: file - description: Merged fastq file - pattern: "*.{merged.fastq.gz}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test index dab2e14..f88a78b 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -1,3 +1,5 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 nextflow_process { name "Test Process CAT_FASTQ" @@ -11,9 +13,6 @@ nextflow_process { test("test_cat_fastq_single_end") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -36,9 +35,6 @@ nextflow_process { test("test_cat_fastq_paired_end") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -63,9 +59,6 @@ nextflow_process { test("test_cat_fastq_single_end_same_name") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -88,9 +81,6 @@ nextflow_process { test("test_cat_fastq_paired_end_same_name") { when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -115,9 +105,129 @@ nextflow_process { test("test_cat_fastq_single_end_single_file") { when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file - stub") { + + options "-stub" + + when { process { """ input[0] = Channel.of([ diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index 43dfe28..aec119a 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -28,6 +28,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:30:39.816981" }, "test_cat_fastq_single_end_same_name": { @@ -59,6 +63,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:32:35.229332" }, "test_cat_fastq_single_end_single_file": { @@ -90,6 +98,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:34:00.058829" }, "test_cat_fastq_paired_end_same_name": { @@ -127,8 +139,123 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:33:33.031555" }, + "test_cat_fastq_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:28.244999" + }, + "test_cat_fastq_paired_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:57.070911" + }, + "test_cat_fastq_single_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:46.796254" + }, "test_cat_fastq_paired_end": { "content": [ { @@ -164,6 +291,86 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2024-01-17T17:32:02.270935" + }, + "test_cat_fastq_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:07:37.807553" + }, + "test_cat_fastq_single_end_single_file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:14:51.861264" } } \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml deleted file mode 100644 index b48ced2..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: custom_dumpsoftwareversions -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 105f926..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : - 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5f15a5f..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da03340..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test deleted file mode 100644 index b1e1630..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ /dev/null @@ -1,43 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" - script "../main.nf" - process "CUSTOM_DUMPSOFTWAREVERSIONS" - tag "modules" - tag "modules_nfcore" - tag "custom" - tag "dumpsoftwareversions" - tag "custom/dumpsoftwareversions" - - test("Should run without failures") { - when { - process { - """ - def tool1_version = ''' - TOOL1: - tool1: 0.11.9 - '''.stripIndent() - - def tool2_version = ''' - TOOL2: - tool2: 1.9 - '''.stripIndent() - - input[0] = Channel.of(tool1_version, tool2_version).collectFile() - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.versions, - file(process.out.mqc_yml[0]).readLines()[0..10], - file(process.out.yml[0]).readLines()[0..7] - ).match() - } - ) - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap deleted file mode 100644 index 5f59a93..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ /dev/null @@ -1,33 +0,0 @@ -{ - "Should run without failures": { - "content": [ - [ - "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" - ], - [ - "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", - " \\n\\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n \\n \\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n\\n\\n \\n\\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\" - ], - [ - "CUSTOM_DUMPSOFTWAREVERSIONS:", - " python: 3.11.7", - " yaml: 5.4.1", - "TOOL1:", - " tool1: 0.11.9", - "TOOL2:", - " tool2: '1.9'", - "Workflow:" - ] - ], - "timestamp": "2024-01-09T23:01:18.710682" - } -} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml deleted file mode 100644 index 405aa24..0000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/eggnogmapper/eggnogmapper.diff b/modules/nf-core/eggnogmapper/eggnogmapper.diff deleted file mode 100644 index b38223d..0000000 --- a/modules/nf-core/eggnogmapper/eggnogmapper.diff +++ /dev/null @@ -1,53 +0,0 @@ -Changes in module 'nf-core/eggnogmapper' ---- modules/nf-core/eggnogmapper/meta.yml -+++ modules/nf-core/eggnogmapper/meta.yml -@@ -60,3 +60,6 @@ - pattern: "versions.yml" - authors: - - "@vagkaratzas" -+maintainers: -+ - "@vagkaratzas" -+ - "@gallvp" - ---- modules/nf-core/eggnogmapper/main.nf -+++ modules/nf-core/eggnogmapper/main.nf -@@ -1,6 +1,6 @@ - process EGGNOGMAPPER { - tag "$meta.id" -- label 'process_long' -+ label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -@@ -23,11 +23,13 @@ - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -- def prefix = task.ext.prefix ?: "${meta.id}" -- def is_compressed = fasta.name.endsWith(".gz") -- def fasta_name = fasta.name.replace(".gz", "") -- def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : '' -+ def args = task.ext.args ?: '' -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ def is_compressed = fasta.name.endsWith(".gz") -+ def fasta_name = fasta.name.replace(".gz", "") -+ def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : '' -+ def database_arg = eggnog_db ? "--database $eggnog_db" : '' -+ def dmnd_db_arg = eggnog_diamond_db ? "--dmnd_db $eggnog_diamond_db" : '' - """ - if [ "$is_compressed" == "true" ]; then - gzip -c -d $fasta > $fasta_name -@@ -38,8 +40,8 @@ - -i ${fasta_name} \\ - --data_dir ${eggnog_data_dir} \\ - -m diamond \\ -- --dmnd_db ${eggnog_diamond_db} \\ -- --database ${eggnog_db} \\ -+ $dmnd_db_arg \\ -+ $database_arg \\ - --output ${prefix} \\ - ${dbmem} \\ - $args - -************************************************************ diff --git a/modules/nf-core/eggnogmapper/environment.yml b/modules/nf-core/eggnogmapper/environment.yml index f4fb6fd..2092ea5 100644 --- a/modules/nf-core/eggnogmapper/environment.yml +++ b/modules/nf-core/eggnogmapper/environment.yml @@ -1,7 +1,5 @@ -name: eggnogmapper channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::eggnog-mapper=2.1.12 diff --git a/modules/nf-core/eggnogmapper/main.nf b/modules/nf-core/eggnogmapper/main.nf index 134451d..2489b7f 100644 --- a/modules/nf-core/eggnogmapper/main.nf +++ b/modules/nf-core/eggnogmapper/main.nf @@ -25,8 +25,8 @@ process EGGNOGMAPPER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def is_compressed = fasta.name.endsWith(".gz") - def fasta_name = fasta.name.replace(".gz", "") + def is_compressed = fasta.extension == '.gz' ? true : false + def fasta_name = is_compressed ? fasta.baseName : "$fasta" def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : '' def database_arg = eggnog_db ? "--database $eggnog_db" : '' def dmnd_db_arg = eggnog_diamond_db ? "--dmnd_db $eggnog_diamond_db" : '' @@ -53,8 +53,8 @@ process EGGNOGMAPPER { """ stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.emapper.annotations touch ${prefix}.emapper.seed_orthologs diff --git a/modules/nf-core/eggnogmapper/meta.yml b/modules/nf-core/eggnogmapper/meta.yml index b07c27e..8cf836d 100644 --- a/modules/nf-core/eggnogmapper/meta.yml +++ b/modules/nf-core/eggnogmapper/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: "eggnogmapper" description: Fast genome-wide functional annotation through orthology assignment. @@ -14,50 +13,71 @@ tools: tool_dev_url: "https://github.com/eggnogdb/eggnog-mapper" doi: "10.1093/molbev/msab293" licence: ["AGPL v3"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - fasta: - type: file - description: Database of sequences in FASTA format - pattern: "*.{fasta,fa,fasta.gz,fa.gz}" - - eggnog_db: - type: file - description: The eggnog database file (e.g. eggnog-mapper/data/eggnog.db) - pattern: "*.db" - - eggnog_data_dir: - type: directory - description: Directory containing eggnog database files (e.g. eggnog-mapper/data) - pattern: "*" - - eggnog_diamond_db: - type: file - description: The eggnog Diamond protein database file (e.g. eggnog-mapper/data/eggnog_proteins.dmnd) - pattern: "*.dmnd" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Database of sequences in FASTA format + pattern: "*.{fasta,fa,fasta.gz,fa.gz}" + - - eggnog_db: + type: file + description: The eggnog database file (e.g. eggnog-mapper/data/eggnog.db) + pattern: "*.db" + - - eggnog_data_dir: + type: directory + description: Directory containing eggnog database files (e.g. eggnog-mapper/data) + pattern: "*" + - - meta2: + type: map + description: | + Groovy Map containing database information + e.g. `[ id:'test' ]` + - eggnog_diamond_db: + type: file + description: The eggnog Diamond protein database file (e.g. eggnog-mapper/data/eggnog_proteins.dmnd) + pattern: "*.dmnd" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - annotations: - type: file - description: TSV with the results from the annotation phase - pattern: "*.emapper.annotations" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.emapper.annotations": + type: file + description: TSV with the results from the annotation phase + pattern: "*.emapper.annotations" - orthologs: - type: file - description: TSV with the results from parsing the hits, linking queries with seed orthologs (with commented metadata) - pattern: "*.emapper.seed_orthologs" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.emapper.seed_orthologs": + type: file + description: TSV with the results from parsing the hits, linking queries with + seed orthologs (with commented metadata) + pattern: "*.emapper.seed_orthologs" - hits: - type: file - description: TSV with the results from the Diamond search phase - pattern: "*.emapper.hits" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.emapper.hits": + type: file + description: TSV with the results from the Diamond search phase + pattern: "*.emapper.hits" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@vagkaratzas" maintainers: diff --git a/modules/nf-core/eggnogmapper/tests/main.nf.test b/modules/nf-core/eggnogmapper/tests/main.nf.test index fb707ea..5f36b44 100644 --- a/modules/nf-core/eggnogmapper/tests/main.nf.test +++ b/modules/nf-core/eggnogmapper/tests/main.nf.test @@ -8,21 +8,21 @@ nextflow_process { tag "eggnogmapper" tag "diamond/makedb" - test("Should search for protein annotations against the eggnogmapper db") { - - setup { - run("DIAMOND_MAKEDB") { - script "../../diamond/makedb/main.nf" - process { - """ - input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] - input[1] = [] - input[2] = [] - input[3] = [] - """ - } + setup { + run("DIAMOND_MAKEDB") { + script "../../diamond/makedb/main.nf" + process { + """ + input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = [] + input[2] = [] + input[3] = [] + """ } } + } + + test("Should search for protein annotations against the eggnogmapper db") { when { params { @@ -30,7 +30,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true) eggnog_db.copyTo("${workDir}/tmp/eggnog.db") eggnog_data_dir = "${workDir}/tmp/" @@ -47,7 +47,46 @@ nextflow_process { { assert path(process.out.annotations.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\tCOG0498@1|root,COG0498@2|Bacteria,1MUWQ@1224|Proteobacteria,2VHR6@28216|Betaproteobacteria,2KUMA@206389|Rhodocyclales\t1224|Proteobacteria\tE\tthreonine synthase\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-") }, { assert path(process.out.orthologs.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\t1\t7096\t1\t7096\t100.0\t100.0\t100.0") }, { assert snapshot(process.out.hits).match("hits") }, - { assert process.out.versions } + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("Should search for protein annotations against the eggnogmapper db -- empty-params") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true) + eggnog_db.copyTo("${workDir}/tmp/eggnog.db") + + ch_synced_inputs = DIAMOND_MAKEDB.out.db.map { meta, dmnd -> + dmnd.copyTo("${workDir}/tmp/eggnog_proteins.dmnd") + + return true + } + | combine ( Channel.fromPath( "${workDir}/tmp/" ) ) + eggnog_data_dir = ch_synced_inputs.map { sync_status, data_dir -> data_dir } + + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = [] + input[2] = eggnog_data_dir + input[3] = [[], []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.annotations.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\tCOG0498@1|root,COG0498@2|Bacteria,1MUWQ@1224|Proteobacteria,2VHR6@28216|Betaproteobacteria,2KUMA@206389|Rhodocyclales\t1224|Proteobacteria\tE\tthreonine synthase\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-") }, + { assert path(process.out.orthologs.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\t1\t7096\t1\t7096\t100.0\t100.0\t100.0") }, + { assert snapshot(process.out.hits).match("hits--empty-params") }, + { assert snapshot(process.out.versions).match("versions--empty-params") } ) } diff --git a/modules/nf-core/eggnogmapper/tests/main.nf.test.snap b/modules/nf-core/eggnogmapper/tests/main.nf.test.snap index 4e1c837..170e8c9 100644 --- a/modules/nf-core/eggnogmapper/tests/main.nf.test.snap +++ b/modules/nf-core/eggnogmapper/tests/main.nf.test.snap @@ -10,6 +10,51 @@ ] ] ], - "timestamp": "2023-11-08T20:43:50.173213923" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-12T13:43:01.751295" + }, + "versions--empty-params": { + "content": [ + [ + "versions.yml:md5,d3e4efad28b5a924585ea3dfcf72c32c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-15T11:42:23.737523" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,d3e4efad28b5a924585ea3dfcf72c32c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-12T13:45:26.555465" + }, + "hits--empty-params": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.emapper.hits:md5,864b7a1f902893d8aee6621baeab7be8" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-15T11:42:23.690105" } } \ No newline at end of file diff --git a/modules/nf-core/fastavalidator/environment.yml b/modules/nf-core/fastavalidator/environment.yml index 70f346e..44d55c1 100644 --- a/modules/nf-core/fastavalidator/environment.yml +++ b/modules/nf-core/fastavalidator/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "fastavalidator" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::py_fasta_validator=0.6" diff --git a/modules/nf-core/fastavalidator/meta.yml b/modules/nf-core/fastavalidator/meta.yml index c5c4371..94198e6 100644 --- a/modules/nf-core/fastavalidator/meta.yml +++ b/modules/nf-core/fastavalidator/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "fastavalidator" description: | @@ -19,34 +18,43 @@ tools: tool_dev_url: "https://github.com/linsalrob/py_fasta_validator" doi: "10.5281/zenodo.5002710" licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing file information - e.g. [ id:'test' ] - - fasta: - type: file - description: Input fasta file - pattern: "*.fasta" + - - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.fasta" output: - - meta: - type: map - description: | - Groovy Map containing file information - e.g. [ id:'test' ] - success_log: - type: file - description: Log file for successful validation - pattern: "*.success.log" + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - "*.success.log": + type: file + description: Log file for successful validation + pattern: "*.success.log" - error_log: - type: file - description: Log file for failed validation - pattern: "*.error.log" + - meta: + type: map + description: | + Groovy Map containing file information + e.g. [ id:'test' ] + - "*.error.log": + type: file + description: Log file for failed validation + pattern: "*.error.log" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@gallvp" maintainers: diff --git a/modules/nf-core/fastavalidator/tests/main.nf.test b/modules/nf-core/fastavalidator/tests/main.nf.test index bb8c22c..39b00d8 100644 --- a/modules/nf-core/fastavalidator/tests/main.nf.test +++ b/modules/nf-core/fastavalidator/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -40,7 +40,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] """ } diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml index 70389e6..26d4aca 100644 --- a/modules/nf-core/fastp/environment.yml +++ b/modules/nf-core/fastp/environment.yml @@ -1,7 +1,5 @@ -name: fastp channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 4fc19b7..e1b9f56 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -10,6 +10,7 @@ process FASTP { input: tuple val(meta), path(reads) path adapter_fasta + val discard_trimmed_pass val save_trimmed_fail val save_merged @@ -18,9 +19,9 @@ process FASTP { tuple val(meta), path('*.json') , emit: json tuple val(meta), path('*.html') , emit: html tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -30,6 +31,8 @@ process FASTP { def prefix = task.ext.prefix ?: "${meta.id}" def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" // Added soft-links to original fastqs for consistent naming in MultiQC // Use single ended for interleaved. Add --interleaved_in in config. if ( task.ext.args?.contains('--interleaved_in') ) { @@ -59,7 +62,7 @@ process FASTP { fastp \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ + $out_fq1 \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ @@ -81,8 +84,8 @@ process FASTP { fastp \\ --in1 ${prefix}_1.fastq.gz \\ --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.fastp.fastq.gz \\ - --out2 ${prefix}_2.fastp.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $adapter_list \\ @@ -103,14 +106,16 @@ process FASTP { stub: def prefix = task.ext.prefix ?: "${meta.id}" def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end - def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" - def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz" """ - touch $touch_reads + $touch_reads + $touch_fail_fastq + $touch_merged touch "${prefix}.fastp.json" touch "${prefix}.fastp.html" touch "${prefix}.fastp.log" - $touch_merged cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index c22a16a..159404d 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -11,62 +11,100 @@ tools: documentation: https://github.com/OpenGene/fastp doi: 10.1093/bioinformatics/bty560 licence: ["MIT"] + identifier: biotools:fastp input: - - meta: - type: map - description: | - Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. If you wish to run interleaved paired-end data, supply as single-end data - but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. - - adapter_fasta: - type: file - description: File in FASTA format containing possible adapters to remove. - pattern: "*.{fasta,fna,fas,fa}" - - save_trimmed_fail: - type: boolean - description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` - - save_merged: - type: boolean - description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - - discard_trimmed_pass: + type: boolean + description: Specify true to not write any reads that pass trimming thresholds. + | This can be used to use fastp for the output report only. + - - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - reads: - type: file - description: The trimmed/modified/unmerged fastq reads - pattern: "*fastp.fastq.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" - json: - type: file - description: Results in JSON format - pattern: "*.json" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" - html: - type: file - description: Results in HTML format - pattern: "*.html" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" - log: - type: file - description: fastq log file - pattern: "*.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" - reads_fail: - type: file - description: Reads the failed the preprocessing - pattern: "*fail.fastq.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" - reads_merged: - type: file - description: Reads that were successfully merged - pattern: "*.{merged.fastq.gz}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test index 6f1f489..30dbb8a 100644 --- a/modules/nf-core/fastp/tests/main.nf.test +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -10,221 +10,290 @@ nextflow_process { test("test_fastp_single_end") { when { - params { - outdir = "$outputDir" - } + process { """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - input[0] = Channel.of([ [ id:'test', single_end:true ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = false + input[4] = false """ } } then { - def html_text = [ "Q20 bases:") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_single") } + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -54,16 +51,14 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("") }, - { assert path(process.out.html[0][1][1]).text.contains("") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -83,13 +78,11 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -109,13 +102,11 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -138,22 +129,20 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, - { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, - { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, - { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, - { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, - { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, - { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, - { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, - { assert path(process.out.html[0][1][0]).text.contains("") }, - { assert path(process.out.html[0][1][1]).text.contains("") }, - { assert path(process.out.html[0][1][2]).text.contains("") }, - { assert path(process.out.html[0][1][3]).text.contains("") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("") }, + { assert path(process.out.html[0][1][1]).text.contains("") }, + { assert path(process.out.html[0][1][2]).text.contains("") }, + { assert path(process.out.html[0][1][3]).text.contains("") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -173,21 +162,18 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("") }, - - { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("") }, + { assert snapshot(process.out.versions).match() } ) } } test("sarscov2 single-end [fastq] - stub") { - options "-stub" - + options "-stub" when { process { """ @@ -201,12 +187,123 @@ nextflow_process { then { assertAll ( - { assert process.success }, - { assert snapshot(process.out.html.collect { file(it[1]).getName() } + - process.out.zip.collect { file(it[1]).getName() } + - process.out.versions ).match("fastqc_stub") } + { assert process.success }, + { assert snapshot(process.out).match() } ) } } + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 86f7c31..d5db309 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,88 +1,392 @@ { - "fastqc_versions_interleaved": { + "sarscov2 custom_prefix": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:07.293713" + "timestamp": "2024-07-22T11:02:16.374038" }, - "fastqc_stub": { + "sarscov2 single-end [fastq] - stub": { "content": [ - [ - "test.html", - "test.zip", - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:24.993809" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:31:01.425198" + "timestamp": "2024-07-22T11:03:10.93942" }, - "fastqc_versions_multiple": { + "sarscov2 interleaved [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:55.797907" + "timestamp": "2024-07-22T11:01:42.355718" }, - "fastqc_versions_bam": { + "sarscov2 paired-end [bam]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:40:26.795862" + "timestamp": "2024-07-22T11:01:53.276274" }, - "fastqc_versions_single": { + "sarscov2 multiple [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:39:27.043675" + "timestamp": "2024-07-22T11:02:05.527626" }, - "fastqc_versions_paired": { + "sarscov2 paired-end [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:31.188871" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:34.273566" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:39:47.584191" + "timestamp": "2024-07-22T11:03:02.304411" }, - "fastqc_versions_custom_prefix": { + "sarscov2 single-end [fastq]": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:19.095607" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:44.640184" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-01-31T17:41:14.576531" + "timestamp": "2024-07-22T11:02:53.550742" } } \ No newline at end of file diff --git a/modules/nf-core/gffcompare/environment.yml b/modules/nf-core/gffcompare/environment.yml index bcd633e..2b52417 100644 --- a/modules/nf-core/gffcompare/environment.yml +++ b/modules/nf-core/gffcompare/environment.yml @@ -1,7 +1,5 @@ -name: gffcompare channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gffcompare=0.12.6 diff --git a/modules/nf-core/gffcompare/meta.yml b/modules/nf-core/gffcompare/meta.yml index 674f08c..7c3a91e 100644 --- a/modules/nf-core/gffcompare/meta.yml +++ b/modules/nf-core/gffcompare/meta.yml @@ -13,79 +13,129 @@ tools: tool_dev_url: "https://github.com/gpertea/gffcompare" doi: "10.12688/f1000research.23297.1" licence: ["MIT"] + identifier: biotools:gffcompare input: - - meta: - type: map - description: | - Groovy Map containing meta data - e.g. [ id:'test', single_end:false ] - - gtfs: - type: file - description: | - GTF/GFF files - e.g. [ 'file_1.gtf', 'file_2.gtf' ] - pattern: "*.{gtf,gff}" - - fasta: - type: file - description: Genome reference fasta file (optional) - pattern: "*.{fasta,fa}" - - fai: - type: file - description: Index for fasta file - pattern: "*.fai" - - reference_gtf: - type: file - description: Reference annotation in gtf/gff format (optional) - pattern: "*.{gtf,gff}" + - - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - gtfs: + type: file + description: | + GTF/GFF files + e.g. [ 'file_1.gtf', 'file_2.gtf' ] + pattern: "*.{gtf,gff}" + - - meta2: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Genome reference fasta file (optional) + pattern: "*.{fasta,fa}" + - fai: + type: file + description: Index for fasta file + pattern: "*.fai" + - - meta3: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - reference_gtf: + type: file + description: Reference annotation in gtf/gff format (optional) + pattern: "*.{gtf,gff}" output: - - meta: - type: map - description: | - Groovy Map containing meta data - e.g. [ id:'test', single_end:false ] - annotated_gtf: - type: file - description: | - Annotated gtf file when reference gtf is provided (optional) - pattern: "*.annotated.gtf" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - "*.annotated.gtf": + type: file + description: | + Annotated gtf file when reference gtf is provided (optional) + pattern: "*.annotated.gtf" - combined_gtf: - type: file - description: | - Combined gtf file when multiple input files are - provided (optional) - pattern: "*.annotated.gtf" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - "*.combined.gtf": + type: file + description: | + Combined gtf file when multiple input files are + provided (optional) + pattern: "*.annotated.gtf" - tmap: - type: file - description: | - File listing the most closely matching reference transcript - for each query transcript (optional) - pattern: "*.tmap" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - "*.tmap": + type: file + description: | + File listing the most closely matching reference transcript + for each query transcript (optional) + pattern: "*.tmap" - refmap: - type: file - description: | - File listing the reference transcripts with overlapping - query transcripts (optional) - pattern: "*.refmap" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - "*.refmap": + type: file + description: | + File listing the reference transcripts with overlapping + query transcripts (optional) + pattern: "*.refmap" - loci: - type: file - description: File with loci - pattern: "*.loci" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - "*.loci": + type: file + description: File with loci + pattern: "*.loci" - stats: - type: file - description: | - File with stats for input transcripts as compared to - reference alternatively stats for the combined gtf - pattern: "*.stats" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - "*.stats": + type: file + description: | + File with stats for input transcripts as compared to + reference alternatively stats for the combined gtf + pattern: "*.stats" - tracking: - type: file - description: | - This file matches transcripts up between samples - pattern: "*.tracking" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test', single_end:false ] + - "*.tracking": + type: file + description: | + This file matches transcripts up between samples + pattern: "*.tracking" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jemten" maintainers: - "@jemten" + - "@gallvp" diff --git a/modules/nf-core/gffcompare/tests/main.nf.test b/modules/nf-core/gffcompare/tests/main.nf.test new file mode 100644 index 0000000..258733a --- /dev/null +++ b/modules/nf-core/gffcompare/tests/main.nf.test @@ -0,0 +1,103 @@ + +nextflow_process { + + name "Test Process GFFCOMPARE" + script "../main.nf" + process "GFFCOMPARE" + + tag "modules" + tag "modules_nfcore" + tag "gffcompare" + + test("test-gffcompare") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ], + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[2] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-gffcompare-combine") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[2] = [[id:'sarscov2'], []] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-gffcompare-combine-stub") { + options '-stub' + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[2] = [[id:'sarscov2'], []] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/gffcompare/tests/main.nf.test.snap b/modules/nf-core/gffcompare/tests/main.nf.test.snap new file mode 100644 index 0000000..9db3c0a --- /dev/null +++ b/modules/nf-core/gffcompare/tests/main.nf.test.snap @@ -0,0 +1,365 @@ +{ + "test-gffcompare": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.annotated.gtf:md5,9fdcea941cb003026e1ab04a45c3944c" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.genome.gff3.tmap:md5,8935511caaf122288167f8d908eb2632" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.genome.gff3.refmap:md5,60461ca3548615db12d31c53c5356ee5" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.loci:md5,b637f88bbedda47aee859f7bc20c36ab" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.stats:md5,a7c5a8cd8aa7188a2152e894f2355f10" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.tracking:md5,344cd0bea36e5aab3a0824f2aaf7b824" + ] + ], + "7": [ + "versions.yml:md5,6f40a0e2547d9e3f4cf8e142ae5c17b6" + ], + "annotated_gtf": [ + [ + { + "id": "test" + }, + "test.annotated.gtf:md5,9fdcea941cb003026e1ab04a45c3944c" + ] + ], + "combined_gtf": [ + + ], + "loci": [ + [ + { + "id": "test" + }, + "test.loci:md5,b637f88bbedda47aee859f7bc20c36ab" + ] + ], + "refmap": [ + [ + { + "id": "test" + }, + "test.genome.gff3.refmap:md5,60461ca3548615db12d31c53c5356ee5" + ] + ], + "stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,a7c5a8cd8aa7188a2152e894f2355f10" + ] + ], + "tmap": [ + [ + { + "id": "test" + }, + "test.genome.gff3.tmap:md5,8935511caaf122288167f8d908eb2632" + ] + ], + "tracking": [ + [ + { + "id": "test" + }, + "test.tracking:md5,344cd0bea36e5aab3a0824f2aaf7b824" + ] + ], + "versions": [ + "versions.yml:md5,6f40a0e2547d9e3f4cf8e142ae5c17b6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T19:08:50.819004" + }, + "test-gffcompare-combine-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.annotated.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.combined.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.tmap:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.refmap:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.loci:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.tracking:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,6f40a0e2547d9e3f4cf8e142ae5c17b6" + ], + "annotated_gtf": [ + [ + { + "id": "test" + }, + "test.annotated.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "combined_gtf": [ + [ + { + "id": "test" + }, + "test.combined.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "loci": [ + [ + { + "id": "test" + }, + "test.loci:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "refmap": [ + [ + { + "id": "test" + }, + "test.refmap:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tmap": [ + [ + { + "id": "test" + }, + "test.tmap:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tracking": [ + [ + { + "id": "test" + }, + "test.tracking:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,6f40a0e2547d9e3f4cf8e142ae5c17b6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T19:10:25.943352" + }, + "test-gffcompare-combine": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.combined.gtf:md5,deebda20162eddb07b53d1da393a92a1" + ] + ], + "2": [ + [ + { + "id": "test" + }, + [ + "test.genome.gff3.tmap:md5,6f3866608c9d2b45d169dbc15a6372c4", + "test.genome.gtf.tmap:md5,6f3866608c9d2b45d169dbc15a6372c4" + ] + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test" + }, + "test.loci:md5,2c020db4486b1997f68555825065c28d" + ] + ], + "5": [ + [ + { + "id": "test" + }, + "test.stats:md5,040cf90e0d649479173a5c846928cd67" + ] + ], + "6": [ + [ + { + "id": "test" + }, + "test.tracking:md5,55b10c257c1a07224cb5d995c799f782" + ] + ], + "7": [ + "versions.yml:md5,6f40a0e2547d9e3f4cf8e142ae5c17b6" + ], + "annotated_gtf": [ + + ], + "combined_gtf": [ + [ + { + "id": "test" + }, + "test.combined.gtf:md5,deebda20162eddb07b53d1da393a92a1" + ] + ], + "loci": [ + [ + { + "id": "test" + }, + "test.loci:md5,2c020db4486b1997f68555825065c28d" + ] + ], + "refmap": [ + + ], + "stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,040cf90e0d649479173a5c846928cd67" + ] + ], + "tmap": [ + [ + { + "id": "test" + }, + [ + "test.genome.gff3.tmap:md5,6f3866608c9d2b45d169dbc15a6372c4", + "test.genome.gtf.tmap:md5,6f3866608c9d2b45d169dbc15a6372c4" + ] + ] + ], + "tracking": [ + [ + { + "id": "test" + }, + "test.tracking:md5,55b10c257c1a07224cb5d995c799f782" + ] + ], + "versions": [ + "versions.yml:md5,6f40a0e2547d9e3f4cf8e142ae5c17b6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T19:10:21.420587" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml index c6df58a..ee23984 100644 --- a/modules/nf-core/gffread/environment.yml +++ b/modules/nf-core/gffread/environment.yml @@ -1,7 +1,5 @@ -name: gffread channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::gffread=0.12.7 diff --git a/modules/nf-core/gffread/gffread.diff b/modules/nf-core/gffread/gffread.diff deleted file mode 100644 index fa3668c..0000000 --- a/modules/nf-core/gffread/gffread.diff +++ /dev/null @@ -1,675 +0,0 @@ -Changes in module 'nf-core/gffread' ---- modules/nf-core/gffread/environment.yml -+++ modules/nf-core/gffread/environment.yml -@@ -4,4 +4,4 @@ - - bioconda - - defaults - dependencies: -- - bioconda::gffread=0.12.1 -+ - bioconda::gffread=0.12.7 - ---- modules/nf-core/gffread/meta.yml -+++ modules/nf-core/gffread/meta.yml -@@ -13,11 +13,25 @@ - doi: 10.12688/f1000research.23297.1 - licence: ["MIT"] - input: -+ - meta: -+ type: map -+ description: | -+ Groovy Map containing meta data -+ e.g. [ id:'test' ] - - gff: - type: file - description: A reference file in either the GFF3, GFF2 or GTF format. - pattern: "*.{gff, gtf}" -+ - fasta: -+ type: file -+ description: A multi-fasta file with the genomic sequences -+ pattern: "*.{fasta,fa,faa,fas,fsa}" - output: -+ - meta: -+ type: map -+ description: | -+ Groovy Map containing meta data -+ e.g. [ id:'test' ] - - gtf: - type: file - description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present -@@ -25,7 +39,11 @@ - - gffread_gff: - type: file - description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent -- pattern: "*.{gff3}" -+ pattern: "*.gff3" -+ - gffread_fasta: -+ type: file -+ description: Fasta file produced when either of '-w', '-x', '-y' parameters is present -+ pattern: "*.fasta" - - versions: - type: file - description: File containing software versions -@@ -34,3 +52,4 @@ - - "@edmundmiller" - maintainers: - - "@edmundmiller" -+ - "@gallvp" - ---- modules/nf-core/gffread/main.nf -+++ modules/nf-core/gffread/main.nf -@@ -1,32 +1,59 @@ - process GFFREAD { -- tag "$gff" -+ tag "$meta.id" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : -- 'biocontainers/gffread:0.12.1--h8b12597_0' }" -+ 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' : -+ 'biocontainers/gffread:0.12.7--hdcf5f25_4' }" - - input: -- path gff -+ tuple val(meta), path(gff) -+ path fasta - - output: -- path "*.gtf" , emit: gtf , optional: true -- path "*.gff3" , emit: gffread_gff , optional: true -- path "versions.yml" , emit: versions -+ tuple val(meta), path("*.gtf") , emit: gtf , optional: true -+ tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true -+ tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true -+ path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: -- def args = task.ext.args ?: '' -- def prefix = task.ext.prefix ?: "${gff.baseName}" -- def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3' -+ def args = task.ext.args ?: '' -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) -+ def fasta_arg = fasta ? "-g $fasta" : '' -+ def output_name = "${prefix}.${extension}" -+ def output = extension == "fasta" ? "$output_name" : "-o $output_name" -+ def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() -+ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - gffread \\ - $gff \\ -- $args \\ -- -o ${prefix}.${extension} -+ $fasta_arg \\ -+ $args_sorted \\ -+ $output -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ gffread: \$(gffread --version 2>&1) -+ END_VERSIONS -+ """ -+ -+ stub: -+ def args = task.ext.args ?: '' -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) -+ def fasta_arg = fasta ? "-g $fasta" : '' -+ def output_name = "${prefix}.${extension}" -+ def output = extension == "fasta" ? "$output_name" : "-o $output_name" -+ def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() -+ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" -+ """ -+ touch $output_name -+ - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gffread: \$(gffread --version 2>&1) - ---- modules/nf-core/gffread/tests/main.nf.test.snap -+++ modules/nf-core/gffread/tests/main.nf.test.snap -@@ -1,24 +1,272 @@ - { - "sarscov2-gff3-gtf": { - "content": [ -- [ -- "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" -- ], -- [ -- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" -- ] -- ], -- "timestamp": "2024-01-23T20:00:32.688779117" -+ { -+ "0": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" -+ ] -+ ], -+ "1": [ -+ -+ ], -+ "2": [ -+ -+ ], -+ "3": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ], -+ "gffread_fasta": [ -+ -+ ], -+ "gffread_gff": [ -+ -+ ], -+ "gtf": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" -+ ] -+ ], -+ "versions": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ] -+ } -+ ], -+ "meta": { -+ "nf-test": "0.8.4", -+ "nextflow": "23.10.1" -+ }, -+ "timestamp": "2024-04-09T10:48:56.496187" - }, - "sarscov2-gff3-gff3": { - "content": [ -- [ -- "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" -- ], -- [ -- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" -- ] -- ], -- "timestamp": "2024-01-23T20:07:11.457356625" -+ { -+ "0": [ -+ -+ ], -+ "1": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" -+ ] -+ ], -+ "2": [ -+ -+ ], -+ "3": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ], -+ "gffread_fasta": [ -+ -+ ], -+ "gffread_gff": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" -+ ] -+ ], -+ "gtf": [ -+ -+ ], -+ "versions": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ] -+ } -+ ], -+ "meta": { -+ "nf-test": "0.8.4", -+ "nextflow": "23.10.1" -+ }, -+ "timestamp": "2024-04-09T10:49:00.892782" -+ }, -+ "sarscov2-gff3-gtf-stub": { -+ "content": [ -+ { -+ "0": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" -+ ] -+ ], -+ "1": [ -+ -+ ], -+ "2": [ -+ -+ ], -+ "3": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ], -+ "gffread_fasta": [ -+ -+ ], -+ "gffread_gff": [ -+ -+ ], -+ "gtf": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" -+ ] -+ ], -+ "versions": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ] -+ } -+ ], -+ "meta": { -+ "nf-test": "0.8.4", -+ "nextflow": "23.10.1" -+ }, -+ "timestamp": "2024-04-09T11:11:26.975666" -+ }, -+ "sarscov2-gff3-fasta-stub": { -+ "content": [ -+ { -+ "0": [ -+ -+ ], -+ "1": [ -+ -+ ], -+ "2": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" -+ ] -+ ], -+ "3": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ], -+ "gffread_fasta": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" -+ ] -+ ], -+ "gffread_gff": [ -+ -+ ], -+ "gtf": [ -+ -+ ], -+ "versions": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ] -+ } -+ ], -+ "meta": { -+ "nf-test": "0.8.4", -+ "nextflow": "23.10.1" -+ }, -+ "timestamp": "2024-04-09T11:11:44.34792" -+ }, -+ "sarscov2-gff3-gff3-stub": { -+ "content": [ -+ { -+ "0": [ -+ -+ ], -+ "1": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" -+ ] -+ ], -+ "2": [ -+ -+ ], -+ "3": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ], -+ "gffread_fasta": [ -+ -+ ], -+ "gffread_gff": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" -+ ] -+ ], -+ "gtf": [ -+ -+ ], -+ "versions": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ] -+ } -+ ], -+ "meta": { -+ "nf-test": "0.8.4", -+ "nextflow": "23.10.1" -+ }, -+ "timestamp": "2024-04-09T11:11:35.221671" -+ }, -+ "sarscov2-gff3-fasta": { -+ "content": [ -+ { -+ "0": [ -+ -+ ], -+ "1": [ -+ -+ ], -+ "2": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" -+ ] -+ ], -+ "3": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ], -+ "gffread_fasta": [ -+ [ -+ { -+ "id": "test" -+ }, -+ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" -+ ] -+ ], -+ "gffread_gff": [ -+ -+ ], -+ "gtf": [ -+ -+ ], -+ "versions": [ -+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" -+ ] -+ } -+ ], -+ "meta": { -+ "nf-test": "0.8.4", -+ "nextflow": "23.10.1" -+ }, -+ "timestamp": "2024-04-09T10:54:02.88143" - } - } ---- modules/nf-core/gffread/tests/main.nf.test -+++ modules/nf-core/gffread/tests/main.nf.test -@@ -18,47 +18,203 @@ - } - process { - """ -- input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -- """ -- } -- } -- -- then { -- assertAll ( -- { assert process.success }, -- { assert snapshot( -- process.out.gtf, -- process.out.versions -- ).match() }, -+ input[0] = [ -+ [id: 'test'], -+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -+ ] -+ input[1] = [] -+ """ -+ } -+ } -+ -+ then { -+ assertAll ( -+ { assert process.success }, -+ { assert snapshot(process.out).match() }, -+ { assert process.out.gffread_gff == [] }, -+ { assert process.out.gffread_fasta == [] } -+ ) -+ } -+ -+ } -+ -+ test("sarscov2-gff3-gtf-stub") { -+ -+ options '-stub' -+ config "./nextflow.config" -+ -+ when { -+ params { -+ outdir = "$outputDir" -+ } -+ process { -+ """ -+ input[0] = [ -+ [id: 'test'], -+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -+ ] -+ input[1] = [] -+ """ -+ } -+ } -+ -+ then { -+ assertAll ( -+ { assert process.success }, -+ { assert snapshot(process.out).match() }, -+ { assert process.out.gffread_gff == [] }, -+ { assert process.out.gffread_fasta == [] } -+ ) -+ } -+ -+ } -+ -+ test("sarscov2-gff3-gff3") { -+ -+ config "./nextflow-gff3.config" -+ -+ when { -+ params { -+ outdir = "$outputDir" -+ } -+ process { -+ """ -+ input[0] = [ -+ [id: 'test'], -+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -+ ] -+ input[1] = [] -+ """ -+ } -+ } -+ -+ then { -+ assertAll ( -+ { assert process.success }, -+ { assert snapshot(process.out).match() }, -+ { assert process.out.gtf == [] }, -+ { assert process.out.gffread_fasta == [] } -+ ) -+ } -+ -+ } -+ -+ test("sarscov2-gff3-gff3-stub") { -+ -+ options '-stub' -+ config "./nextflow-gff3.config" -+ -+ when { -+ params { -+ outdir = "$outputDir" -+ } -+ process { -+ """ -+ input[0] = [ -+ [id: 'test'], -+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -+ ] -+ input[1] = [] -+ """ -+ } -+ } -+ -+ then { -+ assertAll ( -+ { assert process.success }, -+ { assert snapshot(process.out).match() }, -+ { assert process.out.gtf == [] }, -+ { assert process.out.gffread_fasta == [] } -+ ) -+ } -+ -+ } -+ -+ test("sarscov2-gff3-fasta") { -+ -+ config "./nextflow-fasta.config" -+ -+ when { -+ params { -+ outdir = "$outputDir" -+ } -+ process { -+ """ -+ input[0] = [ -+ [id: 'test'], -+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -+ ] -+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) -+ """ -+ } -+ } -+ -+ then { -+ assertAll ( -+ { assert process.success }, -+ { assert snapshot(process.out).match() }, -+ { assert process.out.gtf == [] }, - { assert process.out.gffread_gff == [] } - ) - } - - } - -- test("sarscov2-gff3-gff3") { -- -- config "./nextflow-gff3.config" -- -- when { -- params { -- outdir = "$outputDir" -- } -- process { -- """ -- input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -- """ -- } -- } -- -- then { -- assertAll ( -- { assert process.success }, -- { assert snapshot( -- process.out.gffread_gff, -- process.out.versions -- ).match() }, -- { assert process.out.gtf == [] }, -+ test("sarscov2-gff3-fasta-stub") { -+ -+ options '-stub' -+ config "./nextflow-fasta.config" -+ -+ when { -+ params { -+ outdir = "$outputDir" -+ } -+ process { -+ """ -+ input[0] = [ -+ [id: 'test'], -+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -+ ] -+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) -+ """ -+ } -+ } -+ -+ then { -+ assertAll ( -+ { assert process.success }, -+ { assert snapshot(process.out).match() }, -+ { assert process.out.gtf == [] }, -+ { assert process.out.gffread_gff == [] } -+ ) -+ } -+ -+ } -+ -+ test("sarscov2-gff3-fasta-fail-catch") { -+ -+ options '-stub' -+ config "./nextflow-fasta.config" -+ -+ when { -+ params { -+ outdir = "$outputDir" -+ } -+ process { -+ """ -+ input[0] = [ -+ [id: 'genome'], -+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) -+ ] -+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) -+ """ -+ } -+ } -+ -+ then { -+ assertAll ( -+ { assert ! process.success }, -+ { assert process.stdout.toString().contains("Input and output names are the same") } - ) - } - - ---- /dev/null -+++ modules/nf-core/gffread/tests/nextflow-fasta.config -@@ -0,0 +1,5 @@ -+process { -+ withName: GFFREAD { -+ ext.args = '-w -S' -+ } -+} - -************************************************************ diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf index cfd3e2f..da55cba 100644 --- a/modules/nf-core/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -28,6 +28,7 @@ process GFFREAD { def output_name = "${prefix}.${extension}" def output = extension == "fasta" ? "$output_name" : "-o $output_name" def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() + // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ gffread \\ @@ -46,10 +47,7 @@ process GFFREAD { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) - def fasta_arg = fasta ? "-g $fasta" : '' def output_name = "${prefix}.${extension}" - def output = extension == "fasta" ? "$output_name" : "-o $output_name" - def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch $output_name diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml index c060282..bebe7f5 100644 --- a/modules/nf-core/gffread/meta.yml +++ b/modules/nf-core/gffread/meta.yml @@ -1,53 +1,73 @@ name: gffread -description: Validate, filter, convert and perform various other operations on GFF files +description: Validate, filter, convert and perform various other operations on GFF + files keywords: - gff - conversion - validation tools: - gffread: - description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + description: GFF/GTF utility providing format conversions, region filtering, FASTA + sequence extraction and more. homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread tool_dev_url: https://github.com/gpertea/gffread doi: 10.12688/f1000research.23297.1 licence: ["MIT"] + identifier: biotools:gffread input: - - meta: - type: map - description: | - Groovy Map containing meta data - e.g. [ id:'test' ] - - gff: - type: file - description: A reference file in either the GFF3, GFF2 or GTF format. - pattern: "*.{gff, gtf}" - - fasta: - type: file - description: A multi-fasta file with the genomic sequences - pattern: "*.{fasta,fa,faa,fas,fsa}" + - - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + - - fasta: + type: file + description: A multi-fasta file with the genomic sequences + pattern: "*.{fasta,fa,faa,fas,fsa}" output: - - meta: - type: map - description: | - Groovy Map containing meta data - e.g. [ id:'test' ] - gtf: - type: file - description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present - pattern: "*.{gtf}" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gtf": + type: file + description: GTF file resulting from the conversion of the GFF input file if + '-T' argument is present + pattern: "*.{gtf}" - gffread_gff: - type: file - description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent - pattern: "*.gff3" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gff3": + type: file + description: GFF3 file resulting from the conversion of the GFF input file if + '-T' argument is absent + pattern: "*.gff3" - gffread_fasta: - type: file - description: Fasta file produced when either of '-w', '-x', '-y' parameters is present - pattern: "*.fasta" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: Fasta file produced when either of '-w', '-x', '-y' parameters + is present + pattern: "*.fasta" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@edmundmiller" maintainers: diff --git a/modules/nf-core/gt/gff3/environment.yml b/modules/nf-core/gt/gff3/environment.yml index 8289fb3..666eb47 100644 --- a/modules/nf-core/gt/gff3/environment.yml +++ b/modules/nf-core/gt/gff3/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "gt_gff3" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::genometools-genometools=1.6.5" diff --git a/modules/nf-core/gt/gff3/meta.yml b/modules/nf-core/gt/gff3/meta.yml index 5cecd8d..62c4cbc 100644 --- a/modules/nf-core/gt/gff3/meta.yml +++ b/modules/nf-core/gt/gff3/meta.yml @@ -1,7 +1,7 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "gt_gff3" -description: "GenomeTools gt-gff3 utility to parse, possibly transform, and output GFF3 files" +description: "GenomeTools gt-gff3 utility to parse, possibly transform, and output + GFF3 files" keywords: - genome - gff3 @@ -14,34 +14,43 @@ tools: tool_dev_url: "https://github.com/genometools/genometools" doi: "10.1109/TCBB.2013.68" licence: ["ISC"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - gff3: - type: file - description: Input gff3 file - pattern: "*.{gff,gff3}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - gff3: + type: file + description: Input gff3 file + pattern: "*.{gff,gff3}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - gt_gff3: - type: file - description: Parsed gff3 file produced only if there is no parsing error - pattern: "*.gt.gff3" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.gt.gff3": + type: file + description: Parsed gff3 file produced only if there is no parsing error + pattern: "*.gt.gff3" - error_log: - type: file - description: Error log if gt-gff3 failed to parse the input gff3 file - pattern: "*.error.log" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.error.log": + type: file + description: Error log if gt-gff3 failed to parse the input gff3 file + pattern: "*.error.log" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@gallvp" maintainers: diff --git a/modules/nf-core/gt/gff3/tests/main.nf.test b/modules/nf-core/gt/gff3/tests/main.nf.test index cb44bc8..46c7da3 100644 --- a/modules/nf-core/gt/gff3/tests/main.nf.test +++ b/modules/nf-core/gt/gff3/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) ] """ } diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml index 25910b3..c779485 100644 --- a/modules/nf-core/gunzip/environment.yml +++ b/modules/nf-core/gunzip/environment.yml @@ -1,7 +1,7 @@ -name: gunzip channels: - conda-forge - bioconda - - defaults dependencies: - - conda-forge::sed=4.7 + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 468a6f2..5e67e3b 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -4,8 +4,8 @@ process GUNZIP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -18,8 +18,11 @@ process GUNZIP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ # Not calling gunzip itself because it creates files # with the original group ownership rather than the @@ -37,7 +40,11 @@ process GUNZIP { """ stub: - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ touch $gunzip cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 231034f..9066c03 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -10,25 +10,32 @@ tools: gzip is a file format and a software application used for file compression and decompression. documentation: https://www.gnu.org/software/gzip/manual/gzip.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" @@ -37,3 +44,4 @@ maintainers: - "@joseespinosa" - "@drpatelh" - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index 6406008..776211a 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -33,4 +33,89 @@ nextflow_process { } + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + } diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap index 720fd9f..069967e 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -1,4 +1,70 @@ { + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, "Should run without failures": { "content": [ { @@ -26,6 +92,43 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" } } \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 0000000..dec7764 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/liftoff/environment.yml b/modules/nf-core/liftoff/environment.yml index 8761c9b..94c10a3 100644 --- a/modules/nf-core/liftoff/environment.yml +++ b/modules/nf-core/liftoff/environment.yml @@ -1,9 +1,6 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "liftoff" channels: - conda-forge - bioconda - - defaults + dependencies: - - "bioconda::liftoff=1.6.3" + - bioconda::liftoff=1.6.3 diff --git a/modules/nf-core/liftoff/meta.yml b/modules/nf-core/liftoff/meta.yml index 10e502c..a8ed079 100644 --- a/modules/nf-core/liftoff/meta.yml +++ b/modules/nf-core/liftoff/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "liftoff" description: | @@ -20,51 +19,66 @@ tools: tool_dev_url: "https://github.com/agshumate/Liftoff" doi: "10.1093/bioinformatics/bty191" licence: ["GPL v3 License"] + identifier: biotools:liftoff input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - target_fa: - type: file - description: Target assembly in fasta format - pattern: "*.{fsa,fa,fasta}" - - ref_fa: - type: file - description: Reference assembly in fasta format - pattern: "*.{fsa,fa,fasta}" - - ref_annotation: - type: file - description: Reference assembly annotations in gtf or gff3 format - pattern: "*.{gtf,gff3}" - - ref_db: - type: file - description: | - Name of feature database; if not specified, the -g argument must - be provided and a database will be built automatically + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - target_fa: + type: file + description: Target assembly in fasta format + pattern: "*.{fsa,fa,fasta}" + - - ref_fa: + type: file + description: Reference assembly in fasta format + pattern: "*.{fsa,fa,fasta}" + - - ref_annotation: + type: file + description: Reference assembly annotations in gtf or gff3 format + pattern: "*.{gtf,gff3}" + - - ref_db: + type: file + description: | + Name of feature database; if not specified, the -g argument must + be provided and a database will be built automatically output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - gff3: - type: file - description: Lifted annotations for the target assembly in gff3 format - pattern: "*.gff3" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - ${prefix}.gff3: + type: file + description: Lifted annotations for the target assembly in gff3 format + pattern: "*.gff3" - polished_gff3: - type: file - description: Polished lifted annotations for the target assembly in gff3 format - pattern: "*.polished.gff3" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.polished.gff3": + type: file + description: Polished lifted annotations for the target assembly in gff3 format + pattern: "*.polished.gff3" - unmapped_txt: - type: file - description: List of unmapped reference annotations - pattern: "*.unmapped.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test' ]` + - "*.unmapped.txt": + type: file + description: List of unmapped reference annotations + pattern: "*.unmapped.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@GallVp" maintainers: diff --git a/modules/nf-core/liftoff/tests/main.nf.test b/modules/nf-core/liftoff/tests/main.nf.test index fc0f567..c8ebf26 100644 --- a/modules/nf-core/liftoff/tests/main.nf.test +++ b/modules/nf-core/liftoff/tests/main.nf.test @@ -21,7 +21,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.fasta.gz', checkIfExists: true) ] """ } @@ -33,11 +33,11 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[1] = GUNZIP.out.gunzip.map { meta, file -> file } input[2] = [ - file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) ] input[3] = [] """ @@ -67,7 +67,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_1_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.fasta.gz', checkIfExists: true) ] """ } @@ -79,11 +79,11 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) ] input[1] = GUNZIP.out.gunzip.map { meta, file -> file } input[2] = [ - file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) ] input[3] = [] """ diff --git a/modules/nf-core/liftoff/tests/tags.yml b/modules/nf-core/liftoff/tests/tags.yml deleted file mode 100644 index 4d0adb6..0000000 --- a/modules/nf-core/liftoff/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -liftoff: - - "modules/nf-core/liftoff/**" diff --git a/modules/nf-core/orthofinder/environment.yml b/modules/nf-core/orthofinder/environment.yml new file mode 100644 index 0000000..68c475f --- /dev/null +++ b/modules/nf-core/orthofinder/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::diamond=2.1.9 + - bioconda::orthofinder=2.5.5 diff --git a/modules/nf-core/orthofinder/main.nf b/modules/nf-core/orthofinder/main.nf new file mode 100644 index 0000000..a47c4de --- /dev/null +++ b/modules/nf-core/orthofinder/main.nf @@ -0,0 +1,80 @@ +process ORTHOFINDER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/orthofinder:2.5.5--hdfd78af_2': + 'biocontainers/orthofinder:2.5.5--hdfd78af_2' }" + + input: + tuple val(meta), path(fastas, stageAs: 'input/') + tuple val(meta2), path(prior_run) + + output: + tuple val(meta), path("$prefix") , emit: orthofinder + tuple val(meta), path("$prefix/WorkingDirectory") , emit: working + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def include_command = prior_run ? "-b $prior_run" : '' + + """ + mkdir temp_pickle + + orthofinder \\ + -t $task.cpus \\ + -a $task.cpus \\ + -p temp_pickle \\ + -f input \\ + -n $prefix \\ + $include_command \\ + $args + + if [ -e input/OrthoFinder/Results_$prefix ]; then + mv input/OrthoFinder/Results_$prefix $prefix + fi + + if [ -e ${prior_run}/OrthoFinder/Results_$prefix ]; then + mv ${prior_run}/OrthoFinder/Results_$prefix $prefix + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + orthofinder: \$(orthofinder -h | sed -n 's/.*version \\(.*\\) Copy.*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def include_command = prior_run ? "-b $prior_run" : '' + + """ + mkdir -p $prefix/Comparative_Genomics_Statistics + mkdir $prefix/Gene_Duplication_Events + mkdir $prefix/Gene_Trees + mkdir $prefix/Orthogroup_Sequences + mkdir $prefix/Orthogroups + mkdir $prefix/Orthologues + mkdir $prefix/Phylogenetic_Hierarchical_Orthogroups + mkdir $prefix/Phylogenetically_Misplaced_Genes + mkdir $prefix/Putative_Xenologs + mkdir $prefix/Resolved_Gene_Trees + mkdir $prefix/Single_Copy_Orthologue_Sequences + mkdir $prefix/Species_Tree + mkdir $prefix/WorkingDirectory + + touch $prefix/Log.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + orthofinder: \$(orthofinder -h | sed -n 's/.*version \\(.*\\) Copy.*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/orthofinder/meta.yml b/modules/nf-core/orthofinder/meta.yml new file mode 100644 index 0000000..4aeb46b --- /dev/null +++ b/modules/nf-core/orthofinder/meta.yml @@ -0,0 +1,71 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "orthofinder" +description: OrthoFinder is a fast, accurate and comprehensive platform for comparative + genomics. +keywords: + - genomics + - orthogroup + - orthologs + - gene + - duplication + - tree + - phylogeny +tools: + - "orthofinder": + description: "Accurate inference of orthogroups, orthologues, gene trees and rooted + species tree made easy!" + homepage: "https://github.com/davidemms/OrthoFinder" + documentation: "https://github.com/davidemms/OrthoFinder" + tool_dev_url: "https://github.com/davidemms/OrthoFinder" + doi: "10.1186/s13059-019-1832-y" + licence: ["GPL v3"] + identifier: biotools:OrthoFinder + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastas: + type: list + description: Input fasta files + pattern: "*.{fa,faa,fasta,fas,pep}" + - - meta2: + type: map + description: | + Groovy Map containing a name + e.g. `[ id:'folder1' ]` + - prior_run: + type: directory + description: | + A folder container containing a previous WorkingDirectory from Orthofinder. +output: + - orthofinder: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - $prefix: + type: directory + description: Orthofinder output directory + - working: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - $prefix/WorkingDirectory: + type: directory + description: Orthofinder output WorkingDirectory (used for the orthofinder resume + function) + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/orthofinder/tests/main.nf.test b/modules/nf-core/orthofinder/tests/main.nf.test new file mode 100644 index 0000000..aa68d1d --- /dev/null +++ b/modules/nf-core/orthofinder/tests/main.nf.test @@ -0,0 +1,161 @@ +import groovy.io.FileType + +nextflow_process { + + name "Test Process ORTHOFINDER" + script "../main.nf" + process "ORTHOFINDER" + + tag "modules" + tag "modules_nfcore" + tag "orthofinder" + tag "untar" + + test("sarscov2 - candidatus_portiera_aleyrodidarum - proteome") { + + when { + process { + """ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) + .copyTo("${workDir}/sarscov2.fasta") + + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + .copyTo("${workDir}/candidatus_portiera_aleyrodidarum.fasta") + + def file_a = file("${workDir}/sarscov2.fasta", checkIfExists:true) + def file_b = file("${workDir}/candidatus_portiera_aleyrodidarum.fasta", checkIfExists:true) + + input[0] = [ + [ id:'test', single_end:false ], + [ file_a, file_b ] + ] + input[1] = [ + [], + [] + ] + """ + } + } + + then { + assert process.success + + def all_files = [] + + file(process.out.orthofinder[0][1]).eachFileRecurse (FileType.FILES) { file -> + all_files << file + } + + def stable_file_names = [ + 'Statistics_PerSpecies.tsv', + 'SpeciesTree_Gene_Duplications_0.5_Support.txt', + 'SpeciesTree_rooted.txt' + ] + + def stable_files = all_files.findAll { it.name in stable_file_names } + + assert snapshot( + stable_files.toSorted(), + process.out.versions[0] + ).match() + } + + } + + + test("sarscov2 - candidatus_portiera_aleyrodidarum - proteome - resume") { + + + setup { + run("UNTAR") { + script "../../untar/main.nf" + process { + """ + input[0] = [ [ id:'test1' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/orthofinder/WorkingDirectory.tar.gz', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) + .copyTo("${workDir}/sarscov2.fasta") + + def file_a = file("https://raw.githubusercontent.com/nf-core/test-datasets/proteinfold/testdata/sequences/H1065.fasta") + def file_c = UNTAR.out.untar + input[0] = [ + [ id:'test_2', single_end:false ], + [ file_a ] + ] + input[1] = UNTAR.out.untar + """ + } + } + + then { + assert process.success + + def all_files = [] + + file(process.out.orthofinder[0][1]).eachFileRecurse (FileType.FILES) { file -> + all_files << file + } + + def stable_file_names = [ + 'Statistics_PerSpecies.tsv', + 'OrthologuesStats_Totals.tsv', + 'Duplications_per_Species_Tree_Node.tsv' + ] + + def stable_files = all_files.findAll { it.name in stable_file_names } + + assert snapshot( + stable_files.toSorted(), + process.out.versions[0] + ).match() + } + + } + + test("sarscov2 - candidatus_portiera_aleyrodidarum - proteome - stub") { + + options '-stub' + + when { + process { + """ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) + .copyTo("${workDir}/sarscov2.fasta") + + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + .copyTo("${workDir}/candidatus_portiera_aleyrodidarum.fasta") + + def file_a = file("${workDir}/sarscov2.fasta", checkIfExists:true) + def file_b = file("${workDir}/candidatus_portiera_aleyrodidarum.fasta", checkIfExists:true) + + input[0] = [ + [ id:'test', single_end:false ], + [ file_a, file_b ] + ] + input[1] = [ + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/orthofinder/tests/main.nf.test.snap b/modules/nf-core/orthofinder/tests/main.nf.test.snap new file mode 100644 index 0000000..f2c7b91 --- /dev/null +++ b/modules/nf-core/orthofinder/tests/main.nf.test.snap @@ -0,0 +1,171 @@ +{ + "sarscov2 - candidatus_portiera_aleyrodidarum - proteome": { + "content": [ + [ + "Statistics_PerSpecies.tsv:md5,984b5011a34d54527fe17896bfa36a2d", + "SpeciesTree_Gene_Duplications_0.5_Support.txt:md5,8b7a673e2e8b6d1aeb697f2bb88afa18", + "SpeciesTree_rooted.txt:md5,4d5ea525feebe479fca0c0768271ba81" + ], + "versions.yml:md5,86b472c85626aac1840eec0769016f5c" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-03T10:59:02.895708598" + }, + "sarscov2 - candidatus_portiera_aleyrodidarum - proteome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + [ + + ], + [ + + ], + [ + + ], + "Log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ], + "2": [ + "versions.yml:md5,86b472c85626aac1840eec0769016f5c" + ], + "orthofinder": [ + [ + { + "id": "test", + "single_end": false + }, + [ + [ + + ], + [ + + ], + [ + + ], + "Log.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ] + ] + ], + "versions": [ + "versions.yml:md5,86b472c85626aac1840eec0769016f5c" + ], + "working": [ + [ + { + "id": "test", + "single_end": false + }, + [ + + ] + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-03T11:07:31.319665056" + }, + "sarscov2 - candidatus_portiera_aleyrodidarum - proteome - resume": { + "content": [ + [ + "Duplications_per_Species_Tree_Node.tsv:md5,addc6f5ceec40bd82b00038d1872a27c", + "OrthologuesStats_Totals.tsv:md5,20d243abef226051a43cb37e922fc3eb", + "Statistics_PerSpecies.tsv:md5,83174c383b6c6828d1cc9b3be1679890" + ], + "versions.yml:md5,86b472c85626aac1840eec0769016f5c" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-03T11:04:10.916947006" + } +} \ No newline at end of file diff --git a/modules/nf-core/orthofinder/tests/tags.yml b/modules/nf-core/orthofinder/tests/tags.yml new file mode 100644 index 0000000..f386e25 --- /dev/null +++ b/modules/nf-core/orthofinder/tests/tags.yml @@ -0,0 +1,2 @@ +orthofinder: + - "modules/nf-core/orthofinder/**" diff --git a/modules/pfr/repeatmodeler/builddatabase/environment.yml b/modules/nf-core/repeatmodeler/builddatabase/environment.yml similarity index 81% rename from modules/pfr/repeatmodeler/builddatabase/environment.yml rename to modules/nf-core/repeatmodeler/builddatabase/environment.yml index ecc282e..5314307 100644 --- a/modules/pfr/repeatmodeler/builddatabase/environment.yml +++ b/modules/nf-core/repeatmodeler/builddatabase/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "repeatmodeler_builddatabase" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::repeatmodeler=2.0.5" diff --git a/modules/pfr/repeatmodeler/builddatabase/main.nf b/modules/nf-core/repeatmodeler/builddatabase/main.nf similarity index 100% rename from modules/pfr/repeatmodeler/builddatabase/main.nf rename to modules/nf-core/repeatmodeler/builddatabase/main.nf diff --git a/modules/nf-core/repeatmodeler/builddatabase/meta.yml b/modules/nf-core/repeatmodeler/builddatabase/meta.yml new file mode 100644 index 0000000..cc78cf0 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/meta.yml @@ -0,0 +1,47 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "repeatmodeler_builddatabase" +description: Create a database for RepeatModeler +keywords: + - genomics + - fasta + - repeat +tools: + - "repeatmodeler": + description: "RepeatModeler is a de-novo repeat family identification and modeling + package." + homepage: "https://github.com/Dfam-consortium/RepeatModeler" + documentation: "https://github.com/Dfam-consortium/RepeatModeler" + tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" + licence: ["Open Software License v2.1"] + identifier: biotools:repeatmodeler + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fasta: + type: file + description: Fasta file + pattern: "*.{fasta,fsa,fa}" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.*: + type: file + description: Database files for repeatmodeler + pattern: "`${prefix}.*`" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test similarity index 69% rename from modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test rename to modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test index fdeda4a..78b78a6 100644 --- a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test +++ b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test @@ -16,7 +16,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -26,7 +26,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out.versions).match("versions") }, - { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") } + { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("db") }, + { assert snapshot(process.out.db[0][1].findAll { ! ( "$it"[-3..-1] in [ 'nin', 'njs' ] ) } ).match("stable_md5") } ) } @@ -41,7 +42,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -50,11 +51,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.versions).match("versions") }, - { assert snapshot(process.out.db[0][1].collect { file(it).name }.sort().toString()).match("for-stub-match") } + { assert snapshot(process.out).match() } ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test.snap b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test.snap new file mode 100644 index 0000000..1f1a551 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/tests/main.nf.test.snap @@ -0,0 +1,92 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "test.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nni:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nog:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.translation:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" + ], + "db": [ + [ + { + "id": "test" + }, + [ + "test.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nin:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.njs:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nnd:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nni:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nog:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.translation:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T12:06:44.261566" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:14:48.807063" + }, + "stable_md5": { + "content": [ + [ + "test.nhr:md5,1a41cb6d0b00c28f62ad60e75ae2f6fc", + "test.nnd:md5,2002e13acf59079a1a5782c918894579", + "test.nni:md5,26a954ba0fd80983b550d8f6b8b35ff8", + "test.nog:md5,30896f123998e926ea2237b89091e7fe", + "test.nsq:md5,982cbc7d9e38743b9b1037588862b9da", + "test.translation:md5,ccbb119522c09daa976a9015ba999329" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-23T10:03:41.669433" + }, + "db": { + "content": [ + "[test.nhr, test.nin, test.njs, test.nnd, test.nni, test.nog, test.nsq, test.translation]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-02T12:08:36.94713" + } +} \ No newline at end of file diff --git a/modules/nf-core/repeatmodeler/builddatabase/tests/tags.yml b/modules/nf-core/repeatmodeler/builddatabase/tests/tags.yml new file mode 100644 index 0000000..c524294 --- /dev/null +++ b/modules/nf-core/repeatmodeler/builddatabase/tests/tags.yml @@ -0,0 +1,2 @@ +repeatmodeler/builddatabase: + - "modules/nf-core/repeatmodeler/builddatabase/**" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/environment.yml b/modules/nf-core/repeatmodeler/repeatmodeler/environment.yml similarity index 81% rename from modules/pfr/repeatmodeler/repeatmodeler/environment.yml rename to modules/nf-core/repeatmodeler/repeatmodeler/environment.yml index 2422071..5314307 100644 --- a/modules/pfr/repeatmodeler/repeatmodeler/environment.yml +++ b/modules/nf-core/repeatmodeler/repeatmodeler/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "repeatmodeler_repeatmodeler" channels: - conda-forge - bioconda - - defaults dependencies: - "bioconda::repeatmodeler=2.0.5" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/main.nf b/modules/nf-core/repeatmodeler/repeatmodeler/main.nf similarity index 85% rename from modules/pfr/repeatmodeler/repeatmodeler/main.nf rename to modules/nf-core/repeatmodeler/repeatmodeler/main.nf index c7df9ca..9d0449f 100644 --- a/modules/pfr/repeatmodeler/repeatmodeler/main.nf +++ b/modules/nf-core/repeatmodeler/repeatmodeler/main.nf @@ -1,6 +1,6 @@ process REPEATMODELER_REPEATMODELER { tag "$meta.id" - label 'process_high' + label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -11,10 +11,10 @@ process REPEATMODELER_REPEATMODELER { tuple val(meta), path(db) output: - tuple val(meta), path("*.fa") , emit: fasta - tuple val(meta), path("*.stk") , emit: stk - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("*.fa") , emit: fasta + tuple val(meta), path("*.stk"), emit: stk + tuple val(meta), path("*.log"), emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/meta.yml b/modules/nf-core/repeatmodeler/repeatmodeler/meta.yml new file mode 100644 index 0000000..6693ae9 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "repeatmodeler_repeatmodeler" +description: Performs de novo transposable element (TE) family identification with + RepeatModeler +keywords: + - genomics + - fasta + - repeat + - transposable element +tools: + - "repeatmodeler": + description: "RepeatModeler is a de-novo repeat family identification and modeling + package." + homepage: "https://github.com/Dfam-consortium/RepeatModeler" + documentation: "https://github.com/Dfam-consortium/RepeatModeler" + tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" + licence: ["Open Software License v2.1"] + identifier: biotools:repeatmodeler +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - db: + type: file + description: RepeatModeler database files generated with REPEATMODELER_BUILDDATABASE + pattern: "*" +output: + - fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.fa": + type: file + description: Consensus repeat sequences + pattern: "*.fa" + - stk: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.stk": + type: file + description: Seed alignments + pattern: "*.stk" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.log": + type: file + description: A summarized log of the run + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test similarity index 56% rename from modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test rename to modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test index dd7185f..829e222 100644 --- a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test +++ b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test @@ -14,13 +14,13 @@ nextflow_process { setup { run("REPEATMODELER_BUILDDATABASE") { - script "../../../../pfr/repeatmodeler/builddatabase" + script "../../../../nf-core/repeatmodeler/builddatabase" process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] """ } @@ -41,16 +41,7 @@ nextflow_process { { assert snapshot(process.out.fasta).match("fasta") }, { assert snapshot(process.out.stk).match("stk") }, { assert file(process.out.log[0][1]).text.contains('1 families discovered.') }, - { assert snapshot(process.out.versions).match("versions") }, - { - assert snapshot( - ( - process.out.fasta.collect { file(it[1]).getName() } + - process.out.stk.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } - ).sort() - ).match("for-stub-match") - } + { assert snapshot(process.out.versions).match("versions") } ) } @@ -65,7 +56,7 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] """ } @@ -74,16 +65,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.versions).match("versions") }, - { - assert snapshot( - ( - process.out.fasta.collect { file(it[1]).getName() } + - process.out.stk.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } - ).sort() - ).match("for-stub-match") - } + { assert snapshot(process.out).match() } ) } diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test.snap b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test.snap new file mode 100644 index 0000000..e923952 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/tests/main.nf.test.snap @@ -0,0 +1,113 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:06:55.753492" + }, + "homo_sapiens-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.stk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stk": [ + [ + { + "id": "test" + }, + "test.stk:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:16:41.45166" + }, + "stk": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.stk:md5,acd01ad35763c11315e2297a4f051d57" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:06:55.740963" + }, + "fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.fa:md5,e25326771341204e1f8054d9529411e5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-09T15:06:55.737658" + } +} \ No newline at end of file diff --git a/modules/nf-core/repeatmodeler/repeatmodeler/tests/tags.yml b/modules/nf-core/repeatmodeler/repeatmodeler/tests/tags.yml new file mode 100644 index 0000000..df65110 --- /dev/null +++ b/modules/nf-core/repeatmodeler/repeatmodeler/tests/tags.yml @@ -0,0 +1,2 @@ +repeatmodeler/repeatmodeler: + - "modules/nf-core/repeatmodeler/repeatmodeler/**" diff --git a/modules/nf-core/samtools/cat/environment.yml b/modules/nf-core/samtools/cat/environment.yml index 75f10f7..62054fc 100644 --- a/modules/nf-core/samtools/cat/environment.yml +++ b/modules/nf-core/samtools/cat/environment.yml @@ -1,8 +1,8 @@ -name: samtools_cat +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/cat/main.nf b/modules/nf-core/samtools/cat/main.nf index 06615aa..0490b81 100644 --- a/modules/nf-core/samtools/cat/main.nf +++ b/modules/nf-core/samtools/cat/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_CAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") @@ -26,7 +26,6 @@ process SAMTOOLS_CAT { """ samtools \\ cat \\ - --threads ${task.cpus-1} \\ $args \\ -o ${prefix}.${file_type} \\ $input_files diff --git a/modules/nf-core/samtools/cat/meta.yml b/modules/nf-core/samtools/cat/meta.yml index 3541e0c..dfb0f78 100644 --- a/modules/nf-core/samtools/cat/meta.yml +++ b/modules/nf-core/samtools/cat/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: samtools_cat description: Concatenate BAM or CRAM file @@ -17,34 +16,43 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_files: - type: file - description: BAM/CRAM files - pattern: "*.{bam,cram}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM files + pattern: "*.{bam,cram}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: Concatenated BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: Concatenated BAM file + pattern: "*.{bam}" - cram: - type: file - description: Concatenated CRAM file - pattern: "*.{cram}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: Concatenated CRAM file + pattern: "*.{cram}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@matthdsm" maintainers: diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test.snap b/modules/nf-core/samtools/cat/tests/main.nf.test.snap index f99cdd6..9af1b19 100644 --- a/modules/nf-core/samtools/cat/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/cat/tests/main.nf.test.snap @@ -14,14 +14,14 @@ "bams_stub_versions": { "content": [ [ - "versions.yml:md5,e214a92343158372aa79dabe0fb0064a" + "versions.yml:md5,cd29ae344fb0bf5635527e1cb7a7d95f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:15:40.92408626" + "timestamp": "2024-09-16T07:47:51.511914861" }, "bams_bam": { "content": [ @@ -58,13 +58,13 @@ "bams_versions": { "content": [ [ - "versions.yml:md5,e214a92343158372aa79dabe0fb0064a" + "versions.yml:md5,cd29ae344fb0bf5635527e1cb7a7d95f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:15:33.224336325" + "timestamp": "2024-09-16T08:47:50.783194958" } } \ No newline at end of file diff --git a/modules/nf-core/seqkit/rmdup/environment.yml b/modules/nf-core/seqkit/rmdup/environment.yml new file mode 100644 index 0000000..4f8058a --- /dev/null +++ b/modules/nf-core/seqkit/rmdup/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::seqkit=2.8.1" diff --git a/modules/nf-core/seqkit/rmdup/main.nf b/modules/nf-core/seqkit/rmdup/main.nf new file mode 100644 index 0000000..410bb83 --- /dev/null +++ b/modules/nf-core/seqkit/rmdup/main.nf @@ -0,0 +1,66 @@ +process SEQKIT_RMDUP { + tag "$meta.id" + label 'process_low' + // File IO can be a bottleneck. See: https://bioinf.shenwei.me/seqkit/usage/#parallelization-of-cpu-intensive-jobs + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0': + 'biocontainers/seqkit:2.8.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("${prefix}.${extension}") , emit: fastx + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + // SeqKit/rmdup takes care of compressing the output: https://bioinf.shenwei.me/seqkit/usage/#rmdup + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + seqkit \\ + rmdup \\ + --threads $task.cpus \\ + $args \\ + $fastx \\ + -o ${prefix}.${extension} \\ + 2> >(tee ${prefix}.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + echo \\ + '[INFO] 0 duplicated records removed' \\ + > ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqkit/rmdup/meta.yml b/modules/nf-core/seqkit/rmdup/meta.yml new file mode 100644 index 0000000..22e29c1 --- /dev/null +++ b/modules/nf-core/seqkit/rmdup/meta.yml @@ -0,0 +1,59 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "seqkit_rmdup" +description: Transforms sequences (extract ID, filter by length, remove gaps, reverse + complement...) +keywords: + - genomics + - fasta + - fastq + - remove + - duplicates +tools: + - "seqkit": + description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation" + homepage: "https://bioinf.shenwei.me/seqkit/" + documentation: "https://bioinf.shenwei.me/seqkit/usage/" + tool_dev_url: "https://github.com/shenwei356/seqkit" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] + identifier: biotools:seqkit +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Input fasta/fastq file + pattern: "*.{fsa,fas,fa,fasta,fastq,fq,fsa.gz,fas.gz,fa.gz,fasta.gz,fastq.gz,fq.gz}" +output: + - fastx: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}.${extension}: + type: file + description: Output fasta/fastq file + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.log": + type: file + description: Log containing information regarding removed duplicates + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/nf-core/seqkit/rmdup/tests/main.nf.test b/modules/nf-core/seqkit/rmdup/tests/main.nf.test new file mode 100644 index 0000000..e990443 --- /dev/null +++ b/modules/nf-core/seqkit/rmdup/tests/main.nf.test @@ -0,0 +1,173 @@ +nextflow_process { + + name "Test Process SEQKIT_RMDUP" + script "../main.nf" + process "SEQKIT_RMDUP" + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/rmdup" + + test("sarscov2-genome_fasta") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') } + ) + } + + } + + test("repeated-fasta") { + when { + process { + """ + def repeated_fasta = file('repeated.fasta') + repeated_fasta.text = '>A\\nAGCTAGCTAGCT\\n>B\\nAGCTAGCTAGCT\\n>A\\nAGCTAGCTAGCT' + + input[0] = [ + [ id:'test' ], // meta map + repeated_fasta + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.log[0][1]).text.contains('1 duplicated records removed') } + ) + } + + } + + test("sarscov2-genome_fasta_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') } + ) + } + + } + + test("sarscov2-test_1_fastq_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') } + ) + } + + } + + test("file_name_conflict-fail_with_error") { + when { + process { + """ + input[0] = [ + [ id:'test_1' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') } + ) + } + + } + + test("file_name_conflict-fail_with_error-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/rmdup/tests/main.nf.test.snap b/modules/nf-core/seqkit/rmdup/tests/main.nf.test.snap new file mode 100644 index 0000000..68c415c --- /dev/null +++ b/modules/nf-core/seqkit/rmdup/tests/main.nf.test.snap @@ -0,0 +1,247 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,cf833211befdf890bb6b2a3cd0b91853" + ] + ], + "2": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,cf833211befdf890bb6b2a3cd0b91853" + ] + ], + "versions": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T19:40:01.6034" + }, + "sarscov2-test_1_fastq_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,a41135cfe024baaf42f135583fe73f0d" + ] + ], + "2": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,a41135cfe024baaf42f135583fe73f0d" + ] + ], + "versions": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T19:37:48.551195" + }, + "sarscov2-genome_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,a41135cfe024baaf42f135583fe73f0d" + ] + ], + "2": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,a41135cfe024baaf42f135583fe73f0d" + ] + ], + "versions": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T19:37:38.821528" + }, + "sarscov2-genome_fasta_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,a41135cfe024baaf42f135583fe73f0d" + ] + ], + "2": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,a41135cfe024baaf42f135583fe73f0d" + ] + ], + "versions": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T19:37:43.723054" + }, + "repeated-fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,7510a742291241e7d7556bf720caf65c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,314c0aaef0f832a217a3f6ce3f8bc117" + ] + ], + "2": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,7510a742291241e7d7556bf720caf65c" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,314c0aaef0f832a217a3f6ce3f8bc117" + ] + ], + "versions": [ + "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-27T19:52:34.545807" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/rmdup/tests/tags.yml b/modules/nf-core/seqkit/rmdup/tests/tags.yml new file mode 100644 index 0000000..e732db3 --- /dev/null +++ b/modules/nf-core/seqkit/rmdup/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/rmdup: + - "modules/nf-core/seqkit/rmdup/**" diff --git a/modules/nf-core/sortmerna/environment.yml b/modules/nf-core/sortmerna/environment.yml index f40f995..fab4408 100644 --- a/modules/nf-core/sortmerna/environment.yml +++ b/modules/nf-core/sortmerna/environment.yml @@ -1,7 +1,5 @@ -name: sortmerna channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::sortmerna=4.3.6 diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml index c0a2a58..3b43d00 100644 --- a/modules/nf-core/sortmerna/meta.yml +++ b/modules/nf-core/sortmerna/meta.yml @@ -8,67 +8,84 @@ keywords: - ribosomal RNA tools: - SortMeRNA: - description: The core algorithm is based on approximate seeds and allows for sensitive analysis of NGS reads. The main application of SortMeRNA is filtering rRNA from metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart aligned and rejected reads into two files. Additional applications include clustering and taxonomy assignation available through QIIME v1.9.1. SortMeRNA works with Illumina, Ion Torrent and PacBio data, and can produce SAM and BLAST-like alignments. + description: The core algorithm is based on approximate seeds and allows for sensitive + analysis of NGS reads. The main application of SortMeRNA is filtering rRNA from + metatranscriptomic data. SortMeRNA takes as input files of reads (fasta, fastq, + fasta.gz, fastq.gz) and one or multiple rRNA database file(s), and sorts apart + aligned and rejected reads into two files. Additional applications include clustering + and taxonomy assignation available through QIIME v1.9.1. SortMeRNA works with + Illumina, Ion Torrent and PacBio data, and can produce SAM and BLAST-like alignments. homepage: https://hpc.nih.gov/apps/sortmeRNA.html documentation: https://github.com/biocore/sortmerna/wiki/ licence: ["GPL-3.0-or-later"] + identifier: biotools:sortmerna input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fastas: - type: file - description: | - Path to reference file(s) - - meta3: - type: map - description: | - Groovy Map containing index information - e.g. [ id:'test' ] - - index: - type: directory - description: | - Path to index directory of a previous sortmerna run + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fastas: + type: file + description: | + Path to reference file(s) + - - meta3: + type: map + description: | + Groovy Map containing index information + e.g. [ id:'test' ] + - index: + type: directory + description: | + Path to index directory of a previous sortmerna run output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ], or reference information from an - indexing-only run - reads: - type: file - description: The filtered fastq reads - pattern: "*fastq.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ], or reference information from an + indexing-only run + - "*non_rRNA.fastq.gz": + type: file + description: The filtered fastq reads + pattern: "*fastq.gz" - log: - type: file - description: SortMeRNA log file - pattern: "*sortmerna.log" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ], or reference information from an + indexing-only run + - "*.log": + type: file + description: SortMeRNA log file + pattern: "*sortmerna.log" - index: - type: directory - description: | - Path to index directory generated by sortmern + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - idx: + type: directory + description: | + Path to index directory generated by sortmern - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@mashehu" diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml index 8bd58cf..1debc4c 100644 --- a/modules/nf-core/star/align/environment.yml +++ b/modules/nf-core/star/align/environment.yml @@ -1,10 +1,9 @@ -name: star_align channels: - conda-forge - bioconda - - defaults + dependencies: - - bioconda::star=2.7.10a - - bioconda::samtools=1.18 - bioconda::htslib=1.18 + - bioconda::samtools=1.18 + - bioconda::star=2.7.10a - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index 8e9c48b..ae67e00 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -81,6 +81,8 @@ process STAR_ALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ + echo "" | gzip > ${prefix}.unmapped_1.fastq.gz + echo "" | gzip > ${prefix}.unmapped_2.fastq.gz touch ${prefix}Xd.out.bam touch ${prefix}.Log.final.out touch ${prefix}.Log.out @@ -89,8 +91,6 @@ process STAR_ALIGN { touch ${prefix}.toTranscriptome.out.bam touch ${prefix}.Aligned.unsort.out.bam touch ${prefix}.Aligned.sortedByCoord.out.bam - touch ${prefix}.unmapped_1.fastq.gz - touch ${prefix}.unmapped_2.fastq.gz touch ${prefix}.tab touch ${prefix}.SJ.out.tab touch ${prefix}.ReadsPerGene.out.tab diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index e80dbb7..d30556b 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -14,97 +14,189 @@ tools: manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] + identifier: biotools:star input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - index: - type: directory - description: STAR genome index - pattern: "star" - - meta3: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - gtf: - type: file - description: Annotation GTF file - pattern: "*.{gtf}" - - star_ignore_sjdbgtf: - type: boolean - description: Ignore annotation GTF file - - seq_platform: - type: string - description: Sequencing platform - - seq_center: - type: string - description: Sequencing center + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - index: + type: directory + description: STAR genome index + pattern: "star" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - - seq_platform: + type: string + description: Sequencing platform + - - seq_center: + type: string + description: Sequencing center output: - - bam: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - log_final: - type: file - description: STAR final log file - pattern: "*Log.final.out" + - meta: + type: file + description: STAR final log file + pattern: "*Log.final.out" + - "*Log.final.out": + type: file + description: STAR final log file + pattern: "*Log.final.out" - log_out: - type: file - description: STAR lot out file - pattern: "*Log.out" + - meta: + type: file + description: STAR lot out file + pattern: "*Log.out" + - "*Log.out": + type: file + description: STAR lot out file + pattern: "*Log.out" - log_progress: - type: file - description: STAR log progress file - pattern: "*Log.progress.out" + - meta: + type: file + description: STAR log progress file + pattern: "*Log.progress.out" + - "*Log.progress.out": + type: file + description: STAR log progress file + pattern: "*Log.progress.out" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - "*d.out.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" - bam_sorted: - type: file - description: Sorted BAM file of read alignments (optional) - pattern: "*sortedByCoord.out.bam" + - meta: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" + - "*sortedByCoord.out.bam": + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" - bam_transcript: - type: file - description: Output BAM file of transcriptome alignment (optional) - pattern: "*toTranscriptome.out.bam" + - meta: + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" + - "*toTranscriptome.out.bam": + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" - bam_unsorted: - type: file - description: Unsorted BAM file of read alignments (optional) - pattern: "*Aligned.unsort.out.bam" + - meta: + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" + - "*Aligned.unsort.out.bam": + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" - fastq: - type: file - description: Unmapped FastQ files (optional) - pattern: "*fastq.gz" + - meta: + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" + - "*fastq.gz": + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" - tab: - type: file - description: STAR output tab file(s) (optional) - pattern: "*.tab" + - meta: + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - "*.tab": + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - spl_junc_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.SJ.out.tab": + type: file + description: STAR output splice junction tab file + pattern: "*.SJ.out.tab" + - read_per_gene_tab: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.ReadsPerGene.out.tab": + type: file + description: STAR output read per gene tab file + pattern: "*.ReadsPerGene.out.tab" - junction: - type: file - description: STAR chimeric junction output file (optional) - pattern: "*.out.junction" + - meta: + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - "*.out.junction": + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - sam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.out.sam": + type: file + description: STAR output SAM file + pattern: "*.out.sam" - wig: - type: file - description: STAR output wiggle format file(s) (optional) - pattern: "*.wig" + - meta: + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - "*.wig": + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" - bedgraph: - type: file - description: STAR output bedGraph format file(s) (optional) - pattern: "*.bg" + - meta: + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" + - "*.bg": + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" authors: - "@kevinmenden" - "@drpatelh" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test index 6ecd778..2d9f72d 100644 --- a/modules/nf-core/star/align/tests/main.nf.test +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -9,27 +9,367 @@ nextflow_process { tag "star/align" tag "star/genomegenerate" - setup { - run("STAR_GENOMEGENERATE") { - script "../../../star/genomegenerate/main.nf" + test("homo_sapiens - single_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { process { """ input[0] = Channel.of([ - [ id:'test_fasta' ], - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] ]) - input[1] = Channel.of([ + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ [ id:'test_gtf' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] ]) + input[3] = false + input[4] = 'illumina' + input[5] = false """ } } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } } - test("homo_sapiens - single_end") { + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { config "./nextflow.config" + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.log_final[0][1]).name, + file(process.out.log_out[0][1]).name, + file(process.out.log_progress[0][1]).name, + process.out.bam, + process.out.bam_sorted, + process.out.bam_transcript, + process.out.bam_unsorted, + process.out.bedgraph, + process.out.fastq, + process.out.junction, + process.out.read_per_gene_tab, + process.out.sam, + process.out.spl_junc_tab, + process.out.tab, + process.out.wig, + process.out.versions + ).match() } + ) + } + } + + test("homo_sapiens - single_end - stub") { + options "-stub" + config "./nextflow.config" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + when { process { """ @@ -52,29 +392,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - single_end - log_final") }, - { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - single_end - log_out") }, - { assert snapshot(process.out.bam).match("homo_sapiens - single_end - bam") }, - { assert snapshot(process.out.bam_sorted).match("homo_sapiens - single_end - bam_sorted") }, - { assert snapshot(process.out.bam_transcript).match("homo_sapiens - single_end - bam_transcript") }, - { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - single_end - bam_unsorted") }, - { assert snapshot(process.out.bedgraph).match("homo_sapiens - single_end - bedgraph") }, - { assert snapshot(process.out.fastq).match("homo_sapiens - single_end - fastq") }, - { assert snapshot(process.out.junction).match("homo_sapiens - single_end - junction") }, - { assert snapshot(process.out.log_progress).match("homo_sapiens - single_end - log_progress") }, - { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - single_end - read_per_gene_tab") }, - { assert snapshot(process.out.sam).match("homo_sapiens - single_end - sam") }, - { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - single_end - spl_junc_tab") }, - { assert snapshot(process.out.tab).match("homo_sapiens - single_end - tab") }, - { assert snapshot(process.out.wig).match("homo_sapiens - single_end - wig") }, - { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") } + { assert snapshot(process.out).match() } ) } } - test("homo_sapiens - paired_end") { + test("homo_sapiens - paired_end - stub") { + options "-stub" config "./nextflow.config" + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + when { process { """ @@ -100,29 +444,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - log_final") }, - { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - log_out") }, - { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - bam") }, - { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - bam_sorted") }, - { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - bam_transcript") }, - { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - bam_unsorted") }, - { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - bedgraph") }, - { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - fastq") }, - { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - junction") }, - { assert snapshot(process.out.log_progress).match("homo_sapiens - paired_end - log_progress") }, - { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - read_per_gene_tab") }, - { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - sam") }, - { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - spl_junc_tab") }, - { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - tab") }, - { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - wig") }, - { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") } + { assert snapshot(process.out).match() } ) } } - test("homo_sapiens - paired_end - arriba") { + test("homo_sapiens - paired_end - arriba - stub") { + options "-stub" config "./nextflow.arriba.config" + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + when { process { """ @@ -148,29 +496,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - arriba - log_final") }, - { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - arriba - log_out") }, - { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - arriba - log_progress") }, - { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - arriba - bam") }, - { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - arriba - bam_sorted") }, - { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - arriba - bam_transcript") }, - { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - arriba - bam_unsorted") }, - { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - arriba - bedgraph") }, - { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - arriba - fastq") }, - { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - arriba - junction") }, - { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - arriba - read_per_gene_tab") }, - { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - arriba - sam") }, - { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - arriba - spl_junc_tab") }, - { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - arriba - tab") }, - { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - arriba - wig") }, - { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - arriba - versions") } + { assert snapshot(process.out).match() } ) } } - test("homo_sapiens - paired_end - starfusion") { + test("homo_sapiens - paired_end - starfusion - stub") { + options "-stub" config "./nextflow.starfusion.config" + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + when { process { """ @@ -196,29 +548,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_final") }, - { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_out") }, - { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_progress") }, - { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - starfusion - bam") }, - { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - starfusion - bam_sorted") }, - { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - starfusion - bam_transcript") }, - { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - starfusion - bam_unsorted") }, - { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - starfusion - bedgraph") }, - { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - starfusion - fastq") }, - { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - starfusion - junction") }, - { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - starfusion - read_per_gene_tab") }, - { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - starfusion - sam") }, - { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - starfusion - spl_junc_tab") }, - { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - starfusion - tab") }, - { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - starfusion - wig") }, - { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - starfusion - versions") } + { assert snapshot(process.out).match() } ) } } - test("homo_sapiens - paired_end - multiple") { + test("homo_sapiens - paired_end - multiple - stub") { + options "-stub" config "./nextflow.config" + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + when { process { """ @@ -246,22 +602,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - multiple - log_final") }, - { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - multiple - log_out") }, - { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - multiple - log_progress") }, - { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - multiple - bam") }, - { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - multiple - bam_sorted") }, - { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - multiple - bam_transcript") }, - { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - multiple - bam_unsorted") }, - { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - multiple - bedgraph") }, - { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - multiple - fastq") }, - { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - multiple - junction") }, - { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - multiple - read_per_gene_tab") }, - { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - multiple - sam") }, - { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - multiple - spl_junc_tab") }, - { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - multiple - tab") }, - { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - multiple - wig") }, - { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - multiple - versions") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap index 08edb91..c814eb5 100644 --- a/modules/nf-core/star/align/tests/main.nf.test.snap +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -1,382 +1,1170 @@ { - "homo_sapiens - paired_end - multiple - bam_sorted": { + "homo_sapiens - single_end - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": true + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] - ] + } ], - "timestamp": "2023-12-04T18:01:19.968225733" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:04.712114" }, - "homo_sapiens - paired_end - multiple - wig": { + "homo_sapiens - paired_end - arriba - stub": { "content": [ - [ - - ] + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } ], - "timestamp": "2023-11-23T13:29:01.857804" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:28.874293" }, - "homo_sapiens - paired_end - arriba - tab": { + "homo_sapiens - single_end": { "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", [ [ { "id": "test", - "single_end": false + "single_end": true }, - "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" ] - ] - ], - "timestamp": "2023-12-04T17:56:12.347549723" - }, - "homo_sapiens - single_end - wig": { - "content": [ + ], [ - - ] - ], - "timestamp": "2023-11-23T13:22:55.24701" - }, - "homo_sapiens - paired_end - sam": { - "content": [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ], [ - ] - ], - "timestamp": "2023-11-23T13:23:33.383818" - }, - "homo_sapiens - paired_end - arriba - versions": { - "content": [ + ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" - ] - ], - "timestamp": "2023-12-04T17:56:12.431212643" - }, - "homo_sapiens - paired_end - multiple - bedgraph": { - "content": [ + + ], [ [ { "id": "test", - "single_end": false + "single_end": true }, [ - "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", - "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" ] ] - ] - ], - "timestamp": "2023-12-04T18:01:20.07119229" - }, - "homo_sapiens - paired_end - read_per_gene_tab": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:23:33.368841" - }, - "homo_sapiens - paired_end - arriba - bedgraph": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:25:07.102537" - }, - "homo_sapiens - single_end - junction": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:22:55.185369" - }, - "homo_sapiens - paired_end - arriba - spl_junc_tab": { - "content": [ + ], + [ + + ], [ [ { "id": "test", - "single_end": false + "single_end": true }, - "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" ] - ] - ], - "timestamp": "2023-12-04T17:56:12.268388251" - }, - "homo_sapiens - single_end - sam": { - "content": [ + ], [ - - ] - ], - "timestamp": "2023-11-23T13:22:55.216183" - }, - "homo_sapiens - paired_end - fastq": { - "content": [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ], [ - ] - ], - "timestamp": "2023-11-23T13:23:33.327236" - }, - "homo_sapiens - single_end - versions": { - "content": [ + ], [ "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" ] ], - "timestamp": "2023-12-04T17:53:26.664210196" - }, - "homo_sapiens - paired_end - multiple - log_out": { - "content": [ - "test.Log.out" - ], - "timestamp": "2023-11-23T13:29:01.022176" - }, - "homo_sapiens - paired_end - arriba - fastq": { - "content": [ - [ - - ] - ], - "timestamp": "2023-11-23T13:25:07.15277" - }, - "homo_sapiens - paired_end - multiple - junction": { - "content": [ - [ - - ] - ], - "timestamp": "2023-11-23T13:29:01.52923" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T18:02:34.35338" }, - "homo_sapiens - paired_end - multiple - spl_junc_tab": { + "homo_sapiens - paired_end": { "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", [ [ { "id": "test", "single_end": false }, - "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" ] - ] - ], - "timestamp": "2023-12-04T18:01:20.189486201" - }, - "homo_sapiens - paired_end - starfusion - log_final": { - "content": [ - "test.Log.final.out" - ], - "timestamp": "2023-11-23T13:27:55.905883" - }, - "homo_sapiens - paired_end - starfusion - fastq": { - "content": [ - [ - - ] - ], - "timestamp": "2023-11-23T13:27:56.192302" - }, - "homo_sapiens - paired_end - multiple - sam": { - "content": [ - [ - - ] - ], - "timestamp": "2023-11-23T13:29:01.661837" - }, - "homo_sapiens - paired_end - multiple - log_final": { - "content": [ - "test.Log.final.out" - ], - "timestamp": "2023-11-23T13:29:00.966417" - }, - "homo_sapiens - paired_end - starfusion - bam": { - "content": [ + ], [ [ { "id": "test", "single_end": false }, - "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" ] - ] - ], - "timestamp": "2023-12-04T17:59:58.53235164" - }, - "homo_sapiens - paired_end - arriba - junction": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:25:07.202776" - }, - "homo_sapiens - single_end - bedgraph": { - "content": [ + ], + [ + + ], [ [ { "id": "test", - "single_end": true + "single_end": false }, [ - "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", - "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" ] ] - ] - ], - "timestamp": "2023-12-04T17:53:26.394863748" - }, - "homo_sapiens - paired_end - arriba - read_per_gene_tab": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:25:07.251962" - }, - "homo_sapiens - paired_end - starfusion - bam_sorted": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:27:56.040843" - }, - "homo_sapiens - single_end - bam_unsorted": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:22:55.154172" - }, - "homo_sapiens - paired_end - bam": { - "content": [ + ], + [ + + ], [ [ { "id": "test", "single_end": false }, - "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" ] - ] - ], - "timestamp": "2023-12-04T17:54:11.934832258" - }, - "homo_sapiens - paired_end - arriba - bam_transcript": { - "content": [ + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ], [ - ] - ], - "timestamp": "2023-11-23T13:25:06.998817" - }, - "homo_sapiens - paired_end - log_out": { - "content": [ - "test.Log.out" - ], - "timestamp": "2023-11-23T13:23:33.259699" - }, - "homo_sapiens - paired_end - arriba - log_out": { - "content": [ - "test.Log.out" - ], - "timestamp": "2023-11-23T13:25:06.849451" - }, - "homo_sapiens - paired_end - multiple - versions": { - "content": [ + ], [ "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" ] ], - "timestamp": "2023-12-04T18:01:20.393705142" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T18:03:16.701923" }, - "homo_sapiens - paired_end - starfusion - bam_transcript": { + "homo_sapiens - paired_end - multiple - stub": { "content": [ - [ - - ] + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } ], - "timestamp": "2023-11-23T13:27:56.082408" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:51.360287" }, - "homo_sapiens - paired_end - starfusion - tab": { + "homo_sapiens - paired_end - multiple": { "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", [ [ { "id": "test", "single_end": false }, - "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" ] - ] - ], - "timestamp": "2023-12-04T17:59:58.818041322" - }, - "homo_sapiens - single_end - fastq": { - "content": [ + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ], [ - ] - ], - "timestamp": "2023-11-23T13:22:55.175307" - }, - "homo_sapiens - paired_end - tab": { - "content": [ + ], + [ + + ], [ [ { "id": "test", "single_end": false }, - "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] ] - ] - ], - "timestamp": "2023-12-04T17:54:12.255481058" - }, - "homo_sapiens - paired_end - starfusion - bedgraph": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:27:56.155413" - }, - "homo_sapiens - single_end - bam_transcript": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:22:55.144852" - }, - "homo_sapiens - paired_end - versions": { - "content": [ + ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" - ] - ], - "timestamp": "2023-12-04T17:54:12.343840482" - }, - "homo_sapiens - paired_end - multiple - tab": { - "content": [ + + ], + [ + + ], [ [ { @@ -385,385 +1173,801 @@ }, "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" ] - ] - ], - "timestamp": "2023-12-04T18:01:20.291692062" - }, - "homo_sapiens - single_end - bam": { - "content": [ + ], [ [ { "id": "test", - "single_end": true + "single_end": false }, - "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" ] - ] - ], - "timestamp": "2023-12-04T17:53:26.265642675" - }, - "homo_sapiens - paired_end - arriba - wig": { - "content": [ + ], [ + ], + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" ] ], - "timestamp": "2023-11-23T13:25:07.444214" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T13:13:28.987438" }, - "homo_sapiens - paired_end - log_progress": { + "homo_sapiens - paired_end - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ] - ] - ], - "timestamp": "2023-12-04T17:54:12.126063825" - }, - "homo_sapiens - paired_end - arriba - log_final": { - "content": [ - "test.Log.final.out" - ], - "timestamp": "2023-11-23T13:25:06.829799" - }, - "homo_sapiens - paired_end - bam_unsorted": { - "content": [ - [ - - ] - ], - "timestamp": "2023-11-23T13:23:33.300509" - }, - "homo_sapiens - paired_end - arriba - sam": { - "content": [ - [ - - ] + } ], - "timestamp": "2023-11-23T13:25:07.300383" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:16.798018" }, - "homo_sapiens - paired_end - multiple - bam": { + "homo_sapiens - paired_end - starfusion": { "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", [ [ { "id": "test", "single_end": false }, - "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" ] - ] - ], - "timestamp": "2023-12-04T18:01:19.851247126" - }, - "homo_sapiens - paired_end - multiple - fastq": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:29:01.462257" - }, - "homo_sapiens - single_end - bam_sorted": { - "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" - ] - ] - ], - "timestamp": "2023-12-04T17:53:26.335457371" - }, - "homo_sapiens - paired_end - arriba - bam_sorted": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:25:06.94699" - }, - "homo_sapiens - paired_end - starfusion - junction": { - "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" - ] - ] - ], - "timestamp": "2023-12-04T17:59:58.641115828" - }, - "homo_sapiens - single_end - tab": { - "content": [ + ], [ - [ - { - "id": "test", - "single_end": true - }, - "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" - ] - ] - ], - "timestamp": "2023-12-04T17:53:26.580593434" - }, - "homo_sapiens - paired_end - starfusion - versions": { - "content": [ + + ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" - ] - ], - "timestamp": "2023-12-04T17:59:58.907317103" - }, - "homo_sapiens - paired_end - multiple - bam_unsorted": { - "content": [ + + ], [ - ] - ], - "timestamp": "2023-11-23T13:29:01.330463" - }, - "homo_sapiens - paired_end - arriba - log_progress": { - "content": [ - "test.Log.progress.out" - ], - "timestamp": "2023-11-23T13:25:06.86866" - }, - "homo_sapiens - paired_end - bedgraph": { - "content": [ + ], [ [ { "id": "test", "single_end": false }, - [ - "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", - "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" - ] + "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" ] - ] - ], - "timestamp": "2023-12-04T17:54:12.064121304" - }, - "homo_sapiens - paired_end - starfusion - bam_unsorted": { - "content": [ - [ - - ] - ], - "timestamp": "2023-11-23T13:27:56.118974" - }, - "homo_sapiens - paired_end - starfusion - read_per_gene_tab": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:27:56.264699" - }, - "homo_sapiens - paired_end - multiple - log_progress": { - "content": [ - "test.Log.progress.out" - ], - "timestamp": "2023-11-23T13:29:01.076947" - }, - "homo_sapiens - paired_end - arriba - bam_unsorted": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:25:07.050409" - }, - "homo_sapiens - paired_end - bam_sorted": { - "content": [ + ], [ [ { "id": "test", "single_end": false }, - "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" ] - ] - ], - "timestamp": "2023-12-04T17:54:12.002180537" - }, - "homo_sapiens - single_end - spl_junc_tab": { - "content": [ + ], [ [ { "id": "test", - "single_end": true + "single_end": false }, - "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" ] + ], + [ + + ], + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" ] ], - "timestamp": "2023-12-04T17:53:26.50932751" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T13:10:55.371956" }, - "homo_sapiens - paired_end - starfusion - spl_junc_tab": { + "homo_sapiens - paired_end - arriba": { "content": [ + "test.Log.final.out", + "test.Log.out", + "test.Log.progress.out", [ [ { "id": "test", "single_end": false }, - "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" ] - ] - ], - "timestamp": "2023-12-04T17:59:58.731699486" - }, - "homo_sapiens - single_end - log_out": { - "content": [ - "test.Log.out" - ], - "timestamp": "2023-11-23T13:22:55.126286" - }, - "homo_sapiens - paired_end - log_final": { - "content": [ - "test.Log.final.out" - ], - "timestamp": "2023-11-23T13:23:33.253884" - }, - "homo_sapiens - single_end - log_final": { - "content": [ - "test.Log.final.out" - ], - "timestamp": "2023-11-23T13:22:55.11799" - }, - "homo_sapiens - paired_end - bam_transcript": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:23:33.287684" - }, - "homo_sapiens - paired_end - starfusion - log_progress": { - "content": [ - "test.Log.progress.out" - ], - "timestamp": "2023-11-23T13:27:55.971484" - }, - "homo_sapiens - paired_end - multiple - bam_transcript": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:29:01.264176" - }, - "homo_sapiens - paired_end - multiple - read_per_gene_tab": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:29:01.596406" - }, - "homo_sapiens - single_end - read_per_gene_tab": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:22:55.205936" - }, - "homo_sapiens - paired_end - junction": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:23:33.340653" - }, - "homo_sapiens - paired_end - spl_junc_tab": { - "content": [ + ], [ - [ - { - "id": "test", - "single_end": false - }, - "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" - ] - ] - ], - "timestamp": "2023-12-04T17:54:12.185730856" - }, - "homo_sapiens - paired_end - starfusion - sam": { - "content": [ + + ], [ - ] - ], - "timestamp": "2023-11-23T13:27:56.300637" - }, - "homo_sapiens - paired_end - arriba - bam": { - "content": [ + ], + [ + + ], [ [ { "id": "test", "single_end": false }, - "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" ] - ] - ], - "timestamp": "2023-12-04T17:56:12.190560178" - }, - "homo_sapiens - single_end - log_progress": { - "content": [ + ], [ [ { "id": "test", - "single_end": true + "single_end": false }, - "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" ] - ] - ], - "timestamp": "2023-12-04T17:53:26.450352138" - }, - "homo_sapiens - paired_end - starfusion - wig": { - "content": [ + ], [ - ] - ], - "timestamp": "2023-11-23T13:27:56.422018" - }, - "homo_sapiens - paired_end - wig": { - "content": [ + ], [ - + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" ] ], - "timestamp": "2023-11-23T13:23:33.429457" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T13:05:10.7534" }, - "homo_sapiens - paired_end - starfusion - log_out": { + "homo_sapiens - paired_end - starfusion - stub": { "content": [ - "test.Log.out" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "14": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "15": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "testXd.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_sorted": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Aligned.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.sortedByCoord.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "bam_transcript": [ + [ + { + "id": "test", + "single_end": false + }, + "test.toTranscriptome.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam_unsorted": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.unsort.out.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bedgraph": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.bg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastq": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.unmapped_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.unmapped_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "junction": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_final": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.final.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log_progress": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "read_per_gene_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "spl_junc_tab": [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.ReadsPerGene.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.SJ.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ], + "wig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.Signal.UniqueMultiple.str1.out.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } ], - "timestamp": "2023-11-23T13:27:55.93945" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T15:16:40.64399" } } \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml index 791f255..1debc4c 100644 --- a/modules/nf-core/star/genomegenerate/environment.yml +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -1,10 +1,9 @@ -name: star_genomegenerate channels: - conda-forge - bioconda - - defaults + dependencies: - - bioconda::samtools=1.18 - bioconda::htslib=1.18 + - bioconda::samtools=1.18 - bioconda::star=2.7.10a - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml index 1061e1b..33c1f65 100644 --- a/modules/nf-core/star/genomegenerate/meta.yml +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -14,37 +14,40 @@ tools: manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf doi: 10.1093/bioinformatics/bts635 licence: ["MIT"] + identifier: biotools:star input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Fasta file of the reference genome - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - gtf: - type: file - description: GTF file of the reference genome + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - index: - type: directory - description: Folder containing the star index files - pattern: "star" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - star: + type: directory + description: Folder containing the star index files + pattern: "star" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@kevinmenden" - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test index c17c8ba..4d619c4 100644 --- a/modules/nf-core/star/genomegenerate/tests/main.nf.test +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -28,15 +28,15 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_index") }, - { assert snapshot(process.out.versions).match("fasta_gtf_versions") } + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions) + .match() } ) } } - test("fasta_gtf_stub") { - - options '-stub' + test("fasta") { when { process { @@ -45,10 +45,7 @@ nextflow_process { [ id:'test_fasta' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] ]) - input[1] = Channel.of([ - [ id:'test_gtf' ], - [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] - ]) + input[1] = Channel.of([ [], [] ]) """ } } @@ -56,13 +53,17 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_stub_index") }, - { assert snapshot(process.out.versions).match("fasta_gtf_stub_versions") } + { assert snapshot( + file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match() } ) } } - test("fasta") { + test("fasta_gtf_stub") { + + options '-stub' when { process { @@ -71,7 +72,10 @@ nextflow_process { [ id:'test_fasta' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] ]) - input[1] = Channel.of([ [], [] ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) """ } } @@ -79,11 +83,9 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_index") }, - { assert snapshot(process.out.versions).match("fasta_versions") } + { assert snapshot(process.out).match() } ) } - } test("fasta_stub") { @@ -105,11 +107,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_stub_index") }, - { assert snapshot(process.out.versions).match("fasta_stub_versions") } + { assert snapshot(process.out).match() } ) } - } - } diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap index 5653d6e..207f4b4 100644 --- a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -1,90 +1,148 @@ { - "fasta_gtf_versions": { + "fasta_gtf": { "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]", [ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-02-01T15:54:31.798555" + "timestamp": "2024-07-22T14:55:35.478401" }, - "fasta_stub_versions": { + "fasta_gtf_stub": { "content": [ - [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-01T15:55:07.521209" - }, - "fasta_gtf_stub_index": { - "content": [ - "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-01T15:54:46.478098" - }, - "fasta_gtf_stub_versions": { - "content": [ - [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" - ] + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-02-01T15:54:46.491657" + "timestamp": "2024-07-22T14:55:57.247585" }, - "fasta_index": { + "fasta_stub": { "content": [ - "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + { + "0": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ], + "index": [ + [ + { + "id": "test_fasta" + }, + [ + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "Log.out:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genomeParameters.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-02-01T15:54:57.552329" + "timestamp": "2024-07-22T14:56:07.01742" }, - "fasta_versions": { + "fasta": { "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]", [ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-01T15:54:57.560541" - }, - "fasta_gtf_index": { - "content": [ - "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" - }, - "timestamp": "2024-02-01T15:54:31.786814" - }, - "fasta_stub_index": { - "content": [ - "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-02-01T15:55:07.517472" + "timestamp": "2024-07-22T14:55:45.48784" } } \ No newline at end of file diff --git a/modules/nf-core/tsebra/environment.yml b/modules/nf-core/tsebra/environment.yml new file mode 100644 index 0000000..f189f6b --- /dev/null +++ b/modules/nf-core/tsebra/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::tsebra=1.1.2.5" diff --git a/modules/pfr/tsebra/main.nf b/modules/nf-core/tsebra/main.nf similarity index 77% rename from modules/pfr/tsebra/main.nf rename to modules/nf-core/tsebra/main.nf index c92ade7..2b0673a 100644 --- a/modules/pfr/tsebra/main.nf +++ b/modules/nf-core/tsebra/main.nf @@ -2,10 +2,11 @@ process TSEBRA { tag "$meta.id" label 'process_single' + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tsebra:1.1.2.4--pyhca03a8a_0': - 'biocontainers/tsebra:1.1.2.4--pyhca03a8a_0' }" + 'https://depot.galaxyproject.org/singularity/tsebra:1.1.2.5--pyhca03a8a_0': + 'biocontainers/tsebra:1.1.2.5--pyhca03a8a_0' }" input: tuple val(meta), path(gtfs) @@ -28,7 +29,7 @@ process TSEBRA { def hints_arg = '-e ' + hints_files.collect { "$it" }.join(',') def keep_arg = keep_gtfs ? ( '-k ' + keep_gtfs.collect { "$it" }.join(',') ) : '' def config_arg = config ? "-c $config" : '' - def VERSION = '1.1.2.4' + def VERSION = '1.1.2.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ tsebra.py \\ $gtf_arg \\ @@ -48,7 +49,7 @@ process TSEBRA { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '1.1.2.4' + def VERSION = '1.1.2.5' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}.gtf touch ${prefix}.tsv diff --git a/modules/nf-core/tsebra/meta.yml b/modules/nf-core/tsebra/meta.yml new file mode 100644 index 0000000..b7808ef --- /dev/null +++ b/modules/nf-core/tsebra/meta.yml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "tsebra" +description: Transcript Selector for BRAKER TSEBRA combines gene predictions by selecing + transcripts based on their extrisic evidence support +keywords: + - genomics + - transcript + - selector + - gene + - prediction + - evidence +tools: + - "tsebra": + description: TSEBRA is a combiner tool that selects transcripts from gene predictions + based on the support by extrisic evidence in form of introns and start/stop + codons + homepage: "https://github.com/Gaius-Augustus/TSEBRA" + documentation: "https://github.com/Gaius-Augustus/TSEBRA" + tool_dev_url: "https://github.com/Gaius-Augustus/TSEBRA" + doi: "10.1186/s12859-021-04482-0" + licence: ["Artistic-2.0"] + identifier: biotools:tsebra + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - gtfs: + type: list + description: List of gene prediction files in gtf + pattern: "*.gtf" + - - hints_files: + type: list + description: List of files containing extrinsic evidence in gff + pattern: "*.gff" + - - keep_gtfs: + type: list + description: | + List of gene prediction files in gtf. These gene sets are used the same way as other inputs, but TSEBRA ensures that all + transcripts from these gene sets are included in the output + pattern: "*.gtf" + - - config: + type: file + description: Configuration file that sets the parameter for TSEBRA + pattern: "*.cfg" +output: + - tsebra_gtf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.gtf": + type: file + description: Output file for the combined gene predictions in gtf + pattern: "*.gtf" + - tsebra_scores: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.tsv": + type: file + description: Transcript scores as a table + pattern: "*.tsv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/pfr/tsebra/tests/main.nf.test b/modules/nf-core/tsebra/tests/main.nf.test similarity index 70% rename from modules/pfr/tsebra/tests/main.nf.test rename to modules/nf-core/tsebra/tests/main.nf.test index ddf7a6c..6e45110 100644 --- a/modules/pfr/tsebra/tests/main.nf.test +++ b/modules/nf-core/tsebra/tests/main.nf.test @@ -7,32 +7,32 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "tsebra" - tag "nf-core/gunzip/main" + tag "gunzip" test("actinidia_chinensis-genome") { setup { run('GUNZIP', alias: 'GUNZIP_GTF') { - script "../../../nf-core/gunzip/main" + script "../../../nf-core/gunzip" process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['actinidia_chinensis']['genome']['genome_1_gtf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.gtf.gz', checkIfExists: true) ] """ } } run('GUNZIP', alias: 'GUNZIP_HINTS') { - script "../../../nf-core/gunzip/main" + script "../../../nf-core/gunzip" process { """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['actinidia_chinensis']['genome']['genome_1_hints_gff_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.hints.gff.gz', checkIfExists: true) ] """ } @@ -68,10 +68,10 @@ nextflow_process { """ input[0] = [ [ id:'test' ], // meta map - [ file(params.test_data['actinidia_chinensis']['genome']['genome_1_gtf_gz'], checkIfExists: true) ] + [ file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.gtf.gz', checkIfExists: true) ] ] input[1] = [ - file(params.test_data['actinidia_chinensis']['genome']['genome_1_hints_gff_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.hints.gff.gz', checkIfExists: true) ] input[2] = [] input[3] = [] diff --git a/modules/pfr/tsebra/tests/main.nf.test.snap b/modules/nf-core/tsebra/tests/main.nf.test.snap similarity index 84% rename from modules/pfr/tsebra/tests/main.nf.test.snap rename to modules/nf-core/tsebra/tests/main.nf.test.snap index 4d9e15f..bbe880d 100644 --- a/modules/pfr/tsebra/tests/main.nf.test.snap +++ b/modules/nf-core/tsebra/tests/main.nf.test.snap @@ -19,7 +19,7 @@ ] ], "2": [ - "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408" + "versions.yml:md5,4e3a44cee65282ac56b08e2bbc4f1d46" ], "tsebra_gtf": [ [ @@ -38,15 +38,15 @@ ] ], "versions": [ - "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408" + "versions.yml:md5,4e3a44cee65282ac56b08e2bbc4f1d46" ] } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.1" }, - "timestamp": "2024-04-11T12:20:49.618044" + "timestamp": "2024-05-28T16:27:56.639849" }, "actinidia_chinensis-genome": { "content": [ @@ -68,7 +68,7 @@ ] ], "2": [ - "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408" + "versions.yml:md5,4e3a44cee65282ac56b08e2bbc4f1d46" ], "tsebra_gtf": [ [ @@ -87,14 +87,14 @@ ] ], "versions": [ - "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408" + "versions.yml:md5,4e3a44cee65282ac56b08e2bbc4f1d46" ] } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.1" }, - "timestamp": "2024-04-11T12:20:45.666076" + "timestamp": "2024-05-28T16:27:52.049367" } } \ No newline at end of file diff --git a/modules/nf-core/tsebra/tests/tags.yml b/modules/nf-core/tsebra/tests/tags.yml new file mode 100644 index 0000000..c76165b --- /dev/null +++ b/modules/nf-core/tsebra/tests/tags.yml @@ -0,0 +1,2 @@ +tsebra: + - "modules/nf-core/tsebra/**" diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml index aab452d..9f9e03c 100644 --- a/modules/nf-core/umitools/extract/environment.yml +++ b/modules/nf-core/umitools/extract/environment.yml @@ -1,7 +1,5 @@ -name: umitools_extract channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::umi_tools=1.1.5 diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf index 8719e5f..b97900e 100644 --- a/modules/nf-core/umitools/extract/main.nf +++ b/modules/nf-core/umitools/extract/main.nf @@ -53,4 +53,22 @@ process UMITOOLS_EXTRACT { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + output_command = "echo '' | gzip > ${prefix}.umi_extract.fastq.gz" + } else { + output_command = "echo '' | gzip > ${prefix}.umi_extract_1.fastq.gz ;" + output_command += "echo '' | gzip > ${prefix}.umi_extract_2.fastq.gz" + } + """ + touch ${prefix}.umi_extract.log + ${output_command} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + umitools: \$( umi_tools --version | sed '/version:/!d; s/.*: //' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml index 7695b27..648ffbd 100644 --- a/modules/nf-core/umitools/extract/meta.yml +++ b/modules/nf-core/umitools/extract/meta.yml @@ -1,5 +1,6 @@ name: umitools_extract -description: Extracts UMI barcode from a read and add it to the read name, leaving any sample barcode in place +description: Extracts UMI barcode from a read and add it to the read name, leaving + any sample barcode in place keywords: - UMI - barcode @@ -8,38 +9,49 @@ keywords: tools: - umi_tools: description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random + Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes documentation: https://umi-tools.readthedocs.io/en/latest/ license: "MIT" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: list - description: | - List of input FASTQ files whose UMIs will be extracted. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - reads: - type: file - description: > - Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. - pattern: "*.{fastq.gz}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: > + Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. + | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" - log: - type: file - description: Logfile for umi_tools - pattern: "*.{log}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Logfile for umi_tools + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test index 2a8eba1..bb8a065 100644 --- a/modules/nf-core/umitools/extract/tests/main.nf.test +++ b/modules/nf-core/umitools/extract/tests/main.nf.test @@ -9,7 +9,7 @@ nextflow_process { tag "umitools" tag "umitools/extract" - test("Should run without failures") { + test("single end") { when { process { @@ -24,7 +24,82 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot( + process.out.reads.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.log.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match() } + ) + } + } + + test("single end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("pair end") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.reads[0][1][0]).name, + file(process.out.reads[0][1][1]).name, + process.out.log.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match() } + ) + } + } + + test("pair end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap index bf82701..b115905 100644 --- a/modules/nf-core/umitools/extract/tests/main.nf.test.snap +++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap @@ -1,14 +1,167 @@ { - "versions": { + "pair end - stub": { "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.umi_extract.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,568d243174c081a0301e74ed42e59b48" + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.umi_extract.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.umi_extract_1.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.umi_extract_2.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions": [ + "versions.yml:md5,568d243174c081a0301e74ed42e59b48" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T15:05:20.008312" + }, + "single end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.umi_extract.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.umi_extract.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,568d243174c081a0301e74ed42e59b48" + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.umi_extract.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.umi_extract.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,568d243174c081a0301e74ed42e59b48" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T15:04:12.145999" + }, + "pair end": { + "content": [ + "test.umi_extract_1.fastq.gz", + "test.umi_extract_2.fastq.gz", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.umi_extract.log" + ] + ], + [ + "versions.yml:md5,568d243174c081a0301e74ed42e59b48" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-02T15:21:09.578031" + }, + "single end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.umi_extract.fastq.gz" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.umi_extract.log" + ] + ], [ "versions.yml:md5,568d243174c081a0301e74ed42e59b48" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-16T10:01:33.326046137" + "timestamp": "2024-07-02T15:03:52.464606" } } \ No newline at end of file diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/environment.yml b/modules/pfr/agat/spfilterfeaturefromkilllist/environment.yml deleted file mode 100644 index b0811b4..0000000 --- a/modules/pfr/agat/spfilterfeaturefromkilllist/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "agat_spfilterfeaturefromkilllist" -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - "bioconda::agat=1.3.3" diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/meta.yml b/modules/pfr/agat/spfilterfeaturefromkilllist/meta.yml deleted file mode 100644 index d408fe7..0000000 --- a/modules/pfr/agat/spfilterfeaturefromkilllist/meta.yml +++ /dev/null @@ -1,60 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "agat_spfilterfeaturefromkilllist" -description: | - The script aims to remove features based on a kill list. The default behaviour is to look at the features's ID. - If the feature has an ID (case insensitive) listed among the kill list it will be removed. /!\ Removing a level1 - or level2 feature will automatically remove all linked subfeatures, and removing all children of a feature will - automatically remove this feature too. -keywords: - - genomics - - gff - - remove - - feature -tools: - - "agat": - description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene annotations in any GTF/GFF format." - homepage: "https://agat.readthedocs.io/en/latest/" - documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_filter_feature_from_kill_list.html" - tool_dev_url: "https://github.com/NBISweden/AGAT" - doi: "10.5281/zenodo.3552717" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - gff: - type: file - description: Input GFF3 file that will be read - pattern: "*.{gff,gff3}" - - kill_list: - type: file - description: Kill list. One value per line. - pattern: "*.txt" - - config: - type: file - description: | - Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). - pattern: "*.yaml" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - gff: - type: file - description: Output GFF file. - pattern: "*.gff" - -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/tags.yml b/modules/pfr/agat/spfilterfeaturefromkilllist/tests/tags.yml deleted file mode 100644 index 2ab17b0..0000000 --- a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -agat/spfilterfeaturefromkilllist: - - "modules/pfr/agat/spfilterfeaturefromkilllist/**" diff --git a/modules/pfr/agat/spmergeannotations/meta.yml b/modules/pfr/agat/spmergeannotations/meta.yml deleted file mode 100644 index afa9ddd..0000000 --- a/modules/pfr/agat/spmergeannotations/meta.yml +++ /dev/null @@ -1,54 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "agat_spmergeannotations" -description: | - This script merge different gff annotation files in one. It uses the AGAT parser that takes care of duplicated names and fixes other oddities met in those files. -keywords: - - genomics - - gff - - merge - - combine -tools: - - "agat": - description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene annotations in any GTF/GFF format." - homepage: "https://agat.readthedocs.io/en/latest/" - documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_merge_annotations.html" - tool_dev_url: "https://github.com/NBISweden/AGAT" - doi: "10.5281/zenodo.3552717" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - gffs: - type: list - description: A list of GFFs to merge - pattern: "[ *.{gff,gff3} ]" - - config: - type: file - description: | - Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, - otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml - locally type: "agat config --expose". The --config option gives you the possibility to use your - own AGAT config file (located elsewhere or named differently). - pattern: "*.yaml" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - gff: - type: file - description: Output GFF file. - pattern: "*.gff" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/agat/spmergeannotations/tests/main.nf.test b/modules/pfr/agat/spmergeannotations/tests/main.nf.test deleted file mode 100644 index 5e25599..0000000 --- a/modules/pfr/agat/spmergeannotations/tests/main.nf.test +++ /dev/null @@ -1,71 +0,0 @@ -nextflow_process { - - name "Test Process AGAT_SPMERGEANNOTATIONS" - script "../main.nf" - process "AGAT_SPMERGEANNOTATIONS" - - tag "modules" - tag "modules_nfcore" - tag "agat" - tag "agat/spmergeannotations" - - test("candidatus_portiera_aleyrodidarum-multi_gffs") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - [ - file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true), - file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test2_gff'], checkIfExists: true), - file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true), - ] - ] - - input[1] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert path(process.out.gff[0][1]).text.contains('AGAT gene') }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - - } - - test("candidatus_portiera_aleyrodidarum-multi_gffs-stub") { - - options '-stub' - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - [ - file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true), - file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test2_gff'], checkIfExists: true), - file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true), - ] - ] - - input[1] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} \ No newline at end of file diff --git a/modules/pfr/agat/spmergeannotations/tests/main.nf.test.snap b/modules/pfr/agat/spmergeannotations/tests/main.nf.test.snap deleted file mode 100644 index c7e2154..0000000 --- a/modules/pfr/agat/spmergeannotations/tests/main.nf.test.snap +++ /dev/null @@ -1,47 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,0ae449590befbaac9269ad8a7a84b66d" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-02T17:08:15.459625" - }, - "candidatus_portiera_aleyrodidarum-multi_gffs-stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,0ae449590befbaac9269ad8a7a84b66d" - ], - "gff": [ - [ - { - "id": "test" - }, - "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,0ae449590befbaac9269ad8a7a84b66d" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-04-02T17:08:20.581403" - } -} \ No newline at end of file diff --git a/modules/pfr/agat/spmergeannotations/tests/tags.yml b/modules/pfr/agat/spmergeannotations/tests/tags.yml deleted file mode 100644 index 7d9b839..0000000 --- a/modules/pfr/agat/spmergeannotations/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -agat/spmergeannotations: - - "modules/pfr/agat/spmergeannotations/**" diff --git a/modules/pfr/custom/restoregffids/tests/tags.yml b/modules/pfr/custom/restoregffids/tests/tags.yml deleted file mode 100644 index 1d4b9a8..0000000 --- a/modules/pfr/custom/restoregffids/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/restoregffids: - - "modules/pfr/custom/restoregffids/**" diff --git a/modules/pfr/custom/shortenfastaids/environment.yml b/modules/pfr/custom/shortenfastaids/environment.yml deleted file mode 100644 index e80fa7c..0000000 --- a/modules/pfr/custom/shortenfastaids/environment.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "custom_shortenfastaids" -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - biopython==1.75 - - python=3.8 diff --git a/modules/pfr/custom/shortenfastaids/meta.yml b/modules/pfr/custom/shortenfastaids/meta.yml deleted file mode 100644 index 2425810..0000000 --- a/modules/pfr/custom/shortenfastaids/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "custom_shortenfastaids" -description: | - Shortens fasta IDs and produces a new fasta along with a TSV table - consisting of original (first column) and new IDs (second column). - This module is helpful when some tools like EDTA implicitly shorten - the IDs without producing the ID map, leading to downstream mismatch - in IDs across files. -keywords: - - genome - - fasta - - ID - - shorten -tools: - - "biopython": - description: | - Biopython is a set of freely available tools for biological computation written in Python by - an international team of developers. - homepage: "https://biopython.org" - documentation: "https://biopython.org/wiki/Documentation" - tool_dev_url: "https://github.com/biopython/biopython" - doi: "10.1093/bioinformatics/btp163" - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - fasta: - type: file - description: Input fasta file - pattern: "*.{fsa,fa,fasta}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - short_ids_fasta: - type: file - description: Fasta file with shortened ids if id change is required - pattern: "*.{fsa,fa,fasta}" - - short_ids_tsv: - type: file - description: | - A TSV file with original (first column) and new ids (second column) - if id change is required - pattern: "*.tsv" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/custom/shortenfastaids/tests/tags.yml b/modules/pfr/custom/shortenfastaids/tests/tags.yml deleted file mode 100644 index 4715b64..0000000 --- a/modules/pfr/custom/shortenfastaids/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/shortenfastaids: - - "modules/pfr/custom/shortenfastaids/**" diff --git a/modules/pfr/edta/edta/meta.yml b/modules/pfr/edta/edta/meta.yml deleted file mode 100644 index 52503b8..0000000 --- a/modules/pfr/edta/edta/meta.yml +++ /dev/null @@ -1,82 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "edta_edta" -description: Extensive de-novo TE Annotator (EDTA) -keywords: - - genome - - repeat - - annotation - - transposable-elements -tools: - - "edta": - description: Extensive de-novo TE Annotator (EDTA) - homepage: "https://github.com/oushujun/EDTA" - documentation: "https://github.com/oushujun/EDTA" - tool_dev_url: "https://github.com/oushujun/EDTA" - doi: "10.1186/s13059-019-1905-y" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - fasta: - type: file - description: Genome fasta file - pattern: "*.{fsa,fa,fasta}" - - cds: - type: file - description: | - A FASTA file containing the coding sequence (no introns, UTRs, nor TEs) - of this genome or its close relative - pattern: "*.{fsa,fa,fasta}" - - curatedlib: - type: file - description: | - A curated library to keep consistent naming and classification for known TEs - pattern: "*.liban" - - rmout: - type: file - description: | - Homology-based TE annotation instead of using the EDTA library for masking in - RepeatMasker .out format - pattern: "*.out" - - exclude: - type: file - description: Exclude regions (bed format) from TE masking in the MAKER.masked output - pattern: "*.bed" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - log: - type: file - description: Log emitted by EDTA - pattern: "*.log" - - te_lib_fasta: - type: file - description: A non-redundant TE library in fasta format - pattern: "*.EDTA.TElib.fa" - - pass_list: - type: file - description: A summary table of intact LTR-RTs with coordinate and structural information - pattern: "*.EDTA.pass.list" - - out_file: - type: file - description: RepeatMasker annotation of all LTR sequences in the genome - pattern: "*.EDTA.out" - - te_anno_gff3: - type: file - description: A gff3 file containing both structurally intact and fragmented TE annotations - pattern: "*.EDTA.TEanno.gff3" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/edta/edta/tests/main.nf.test b/modules/pfr/edta/edta/tests/main.nf.test deleted file mode 100644 index 7601876..0000000 --- a/modules/pfr/edta/edta/tests/main.nf.test +++ /dev/null @@ -1,77 +0,0 @@ -nextflow_process { - - name "Test Process EDTA_EDTA" - script "../main.nf" - process "EDTA_EDTA" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "edta" - tag "edta/edta" - - test("homo_sapiens-genome_fasta") { - - when { - process { - """ - input[0] = Channel.of(file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)) - | map { f -> - ( - ['>Chr21'] + - f.readLines().subList(66666.toInteger(), 116666.toInteger()) // 4 MB to 7 MB; 60 bases per line - ).join('\\n') - } - | collectFile(name: 'genome_3_to_10_mb.fasta') - | map { f -> [ [ id: 'test'], f ] } - input[1] = [] - input[2] = [] - input[3] = [] - input[4] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.versions).match("versions") } - ) - } - - } - - test("stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] - input[1] = [] - input[2] = [] - input[3] = [] - input[4] = [] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.log != null }, - { assert process.out.te_lib_fasta != null }, - { assert process.out.pass_list != null }, - { assert process.out.out_file != null }, - { assert process.out.te_anno_gff3 != null } - ) - } - - } - -} \ No newline at end of file diff --git a/modules/pfr/edta/edta/tests/nextflow.config b/modules/pfr/edta/edta/tests/nextflow.config deleted file mode 100644 index e58e10e..0000000 --- a/modules/pfr/edta/edta/tests/nextflow.config +++ /dev/null @@ -1,3 +0,0 @@ -process { - ext.args = '--anno 1' -} diff --git a/modules/pfr/edta/edta/tests/tags.yml b/modules/pfr/edta/edta/tests/tags.yml deleted file mode 100644 index 180ae6d..0000000 --- a/modules/pfr/edta/edta/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -edta/edta: - - "modules/pfr/edta/edta/**" diff --git a/modules/pfr/ltrretriever/lai/meta.yml b/modules/pfr/ltrretriever/lai/meta.yml deleted file mode 100644 index f84cf6c..0000000 --- a/modules/pfr/ltrretriever/lai/meta.yml +++ /dev/null @@ -1,70 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "ltrretriever_lai" -description: | - Estimates the mean LTR sequence identity in the genome. The input genome fasta should - have short alphanumeric IDs without comments -keywords: - - genomics - - annotation - - repeat - - long terminal retrotransposon - - retrotransposon - - stats - - qc -tools: - - "lai": - description: Assessing genome assembly quality using the LTR Assembly Index (LAI) - homepage: "https://github.com/oushujun/LTR_retriever" - documentation: "https://github.com/oushujun/LTR_retriever" - tool_dev_url: "https://github.com/oushujun/LTR_retriever" - doi: "10.1093/nar/gky730" - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - fasta: - type: file - description: The genome file that is used to generate everything - pattern: "*.{fsa,fa,fasta}" - - pass_list: - type: file - description: A list of intact LTR-RTs generated by LTR_retriever - pattern: "*.pass.list" - - annotation_out: - type: file - description: RepeatMasker annotation of all LTR sequences in the genome - pattern: "*.out" - - monoploid_seqs: - type: file - description: | - This parameter is mainly for ployploid genomes. User provides a list of - sequence names that represent a monoploid (1x). LAI will be calculated only - on these sequences if provided. - pattern: "*.txt" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - log: - type: file - description: Log from LAI - pattern: "*.LAI.log" - - lai_out: - type: file - description: | - Output file from LAI if LAI is able to estimate the index from the inputs - pattern: "*.LAI.out" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/repeatmodeler/builddatabase/meta.yml b/modules/pfr/repeatmodeler/builddatabase/meta.yml deleted file mode 100644 index d3aa931..0000000 --- a/modules/pfr/repeatmodeler/builddatabase/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "repeatmodeler_builddatabase" -description: Create a database for RepeatModeler -keywords: - - genomics - - fasta - - repeat -tools: - - "repeatmodeler": - description: "RepeatModeler is a de-novo repeat family identification and modeling package." - homepage: "https://github.com/Dfam-consortium/RepeatModeler" - documentation: "https://github.com/Dfam-consortium/RepeatModeler" - tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" - licence: ["Open Software License v2.1"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - fasta: - type: file - description: Fasta file - pattern: "*.{fasta,fsa,fa}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - db: - type: file - description: Database files for repeatmodeler - pattern: "`${prefix}.*`" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap deleted file mode 100644 index cda327e..0000000 --- a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test.snap +++ /dev/null @@ -1,16 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,7944637266bc3e2726899eaad5e46c87" - ] - ], - "timestamp": "2024-01-09T15:14:48.807063" - }, - "for-stub-match": { - "content": [ - "[test.nhr, test.nin, test.njs, test.nnd, test.nni, test.nog, test.nsq, test.translation]" - ], - "timestamp": "2024-01-09T15:14:48.81702" - } -} \ No newline at end of file diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml b/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml deleted file mode 100644 index 426540d..0000000 --- a/modules/pfr/repeatmodeler/builddatabase/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -repeatmodeler/builddatabase: - - "modules/pfr/repeatmodeler/builddatabase/**" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/meta.yml b/modules/pfr/repeatmodeler/repeatmodeler/meta.yml deleted file mode 100644 index 29bb795..0000000 --- a/modules/pfr/repeatmodeler/repeatmodeler/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "repeatmodeler_repeatmodeler" -description: Performs de novo transposable element (TE) family identification with RepeatModeler -keywords: - - genomics - - fasta - - repeat - - transposable element -tools: - - "repeatmodeler": - description: "RepeatModeler is a de-novo repeat family identification and modeling package." - homepage: "https://github.com/Dfam-consortium/RepeatModeler" - documentation: "https://github.com/Dfam-consortium/RepeatModeler" - tool_dev_url: "https://github.com/Dfam-consortium/RepeatModeler" - licence: ["Open Software License v2.1"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - db: - type: file - description: RepeatModeler database files generated with REPEATMODELER_BUILDDATABASE - pattern: "*" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - fasta: - type: file - description: Consensus repeat sequences - pattern: "*.fa" - - stk: - type: file - description: Seed alignments - pattern: "*.stk" - - log: - type: file - description: A summarized log of the run - pattern: "*.log" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap deleted file mode 100644 index 051dd60..0000000 --- a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test.snap +++ /dev/null @@ -1,46 +0,0 @@ -{ - "versions": { - "content": [ - [ - "versions.yml:md5,1bb6846ecf1304c262eaef4d3de60cf9" - ] - ], - "timestamp": "2024-01-09T15:06:55.753492" - }, - "stk": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.stk:md5,acd01ad35763c11315e2297a4f051d57" - ] - ] - ], - "timestamp": "2024-01-09T15:06:55.740963" - }, - "for-stub-match": { - "content": [ - [ - "test.fa", - "test.log", - "test.stk" - ] - ], - "timestamp": "2024-01-09T15:06:55.759971" - }, - "fasta": { - "content": [ - [ - [ - { - "id": "test" - }, - "test.fa:md5,e25326771341204e1f8054d9529411e5" - ] - ] - ], - "timestamp": "2024-01-09T15:06:55.737658" - } -} \ No newline at end of file diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml b/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml deleted file mode 100644 index 648cc93..0000000 --- a/modules/pfr/repeatmodeler/repeatmodeler/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -repeatmodeler/repeatmodeler: - - "modules/pfr/repeatmodeler/repeatmodeler/**" diff --git a/modules/pfr/tsebra/meta.yml b/modules/pfr/tsebra/meta.yml deleted file mode 100644 index 18660d4..0000000 --- a/modules/pfr/tsebra/meta.yml +++ /dev/null @@ -1,66 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "tsebra" -description: Transcript Selector for BRAKER TSEBRA combines gene predictions by selecing transcripts based on their extrisic evidence support -keywords: - - genomics - - transcript - - selector - - gene - - prediction - - evidence -tools: - - "tsebra": - description: TSEBRA is a combiner tool that selects transcripts from gene predictions based on the support by extrisic evidence in form of introns and start/stop codons - homepage: "https://github.com/Gaius-Augustus/TSEBRA" - documentation: "https://github.com/Gaius-Augustus/TSEBRA" - tool_dev_url: "https://github.com/Gaius-Augustus/TSEBRA" - doi: "10.1186/s12859-021-04482-0" - licence: ["Artistic-2.0"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - gtfs: - type: list - description: List of gene prediction files in gtf - pattern: "*.gtf" - - hints_files: - type: list - description: List of files containing extrinsic evidence in gff - pattern: "*.gff" - - keep_gtfs: - type: list - description: | - List of gene prediction files in gtf. These gene sets are used the same way as other inputs, but TSEBRA ensures that all - transcripts from these gene sets are included in the output - pattern: "*.gtf" - - config: - type: file - description: Configuration file that sets the parameter for TSEBRA - pattern: "*.cfg" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1' ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - tsebra_gtf: - type: file - description: Output file for the combined gene predictions in gtf - pattern: "*.gtf" - - tsebra_scores: - type: file - description: Transcript scores as a table - pattern: "*.tsv" -authors: - - "@GallVp" -maintainers: - - "@GallVp" diff --git a/modules/pfr/tsebra/tests/tags.yml b/modules/pfr/tsebra/tests/tags.yml deleted file mode 100644 index 7594182..0000000 --- a/modules/pfr/tsebra/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -tsebra: - - "modules/pfr/tsebra/**" diff --git a/nextflow.config b/nextflow.config index 1b191f6..705d021 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,83 +1,311 @@ -includeConfig './conf/base.config' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + plant-food-research-open/genepal Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ +// Global default params, used in configs params { // Input/output options - input = null - external_protein_fastas = null - eggnogmapper_db_dir = null - eggnogmapper_tax_scope = null - fastq = null - liftoff_annotations = null - outdir = "./results" + input = null + protein_evidence = null + eggnogmapper_db_dir = null + eggnogmapper_tax_scope = 1 + rna_evidence = null + liftoff_annotations = null + orthofinder_annotations = null + outdir = null // Repeat annotation options - repeat_annotator = 'repeatmodeler' - save_annotated_te_lib = false - edta_is_sensitive = false - repeatmasker_save_outputs = false + repeat_annotator = 'repeatmodeler' + save_annotated_te_lib = false + edta_is_sensitive = false + repeatmasker_save_outputs = false // RNASeq pre-processing options - skip_fastqc = false - skip_fastp = false - min_trimmed_reads = 10000 - extra_fastp_args = "" - save_trimmed = false - remove_ribo_rna = false - save_non_ribo_reads = false - ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt" - - // RNAseq alignment options - star_max_intron_length = 16000 - star_align_extra_args = "" - star_save_outputs = false - save_cat_bam = false + skip_fastqc = false + skip_fastp = false + min_trimmed_reads = 10000 + extra_fastp_args = null + save_trimmed = false + remove_ribo_rna = false + save_non_ribo_reads = false + ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt" + + // RNASeq alignment options + star_max_intron_length = 16000 + star_align_extra_args = null + star_save_outputs = false + save_cat_bam = false // Annotation options - braker_extra_args = "" - braker_allow_isoforms = true - liftoff_coverage = 0.9 - liftoff_identity = 0.9 - eggnogmapper_evalue = 0.00001 - eggnogmapper_pident = 35 - eggnogmapper_purge_nohits = false - - // Max job request options - max_cpus = 12 - max_memory = '200.GB' - max_time = '7.day' - - // Infrastructure options - validationSkipDuplicateCheck= true - validationS3PathCheck = true + braker_extra_args = null + liftoff_coverage = 0.9 + liftoff_identity = 0.9 + eggnogmapper_evalue = 0.00001 + eggnogmapper_pident = 35 + + // Post-annotation filtering options + allow_isoforms = true + enforce_full_intron_support = true + filter_liftoff_by_hints = true + eggnogmapper_purge_nohits = false + + // Annotation output options + braker_save_outputs = false + add_attrs_to_proteins_fasta = false + + // Evaluation options + busco_skip = false + busco_lineage_datasets = 'eukaryota_odb10' + + // Boilerplate options + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + + // Config options + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '72.GB' + max_cpus = 16 + max_time = '7.day' + + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } -manifest { - name = 'pangene' - author = """Usman Rashid, Jason Shiller""" - homePage = 'https://github.com/PlantandFoodResearch/pan-gene' - description = """A NextFlow pipeline for pan-genome annotation""" - mainScript = 'main.nf' - nextflowVersion = '!>=23.04.4' - version = '0.3.3' - doi = '' +// Load base.config by default for all pipelines +includeConfig 'conf/base.config' + +// Load nf-core custom profiles from different Institutions +try { + includeConfig "${params.custom_config_base}/nfcore_custom.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +// Load plant-food-research-open/genepal custom profiles from different institutions. +// try { +// includeConfig "${params.custom_config_base}/pipeline/genepal.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/genepal profiles: ${params.custom_config_base}/pipeline/genepal.config") +// } +profiles { + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + podman { + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + shifter { + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + } + charliecloud { + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + gitpod { + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } +} + +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + +// Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" +} + +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] + +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + enabled = true + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + enabled = true + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + enabled = true + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" +} +dag { + enabled = true + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } -plugins { - id 'nf-validation@1.1.3' +manifest { + name = 'plant-food-research-open/genepal' + author = """Usman Rashid, Jason Shiller""" + homePage = 'https://github.com/plant-food-research-open/genepal' + description = """A Nextflow pipeline for single genome, multiple genomes and pan-genome annotation""" + mainScript = 'main.nf' + nextflowVersion = '!>=23.04.0' + version = '0.4.0' + doi = '' } -includeConfig './conf/modules.config' +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + +// Function to ensure that resource requirements don't go beyond +// a maximum limit +def check_max(obj, type) { + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } + } +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 988f87a..6cbe0d1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,8 +1,8 @@ { "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/nextflow_schema.json", - "title": "plantandfoodresearch/pangene pipeline parameters", - "description": "A NextFlow pipeline for pan-genome annotation", + "$id": "https://raw.githubusercontent.com/plant-food-research-open/genepal/master/nextflow_schema.json", + "title": "plant-food-research-open/genepal pipeline parameters", + "description": "A Nextflow pipeline for single genome, multiple genomes and pan-genome annotation", "type": "object", "definitions": { "input_output_options": { @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "", - "required": ["input", "outdir", "external_protein_fastas", "eggnogmapper_db_dir", "eggnogmapper_tax_scope"], + "required": ["input", "protein_evidence", "outdir"], "properties": { "input": { "type": "string", @@ -19,36 +19,36 @@ "schema": "assets/schema_input.json", "pattern": "^\\S+\\.csv$", "description": "Target assemblies listed in a CSV sheet", - "fa_icon": "fas fa-file-csv", - "help_text": "FASTA and other associated files for target assemblies provided as a CSV sheet" + "fa_icon": "fas fa-file-csv" }, - "external_protein_fastas": { + "protein_evidence": { "type": "string", - "description": "External protein fastas listed in a text sheet", - "help_text": "A text file listing FASTA files to provide protein evidence for annotation", + "description": "Protein evidence provided as a fasta file or multiple fasta files listed in a plain txt file", "format": "file-path", - "mimetype": "text/txt", + "pattern": "^\\S+\\.(txt|fa|faa|fna|fsa|fas|fasta)(\\.gz)?$", "fa_icon": "far fa-file-alt" }, "eggnogmapper_db_dir": { "type": "string", "description": "Eggnogmapper database directory", - "format": "directory-path" + "format": "directory-path", + "fa_icon": "fas fa-folder-open" }, "eggnogmapper_tax_scope": { "type": "integer", - "description": "Eggnogmapper taxonomy scopre", - "minimum": 0 + "description": "Eggnogmapper taxonomy scopre. Eukaryota: 2759, Viridiplantae: 33090, Archaea: 2157, Bacteria: 2, root: 1", + "minimum": 1, + "default": 1, + "fa_icon": "fas fa-hashtag" }, - "fastq": { + "rna_evidence": { "type": "string", "format": "file-path", "mimetype": "text/csv", - "schema": "assets/schema_fastq.json", + "schema": "assets/schema_rna.json", "pattern": "^\\S+\\.csv$", - "help_text": "FASTQ files for RNASeq samples corresponding to each target assembly provided in a CSV sheet", "fa_icon": "fas fa-file-csv", - "description": "FASTQ samples listed in a CSV sheet" + "description": "FASTQ/BAM samples listed in a CSV sheet" }, "liftoff_annotations": { "type": "string", @@ -57,16 +57,22 @@ "schema": "assets/schema_liftoff.json", "pattern": "^\\S+\\.csv$", "description": "Reference annotations listed in a CSV sheet", - "help_text": "FASTA and GFF3 files for reference annotations for liftoff listed in a CSV sheet", + "fa_icon": "fas fa-file-csv" + }, + "orthofinder_annotations": { + "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "schema": "/assets/schema_orthofinder.json", + "pattern": "^\\S+\\.csv$", + "description": "Additional annotations for orthology listed in a CSV sheet", "fa_icon": "fas fa-file-csv" }, "outdir": { "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved", - "fa_icon": "fas fa-folder-open", - "default": "./results", - "help_text": " Use absolute paths to storage on Cloud infrastructure" + "fa_icon": "fas fa-folder-open" } } }, @@ -80,21 +86,26 @@ "type": "string", "default": "repeatmodeler", "enum": ["edta", "repeatmodeler"], - "description": "'edta' or 'repeatmodeler'" + "description": "'edta' or 'repeatmodeler'", + "fa_icon": "fas fa-tasks" }, "save_annotated_te_lib": { "type": "boolean", - "description": "Save annotated TE library or not?" + "description": "Save annotated TE library or not?", + "fa_icon": "fas fa-question-circle" }, "edta_is_sensitive": { "type": "boolean", - "description": "Use '--sensitive 1' flag with EDTA or not?" + "description": "Use '--sensitive 1' flag with EDTA or not?", + "fa_icon": "fas fa-question-circle" }, "repeatmasker_save_outputs": { "type": "boolean", - "description": "Save the repeat-masked genome or not?" + "description": "Save the repeat-masked genome or not?", + "fa_icon": "fas fa-question-circle" } - } + }, + "fa_icon": "fab fa-adn" }, "rnaseq_pre_processing_options": { "title": "RNASeq pre-processing options", @@ -104,45 +115,55 @@ "properties": { "skip_fastqc": { "type": "boolean", - "description": "Skip FASTQC or not?" + "description": "Skip FASTQC or not?", + "fa_icon": "fas fa-question-circle" }, "skip_fastp": { "type": "boolean", - "description": "Skip trimming by FASTQP or not?" + "description": "Skip trimming by FASTQP or not?", + "fa_icon": "fas fa-question-circle" }, "min_trimmed_reads": { "type": "integer", "default": 10000, "description": "Exclude a sample if its reads after trimming are below this number", - "minimum": 0 + "minimum": 0, + "fa_icon": "fas fa-hashtag" }, "extra_fastp_args": { "type": "string", - "description": "Extra FASTP arguments" + "description": "Extra FASTP arguments", + "fa_icon": "fas fa-terminal" }, "save_trimmed": { "type": "boolean", - "description": "Save FASTQ files after trimming or not?" + "description": "Save FASTQ files after trimming or not?", + "fa_icon": "fas fa-question-circle" }, "remove_ribo_rna": { "type": "boolean", - "description": "Remove Ribosomal RNA or not?" + "description": "Remove Ribosomal RNA or not?", + "fa_icon": "fas fa-question-circle" }, "save_non_ribo_reads": { "type": "boolean", - "description": "Save FASTQ files after Ribosomal RNA removal or not?" + "description": "Save FASTQ files after Ribosomal RNA removal or not?", + "fa_icon": "fas fa-question-circle" }, "ribo_database_manifest": { "type": "string", - "default": "${projectDir}/assets/rrna-db-defaults.txt", - "description": "Ribosomal RNA fastas listed in a text sheet", "format": "file-path", - "mimetype": "text/txt" + "exists": true, + "mimetype": "text/plain", + "default": "${projectDir}/assets/rrna-db-defaults.txt", + "fa_icon": "fas fa-database", + "description": "Ribosomal RNA fastas listed in a text sheet" } - } + }, + "fa_icon": "fas fa-cut" }, "rnaseq_alignment_options": { - "title": "RNAseq alignment options", + "title": "RNASeq alignment options", "type": "object", "description": "", "default": "", @@ -151,21 +172,26 @@ "type": "integer", "default": 16000, "minimum": 0, - "description": "Maximum intron length for STAR alignment" + "description": "Maximum intron length for STAR alignment", + "fa_icon": "fas fa-hashtag" }, "star_align_extra_args": { "type": "string", - "description": "EXTRA arguments for STAR" + "description": "EXTRA arguments for STAR", + "fa_icon": "fas fa-terminal" }, "star_save_outputs": { "type": "boolean", - "description": "Save BAM files from STAR or not?" + "description": "Save BAM files from STAR or not?", + "fa_icon": "fas fa-question-circle" }, "save_cat_bam": { "type": "boolean", - "description": "SAVE a concatenated BAM file per assembly or not?" + "description": "SAVE a concatenated BAM file per assembly or not?", + "fa_icon": "fas fa-question-circle" } - } + }, + "fa_icon": "fas fa-align-center" }, "annotation_options": { "title": "Annotation options", @@ -175,40 +201,109 @@ "properties": { "braker_extra_args": { "type": "string", - "description": "Extra arguments for BRAKER" - }, - "braker_allow_isoforms": { - "type": "boolean", - "default": true, - "description": "Allow multiple isoforms for gene models" + "description": "Extra arguments for BRAKER", + "fa_icon": "fas fa-terminal" }, "liftoff_coverage": { "type": "number", "default": 0.9, "minimum": 0, "maximum": 1, - "description": "Liftoff coverage parameter" + "description": "Liftoff coverage parameter", + "fa_icon": "fas fa-hashtag" }, "liftoff_identity": { "type": "number", "default": 0.9, - "description": "Liftoff identity parameter" + "description": "Liftoff identity parameter", + "fa_icon": "fas fa-hashtag" }, "eggnogmapper_evalue": { "type": "number", "default": 1e-5, - "description": "Only report alignments below or equal the e-value threshold" + "description": "Only report alignments below or equal the e-value threshold", + "fa_icon": "fas fa-hashtag" }, "eggnogmapper_pident": { "type": "integer", "default": 35, "description": "Only report alignments above or equal to the given percentage of identity (0-100)", "minimum": 0, - "maximum": 100 + "maximum": 100, + "fa_icon": "fas fa-hashtag" + } + }, + "fa_icon": "fab fa-adn" + }, + "post_annotation_filtering_options": { + "title": "Post-annotation filtering options", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-filter", + "properties": { + "allow_isoforms": { + "type": "boolean", + "default": true, + "description": "Allow multiple isoforms for gene models", + "fa_icon": "fas fa-question-circle" + }, + "enforce_full_intron_support": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-question-circle", + "description": "Require every model to have external evidence for all its introns" + }, + "filter_liftoff_by_hints": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-question-circle", + "description": "Use BRAKER hints to filter Liftoff models" }, "eggnogmapper_purge_nohits": { "type": "boolean", - "description": "Purge transcripts which do not have a hit against eggnog" + "description": "Purge transcripts which do not have a hit against eggnog", + "fa_icon": "fas fa-question-circle" + } + } + }, + "annotation_output_options": { + "title": "Annotation output options", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-download", + "properties": { + "braker_save_outputs": { + "type": "boolean", + "description": "Save BRAKER files", + "fa_icon": "fas fa-question-circle" + }, + "add_attrs_to_proteins_fasta": { + "type": "boolean", + "fa_icon": "fas fa-question-circle", + "description": "Add gff attributes to proteins fasta" + } + } + }, + "evaluation_options": { + "title": "Evaluation options", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-balance-scale-right", + "properties": { + "busco_skip": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip evaluation by BUSCO" + }, + "busco_lineage_datasets": { + "type": "string", + "pattern": "^(\\w+_odb10\\s)*\\w+_odb10$", + "fa_icon": "fas fa-tree", + "description": "BUSCO lineages as a space-separated list: 'fungi_odb10 microsporidia_odb10'", + "default": "eukaryota_odb10" } } }, @@ -222,47 +317,161 @@ "max_cpus": { "type": "integer", "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 12, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "default": 16, + "fa_icon": "fas fa-microchip" }, "max_memory": { "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "200.GB", + "description": "Maximum amount of memory that can be requested for any single job. Example: '8.GB'", + "default": "72.GB", "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$" }, "max_time": { "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", + "description": "Maximum amount of time that can be requested for any single job. Example: '1.day'", "default": "7.day", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$" + } + } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + "fa_icon": "fas fa-users-cog" } } }, - "infrastructure_options": { - "title": "Infrastructure options", + "generic_options": { + "title": "Generic options", "type": "object", "fa_icon": "fas fa-file-import", - "description": "", - "help_text": "", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "validationSkipDuplicateCheck": { + "help": { "type": "boolean", - "default": true, + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "email": { + "type": "string", + "description": "Email address for completion summary.", + "fa_icon": "fas fa-envelope", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", + "hidden": true + }, + "email_on_fail": { + "type": "string", + "description": "Email address for completion summary, only when pipeline fails.", + "fa_icon": "fas fa-exclamation-triangle", + "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", "hidden": true }, - "validationS3PathCheck": { + "plaintext_email": { "type": "boolean", + "description": "Send plain-text email instead of HTML.", + "fa_icon": "fas fa-remove-format", + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Do not use coloured log outputs.", + "fa_icon": "fas fa-palette", + "hidden": true + }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "hidden": true + }, + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, + "fa_icon": "fas fa-check-square", + "hidden": true + }, + "validationShowHiddenParams": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", "hidden": true + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true + }, + "pipelines_testdata_base_path": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true, + "description": "Base path for pipeline test datasets" } } } @@ -283,11 +492,23 @@ { "$ref": "#/definitions/annotation_options" }, + { + "$ref": "#/definitions/post_annotation_filtering_options" + }, + { + "$ref": "#/definitions/annotation_output_options" + }, + { + "$ref": "#/definitions/evaluation_options" + }, { "$ref": "#/definitions/max_job_request_options" }, { - "$ref": "#/definitions/infrastructure_options" + "$ref": "#/definitions/institutional_config_options" + }, + { + "$ref": "#/definitions/generic_options" } ] } diff --git a/pfr/params.json b/pfr/params.json index cb7d801..5314052 100644 --- a/pfr/params.json +++ b/pfr/params.json @@ -1,8 +1,10 @@ { - "input": "/workspace/pangene/test_data/assemblysheet.csv", - "external_protein_fastas": "/workspace/pangene/test_data/external-protein-fastas.txt", + "input": "/workspace/genepal/testdata/0.4.0/assemblysheet.csv", + "protein_evidence": "/workspace/genepal/testdata/0.4.0/external-protein-fastas.txt", "eggnogmapper_db_dir": "/workspace/ComparativeDataSources/emapperdb/5.0.2", "eggnogmapper_tax_scope": 33090, - "fastq": "/workspace/pangene/test_data/fastqsheet.csv", - "liftoff_annotations": "/workspace/pangene/test_data/liftoffannotations.csv" + "rna_evidence": "/workspace/genepal/testdata/0.4.0/rnasheet.csv", + "liftoff_annotations": "/workspace/genepal/testdata/0.4.0/liftoffannotations.csv", + "busco_lineage_datasets": "embryophyta_odb10", + "outdir": "results" } diff --git a/pfr/profile.config b/pfr/profile.config index b0eba29..ce04c6f 100644 --- a/pfr/profile.config +++ b/pfr/profile.config @@ -6,12 +6,12 @@ profiles { apptainer { envWhitelist = "APPTAINER_BINDPATH,APPTAINER_BIND" - cacheDir = "/workspace/pangene/singularity" + cacheDir = "/workspace/genepal/singularity" } } } -// params { -// config_profile_name = 'Plant&Food profile' -// config_profile_description = 'Plant&Food profile using SLURM in combination with Apptainer' -// } +params { + config_profile_name = 'Plant&Food profile' + config_profile_description = 'Plant&Food profile using SLURM in combination with Apptainer' +} diff --git a/pfr_pangene b/pfr_genepal similarity index 83% rename from pfr_pangene rename to pfr_genepal index 9b31b44..d4f0fca 100644 --- a/pfr_pangene +++ b/pfr_genepal @@ -1,13 +1,13 @@ #!/bin/bash -e -#SBATCH --job-name PANGENE +#SBATCH --job-name GENEPAL #SBATCH --time=14-00:00:00 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=1 -#SBATCH --output pfr_pangene.stdout -#SBATCH --error pfr_pangene.stderr +#SBATCH --output pfr_genepal.stdout +#SBATCH --error pfr_genepal.stderr #SBATCH --mem=4G full_test_flag=0 @@ -26,9 +26,8 @@ while getopts "t" opt; do done shift $((OPTIND -1)) -ml unload perl ml apptainer/1.1 -ml nextflow/23.04.4 +ml nextflow/24.04.3 export TMPDIR="/workspace/$USER/tmp" export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp" @@ -38,7 +37,8 @@ if [ $full_test_flag -eq 1 ]; then main.nf \ -c pfr/profile.config \ -profile pfr,apptainer,test_full \ - -resume + -resume \ + --outdir results else nextflow \ main.nf \ diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 5611062..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.ruff] -line-length = 120 -target-version = "py38" -cache-dir = "~/.cache/ruff" - -[tool.ruff.lint] -select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] - -[tool.ruff.lint.isort] -known-first-party = ["nf_core"] - -[tool.ruff.lint.per-file-ignores] -"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/gallvp/fasta_edta_lai/main.nf b/subworkflows/gallvp/fasta_edta_lai/main.nf new file mode 100644 index 0000000..1d4dd85 --- /dev/null +++ b/subworkflows/gallvp/fasta_edta_lai/main.nf @@ -0,0 +1,170 @@ +include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/gallvp/custom/shortenfastaids/main' +include { EDTA_EDTA } from '../../../modules/gallvp/edta/edta/main' +include { LTRRETRIEVER_LAI } from '../../../modules/gallvp/ltrretriever/lai/main' +include { CUSTOM_RESTOREGFFIDS } from '../../../modules/gallvp/custom/restoregffids/main' + +workflow FASTA_EDTA_LAI { + + take: + ch_fasta // channel: [ val(meta), fasta ] + ch_monoploid_seqs // channel: [ val(meta2), txt ]; Optional: Set to [] if not needed + // val(meta) from ch_fasta and val(meta2) from ch_monoploid_seqs are + // only required to have the same `id` + skip_lai // val(true|false) + + main: + ch_versions = Channel.empty() + + // Prapre input channels + ch_monoploid_seqs_plain = ( ch_monoploid_seqs ?: Channel.empty() ) + | filter { meta2, seqs -> seqs } + // Cater to channel: [ meta2, [] ] + | map { meta2, seqs -> [ meta2.id, seqs ] } + + // MOUDLE: CUSTOM_SHORTENFASTAIDS + CUSTOM_SHORTENFASTAIDS ( ch_fasta ) + + ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv + ch_shortenfastaids_branch = ch_short_ids_tsv + | branch { meta, tsv -> + change: ! tsv.text.contains('IDs have acceptable length and character') + nonchange: tsv.text.contains('IDs have acceptable length and character') + } + + ch_short_ids_fasta = ch_shortenfastaids_branch.nonchange + | join( + ch_fasta + ) + | map { meta, tsv, fasta -> [ meta, fasta ] } + | mix( + ch_shortenfastaids_branch.change + | join( + CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta + ) + | map { meta, tsv, fasta -> [ meta, fasta ] } + ) + + ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) + + // collectFile: Map monoploid seqs to short IDs + ch_short_monoploid_seqs = ch_short_ids_tsv + | map { meta, tsv -> [ meta.id, tsv ] } + | join(ch_monoploid_seqs_plain) + | map { id, tsv, seqs -> + map_monoploid_seqs_to_new_ids(id, tsv, seqs) + } + | collectFile(newLine:true) + | map { seqs -> + def id = seqs.name.split('.mapped.monoploid.seqs.txt')[0] + + [ id, seqs ] + } + | join( + ch_short_ids_tsv + | map { meta, tsv -> [ meta.id, meta, tsv ] } + ) + | map { id, seqs, meta, tsv -> [ meta, seqs ] } + + // MODULE: EDTA_EDTA + EDTA_EDTA( + ch_short_ids_fasta, + [], + [], + [], + [] + ) + + ch_pass_list = EDTA_EDTA.out.pass_list + ch_out_file = EDTA_EDTA.out.out_file + ch_pass_out = ch_pass_list.join(ch_out_file) + ch_te_lib_fasta = EDTA_EDTA.out.te_lib_fasta + ch_te_anno_gff3 = EDTA_EDTA.out.te_anno_gff3 + ch_versions = ch_versions.mix(EDTA_EDTA.out.versions.first()) + + ch_short_ids_fasta_mono = ch_short_ids_fasta + | join( + ch_short_monoploid_seqs, + by:0, + remainder: true + ) + // Danger! This partial join can fail + | filter { meta, fasta, seqs -> fasta } + // This filter safeguards against fail on upstream + // process failure: https://github.com/nextflow-io/nextflow/issues/5043 + // fasta may come from upstream processes + // seqs also comes from upstream processes, it is optional + // and may not be present for some of the combinations + | map { meta, fasta, seqs -> [ meta, fasta, seqs ?: [] ] } + + ch_lai_inputs = skip_lai + ? Channel.empty() + : ch_short_ids_fasta_mono + | join( + ch_pass_out + ) + | map { meta, fasta, seqs, pass, out -> + [ meta, fasta, pass, out, seqs ] + } + LTRRETRIEVER_LAI( + ch_lai_inputs.map { meta, fasta, pass, out, seqs -> [ meta, fasta ] }, + ch_lai_inputs.map { meta, fasta, pass, out, seqs -> pass }, + ch_lai_inputs.map { meta, fasta, pass, out, seqs -> out }, + ch_lai_inputs.map { meta, fasta, pass, out, seqs -> seqs } + ) + + ch_lai_log = LTRRETRIEVER_LAI.out.log + ch_lai_out = LTRRETRIEVER_LAI.out.lai_out + ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first()) + + // MODULE: CUSTOM_RESTOREGFFIDS + ch_gff_tsv_branch = ch_te_anno_gff3.join(ch_short_ids_tsv) + | branch { meta, gff, tsv -> + change: ! tsv.text.contains('IDs have acceptable length and character') + nochange: tsv.text.contains('IDs have acceptable length and character') + } + + CUSTOM_RESTOREGFFIDS ( + ch_gff_tsv_branch.change.map { meta, gff, tsv -> [ meta, gff ] }, + ch_gff_tsv_branch.change.map { meta, gff, tsv -> tsv } + ) + + ch_restored_gff = ch_gff_tsv_branch.nochange + | map { meta, gff, tsv -> [ meta, gff ] } + | mix(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3) + + ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) + + emit: + te_lib_fasta = ch_te_lib_fasta // channel: [ val(meta), fasta ] + te_anno_gff3 = ch_restored_gff // channel: [ val(meta), gff ] + lai_log = ch_lai_log // channel: [ val(meta), log ] + lai_out = ch_lai_out // channel: [ val(meta), out ] + versions = ch_versions // channel: [ versions.yml ] +} + +def map_monoploid_seqs_to_new_ids(id, short_ids_tsv, monoploid_seqs) { + + def short_ids_head = short_ids_tsv.text.tokenize('\n')[0] + + if (short_ids_head == "IDs have acceptable length and character. No change required.") { + return [ "${id}.mapped.monoploid.seqs.txt" ] + monoploid_seqs.text.tokenize('\n') + } + + def orig_to_new_ids = [:] + short_ids_tsv.text.eachLine { line -> + def (original_id, renamed_id) = line.tokenize('\t') + orig_to_new_ids[original_id] = renamed_id + } + + def mapped_ids = [] + monoploid_seqs.text.eachLine { original_id -> + if (!orig_to_new_ids[original_id]) { + error "Faild to find $original_id in ${short_ids_tsv}" + + "\nThe short_ids_tsv file is malformed!" + } + + mapped_ids.add(orig_to_new_ids[original_id]) + } + + return [ "${id}.mapped.monoploid.seqs.txt" ] + mapped_ids +} diff --git a/subworkflows/pfr/fasta_edta_lai/meta.yml b/subworkflows/gallvp/fasta_edta_lai/meta.yml similarity index 100% rename from subworkflows/pfr/fasta_edta_lai/meta.yml rename to subworkflows/gallvp/fasta_edta_lai/meta.yml diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test b/subworkflows/gallvp/fasta_edta_lai/tests/main.nf.test similarity index 58% rename from subworkflows/pfr/fasta_edta_lai/tests/main.nf.test rename to subworkflows/gallvp/fasta_edta_lai/tests/main.nf.test index 2c6850d..0b56ee5 100644 --- a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test +++ b/subworkflows/gallvp/fasta_edta_lai/tests/main.nf.test @@ -6,7 +6,7 @@ nextflow_workflow { config "./nextflow.config" tag "subworkflows" - tag "subworkflows_nfcore" + tag "subworkflows_gallvp" tag "subworkflows/fasta_edta_lai" tag "fasta_edta_lai" tag "modules/nf-core/gunzip" @@ -15,22 +15,22 @@ nextflow_workflow { tag "ltrretriever/lai" tag "custom/restoregffids" - test("actinidia_chinensis-genome_21_fasta_gz") { + setup { + run("GUNZIP") { + script "../../../../modules/nf-core/gunzip" - setup { - run("GUNZIP") { - script "../../../../modules/nf-core/gunzip" - - process { - """ - input[0] = [ - [ id:'test' ], - file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) - ] - """ - } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz', checkIfExists: true) + ] + """ } } + } + + test("actinidia_chinensis-genome_1_fasta_gz") { when { workflow { @@ -48,31 +48,17 @@ nextflow_workflow { { assert file(workflow.out.te_anno_gff3[0][1]).text.contains('Copia_LTR_retrotransposon') }, { assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') }, { assert file(workflow.out.lai_log[0][1]).text.contains('Done!') }, - { assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 }, - { assert file(workflow.out.te_lib_fasta[0][1]).text.contains('#LTR/Copia') } + { assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.87) <= 2.0 }, + { assert file(workflow.out.te_lib_fasta[0][1]).text.contains('#LTR/Copia') }, + { assert snapshot(workflow.out.versions).match() } ) } } - test("actinidia_chinensis-genome_21_fasta_gz-stub") { + test("actinidia_chinensis-genome_1_fasta_gz-stub") { options '-stub' - setup { - run("GUNZIP") { - script "../../../../modules/nf-core/gunzip" - - process { - """ - input[0] = [ - [ id:'test' ], - file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true) - ] - """ - } - } - } - when { workflow { """ diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap b/subworkflows/gallvp/fasta_edta_lai/tests/main.nf.test.snap similarity index 83% rename from subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap rename to subworkflows/gallvp/fasta_edta_lai/tests/main.nf.test.snap index 2ab7da2..532ddd5 100644 --- a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap +++ b/subworkflows/gallvp/fasta_edta_lai/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "actinidia_chinensis-genome_21_fasta_gz-stub": { + "actinidia_chinensis-genome_1_fasta_gz-stub": { "content": [ { "0": [ @@ -78,10 +78,24 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T14:00:09.252745" + }, + "actinidia_chinensis-genome_1_fasta_gz": { + "content": [ + [ + "versions.yml:md5,0d4bc49e94acb8995ca552d4e666e3ce", + "versions.yml:md5,65666e975bdfd71978843ca963e84d0c", + "versions.yml:md5,754bb19f86be761d90c002a0af2faf1c" + ] + ], "meta": { "nf-test": "0.8.4", "nextflow": "23.10.1" }, - "timestamp": "2024-03-20T18:05:46.667121" + "timestamp": "2024-05-10T13:22:28.063183" } } \ No newline at end of file diff --git a/subworkflows/pfr/fasta_edta_lai/tests/nextflow.config b/subworkflows/gallvp/fasta_edta_lai/tests/nextflow.config similarity index 100% rename from subworkflows/pfr/fasta_edta_lai/tests/nextflow.config rename to subworkflows/gallvp/fasta_edta_lai/tests/nextflow.config diff --git a/subworkflows/gallvp/fasta_gxf_busco_plot/main.nf b/subworkflows/gallvp/fasta_gxf_busco_plot/main.nf new file mode 100644 index 0000000..3e8ad28 --- /dev/null +++ b/subworkflows/gallvp/fasta_gxf_busco_plot/main.nf @@ -0,0 +1,169 @@ +include { BUSCO_BUSCO as BUSCO_ASSEMBLY } from '../../../modules/gallvp/busco/busco/main' +include { BUSCO_GENERATEPLOT as PLOT_ASSEMBLY } from '../../../modules/gallvp/busco/generateplot/main' +include { GFFREAD as EXTRACT_PROTEINS } from '../../../modules/gallvp/gffread/main' +include { BUSCO_BUSCO as BUSCO_ANNOTATION } from '../../../modules/gallvp/busco/busco/main' +include { BUSCO_GENERATEPLOT as PLOT_ANNOTATION } from '../../../modules/gallvp/busco/generateplot/main' + +workflow FASTA_GXF_BUSCO_PLOT { + + take: + ch_fasta // channel: [ val(meta), fasta ] + ch_gxf // channel: [ val(meta2), gxf ]; gxf ~ gff | gff3 | gtf + // + // Meta and meta2 should have same id + + val_mode // val(mode); BUSCO mode to apply to ch_fasta + // - genome, for genome assemblies (DNA) + // - transcriptome, for transcriptome assemblies (DNA) + // - proteins, for annotated gene sets (protein) + // + // If mode is genome, annotations from ch_gxf are evaluated with + // mode proteins, otherwise, evaluation of the annotations is skipped + // + val_lineages // [ val(lineage) ] + val_busco_lineages_path // val(path); Optional; Set to [] if not needed + val_busco_config // val(path); Optional; Set to [] if not needed + + main: + ch_versions = Channel.empty() + ch_db_path = val_busco_lineages_path + ? Channel.of(file(val_busco_lineages_path, checkIfExists: true)) + : Channel.of(null) + ch_config_path = val_busco_config + ? Channel.of(file(val_busco_config, checkIfExists: true)) + : Channel.of(null) + + // MODULE: BUSCO_BUSCO as BUSCO_ASSEMBLY + ch_busco_assembly_inputs = ch_fasta + | combine( + Channel.of(val_mode) + ) + | combine( + Channel.fromList(val_lineages) + ) + | map { meta, fasta, mode, lineage -> + [ + meta + [ mode:mode, lineage:lineage ], + fasta, mode, lineage + ] + } + | combine( + ch_db_path + ) + | combine( + ch_config_path + ) + + BUSCO_ASSEMBLY( + ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> [ meta, fasta ] }, + ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> mode }, + ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> lineage }, + ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> db ?: [] }, + ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> config ?: [] } + ) + + ch_assembly_batch_summary = BUSCO_ASSEMBLY.out.batch_summary + ch_assembly_short_summaries_txt = BUSCO_ASSEMBLY.out.short_summaries_txt + ch_assembly_short_summaries_json = BUSCO_ASSEMBLY.out.short_summaries_json + ch_versions = ch_versions.mix(BUSCO_ASSEMBLY.out.versions.first()) + + // MODULE: BUSCO_GENERATEPLOT as PLOT_ASSEMBLY + ch_assembly_plot_inputs = ch_assembly_short_summaries_txt + | map { meta, txt -> + def lineage_name = meta.lineage.split('_odb')[0] + [ + "short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.txt", + txt.text + ] + } + | collectFile + | collect + + PLOT_ASSEMBLY( ch_assembly_plot_inputs ) + + ch_assembly_png = PLOT_ASSEMBLY.out.png + ch_versions = ch_versions.mix(PLOT_ASSEMBLY.out.versions) + + // MODULE: GFFREAD as EXTRACT_PROTEINS + ch_gffread_inputs = ! ( val_mode == 'geno' || val_mode == 'genome' ) + ? Channel.empty() + : ch_fasta + | map { meta, fasta -> [ meta.id, meta, fasta ] } + | join( + ch_gxf.map { meta2, gxf -> [ meta2.id, gxf ] } + // Join with matching annotation + // to allow one annotations per fasta + ) + | map { id, meta, fasta, gxf -> [ meta, gxf, fasta ] } + EXTRACT_PROTEINS( + ch_gffread_inputs.map { meta, gxf, fasta -> [ meta, gxf ] }, + ch_gffread_inputs.map { meta, gxf, fasta -> fasta } + ) + + ch_proteins = EXTRACT_PROTEINS.out.gffread_fasta + ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first()) + + // MODULE: BUSCO_BUSCO as BUSCO_ANNOTATION + ch_busco_annotation_inputs = ch_proteins + | combine( + Channel.of('proteins') + ) + | combine( + Channel.fromList(val_lineages) + ) + | map { meta, fasta, mode, lineage -> + [ + meta + [ mode:mode, lineage:lineage ], + fasta, mode, lineage + ] + } + | combine( + ch_db_path + ) + | combine( + ch_config_path + ) + + BUSCO_ANNOTATION( + ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> [ meta, fasta ] }, + ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> mode }, + ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> lineage }, + ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> db ?: [] }, + ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> config ?: [] } + ) + + ch_annotation_batch_summary = BUSCO_ANNOTATION.out.batch_summary + ch_annotation_short_summaries_txt = BUSCO_ANNOTATION.out.short_summaries_txt + ch_annotation_short_summaries_json = BUSCO_ANNOTATION.out.short_summaries_json + ch_versions = ch_versions.mix(BUSCO_ANNOTATION.out.versions.first()) + + // MODULE: BUSCO_GENERATEPLOT as PLOT_ANNOTATION + ch_annotation_plot_inputs = ch_annotation_short_summaries_txt + | map { meta, txt -> + def lineage_name = meta.lineage.split('_odb')[0] + [ + "short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.txt", + txt.text + ] + } + | collectFile + | collect + + PLOT_ANNOTATION( ch_annotation_plot_inputs ) + + ch_annotation_png = PLOT_ANNOTATION.out.png + ch_versions = ch_versions.mix(PLOT_ANNOTATION.out.versions) + + + emit: + assembly_batch_summary = ch_assembly_batch_summary // channel: [ meta3, txt ]; meta3 ~ meta + [ val(mode), val(lineage) ] + assembly_short_summaries_txt = ch_assembly_short_summaries_txt // channel: [ meta3, txt ] + assembly_short_summaries_json = ch_assembly_short_summaries_json // channel: [ meta3, json ] + assembly_png = ch_assembly_png // channel: [ png ] + annotation_batch_summary = ch_annotation_batch_summary // channel: [ meta3, txt ] + annotation_short_summaries_txt = ch_annotation_short_summaries_txt // channel: [ meta3, txt ] + annotation_short_summaries_json = ch_annotation_short_summaries_json // channel: [ meta3, json ] + annotation_png = ch_annotation_png // channel: [ png ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/gallvp/fasta_gxf_busco_plot/meta.yml b/subworkflows/gallvp/fasta_gxf_busco_plot/meta.yml new file mode 100644 index 0000000..9226357 --- /dev/null +++ b/subworkflows/gallvp/fasta_gxf_busco_plot/meta.yml @@ -0,0 +1,106 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fasta_gxf_busco_plot" +description: | + Runs BUSCO for input assemblies and their annotations in GFF/GFF3/GTF format, and creates summary plots using `BUSCO/generate_plot.py` script +keywords: + - genome + - annotation + - busco + - plot +components: + - busco/busco + - busco/generateplot + - gffread +input: + - ch_fasta: + type: file + description: | + Channel containing FASTA files + Structure:[ val(meta), fasta ] + pattern: "*.{fa,faa,fsa,fas,fasta}(.gz)?" + - ch_gxf: + type: file + description: | + Channel containing GFF/GFF3/GTF files + Structure:[ val(meta2), gxf ] + pattern: "*.{gff,gff3,gtf}" + - val_mode: + type: string + description: | + String containing BUSCO mode to apply to ch_fasta files + Structure:val(mode) + - val_lineages: + type: array + description: | + Array of strings representing BUSCO lineage datasets + Structure:[ val(lineage) ] + - val_busco_lineages_path: + type: path + description: | + Path where BUSCO lineages are located or downloaded if not already there. If this input is `[]`, + BUSCO will download the datasets in the task work directory + Structure:val(busco_lineages_path) + - val_busco_config: + type: path + description: | + Path to BUSCO config. It is optional and can be set to `[]` + Structure:val(busco_config) +output: + - assembly_batch_summary: + type: file + description: | + Channel containing BUSCO batch summaries corresponding to fasta files + Structure: [ val(meta), txt ] + pattern: "*.txt" + - assembly_short_summaries_txt: + type: file + description: | + Channel containing BUSCO short summaries corresponding to fasta files + Structure: [ val(meta), txt ] + pattern: "*.txt" + - assembly_short_summaries_json: + type: file + description: | + Channel containing BUSCO short summaries corresponding to fasta files + Structure: [ val(meta), json ] + pattern: "*.json" + - assembly_png: + type: file + description: | + Channel containing summary plot for assemblies + Structure: png + pattern: "*.png" + - annotation_batch_summary: + type: file + description: | + Channel containing BUSCO batch summaries corresponding to annotation files + Structure: [ val(meta), txt ] + pattern: "*.txt" + - annotation_short_summaries_txt: + type: file + description: | + Channel containing BUSCO short summaries corresponding to annotation files + Structure: [ val(meta), txt ] + pattern: "*.txt" + - annotation_short_summaries_json: + type: file + description: | + Channel containing BUSCO short summaries corresponding to annotation files + Structure: [ val(meta), json ] + pattern: "*.json" + - annotation_png: + type: file + description: | + Channel containing summary plot for annotations + Structure: png + pattern: "*.png" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/subworkflows/gallvp/fasta_gxf_busco_plot/tests/main.nf.test b/subworkflows/gallvp/fasta_gxf_busco_plot/tests/main.nf.test new file mode 100644 index 0000000..783be86 --- /dev/null +++ b/subworkflows/gallvp/fasta_gxf_busco_plot/tests/main.nf.test @@ -0,0 +1,104 @@ +nextflow_workflow { + + name "Test Subworkflow FASTA_GXF_BUSCO_PLOT" + script "../main.nf" + workflow "FASTA_GXF_BUSCO_PLOT" + config './nextflow.config' + + tag "subworkflows" + tag "subworkflows_gallvp" + tag "subworkflows/fasta_gxf_busco_plot" + tag "busco" + tag "busco/busco" + tag "busco/generateplot" + tag "gffread" + + test("candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true) + ], + [ + [ id:'test2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff', checkIfExists: true) + ] + ) + input[2] = 'genome' + input[3] = [ 'bacteria_odb10', 'archaea_odb10' ] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.assembly_batch_summary, + workflow.out.annotation_batch_summary, + workflow.out.versions, + ).match() + }, + + { assert workflow.out.assembly_png != null }, + { assert workflow.out.annotation_png != null }, + + { assert workflow.out.assembly_short_summaries_json != null }, + { assert workflow.out.assembly_short_summaries_txt != null }, + { assert workflow.out.annotation_short_summaries_json != null }, + { assert workflow.out.annotation_short_summaries_txt != null } + ) + } + } + + test("candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome - stub") { + + options '-stub' + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true) + ], + [ + [ id:'test2' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + ) + input[1] = Channel.of( + [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/gff/test1.gff', checkIfExists: true) + ] + ) + input[2] = 'genome' + input[3] = [ 'bacteria_odb10', 'archaea_odb10' ] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/gallvp/fasta_gxf_busco_plot/tests/main.nf.test.snap b/subworkflows/gallvp/fasta_gxf_busco_plot/tests/main.nf.test.snap new file mode 100644 index 0000000..118ad7e --- /dev/null +++ b/subworkflows/gallvp/fasta_gxf_busco_plot/tests/main.nf.test.snap @@ -0,0 +1,231 @@ +{ + "candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "mode": "genome", + "lineage": "archaea_odb10" + }, + "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test", + "mode": "genome", + "lineage": "bacteria_odb10" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "mode": "genome", + "lineage": "archaea_odb10" + }, + "test2-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "mode": "genome", + "lineage": "bacteria_odb10" + }, + "test2-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "mode": "proteins", + "lineage": "archaea_odb10" + }, + "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test", + "mode": "proteins", + "lineage": "bacteria_odb10" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + "versions.yml:md5,36b11c442943567e471af0abd474a10b", + "versions.yml:md5,9435355f913e283f60b4fb7ef77dd52a", + "versions.yml:md5,e9d65e2f2f13175e99c5b7f4ae1013b9" + ], + "annotation_batch_summary": [ + [ + { + "id": "test", + "mode": "proteins", + "lineage": "archaea_odb10" + }, + "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test", + "mode": "proteins", + "lineage": "bacteria_odb10" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "annotation_png": [ + + ], + "annotation_short_summaries_json": [ + + ], + "annotation_short_summaries_txt": [ + + ], + "assembly_batch_summary": [ + [ + { + "id": "test", + "mode": "genome", + "lineage": "archaea_odb10" + }, + "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test", + "mode": "genome", + "lineage": "bacteria_odb10" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "mode": "genome", + "lineage": "archaea_odb10" + }, + "test2-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "mode": "genome", + "lineage": "bacteria_odb10" + }, + "test2-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "assembly_png": [ + + ], + "assembly_short_summaries_json": [ + + ], + "assembly_short_summaries_txt": [ + + ], + "versions": [ + "versions.yml:md5,36b11c442943567e471af0abd474a10b", + "versions.yml:md5,9435355f913e283f60b4fb7ef77dd52a", + "versions.yml:md5,e9d65e2f2f13175e99c5b7f4ae1013b9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T16:39:45.021811" + }, + "candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome": { + "content": [ + [ + [ + { + "id": "test", + "mode": "genome", + "lineage": "archaea_odb10" + }, + "test-archaea_odb10-busco.batch_summary.txt:md5,1397d74518a776ad75b16a843bc5b6c1" + ], + [ + { + "id": "test", + "mode": "genome", + "lineage": "bacteria_odb10" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,a1186bc25448ac1949bf7790810f7161" + ], + [ + { + "id": "test2", + "mode": "genome", + "lineage": "archaea_odb10" + }, + "test2-archaea_odb10-busco.batch_summary.txt:md5,946582b353a8dba7d6452a71856eca06" + ], + [ + { + "id": "test2", + "mode": "genome", + "lineage": "bacteria_odb10" + }, + "test2-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be" + ] + ], + [ + [ + { + "id": "test", + "mode": "proteins", + "lineage": "archaea_odb10" + }, + "test-archaea_odb10-busco.batch_summary.txt:md5,95172bd5b1a30e632fc79084ea0ca585" + ], + [ + { + "id": "test", + "mode": "proteins", + "lineage": "bacteria_odb10" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,995127c0caecb36205dbf21aa2f9f8a8" + ] + ], + [ + "versions.yml:md5,05d8022e3afb0d5642ed17147b991730", + "versions.yml:md5,36b11c442943567e471af0abd474a10b", + "versions.yml:md5,53987b35fc275297efdaf525937fdca3", + "versions.yml:md5,9435355f913e283f60b4fb7ef77dd52a", + "versions.yml:md5,e9d65e2f2f13175e99c5b7f4ae1013b9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T16:39:04.376704" + } +} \ No newline at end of file diff --git a/subworkflows/gallvp/fasta_gxf_busco_plot/tests/nextflow.config b/subworkflows/gallvp/fasta_gxf_busco_plot/tests/nextflow.config new file mode 100644 index 0000000..ddbc644 --- /dev/null +++ b/subworkflows/gallvp/fasta_gxf_busco_plot/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: EXTRACT_PROTEINS { + ext.args = '-y' + } +} diff --git a/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/main.nf b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/main.nf new file mode 100644 index 0000000..57c9297 --- /dev/null +++ b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/main.nf @@ -0,0 +1,117 @@ +include { AGAT_SPADDINTRONS } from '../../../modules/gallvp/agat/spaddintrons/main' +include { AGAT_SPEXTRACTSEQUENCES } from '../../../modules/gallvp/agat/spextractsequences/main' + +workflow GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES { + + take: + ch_gxf // channel: [ val(meta), gxf ] + ch_fasta // channel: [ val(meta2), fasta ] + + main: + ch_versions = Channel.empty() + + // collectFile: Remove all/partial introns + ch_gxf_purged = ch_gxf + | map { meta, gxf -> + def gxf_lines = gxf.readLines().findAll { line -> + + if ( line.startsWith('#') ) { return true } + + def cols = line.tokenize('\t') + def feat = cols[2].trim().toLowerCase() + + if ( feat == 'intron' ) { return false } + + return true + } + + [ "${meta.id}.nointrons.${gxf.extension}", gxf_lines.join('\n') ] + } + | collectFile + | map { gxf -> [ gxf.baseName.replace('.nointrons', ''), gxf ] } + | join( + ch_gxf.map { meta, gxf -> [ meta.id, meta ] } + ) + | map { id, gxf, meta -> [ meta, gxf ] } + + // MODULE: AGAT_SPADDINTRONS + AGAT_SPADDINTRONS ( ch_gxf_purged, [] ) + + ch_introns_gff = AGAT_SPADDINTRONS.out.gff + ch_versions = ch_versions.mix(AGAT_SPADDINTRONS.out.versions.first()) + + // MODULE: AGAT_SPEXTRACTSEQUENCES + ch_gxf_fasta = ch_introns_gff + | map { meta, gff3 -> [ meta.id, meta, gff3 ] } + | join( + ch_fasta.map { meta2, fasta -> [ meta2.id, fasta ] } + ) + | map { id, meta, gff3, fasta -> [ meta, gff3, fasta ] } + + AGAT_SPEXTRACTSEQUENCES( + ch_gxf_fasta.map { meta, gff3, fasta -> [ meta, gff3 ] }, + ch_gxf_fasta.map { meta, gff3, fasta -> fasta }, + [] // config + ) + + ch_intron_sequences = AGAT_SPEXTRACTSEQUENCES.out.fasta + ch_versions = ch_versions.mix(AGAT_SPEXTRACTSEQUENCES.out.versions.first()) + + // collectFile: splice motifs + ch_splice_motifs = ch_intron_sequences + | map { meta, fasta -> + def splice_motifs = fasta.splitFasta ( record: [id: true, seqString: true] ) + .collect { el -> [ el.id, "${el.seqString[0..1]}${el.seqString[-2..-1]}" ].join('\t') } + + [ "${meta.id}.motifs.tsv", splice_motifs.join('\n') ] + } + | collectFile + | map { tsv -> [ tsv.baseName.replace('.motifs', ''), tsv ] } + | join( + ch_gxf_purged.map { meta, gxf -> [ meta.id, meta ] } + ) + | map { id, tsv, meta -> [ meta, tsv ] } + + // collectFile: Mark gff3 + ch_marked_gff3 = ch_introns_gff + | join ( ch_splice_motifs ) + | map { meta, gff3, tsv -> + def motif_map = [:] + tsv.eachLine { line -> + def cols = line.tokenize('\t') + def id = cols[0] + def motif = cols[1] + + motif_map [ ( id ) ] = motif + } + + def marked_gff3 = gff3.readLines().collect{ line -> + if ( line.startsWith('#') ) { return line } + + def cols = line.tokenize('\t') + def feat = cols[2].trim() + + if ( feat != 'intron' ) { return line } + + def atts = cols[8].trim() + def id = ( atts =~ /ID=([^;]*)/ )[0][1] + + def atts_r = "$atts;splice_motif=${motif_map[id]};canonical_splicing=${motif_map[id]=='GTAG'}" + + return ( cols[0..7] + [ atts_r ] ).join('\t') + } + + [ "${meta.id}.marked.gff3", marked_gff3.join('\n') ] + } + | collectFile + | map { gff3 -> [ gff3.baseName.replace('.marked', ''), gff3 ] } + | join( + ch_gxf_purged.map { meta, gxf -> [ meta.id, meta ] } + ) + | map { id, gff3, meta -> [ meta, gff3 ] } + + emit: + motifs_tsv = ch_splice_motifs // channel: [ val(meta), tsv ] + marked_gff3 = ch_marked_gff3 // channel: [ val(meta), gff3 ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/meta.yml b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/meta.yml new file mode 100644 index 0000000..1863d97 --- /dev/null +++ b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "gxf_fasta_agat_spaddintrons_spextractsequences" +description: Add introns, extract intron sequences and mark introns with splice motifs +keywords: + - genomics + - gtf + - gff + - fasta + - intron + - extract + - sequence + - mark + - splice + - motif +components: + - agat/spaddintrons + - agat/spextractsequences +input: + - ch_gxf: + type: file + description: | + The input channel containing the GTF/GFF file + Structure: [ val(meta), path(gxf) ] + pattern: "*.{gtf,gff,gff3}" + - ch_fasta: + type: file + description: | + The input channel containing the fasta file + Structure: [ val(meta), path(fasta) ] + pattern: "*.{fa,fas,fsa,faa,fasta}" +output: + - motifs_tsv: + type: file + description: | + The output channel containing the motifs TSV file with first column containing intron ID and + the second column containing the splice motif + Structure: [ val(meta), path(tsv) ] + pattern: "*.motifs.tsv" + - marked_gff3: + type: file + description: | + The output channel containing the GFF3 file with splicing annotation added to it + Structure: [ val(meta), path(gff3) ] + pattern: "*.marked.gff3" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/main.nf.test b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/main.nf.test new file mode 100644 index 0000000..f5242c9 --- /dev/null +++ b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/main.nf.test @@ -0,0 +1,83 @@ +nextflow_workflow { + + name "Test Subworkflow GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES" + script "../main.nf" + workflow "GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES" + config './nextflow.config' + + tag "subworkflows" + tag "subworkflows_gallvp" + tag "subworkflows/gxf_fasta_agat_spaddintrons_spextractsequences" + tag "modules/nf-core/gunzip" + tag "agat/spextractsequences" + tag "agat/spaddintrons" + + setup { + run("GUNZIP", alias: 'GUNZIP_FASTA') { + script "../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz' , checkIfExists: true) + ] + """ + } + } + + run("GUNZIP", alias: 'GUNZIP_GFF3') { + script "../../../../modules/nf-core/gunzip" + + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.gff3.gz' , checkIfExists: true) + ] + """ + } + } + } + + + test("actinidia_chinensis - fasta - gff3") { + + when { + workflow { + """ + input[0] = GUNZIP_GFF3.out.gunzip + input[1] = GUNZIP_FASTA.out.gunzip + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("actinidia_chinensis - fasta - gff3 - stub") { + + options '-stub' + + when { + workflow { + """ + input[0] = GUNZIP_GFF3.out.gunzip + input[1] = GUNZIP_FASTA.out.gunzip + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/main.nf.test.snap b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/main.nf.test.snap new file mode 100644 index 0000000..a52bebe --- /dev/null +++ b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/main.nf.test.snap @@ -0,0 +1,104 @@ +{ + "actinidia_chinensis - fasta - gff3": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.motifs.tsv:md5,feb30ceeb2662125a80539929f2908c5" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.marked.gff3:md5,ae8800dc42d5da4a94c93cf09d319fc5" + ] + ], + "2": [ + "versions.yml:md5,1befbe41bc7abbf49767b8dc68877bc7", + "versions.yml:md5,65042e008b2466984150cb219a05291c" + ], + "marked_gff3": [ + [ + { + "id": "test" + }, + "test.marked.gff3:md5,ae8800dc42d5da4a94c93cf09d319fc5" + ] + ], + "motifs_tsv": [ + [ + { + "id": "test" + }, + "test.motifs.tsv:md5,feb30ceeb2662125a80539929f2908c5" + ] + ], + "versions": [ + "versions.yml:md5,1befbe41bc7abbf49767b8dc68877bc7", + "versions.yml:md5,65042e008b2466984150cb219a05291c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-24T16:20:01.32929" + }, + "actinidia_chinensis - fasta - gff3 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.motifs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.marked.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,1befbe41bc7abbf49767b8dc68877bc7", + "versions.yml:md5,65042e008b2466984150cb219a05291c" + ], + "marked_gff3": [ + [ + { + "id": "test" + }, + "test.marked.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "motifs_tsv": [ + [ + { + "id": "test" + }, + "test.motifs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1befbe41bc7abbf49767b8dc68877bc7", + "versions.yml:md5,65042e008b2466984150cb219a05291c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-24T16:22:01.840388" + } +} \ No newline at end of file diff --git a/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/nextflow.config b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/nextflow.config new file mode 100644 index 0000000..456093c --- /dev/null +++ b/subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: AGAT_SPEXTRACTSEQUENCES { + ext.args = '-t intron' + } +} diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf index 94d8b8d..0ad3b59 100644 --- a/subworkflows/local/align_rnaseq.nf +++ b/subworkflows/local/align_rnaseq.nf @@ -5,6 +5,7 @@ workflow ALIGN_RNASEQ { take: reads_target // channel: [ meta, assembly_id ] trim_reads // channel: [ meta, [ fq ] ] + rna_bam_by_assembly // channel: [ meta, [ bam ] ] assembly_index // channel: [ meta2, star_index ] main: @@ -54,16 +55,20 @@ workflow ALIGN_RNASEQ { bam instanceof List ? bam.find { it =~ /Aligned/ } : bam ] } + | mix ( rna_bam_by_assembly ) | groupTuple + | map { meta, bams -> [ meta, bams.flatten() ] } | branch { meta, bamList -> bams: bamList.size() > 1 bam: bamList.size() <= 1 } - SAMTOOLS_CAT ( ch_star_bam_branch.bams ) + SAMTOOLS_CAT( + ch_star_bam_branch.bams.map { meta, bams -> [ meta, bams.toSorted() ] } + ) ch_samtools_bam = SAMTOOLS_CAT.out.bam - | map { meta, bam -> [meta, [bam]] } + | map { meta, bam -> [ meta, [ bam ] ] } | mix( ch_star_bam_branch.bam ) diff --git a/subworkflows/local/fasta_braker3.nf b/subworkflows/local/fasta_braker3.nf index b76bdf7..dcb0e5d 100644 --- a/subworkflows/local/fasta_braker3.nf +++ b/subworkflows/local/fasta_braker3.nf @@ -1,4 +1,4 @@ -include { BRAKER3 } from '../../modules/kherronism/braker3' +include { BRAKER3 } from '../../modules/gallvp/braker3' include { FILE_GUNZIP as BRAKER_GFF3_GUNZIP } from '../../subworkflows/local/file_gunzip' include { FILE_GUNZIP as BRAKER_HINTS_GUNZIP } from '../../subworkflows/local/file_gunzip' diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf index 01cd776..a46beb3 100644 --- a/subworkflows/local/fasta_liftoff.nf +++ b/subworkflows/local/fasta_liftoff.nf @@ -2,15 +2,21 @@ include { GUNZIP as GUNZIP_FASTA } from '../../mo include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip/main' include { GFFREAD as GFFREAD_BEFORE_LIFTOFF } from '../../modules/nf-core/gffread/main' include { LIFTOFF } from '../../modules/nf-core/liftoff/main' -include { AGAT_SPMERGEANNOTATIONS as MERGE_LIFTOFF_ANNOTATIONS } from '../../modules/pfr/agat/spmergeannotations/main' -include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main' +include { AGAT_SPMERGEANNOTATIONS as MERGE_LIFTOFF_ANNOTATIONS } from '../../modules/nf-core/agat/spmergeannotations/main' +include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/nf-core/agat/spfilterfeaturefromkilllist/main' include { GFFREAD as GFFREAD_AFTER_LIFTOFF } from '../../modules/nf-core/gffread/main' +include { GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST } from '../../subworkflows/local/gff_tsebra_spfilterfeaturefromkilllist' workflow FASTA_LIFTOFF { take: target_assemby // Channel: [ meta, fasta ] xref_fasta // Channel: [ meta2, fasta ] xref_gff // Channel: [ meta2, gff3 ] + val_filter_liftoff_by_hints // val(true|false) + braker_hints // [ meta, gff ] + tsebra_config // Channel: [ cfg ] + allow_isoforms // val(true|false) + main: ch_versions = Channel.empty() @@ -98,10 +104,15 @@ workflow FASTA_LIFTOFF { [] ) - ch_merged_gff = MERGE_LIFTOFF_ANNOTATIONS.out.gff.mix(ch_merge_inputs.one) + ch_merged_gff = MERGE_LIFTOFF_ANNOTATIONS.out.gff + | mix( + ch_merge_inputs.one + | map { meta, gffs -> [ meta, gffs[0] ] } + // Unlist the upstream groupTuple + ) ch_versions = ch_versions.mix(MERGE_LIFTOFF_ANNOTATIONS.out.versions.first()) - // COLLECTFILE: Transcript level kill list + // COLLECTFILE: Kill list for valid_ORF=False transcripts ch_kill_list = ch_merged_gff | map { meta, gff -> @@ -152,7 +163,24 @@ workflow FASTA_LIFTOFF { ch_attr_trimmed_gff = GFFREAD_AFTER_LIFTOFF.out.gffread_gff ch_versions = ch_versions.mix(GFFREAD_AFTER_LIFTOFF.out.versions.first()) + // SUBWORKFLOW: GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST + GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST( + val_filter_liftoff_by_hints ? ch_attr_trimmed_gff : Channel.empty(), + braker_hints, + tsebra_config, + allow_isoforms, + 'liftoff' + ) + + ch_tsebra_killed_gff = GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST.out.tsebra_killed_gff + ch_versions = ch_versions.mix(GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST.out.versions) + + // Prepare output channel + ch_output_gff = val_filter_liftoff_by_hints + ? ch_tsebra_killed_gff + : ch_attr_trimmed_gff + emit: - gff3 = ch_attr_trimmed_gff // [ meta, gff3 ] + gff3 = ch_output_gff // [ meta, gff3 ] versions = ch_versions // [ versions.yml ] } diff --git a/subworkflows/local/fasta_orthofinder.nf b/subworkflows/local/fasta_orthofinder.nf new file mode 100644 index 0000000..a39e26d --- /dev/null +++ b/subworkflows/local/fasta_orthofinder.nf @@ -0,0 +1,36 @@ +include { FILE_GUNZIP as FASTA_GUNZIP } from '../../subworkflows/local/file_gunzip' +include { ORTHOFINDER } from '../../modules/nf-core/orthofinder/main' + +workflow FASTA_ORTHOFINDER { + take: + ch_pep_fasta // [ meta, fasta ] + ch_external_pep_fasta // [ meta, fasta ] + + main: + ch_versions = Channel.empty() + + // SUBWORKFLOW: FILE_GUNZIP as FASTA_GUNZIP + FASTA_GUNZIP ( ch_external_pep_fasta ) + + ch_fasta_unzipped = FASTA_GUNZIP.out.gunzip + ch_versions = ch_versions.mix(FASTA_GUNZIP.out.versions) + + // MODULE: ORTHOFINDER + ch_orthofinder_peps = ch_fasta_unzipped + | map { meta, fasta -> fasta } + | mix( + ch_pep_fasta.map { meta, fasta -> fasta } + ) + | collect + | filter { it.size() > 1 } + + ORTHOFINDER( + ch_orthofinder_peps.map { fastas -> [ [ id: 'genepal' ], fastas ] }, + [ [], [] ] + ) + + ch_versions = ch_versions.mix(ORTHOFINDER.out.versions) + + emit: + versions = ch_versions // [ versions.yml ] +} diff --git a/subworkflows/local/file_gunzip.nf b/subworkflows/local/file_gunzip.nf index 30f3368..ec45610 100644 --- a/subworkflows/local/file_gunzip.nf +++ b/subworkflows/local/file_gunzip.nf @@ -5,6 +5,9 @@ workflow FILE_GUNZIP { ch_input // channel [ meta, archive ] main: + + ch_versions = Channel.empty() + ch_input_branch = ch_input | branch { meta, archive -> gz: "$archive".endsWith('.gz') @@ -14,7 +17,9 @@ workflow FILE_GUNZIP { // MODULE: GUNZIP GUNZIP ( ch_input_branch.gz ) + ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) + emit: - versions = GUNZIP.out.versions.first() + versions = ch_versions gunzip = GUNZIP.out.gunzip.mix( ch_input_branch.rest ) } diff --git a/subworkflows/local/gff_eggnogmapper.nf b/subworkflows/local/gff_eggnogmapper.nf index 7ea0d19..841a243 100644 --- a/subworkflows/local/gff_eggnogmapper.nf +++ b/subworkflows/local/gff_eggnogmapper.nf @@ -24,7 +24,9 @@ workflow GFF_EGGNOGMAPPER { ch_versions = ch_versions.mix(GFF2FASTA_FOR_EGGNOGMAPPER.out.versions.first()) - ch_eggnogmapper_inputs = ch_gffread_fasta + ch_eggnogmapper_inputs = ! db_folder + ? Channel.empty() + : ch_gffread_fasta | combine(Channel.fromPath(db_folder)) EGGNOGMAPPER( diff --git a/subworkflows/local/gff_merge_cleanup.nf b/subworkflows/local/gff_merge_cleanup.nf index 834af94..fc6c75e 100644 --- a/subworkflows/local/gff_merge_cleanup.nf +++ b/subworkflows/local/gff_merge_cleanup.nf @@ -1,4 +1,4 @@ -include { AGAT_SPMERGEANNOTATIONS } from '../../modules/pfr/agat/spmergeannotations/main' +include { AGAT_SPMERGEANNOTATIONS } from '../../modules/nf-core/agat/spmergeannotations/main' include { GT_GFF3 } from '../../modules/nf-core/gt/gff3/main' include { AGAT_CONVERTSPGXF2GXF } from '../../modules/nf-core/agat/convertspgxf2gxf/main' @@ -56,14 +56,18 @@ workflow GFF_MERGE_CLEANUP { def feat_r = feat == 'transcript' ? 'mRNA' : feat // Use mRNA inplace of transcript - if ( feat != 'gene' || program != 'Liftoff' ) { + if ( feat_r != 'mRNA' || program != 'Liftoff' ) { return ( cols[0..1] + [ feat_r ] + cols[3..7] + [ atts_r ] ).join('\t') } - def gene_id = ( atts =~ /ID=([^;]*)/ )[0][1] - def atts_g = "liftoffID=$gene_id" + def tx_id = ( atts =~ /ID=([^;]*)/ )[0][1] + def matches = ( atts =~ /liftoffID=([^;]*)/ ) - return ( cols[0..7] + [ atts_g ] ).join('\t') + def liftoffID = matches ? matches[0][1] : tx_id + + def atts_g = "liftoffID=$liftoffID" + + return ( cols[0..1] + [ feat_r ] + cols[3..7] + [ atts_g ] ).join('\t') }.join('\n') @@ -80,11 +84,21 @@ workflow GFF_MERGE_CLEANUP { ch_agat_gff = AGAT_CONVERTSPGXF2GXF.out.output_gff ch_versions = ch_versions.mix(AGAT_CONVERTSPGXF2GXF.out.versions.first()) - // COLLECTFILE: Format AGAT_CONVERTSPGXF2GXF output - ch_final_gff = ch_agat_gff + // COLLECTFILE: Format AGAT_CONVERTSPGXF2GXF output and only allow: [ 'gene', 'mRNA', 'exon', 'CDS' ] + ch_agat_formatted_gff = ch_agat_gff | map { meta, gff -> - def lines = gff.readLines() + def filtered_lines = gff.readLines() + .findAll { line -> + if ( line.startsWith('#') ) { return true } + + def cols = line.split('\t') + def feat = cols[2].trim() + + ( feat in [ 'gene', 'mRNA', 'exon', 'CDS' ] ) + ? true + : false + } .collect { line -> if ( line.startsWith('#') ) { return line } @@ -94,18 +108,69 @@ workflow GFF_MERGE_CLEANUP { def atts = cols[8] def atts_r = atts.replace('-', '').replace('agat', '') - if ( feat != 'gene' || program != 'Liftoff' ) { + if ( feat != 'mRNA' || program != 'Liftoff' ) { return ( cols[0..7] + [ atts_r ] ).join('\t') } def oldID = ( atts =~ /liftoffID=([^;]*)/ )[0][1] def newID = ( atts =~ /ID=([^;]*)/ )[0][1].replace('-', '').replace('agat', '') - def atts_g = "ID=${newID};liftoffID=${oldID}" + def pID = ( atts =~ /Parent=([^;]*)/ )[0][1].replace('-', '').replace('agat', '') + def atts_g = "ID=${newID};Parent=${pID};liftoffID=${oldID}" return ( cols[0..7] + [ atts_g ] ).join('\t') - }.join('\n') - - [ "${meta.id}.agat.cleanup.gff" ] + [ lines ] + } + + def tx_formatted_lines = [] + def current_gene_id = '' + def current_mrna_id = -1 + def current_exon_id = -1 + def current_cds_id = -1 + + filtered_lines.each { line -> + if ( line.startsWith('#') ) { + tx_formatted_lines << line + return + } + + def cols = line.split('\t') + def feat = cols[2] + def atts = cols[8] + def id = ( atts =~ /ID=([^;]*)/ )[0][1] + + if ( feat == 'gene' ) { + tx_formatted_lines << line + current_gene_id = id + current_mrna_id = 0 + return + } + + if ( feat == 'mRNA' ) { + current_mrna_id += 1 + current_exon_id = 0 + current_cds_id = 0 + + def matches = ( atts =~ /liftoffID=([^;]*)/ ) + def liftoffIDStr = matches ? ";liftoffID=${matches[0][1]}" : '' + + tx_formatted_lines << ( ( cols[0..7] + [ "ID=${current_gene_id}.t${current_mrna_id};Parent=${current_gene_id}${liftoffIDStr}" ] ).join('\t') ) + return + } + + if ( feat == 'exon' ) { + current_exon_id += 1 + tx_formatted_lines << ( ( cols[0..7] + [ "ID=${current_gene_id}.t${current_mrna_id}.exon${current_exon_id};Parent=${current_gene_id}.t${current_mrna_id}" ] ).join('\t') ) + return + } + + if ( feat == 'CDS' ) { + current_cds_id += 1 + tx_formatted_lines << ( ( cols[0..7] + [ "ID=${current_gene_id}.t${current_mrna_id}.cds${current_cds_id};Parent=${current_gene_id}.t${current_mrna_id}" ] ).join('\t') ) + return + } + + } + + [ "${meta.id}.agat.cleanup.gff" ] + [ tx_formatted_lines.join('\n') ] } | collectFile(newLine: true) | map { file -> @@ -113,6 +178,6 @@ workflow GFF_MERGE_CLEANUP { } emit: - gff = ch_final_gff // [ meta, gff ] - versions = ch_versions // [ versions.yml ] + gff = ch_agat_formatted_gff // [ meta, gff ] + versions = ch_versions // [ versions.yml ] } diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf index 3326e2a..2c877a9 100644 --- a/subworkflows/local/gff_store.nf +++ b/subworkflows/local/gff_store.nf @@ -1,17 +1,22 @@ import java.net.URLEncoder include { GT_GFF3 as FINAL_GFF_CHECK } from '../../modules/nf-core/gt/gff3/main' +include { GFFREAD as EXTRACT_PROTEINS } from '../../modules/nf-core/gffread/main' workflow GFF_STORE { take: ch_target_gff // [ meta, gff ] ch_eggnogmapper_annotations // [ meta, annotations ] + ch_fasta // [ meta, fasta ] + val_describe_gff // val(true|false) main: ch_versions = Channel.empty() // COLLECTFILE: Add eggnogmapper hits to gff - ch_described_gff = ch_target_gff + ch_described_gff = ! val_describe_gff + ? Channel.empty() + : ch_target_gff | join(ch_eggnogmapper_annotations) | map { meta, gff, annotations -> def tx_annotations = annotations.readLines() @@ -107,13 +112,30 @@ workflow GFF_STORE { } // MODULE: GT_GFF3 as FINAL_GFF_CHECK - FINAL_GFF_CHECK ( ch_described_gff ) + ch_final_check_input = val_describe_gff + ? ch_described_gff + : ch_target_gff + + FINAL_GFF_CHECK ( ch_final_check_input ) ch_final_gff = FINAL_GFF_CHECK.out.gt_gff3 ch_versions = ch_versions.mix(FINAL_GFF_CHECK.out.versions.first()) + // MODULE: GFFREAD as EXTRACT_PROTEINS + ch_extraction_inputs = ch_final_gff + | join(ch_fasta) + + EXTRACT_PROTEINS( + ch_extraction_inputs.map { meta, gff, fasta -> [ meta, gff ] }, + ch_extraction_inputs.map { meta, gff, fasta -> fasta } + ) + + ch_final_proteins = EXTRACT_PROTEINS.out.gffread_fasta + ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first()) + emit: final_gff = ch_final_gff // [ meta, gff ] + final_proteins = ch_final_proteins // [ meta, fasta ] versions = ch_versions // [ versions.yml ] } diff --git a/subworkflows/local/purge_breaker_models.nf b/subworkflows/local/gff_tsebra_spfilterfeaturefromkilllist.nf similarity index 64% rename from subworkflows/local/purge_breaker_models.nf rename to subworkflows/local/gff_tsebra_spfilterfeaturefromkilllist.nf index db828ab..c4e4fb6 100644 --- a/subworkflows/local/purge_breaker_models.nf +++ b/subworkflows/local/gff_tsebra_spfilterfeaturefromkilllist.nf @@ -1,31 +1,28 @@ include { AGAT_CONVERTSPGFF2GTF } from '../../modules/nf-core/agat/convertspgff2gtf/main' -include { TSEBRA } from '../../modules/pfr/tsebra/main' +include { TSEBRA } from '../../modules/nf-core/tsebra/main' include { AGAT_CONVERTSPGXF2GXF } from '../../modules/nf-core/agat/convertspgxf2gxf/main' -include { AGAT_SPFILTERFEATUREFROMKILLLIST as KILL_TSEBRA_ISOFORMS } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main' -include { GFFCOMPARE as COMPARE_BRAKER_TO_LIFTOFF } from '../../modules/nf-core/gffcompare/main' -include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main' -include { GFFCOMPARE as VALIDATE_PURGING_BY_AGAT } from '../../modules/nf-core/gffcompare/main' -include { AGAT_SPMERGEANNOTATIONS as MERGE_BRAKER_LIFTOFF } from '../../modules/pfr/agat/spmergeannotations/main' +include { AGAT_SPFILTERFEATUREFROMKILLLIST as KILL_TSEBRA_ISOFORMS } from '../../modules/nf-core/agat/spfilterfeaturefromkilllist/main' + +workflow GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST { -workflow PURGE_BREAKER_MODELS { take: - braker_gff3 // [ meta, gff3 ] + input_gff3 // [ meta, gff3 ] braker_hints // [ meta, gff ] - liftoff_gff3 // [ meta, gff3 ] - tsebra_config // val(tsebra_config) - braker_allow_isoforms // val(true|false) + tsebra_config // Channel: [ cfg ] + allow_isoforms // val(true|false) + val_prefix // val(String) main: ch_versions = Channel.empty() // MODULE: AGAT_CONVERTSPGFF2GTF - AGAT_CONVERTSPGFF2GTF ( braker_gff3 ) + AGAT_CONVERTSPGFF2GTF ( input_gff3 ) - ch_braker_gtf = AGAT_CONVERTSPGFF2GTF.out.output_gtf + ch_input_gtf = AGAT_CONVERTSPGFF2GTF.out.output_gtf ch_versions = ch_versions.mix(AGAT_CONVERTSPGFF2GTF.out.versions.first()) // COLLECTFILE: Prepare for TSEBRA - ch_tsebra_input_gtf = ch_braker_gtf + ch_tsebra_input_gtf = ch_input_gtf | map { meta, gtf -> def lines = gtf.readLines() @@ -56,7 +53,7 @@ workflow PURGE_BREAKER_MODELS { // MODULE: TSEBRA ch_tsebra_inputs = ch_tsebra_input_gtf | join(braker_hints) - | combine(Channel.fromPath(tsebra_config)) + | combine(tsebra_config) TSEBRA( ch_tsebra_inputs.map { meta, gtf, gff, cfg -> [ meta, [ gtf ] ] }, ch_tsebra_inputs.map { meta, gtf, gff, cfg -> [ gff ] }, @@ -76,11 +73,22 @@ workflow PURGE_BREAKER_MODELS { if ( line.startsWith('#') ) { return line } def cols = line.split('\t') + def program = cols[1] + def feat = cols[2] + def atts = cols[8] + def atts_r = '' // Remove attributes and use AGAT_CONVERTSPGXF2GXF // to create attributes based on sequential layout - return ( cols[0..7] + [ atts_r ] ).join('\t') + if ( feat != 'transcript' || program != 'Liftoff' ) { + return ( cols[0..7] + [ atts_r ] ).join('\t') + } + + def tx_id = atts.trim().replaceFirst('anno1.', '') + def atts_g = "liftoffID $tx_id" + + return ( cols[0..7] + [ atts_g ] ).join('\t') }.join('\n') [ "${meta.id}.gtf" ] + [ lines ] @@ -110,15 +118,15 @@ workflow PURGE_BREAKER_MODELS { return ( cols[0..7] + [ atts_r ] ).join('\t') }.join('\n') - [ "${meta.id}.gff3" ] + [ lines ] + [ "${meta.id}.${val_prefix}.gff3" ] + [ lines ] } | collectFile(newLine: true) | map { file -> - [ [ id: file.baseName ], file ] + [ [ id: file.baseName.replace(".${val_prefix}", '') ], file ] } - // COLLECTFILE: Iso-form kill list if braker_allow_isoforms=true - ch_post_tsebra_kill_list = braker_allow_isoforms + // COLLECTFILE: Iso-form kill list if allow_isoforms=true + ch_post_tsebra_kill_list = allow_isoforms ? Channel.empty() : ch_tsebra_gff | map { meta, gff -> @@ -172,70 +180,7 @@ workflow PURGE_BREAKER_MODELS { } ch_versions = ch_versions.mix(KILL_TSEBRA_ISOFORMS.out.versions.first()) - // MODULE: GFFCOMPARE as COMPARE_BRAKER_TO_LIFTOFF - ch_comparison_inputs = ch_tsebra_killed_gff - | join(liftoff_gff3) - - - COMPARE_BRAKER_TO_LIFTOFF ( - ch_comparison_inputs.map { meta, braker, liftoff -> [ meta, braker ] }, - [ [], [], [] ], - ch_comparison_inputs.map { meta, braker, liftoff -> [ meta, liftoff ] }, - ) - - ch_tracking = COMPARE_BRAKER_TO_LIFTOFF.out.tracking - ch_versions = ch_versions.mix(COMPARE_BRAKER_TO_LIFTOFF.out.versions.first()) - - // COLLECTFILE: Transcript level kill list - ch_kill_list = ch_tracking - | map { meta, tracking -> - - def kept_lines = tracking.readLines() - .findAll { line -> - def cols = line.split('\t') - - ( cols[3] != 'u' ) && ( cols[3] != 'p' ) - } - - def tx_kill_list = kept_lines - .collect { line -> - def cols = line.split('\t') - - def matched = cols[4] =~ /q1:([^\|]+)\|([^\|]+)/ - - matched[0][2].trim() - }.join('\n') - - [ "${meta.id}.kill.list.txt" ] + tx_kill_list - } - | collectFile(newLine: true) - | map { file -> - [ [ id: file.baseName.replace('.kill.list', '') ], file ] - } - - // MODULE: AGAT_SPFILTERFEATUREFROMKILLLIST - ch_agat_kill_inputs = ch_tsebra_killed_gff - | join(ch_kill_list) - - - AGAT_SPFILTERFEATUREFROMKILLLIST( - ch_agat_kill_inputs.map { meta, gff, kill -> [ meta, gff ] }, - ch_agat_kill_inputs.map { meta, gff, kill -> kill }, - [] // default config - ) - - ch_braker_purged_gff = AGAT_SPFILTERFEATUREFROMKILLLIST.out.gff - ch_versions = ch_versions.mix(AGAT_SPFILTERFEATUREFROMKILLLIST.out.versions.first()) - - // Handle case where liftoff is not present - ch_all_braker_gff = ch_tsebra_killed_gff - | join(ch_braker_purged_gff, remainder:true) - | map { meta, tsebra_gff, purged_gff -> - if ( purged_gff ) { return [ meta, purged_gff ] } - if ( tsebra_gff ) { return [ meta, tsebra_gff ] } - } - emit: - braker_purged_gff = ch_all_braker_gff // [ meta, gff3 ] + tsebra_killed_gff = ch_tsebra_killed_gff // [ val(meta), gff ] versions = ch_versions // [ versions.yml ] } diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf index c3701bb..cb92113 100644 --- a/subworkflows/local/prepare_assembly.nf +++ b/subworkflows/local/prepare_assembly.nf @@ -1,18 +1,21 @@ include { GUNZIP as GUNZIP_TARGET_ASSEMBLY } from '../../modules/nf-core/gunzip' include { GUNZIP as GUNZIP_TE_LIBRARY } from '../../modules/nf-core/gunzip' +include { SEQKIT_RMDUP } from '../../modules/nf-core/seqkit/rmdup/main.nf' include { FASTAVALIDATOR } from '../../modules/nf-core/fastavalidator' -include { REPEATMODELER_BUILDDATABASE } from '../../modules/pfr/repeatmodeler/builddatabase' -include { REPEATMODELER_REPEATMODELER } from '../../modules/pfr/repeatmodeler/repeatmodeler' -include { REPEATMASKER } from '../../modules/kherronism/repeatmasker' +include { REPEATMODELER_BUILDDATABASE } from '../../modules/nf-core/repeatmodeler/builddatabase' +include { REPEATMODELER_REPEATMODELER } from '../../modules/nf-core/repeatmodeler/repeatmodeler' +include { REPEATMASKER_REPEATMASKER } from '../../modules/gallvp/repeatmasker/repeatmasker' +include { CUSTOM_RMOUTTOGFF3 } from '../../modules/gallvp/custom/rmouttogff3' include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate' -include { FASTA_EDTA_LAI } from '../../subworkflows/pfr/fasta_edta_lai' +include { FASTA_EDTA_LAI } from '../../subworkflows/gallvp/fasta_edta_lai' workflow PREPARE_ASSEMBLY { take: target_assembly // channel: [ meta, fasta ] te_library // channel: [ meta, fasta ] repeat_annotator // val(String), 'repeatmodeler' or 'edta' + repeatmasker_save_outputs // val(true/false) exclude_assemblies // channel: val(assembly_x,assembly_y) ch_is_masked // channel: [ meta, val(true|false) ] @@ -34,18 +37,35 @@ workflow PREPARE_ASSEMBLY { ) ch_versions = ch_versions.mix(GUNZIP_TARGET_ASSEMBLY.out.versions.first()) + // MODULE: SEQKIT_RMDUP + SEQKIT_RMDUP ( ch_gunzip_assembly ) + + ch_nondup_fw_assembly = SEQKIT_RMDUP.out.log + | join(SEQKIT_RMDUP.out.fastx) + | map { meta, error_log, fasta -> + if ( error_log.text.contains('0 duplicated records removed') ) { + return [ meta, fasta ] + } + + log.warn "FASTA validation failed for ${meta.id} due to presence of duplicate sequences.\n" + + "${meta.id} is excluded from further analysis." + + return null + } // Fixed width assembly fasta without duplicates + + ch_versions = ch_versions.mix(SEQKIT_RMDUP.out.versions.first()) // MODULE: FASTAVALIDATOR - FASTAVALIDATOR ( ch_gunzip_assembly ) + FASTAVALIDATOR ( ch_nondup_fw_assembly ) - ch_validated_assembly = ch_gunzip_assembly + ch_validated_assembly = ch_nondup_fw_assembly | join(FASTAVALIDATOR.out.success_log) | map { meta, fasta, log -> [ meta, fasta ] } ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions.first()) FASTAVALIDATOR.out.error_log | map { meta, log -> - System.err.println("WARNING: FASTAVALIDATOR failed for ${meta.id} with error: ${log}. ${meta.id} is excluded from further analysis.") + log.warn "FASTAVALIDATOR failed for ${meta.id} with error: ${log}. ${meta.id} is excluded from further analysis." } // MODULE: GUNZIP_TE_LIBRARY @@ -123,15 +143,25 @@ workflow PREPARE_ASSEMBLY { ch_versions = ch_versions.mix(REPEATMODELER_REPEATMODELER.out.versions.first()) - // MODULE: REPEATMASKER - REPEATMASKER( + // MODULE: REPEATMASKER_REPEATMASKER + REPEATMASKER_REPEATMASKER( ch_assembly_and_te_lib.map { meta, assembly, teLib -> [ meta, assembly ] }, ch_assembly_and_te_lib.map { meta, assembly, teLib -> teLib }, ) ch_masked_assembly = ch_unmasked_masked_branch.masked - | mix(REPEATMASKER.out.fasta_masked) - ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first()) + | mix(REPEATMASKER_REPEATMASKER.out.masked) + + ch_repeatmasker_out = REPEATMASKER_REPEATMASKER.out.out + ch_versions = ch_versions.mix(REPEATMASKER_REPEATMASKER.out.versions.first()) + + // MODULE: CUSTOM_RMOUTTOGFF3 + ch_RMOUTTOGFF3_input = repeatmasker_save_outputs + ? ch_repeatmasker_out + : Channel.empty() + CUSTOM_RMOUTTOGFF3 ( ch_RMOUTTOGFF3_input ) + + ch_versions = ch_versions.mix(CUSTOM_RMOUTTOGFF3.out.versions.first()) // MODULE: STAR_GENOMEGENERATE ch_genomegenerate_inputs = ch_validated_assembly diff --git a/subworkflows/local/prepare_ext_prots.nf b/subworkflows/local/prepare_ext_prots.nf index ee65f77..92c245d 100644 --- a/subworkflows/local/prepare_ext_prots.nf +++ b/subworkflows/local/prepare_ext_prots.nf @@ -20,7 +20,14 @@ workflow PREPARE_EXT_PROTS { ch_ext_prot_gunzip_fastas = GUNZIP.out.gunzip.mix(ch_ext_prot_seqs_branch.rest) | map { meta, filePath -> filePath } | collect - | map { fileList -> [ [ id: "ext_protein_seqs" ], fileList ] } + | map { fileList -> + [ + [ id: "ext_protein_seqs" ], + ( fileList instanceof List ) + ? fileList.toSorted() + : fileList + ] + } ch_versions = ch_versions.mix(GUNZIP.out.versions.first()) diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf index 72fa176..b13a3c9 100644 --- a/subworkflows/local/preprocess_rnaseq.nf +++ b/subworkflows/local/preprocess_rnaseq.nf @@ -77,7 +77,7 @@ workflow PREPROCESS_RNASEQ { | join(ch_trim_reads, remainder:true) | map { meta, reads, trimmed -> if (!trimmed) { - System.err.println("WARNING: Dropping ${reads.collect { it.getName() }} as read count after trimming is less than $min_trimmed_reads") + log.warn "Dropping ${reads.collect { it.getName() }} as read count after trimming is less than $min_trimmed_reads" } } diff --git a/subworkflows/local/purge_braker_models.nf b/subworkflows/local/purge_braker_models.nf new file mode 100644 index 0000000..3f0dc05 --- /dev/null +++ b/subworkflows/local/purge_braker_models.nf @@ -0,0 +1,94 @@ +include { GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST } from '../../subworkflows/local/gff_tsebra_spfilterfeaturefromkilllist' +include { GFFCOMPARE as COMPARE_BRAKER_TO_LIFTOFF } from '../../modules/nf-core/gffcompare/main' +include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/nf-core/agat/spfilterfeaturefromkilllist/main' + +workflow PURGE_BRAKER_MODELS { + take: + braker_gff3 // [ meta, gff3 ] + braker_hints // [ meta, gff ] + liftoff_gff3 // [ meta, gff3 ] + tsebra_config // Channel: [ cfg ] + allow_isoforms // val(true|false) + + main: + ch_versions = Channel.empty() + + // SUBWORKFLOW: GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST + GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST( + braker_gff3, + braker_hints, + tsebra_config, + allow_isoforms, + 'braker' + ) + + ch_tsebra_killed_gff = GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST.out.tsebra_killed_gff + ch_versions = ch_versions.mix(GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST.out.versions) + + // MODULE: GFFCOMPARE as COMPARE_BRAKER_TO_LIFTOFF + ch_comparison_inputs = ch_tsebra_killed_gff + | join(liftoff_gff3) + + + COMPARE_BRAKER_TO_LIFTOFF ( + ch_comparison_inputs.map { meta, braker, liftoff -> [ meta, braker ] }, + [ [], [], [] ], + ch_comparison_inputs.map { meta, braker, liftoff -> [ meta, liftoff ] }, + ) + + ch_tracking = COMPARE_BRAKER_TO_LIFTOFF.out.tracking + ch_versions = ch_versions.mix(COMPARE_BRAKER_TO_LIFTOFF.out.versions.first()) + + // COLLECTFILE: Transcript level kill list + ch_kill_list = ch_tracking + | map { meta, tracking -> + + def kept_lines = tracking.readLines() + .findAll { line -> + def cols = line.split('\t') + + ( cols[3] != 'u' ) && ( cols[3] != 'p' ) + } + + def tx_kill_list = kept_lines + .collect { line -> + def cols = line.split('\t') + + def matched = cols[4] =~ /q1:([^\|]+)\|([^\|]+)/ + + matched[0][2].trim() + }.join('\n') + + [ "${meta.id}.kill.list.txt" ] + tx_kill_list + } + | collectFile(newLine: true) + | map { file -> + [ [ id: file.baseName.replace('.kill.list', '') ], file ] + } + + // MODULE: AGAT_SPFILTERFEATUREFROMKILLLIST + ch_agat_kill_inputs = ch_tsebra_killed_gff + | join(ch_kill_list) + + + AGAT_SPFILTERFEATUREFROMKILLLIST( + ch_agat_kill_inputs.map { meta, gff, kill -> [ meta, gff ] }, + ch_agat_kill_inputs.map { meta, gff, kill -> kill }, + [] // default config + ) + + ch_braker_purged_gff = AGAT_SPFILTERFEATUREFROMKILLLIST.out.gff + ch_versions = ch_versions.mix(AGAT_SPFILTERFEATUREFROMKILLLIST.out.versions.first()) + + // Handle case where liftoff is not present + ch_all_braker_gff = ch_tsebra_killed_gff + | join(ch_braker_purged_gff, remainder:true) + | map { meta, tsebra_gff, purged_gff -> + if ( purged_gff ) { return [ meta, purged_gff ] } + if ( tsebra_gff ) { return [ meta, tsebra_gff ] } + } + + emit: + braker_purged_gff = ch_all_braker_gff // [ meta, gff3 ] + versions = ch_versions // [ versions.yml ] +} diff --git a/subworkflows/local/purge_nohit_models.nf b/subworkflows/local/purge_nohit_models.nf index d213dc2..55b970e 100644 --- a/subworkflows/local/purge_nohit_models.nf +++ b/subworkflows/local/purge_nohit_models.nf @@ -1,4 +1,4 @@ -include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main' +include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/nf-core/agat/spfilterfeaturefromkilllist/main' workflow PURGE_NOHIT_MODELS { take: @@ -60,6 +60,11 @@ workflow PURGE_NOHIT_MODELS { ch_versions = ch_versions.mix(AGAT_SPFILTERFEATUREFROMKILLLIST.out.versions.first()) emit: - purged_gff = ch_target_purged_gff.mix(val_purge_nohits ? Channel.empty() : ch_target_gff) + purged_gff = ch_target_purged_gff + | mix( + val_purge_nohits + ? Channel.empty() + : ch_target_gff + ) versions = ch_versions // [ versions.yml ] } diff --git a/subworkflows/local/utils_nfcore_genepal_pipeline/main.nf b/subworkflows/local/utils_nfcore_genepal_pipeline/main.nf new file mode 100644 index 0000000..f4510c4 --- /dev/null +++ b/subworkflows/local/utils_nfcore_genepal_pipeline/main.nf @@ -0,0 +1,433 @@ +// +// Subworkflow with functionality specific to the plant-food-research-open/genepal pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = """nextflow run ${workflow.manifest.name} \\ + -profile \\ + --input assemblysheet.csv \\ + --protein_evidence proteins.faa \\ + --outdir """ + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + // + // Create input channels + // + ch_input = Channel.fromSamplesheet('input') + + ch_target_assembly = ch_input + | map { it -> + def tag = it[0] + def fasta = it[1] + + def fasta_file = file(fasta, checkIfExists: true) + + if ( workflow.stubRun ) { + return [ [ id: tag ], fasta_file ] + } + + def is_zipped = fasta.endsWith('.gz') + def sz_thresh = is_zipped ? 300_000 : 1_000_000 + def fasta_size = fasta_file.size() + + if ( fasta_size < sz_thresh ) { // < 1 MB + error "The assembly represented by tag '$tag' is only $fasta_size bytes. The minimum allowed size is 1 MB!" + } + + [ [ id: tag ], fasta_file ] + } + + ch_tar_assm_str = ch_input + | map { it -> + def tag = it[0].strip() + + tag + } + | collect + | map { it -> + it.join(",") + } + + ch_is_masked = ch_input + | map { it -> + def tag = it[0] + def is_masked = it[2] + + [ [ id: tag ], is_masked == "yes" ] + } + + ch_te_library = ch_input + | map { it -> + def tag = it[0] + def te_fasta = it[3] + + if ( te_fasta ) { + [ [ id:tag ], file(te_fasta, checkIfExists: true) ] + } + } + + ch_braker_annotation = ch_input + | map { it -> + def tag = it[0] + def braker_gff3 = it[4] + def hints_gff = it[5] + + if ( braker_gff3 ) { + [ + [ id: tag ], + file(braker_gff3, checkIfExists: true), + file(hints_gff, checkIfExists: true) + ] + } + } + + ch_braker_ex_asm_str = ch_braker_annotation + | map { meta, braker_gff3, hints_gff -> meta.id } + | collect + | map { it.join(",") } + | ifEmpty( "" ) + + ch_benchmark_gff = ch_input + | map { it -> + def tag = it[0] + def gff = it[6] + + if ( gff ) { + [ + [ id: tag ], + file(gff, checkIfExists: true) + ] + } + } + + ch_rna_branch = ! params.rna_evidence + ? Channel.empty() + : Channel.fromSamplesheet('rna_evidence') + | map { meta, f1, f2 -> + f2 + ? [ meta + [ single_end: false ], [ file(f1, checkIfExists:true), file(f2, checkIfExists:true) ] ] + : [ meta + [ single_end: true ], [ file(f1, checkIfExists:true) ] ] + } + | map { meta, files -> + [ meta + [ target_assemblies: meta.target_assemblies.split(';').sort() ], files ] + } + | branch { meta, files -> + fq: files.first().extension != 'bam' + bam: files.first().extension == 'bam' + } + + ch_rna_fq = ! params.rna_evidence + ? Channel.empty() + : ch_rna_branch.fq + | map { meta, files -> [ meta.id, meta, files ] } + | groupTuple + | combine(ch_tar_assm_str) + | map { id, metas, files, tar_assm_str -> + validateFastqMetadata(metas, files, tar_assm_str) + } + + ch_rna_bam = ! params.rna_evidence + ? Channel.empty() + : ch_rna_branch.bam + | map { meta, files -> [ meta.id, meta, files ] } + | groupTuple + | combine(ch_tar_assm_str) + | flatMap { id, metas, files, tar_assm_str -> + validateBamMetadata(metas, files, tar_assm_str) + } + + // Check if each sample for a given assembly has either bam or fastq files + ch_rna_bam + | flatMap { meta, bams -> + meta.target_assemblies.collect { [ [ meta.id, it ], 'bam' ] } + } + | join( + ch_rna_fq + | flatMap { meta, fqs -> + meta.target_assemblies.collect { [ [ meta.id, it ], 'fq' ] } + } + ) + | map { combination, bam, fq -> + error "Sample ${combination[0]} for assembly ${combination[1]} can not have both fastq and bam files" + } + + ch_rna_bam_by_assembly = ch_rna_bam + | map { meta, bams -> [ [ id: meta.target_assemblies.first() ], bams ] } + | groupTuple + | map { meta, bams -> [ meta, bams.flatten() ] } + + ch_ribo_db = params.remove_ribo_rna + ? file(params.ribo_database_manifest, checkIfExists: true) + : null + + ch_sortmerna_fastas = ch_ribo_db + ? Channel.from(ch_ribo_db ? ch_ribo_db.readLines() : null) + | map { row -> file(row, checkIfExists: true) } + | collect + : Channel.empty() + + ch_ext_prot_fastas = ( params.protein_evidence.endsWith('txt') + ? Channel.fromPath(params.protein_evidence) + | splitText + : Channel.fromPath(params.protein_evidence) + ) + | map { file_path -> + + def file_handle = ( file_path instanceof String ) + ? file(file_path.strip(), checkIfExists: true) + : file_path + + [ [ id: idFromFileName( file_handle.baseName ) ], file_handle ] + } + + + ch_liftoff_mm = ! params.liftoff_annotations + ? Channel.empty() + : Channel.fromSamplesheet('liftoff_annotations') + | multiMap { fasta, gff -> + def fastaFile = file(fasta, checkIfExists:true) + + fasta: [ [ id: idFromFileName( fastaFile.baseName ) ], fastaFile ] + gff: [ [ id: idFromFileName( fastaFile.baseName ) ], file(gff, checkIfExists:true) ] + } + + ch_liftoff_fasta = params.liftoff_annotations + ? ch_liftoff_mm.fasta + : Channel.empty() + + ch_liftoff_gff = params.liftoff_annotations + ? ch_liftoff_mm.gff + : Channel.empty() + + ch_tsebra_config = Channel.of ( file("${projectDir}/assets/tsebra-template.cfg", checkIfExists: true) ) + | map { cfg -> + def param_intron_support = params.enforce_full_intron_support ? '1.0' : '0.0' + + def param_e1 = params.allow_isoforms ? '0.1' : '0.0' + def param_e2 = params.allow_isoforms ? '0.5' : '0.0' + def param_e3 = params.allow_isoforms ? '0.05' : '0.0' + def param_e4 = params.allow_isoforms ? '0.2' : '0.0' + + [ + 'tsebra-config.cfg', + cfg + .text + .replace('PARAM_INTRON_SUPPORT', param_intron_support) + .replace('PARAM_E1', param_e1) + .replace('PARAM_E2', param_e2) + .replace('PARAM_E3', param_e3) + .replace('PARAM_E4', param_e4) + ] + } + | collectFile + + + ch_orthofinder_pep = ! params.orthofinder_annotations + ? Channel.empty() + : Channel.fromSamplesheet('orthofinder_annotations') + | map { tag, fasta -> + [ [ id: tag ], file(fasta, checkIfExists:true) ] + } + + emit: + target_assembly = ch_target_assembly + tar_assm_str = ch_tar_assm_str + is_masked = ch_is_masked + te_library = ch_te_library + braker_annotation = ch_braker_annotation + braker_ex_asm_str = ch_braker_ex_asm_str + benchmark_gff = ch_benchmark_gff + rna_fq = ch_rna_fq + rna_bam = ch_rna_bam + rna_bam_by_assembly = ch_rna_bam_by_assembly + sortmerna_fastas = ch_sortmerna_fastas + ext_prot_fastas = ch_ext_prot_fastas + liftoff_fasta = ch_liftoff_fasta + liftoff_gff = ch_liftoff_gff + tsebra_config = ch_tsebra_config + orthofinder_pep = ch_orthofinder_pep +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Additional validation +// +def idFromFileName(fileName) { + + def trial = ( fileName + ).replaceFirst( + /\.f(ast)?q$/, '' + ).replaceFirst( + /\.f(asta|sa|a|as|aa|na)?$/, '' + ).replaceFirst( + /\.gff(3)?$/, '' + ).replaceFirst( + /\.gz$/, '' + ) + + if ( trial == fileName ) { return fileName } + + return idFromFileName ( trial ) +} + +def validateFastqMetadata(metas, fqs, permAssString) { + def permAssList = permAssString.split(",") + + // Check if each listed assembly is permissible + metas.each { meta -> + if ( meta.target_assemblies.any { !permAssList.contains( it ) } ) { + error "Sample ${meta.id} targets ${meta.target_assemblies} which are not in $permAssList" + } + } + + // Check if multiple runs of a sample have the same target assemblies + if ( metas.collect { meta -> meta.target_assemblies }.unique().size() > 1 ) { + error "Multiple runs of sample ${metas.first().id} must target same assemblies" + } + + // Check if multiple runs of a sample have the same endedness + if ( metas.collect { meta -> meta.single_end }.unique().size() > 1 ) { + error "Multiple runs of sample ${metas.first().id} must have same endedness" + } + + [ metas.first(), fqs ] +} + + +def validateBamMetadata(metas, bams, permAssString) { + def permAssList = permAssString.split(",") + + // Check if each listed assembly is permissible + metas.each { meta -> + if ( meta.target_assemblies.any { !permAssList.contains( it ) } ) { + error "Sample ${meta.id} targets ${meta.target_assemblies} which are not in $permAssList" + } + } + + // Check that when the first file is bam then the second file is absent + bams.findAll { files -> + files.first().extension == 'bam' && files.size() != 1 + } + .each { error "Sample ${metas.first().id} contains both bam and fastq pairs. When a bam file is provided as file_1, a fastq for file_2 is not permitted" } + + // Check that a bam file only targets a single assembly + bams.eachWithIndex { files, index -> + if ( files.first().extension == 'bam' && metas[index].target_assemblies.size() > 1 ) { + error "BAM file for sample ${metas.first().id} can only target one assembly: ${metas[index].target_assemblies}" + } + } + + metas.every { it.target_assemblies == metas.first().target_assemblies } + ? [ [ metas.first(), bams.flatten() ] ] + : metas.withIndex().collect { meta, index -> [ meta, bams[index].flatten() ] } +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf index 833d82b..ab6cbb3 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -12,9 +12,9 @@ include { FASTP } from '../../../modules/nf-core/fastp/main' // import groovy.json.JsonSlurper -def getFastpReadsAfterFiltering(json_file, min_trimmed_reads) { +def getFastpReadsAfterFiltering(json_file, min_num_reads) { - if (!json_file.text) { return min_trimmed_reads } // Usman Rashid: To allow -stub with FASTP + if ( workflow.stubRun ) { return min_num_reads } def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') return json['after_filtering']['total_reads'].toLong() @@ -22,7 +22,7 @@ def getFastpReadsAfterFiltering(json_file, min_trimmed_reads) { def getFastpAdapterSequence(json_file){ - if (!json_file.text) { return "" } // Usman Rashid: To allow -stub with FASTP + if ( workflow.stubRun ) { return "" } def Map json = (Map) new JsonSlurper().parseText(json_file.text) try{ @@ -97,6 +97,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { FASTP ( umi_reads, adapter_fasta, + false, // don't want to set discard_trimmed_pass, else there will be no reads output save_trimmed_fail, save_merged ) @@ -114,7 +115,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { .out .reads .join(trim_json) - .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json, min_trimmed_reads) ] } + .map { meta, reads, json -> [ meta, reads, getFastpReadsAfterFiltering(json, min_trimmed_reads.toLong()) ] } .set { ch_num_trimmed_reads } ch_num_trimmed_reads diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test index 961b5b4..48ba5f4 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test @@ -4,7 +4,7 @@ nextflow_workflow { script "../main.nf" workflow "FASTQ_FASTQC_UMITOOLS_FASTP" config './nextflow.config' - + tag "subworkflows" tag "subworkflows_nfcore" tag "subworkflows/fastq_fastqc_umitools_fastp" @@ -31,7 +31,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -52,24 +52,23 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, - - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert workflow.out.fastqc_trim_html }, - { assert workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -91,7 +90,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end: false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -112,24 +111,23 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, + { assert !workflow.out.fastqc_raw_html }, + { assert !workflow.out.fastqc_raw_zip }, + { assert !workflow.out.fastqc_trim_html }, + { assert !workflow.out.fastqc_trim_zip }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, - - { assert !workflow.out.fastqc_raw_html }, - { assert !workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert !workflow.out.fastqc_trim_html }, - { assert !workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -151,7 +149,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -172,23 +170,22 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, workflow.out.versions - ).match() - }, - - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert workflow.out.fastqc_trim_html }, - { assert workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -211,7 +208,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -232,24 +229,23 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, - - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert workflow.out.fastqc_trim_html }, - { assert workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -271,7 +267,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -292,24 +288,23 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, - - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert workflow.out.fastqc_trim_html }, - { assert workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -331,7 +326,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -352,27 +347,24 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert !workflow.out.fastqc_trim_html }, + { assert !workflow.out.fastqc_trim_zip }, + { assert !workflow.out.trim_html }, + { assert !workflow.out.trim_log }, { assert snapshot( + // If we skip trimming then input is output, so not snapshotting + workflow.out.adapter_seq, workflow.out.reads.get(0).get(0), // Reads meta map - // Because the input file is passed to the output file, we have to do check the filename only - file(workflow.out.reads.get(0).get(1).get(0)).name, - file(workflow.out.reads.get(0).get(1).get(1)).name, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, - - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert !workflow.out.trim_html }, - { assert !workflow.out.trim_log }, - { assert !workflow.out.fastqc_trim_html }, - { assert !workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -396,7 +388,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -417,24 +409,23 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, - - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert workflow.out.fastqc_trim_html }, - { assert workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -456,7 +447,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -477,24 +468,23 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, - - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert workflow.out.fastqc_trim_html }, - { assert workflow.out.fastqc_trim_zip } + ).match() + } ) } } @@ -517,7 +507,7 @@ nextflow_workflow { input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] @@ -538,24 +528,445 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, + { assert workflow.out.fastqc_raw_html }, + { assert workflow.out.fastqc_raw_zip }, + { assert workflow.out.fastqc_trim_html }, + { assert workflow.out.fastqc_trim_zip }, + { assert workflow.out.trim_html }, + { assert workflow.out.trim_log }, { assert snapshot( + workflow.out.adapter_seq, workflow.out.reads, - workflow.out.umi_log, workflow.out.trim_json, + workflow.out.trim_read_count, workflow.out.trim_reads_fail, workflow.out.trim_reads_merged, - workflow.out.adapter_seq, - workflow.out.trim_read_count, + workflow.out.umi_log, workflow.out.versions - ).match() - }, + ).match() + } + ) + } + } - { assert workflow.out.fastqc_raw_html }, - { assert workflow.out.fastqc_raw_zip }, - { assert workflow.out.trim_html }, - { assert workflow.out.trim_log }, - { assert workflow.out.fastqc_trim_html }, - { assert workflow.out.fastqc_trim_zip } + test("sarscov2 paired-end [fastq] - stub") { + + options '-stub' + + when { + workflow { + """ + skip_fastqc = false + with_umi = false + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("skip_fastqc - stub") { + + options "-stub" + + when { + workflow { + """ + skip_fastqc = true + with_umi = false + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("with_umi - stub") { + + options "-stub" + + when { + workflow { + """ + skip_fastqc = false + with_umi = true + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + + test("skip_umi_extract - stub") { + + options "-stub" + + when { + workflow { + """ + skip_fastqc = false + with_umi = true + skip_umi_extract = true + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("umi_discard_read = 2 - stub") { + + options "-stub" + + when { + workflow { + """ + skip_fastqc = false + with_umi = true + skip_umi_extract = true + umi_discard_read = 2 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("skip_trimming - stub") { + + options "-stub" + + when { + workflow { + """ + skip_fastqc = false + with_umi = false + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = true + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.adapter_seq, + workflow.out.fastqc_raw_html, + workflow.out.fastqc_raw_zip, + workflow.out.fastqc_trim_html, + workflow.out.fastqc_trim_zip, + workflow.out.trim_html, + workflow.out.trim_json, + workflow.out.trim_log, + workflow.out.trim_read_count, + workflow.out.trim_reads_fail, + workflow.out.trim_reads_merged, + workflow.out.umi_log, + workflow.out.versions).match() } + ) + } + } + + test("save_trimmed_fail - stub") { + + options "-stub" + + config './nextflow.save_trimmed.config' + + when { + workflow { + """ + skip_fastqc = false + with_umi = false + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("save_merged - stub") { + + options "-stub" + + when { + workflow { + """ + skip_fastqc = false + with_umi = false + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + min_trimmed_reads = 1 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("min_trimmed_reads = 26 - stub") { + // Subworkflow should stop after FASTP which trims down to 25 reads + + options "-stub" + + when { + workflow { + """ + skip_fastqc = false + with_umi = false + skip_umi_extract = false + umi_discard_read = 1 + skip_trimming = false + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + min_trimmed_reads = 26 + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = skip_fastqc + input[2] = with_umi + input[3] = skip_umi_extract + input[4] = umi_discard_read + input[5] = skip_trimming + input[6] = adapter_fasta + input[7] = save_trimmed_fail + input[8] = save_merged + input[9] = min_trimmed_reads + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } ) } } diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap index 3e11d9e..e7d1f51 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap @@ -7,14 +7,8 @@ "id": "test", "single_end": false }, - [ - "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", - "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" - ] + "unspecified" ] - ], - [ - ], [ [ @@ -22,14 +16,11 @@ "id": "test", "single_end": false }, - "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] ] - ], - [ - - ], - [ - ], [ [ @@ -37,7 +28,7 @@ "id": "test", "single_end": false }, - "unspecified" + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" ] ], [ @@ -48,16 +39,25 @@ }, 198 ] + ], + [ + + ], + [ + + ], + [ + ], [ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T16:53:49.315194" + "timestamp": "2024-07-22T16:56:01.933832" }, "save_trimmed_fail": { "content": [ @@ -67,14 +67,8 @@ "id": "test", "single_end": false }, - [ - "test_1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", - "test_2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" - ] + "unspecified" ] - ], - [ - ], [ [ @@ -82,7 +76,10 @@ "id": "test", "single_end": false }, - "test.fastp.json:md5,4c3268ddb50ea5b33125984776aa3519" + [ + "test_1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] ] ], [ @@ -91,15 +88,8 @@ "id": "test", "single_end": false }, - [ - "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", - "test_1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", - "test_2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" - ] + "test.fastp.json:md5,4c3268ddb50ea5b33125984776aa3519" ] - ], - [ - ], [ [ @@ -107,7 +97,7 @@ "id": "test", "single_end": false }, - "unspecified" + 162 ] ], [ @@ -116,8 +106,18 @@ "id": "test", "single_end": false }, - 162 + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] ] + ], + [ + + ], + [ + ], [ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", @@ -126,10 +126,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T16:51:45.34934" + "timestamp": "2024-07-22T16:57:38.736" }, "skip_umi_extract": { "content": [ @@ -139,14 +139,8 @@ "id": "test", "single_end": false }, - [ - "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", - "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" - ] + "unspecified" ] - ], - [ - ], [ [ @@ -154,14 +148,11 @@ "id": "test", "single_end": false }, - "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] ] - ], - [ - - ], - [ - ], [ [ @@ -169,7 +160,7 @@ "id": "test", "single_end": false }, - "unspecified" + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" ] ], [ @@ -180,6 +171,15 @@ }, 198 ] + ], + [ + + ], + [ + + ], + [ + ], [ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", @@ -188,13 +188,22 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T12:07:40.34249" + "timestamp": "2024-07-22T16:56:47.905105" }, "umi_discard_read = 2": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "unspecified" + ] + ], [ [ { @@ -208,7 +217,13 @@ ] ], [ - + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] ], [ [ @@ -216,7 +231,7 @@ "id": "test", "single_end": false }, - "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + 198 ] ], [ @@ -224,6 +239,251 @@ ], [ + ], + [ + + ], + [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:57:05.436744" + }, + "umi_discard_read = 2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + + ], + "9": [ + + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "fastqc_raw_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_raw_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "trim_reads_fail": [ + + ], + "trim_reads_merged": [ + + ], + "umi_log": [ + + ], + "versions": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:59:27.273892" + }, + "skip_trimming - stub": { + "content": [ + [ + ], [ [ @@ -231,7 +491,7 @@ "id": "test", "single_end": false }, - "unspecified" + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], [ @@ -240,20 +500,45 @@ "id": "test", "single_end": false }, - 198 + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], [ - "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", - "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T12:08:24.141938" + "timestamp": "2024-07-22T16:59:39.247758" }, "save_merged": { "content": [ @@ -263,14 +548,8 @@ "id": "test", "single_end": false }, - [ - "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", - "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" - ] + "unspecified" ] - ], - [ - ], [ [ @@ -278,11 +557,11 @@ "id": "test", "single_end": false }, - "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] ] - ], - [ - ], [ [ @@ -290,7 +569,7 @@ "id": "test", "single_end": false }, - "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" ] ], [ @@ -299,8 +578,11 @@ "id": "test", "single_end": false }, - "unspecified" + 75 ] + ], + [ + ], [ [ @@ -308,8 +590,11 @@ "id": "test", "single_end": false }, - 75 + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" ] + ], + [ + ], [ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", @@ -318,22 +603,20 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T12:10:18.546963" + "timestamp": "2024-07-22T16:57:57.472342" }, "skip_trimming": { "content": [ + [ + + ], { "id": "test", "single_end": false }, - "test_1.fastq.gz", - "test_2.fastq.gz", - [ - - ], [ ], @@ -354,72 +637,67 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-19T15:49:26.574759" + "timestamp": "2024-07-22T16:57:19.875543" }, - "sarscov2 paired-end [fastq]": { + "with_umi": { "content": [ [ [ { "id": "test", - "single_end": false + "single_end": true }, - [ - "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", - "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" - ] + "" ] - ], - [ - ], [ [ { "id": "test", - "single_end": false + "single_end": true }, - "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + "test.fastp.fastq.gz:md5,ba8c6c3a7ce718d9a2c5857e2edf53bc" ] - ], - [ - - ], - [ - ], [ [ { "id": "test", - "single_end": false + "single_end": true }, - "unspecified" + "test.fastp.json:md5,d39c5c6d9a2e35fb60d26ced46569af6" ] ], [ [ { "id": "test", - "single_end": false + "single_end": true }, - 198 + 99 ] ], [ + + ], + [ + + ], + [ + "versions.yml:md5,01f264f78de3c6d893c449cc6d3cd721", "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T16:53:39.139038" + "timestamp": "2024-07-22T16:56:26.778625" }, "min_trimmed_reads = 26": { "content": [ @@ -429,14 +707,8 @@ "id": "test", "single_end": false }, - [ - "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", - "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" - ] + "unspecified" ] - ], - [ - ], [ [ @@ -444,11 +716,11 @@ "id": "test", "single_end": false }, - "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] ] - ], - [ - ], [ [ @@ -456,7 +728,7 @@ "id": "test", "single_end": false }, - "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" ] ], [ @@ -465,8 +737,11 @@ "id": "test", "single_end": false }, - "unspecified" + 75 ] + ], + [ + ], [ [ @@ -474,8 +749,11 @@ "id": "test", "single_end": false }, - 75 + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" ] + ], + [ + ], [ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", @@ -484,66 +762,1646 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T11:52:23.849945" + "timestamp": "2024-07-22T16:58:16.36697" }, - "with_umi": { + "min_trimmed_reads = 26 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + 26 + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "fastqc_raw_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_raw_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": false + }, + 26 + ] + ], + "trim_reads_fail": [ + + ], + "trim_reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "umi_log": [ + + ], + "versions": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T17:00:16.524361" + }, + "with_umi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + 1 + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + "versions.yml:md5,01f264f78de3c6d893c449cc6d3cd721", + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.umi_extract.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + + ], + "9": [ + + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": true + }, + "" + ] + ], + "fastqc_raw_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_raw_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": true + }, + 1 + ] + ], + "trim_reads_fail": [ + + ], + "trim_reads_merged": [ + + ], + "umi_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.umi_extract.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,01f264f78de3c6d893c449cc6d3cd721", + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:58:56.42517" + }, + "skip_fastqc - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "11": [ + + ], + "12": [ + + ], + "13": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + + ], + "9": [ + + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "fastqc_raw_html": [ + + ], + "fastqc_raw_zip": [ + + ], + "fastqc_trim_html": [ + + ], + "fastqc_trim_zip": [ + + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "trim_reads_fail": [ + + ], + "trim_reads_merged": [ + + ], + "umi_log": [ + + ], + "versions": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:58:41.207281" + }, + "save_merged - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + + ], + "9": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "fastqc_raw_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_raw_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "trim_reads_fail": [ + + ], + "trim_reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "umi_log": [ + + ], + "versions": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T17:00:03.695409" + }, + "sarscov2 paired-end [fastq]": { "content": [ [ [ { "id": "test", - "single_end": true + "single_end": false }, - "test.fastp.fastq.gz:md5,ba8c6c3a7ce718d9a2c5857e2edf53bc" + "unspecified" ] ], [ [ { "id": "test", - "single_end": true + "single_end": false }, - "test.fastp.json:md5,d39c5c6d9a2e35fb60d26ced46569af6" + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] ] - ], - [ - - ], - [ - ], [ [ { "id": "test", - "single_end": true + "single_end": false }, - "" + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" ] ], [ [ { "id": "test", - "single_end": true + "single_end": false }, - 99 + 198 ] ], [ - "versions.yml:md5,01f264f78de3c6d893c449cc6d3cd721", + + ], + [ + + ], + [ + + ], + [ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:55:50.614571" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + + ], + "9": [ + + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "fastqc_raw_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_raw_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "trim_reads_fail": [ + + ], + "trim_reads_merged": [ + + ], + "umi_log": [ + + ], + "versions": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:58:29.296468" + }, + "save_trimmed_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "9": [ + + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "fastqc_raw_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_raw_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "trim_reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_reads_merged": [ + + ], + "umi_log": [ + + ], + "versions": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T16:59:51.615894" + }, + "skip_umi_extract - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "13": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + + ], + "9": [ + + ], + "adapter_seq": [ + [ + { + "id": "test", + "single_end": false + }, + "" + ] + ], + "fastqc_raw_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_raw_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fastqc_trim_zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "trim_html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trim_read_count": [ + [ + { + "id": "test", + "single_end": false + }, + 1 + ] + ], + "trim_reads_fail": [ + + ], + "trim_reads_merged": [ + + ], + "umi_log": [ + + ], + "versions": [ + "versions.yml:md5,85bd0117e5778fff18e3920972a296ad", + "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0", + "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" }, - "timestamp": "2024-03-18T17:31:09.193212" + "timestamp": "2024-07-22T16:59:12.592278" } } \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.config b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.config index 12f7b25..0174cae 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.config +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.config @@ -7,5 +7,5 @@ process { withName: UMICOLLAPSE { ext.prefix = { "${meta.id}.dedup" } } - -} \ No newline at end of file + +} diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.save_trimmed.config b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.save_trimmed.config index 2430e9d..21207ad 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.save_trimmed.config +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.save_trimmed.config @@ -3,4 +3,4 @@ process { withName: FASTP { ext.args = "-e 30" } -} \ No newline at end of file +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..28e32b2 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,124 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + + required_channels_in_order.eachWithIndex { channel, index -> + if (index < required_channels_in_order.size() - 1) { + channel_priority_violation |= !(channels.indexOf(channel) < channels.indexOf(required_channels_in_order[index+1])) + } + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..ca964ce --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..a09572e --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..cbd8495 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,445 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + def manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + manifest_doi.each { doi_ref -> + temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + } + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { version -> processVersionsFromYAML(version) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params.keySet().each { group -> + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + group_params.keySet().sort().each { param -> + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + summary_params.keySet().sort().each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { throw new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params.keySet().sort().each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..1dc317f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..1037232 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 0000000..2585b65 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 0000000..3d4a6b0 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 0000000..5784a33 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..7626c1c --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 0000000..60b1cff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/subworkflows/pfr/fasta_edta_lai/main.nf b/subworkflows/pfr/fasta_edta_lai/main.nf deleted file mode 100644 index 628e255..0000000 --- a/subworkflows/pfr/fasta_edta_lai/main.nf +++ /dev/null @@ -1,131 +0,0 @@ -include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/pfr/custom/shortenfastaids/main' -include { EDTA_EDTA } from '../../../modules/pfr/edta/edta/main' -include { LTRRETRIEVER_LAI } from '../../../modules/pfr/ltrretriever/lai/main' -include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoregffids/main' - -workflow FASTA_EDTA_LAI { - - take: - ch_fasta // channel: [ val(meta), fasta ] - ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed - skip_lai // val; true|false - - main: - ch_versions = Channel.empty() - - // MOUDLE: CUSTOM_SHORTENFASTAIDS - CUSTOM_SHORTENFASTAIDS ( ch_fasta ) - - ch_short_ids_fasta = ch_fasta - | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true) - | map { meta, fasta, short_ids_fasta -> - if ( fasta ) { [ meta, short_ids_fasta ?: fasta ] } - } - - ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv - ch_short_monoploid_seqs = ch_short_ids_tsv - | join( - ch_monoploid_seqs ?: Channel.empty() - ) - | map { meta, short_ids_tsv, monoploid_seqs -> - map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) - } - | collectFile(newLine:true) - | map { seqs -> - def id = seqs.name.split('.mapped.monoploid.seqs.txt')[0] - - [ [ id: id ], seqs ] - } - ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first()) - - // MODULE: EDTA_EDTA - EDTA_EDTA( - ch_short_ids_fasta, - [], - [], - [], - [] - ) - - ch_te_lib_fasta = EDTA_EDTA.out.te_lib_fasta - ch_pass_list = EDTA_EDTA.out.pass_list - ch_out_file = EDTA_EDTA.out.out_file - ch_te_anno_gff3 = EDTA_EDTA.out.te_anno_gff3 - ch_versions = ch_versions.mix(EDTA_EDTA.out.versions.first()) - - // MODULE: LAI - ch_lai_inputs = skip_lai - ? Channel.empty() - : ch_short_ids_fasta - | join(ch_pass_list) - | join(ch_out_file) - | map { meta, fasta, pass, out -> - [ meta.id, meta, fasta, pass, out ] - } - | join( - ch_short_monoploid_seqs - | map { meta, mono -> [ meta.id, mono ] }, - by:0, - remainder: true - ) - | map { id, meta, fasta, pass, out, mono -> - [ meta, fasta, pass, out, mono ?: [] ] - } - LTRRETRIEVER_LAI( - ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] }, - ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass }, - ch_lai_inputs.map { meta, fasta, pass, out, mono -> out }, - ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono } - ) - - ch_lai_log = LTRRETRIEVER_LAI.out.log - ch_lai_out = LTRRETRIEVER_LAI.out.lai_out - ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first()) - - // MODULE: CUSTOM_RESTOREGFFIDS - ch_restorable_gff_tsv = ch_te_anno_gff3.join(ch_short_ids_tsv) - - CUSTOM_RESTOREGFFIDS ( - ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] }, - ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv } - ) - - ch_restored_gff = ch_te_anno_gff3 - | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true) - | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] } - ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first()) - - emit: - te_lib_fasta = ch_te_lib_fasta // channel: [ val(meta), fasta ] - te_anno_gff3 = ch_restored_gff // channel: [ val(meta), gff ] - lai_log = ch_lai_log // channel: [ val(meta), log ] - lai_out = ch_lai_out // channel: [ val(meta), out ] - versions = ch_versions // channel: [ versions.yml ] -} - -def map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) { - - def short_ids_head = short_ids_tsv.text.split('\n')[0] - - if (short_ids_head == "IDs have acceptable length and character. No change required.") { - return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + monoploid_seqs.text.split('\n') - } - - def orig_to_new_ids = [:] - short_ids_tsv.text.eachLine { line -> - def (original_id, renamed_id) = line.split('\t') - orig_to_new_ids[original_id] = renamed_id - } - - def mapped_ids = [] - monoploid_seqs.text.eachLine { original_id -> - if (!orig_to_new_ids[original_id]) { - error "Faild to find $original_id in ${monoploid_seqs}" + - "The monoploid_seqs file is malformed!" - } - - mapped_ids.add(orig_to_new_ids[original_id]) - } - - return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + mapped_ids -} diff --git a/subworkflows/pfr/fasta_edta_lai/tests/tags.yml b/subworkflows/pfr/fasta_edta_lai/tests/tags.yml deleted file mode 100644 index b114c58..0000000 --- a/subworkflows/pfr/fasta_edta_lai/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/fasta_edta_lai: - - subworkflows/pfr/fasta_edta_lai/** diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..9d62d34 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,27 @@ +# **plant-food-research-open/genepal** Tests + +## Minimal Testing + +If [Nextflow](https://www.nextflow.io/docs/latest/install.html#install-nextflow) and [Docker](https://docs.docker.com/install) are installed on the system, the pipeline can be minimally tested with the following command: + +```bash +nextflow run plant-food-research-open/genepal -r main -profile docker,test --outdir results +``` + +Or using [singularity](https://docs.sylabs.io/guides/3.0/user-guide/installation.html): + +```bash +nextflow run plant-food-research-open/genepal -r main -profile singularity,test --outdir results +``` + +## Local Testing + +The test sets included in this directory can be executed by first downloading the pipeline from GitHub and then executing the following command: + +```bash +./main.nf -profile docker -params-file tests/minimal/params.json --max_cpus 8 --max_memory '32.GB' --outdir results +``` + +## Continuous Integration (CI) + +The GitHub [CI action](../.github/workflows/ci.yml) included with the pipeline continuously tests the pipeline with the various test sets listed in this directory. diff --git a/tests/minimal/assemblysheet.csv b/tests/minimal/assemblysheet.csv new file mode 100644 index 0000000..a77d36a --- /dev/null +++ b/tests/minimal/assemblysheet.csv @@ -0,0 +1,2 @@ +tag,fasta,is_masked +a_thaliana,https://raw.githubusercontent.com/Gaius-Augustus/BRAKER/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471/example/genome.fa,yes diff --git a/tests/minimal/params.json b/tests/minimal/params.json new file mode 100644 index 0000000..c3e9566 --- /dev/null +++ b/tests/minimal/params.json @@ -0,0 +1,6 @@ +{ + "input": "tests/minimal/assemblysheet.csv", + "protein_evidence": "https://raw.githubusercontent.com/Gaius-Augustus/BRAKER/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471/example/proteins.fa", + "braker_extra_args": "--gm_max_intergenic 10000 --skipOptimize", + "busco_lineage_datasets": "eudicots_odb10" +} diff --git a/tests/nonmasked/assemblysheet.csv b/tests/nonmasked/assemblysheet.csv new file mode 100644 index 0000000..9cf5f15 --- /dev/null +++ b/tests/nonmasked/assemblysheet.csv @@ -0,0 +1,2 @@ +tag,fasta,is_masked +a_chinensis,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/eukaryotes/actinidia_chinensis/genome/chr1/genome.fasta.gz,no diff --git a/tests/nonmasked/params.json b/tests/nonmasked/params.json new file mode 100644 index 0000000..0c7a4bf --- /dev/null +++ b/tests/nonmasked/params.json @@ -0,0 +1,5 @@ +{ + "input": "tests/nonmasked/assemblysheet.csv", + "protein_evidence": "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/024/255/GCA_003024255.1_Red5_PS1_1.69.0/GCA_003024255.1_Red5_PS1_1.69.0_protein.faa.gz", + "busco_lineage_datasets": "eudicots_odb10" +} diff --git a/tests/short/assemblysheet.csv b/tests/short/assemblysheet.csv new file mode 100644 index 0000000..3b09d44 --- /dev/null +++ b/tests/short/assemblysheet.csv @@ -0,0 +1,2 @@ +tag,fasta,is_masked +sarscov2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta.gz,no diff --git a/tests/short/params.json b/tests/short/params.json new file mode 100644 index 0000000..b808cab --- /dev/null +++ b/tests/short/params.json @@ -0,0 +1,5 @@ +{ + "input": "tests/short/assemblysheet.csv", + "protein_evidence": "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta.gz", + "busco_skip": true +} diff --git a/tests/stub/target/donghong.chr1.fsa.gz b/tests/stub/assembly/donghong.chr1.fsa.gz similarity index 100% rename from tests/stub/target/donghong.chr1.fsa.gz rename to tests/stub/assembly/donghong.chr1.fsa.gz diff --git a/tests/stub/target/red5_v2p1_chr1.fasta.gz b/tests/stub/assembly/red5_v2p1_chr1.fasta.gz similarity index 100% rename from tests/stub/target/red5_v2p1_chr1.fasta.gz rename to tests/stub/assembly/red5_v2p1_chr1.fasta.gz diff --git a/tests/stub/target/red5_v3_chr1.fasta b/tests/stub/assembly/red5_v3_chr1.fasta similarity index 100% rename from tests/stub/target/red5_v3_chr1.fasta rename to tests/stub/assembly/red5_v3_chr1.fasta diff --git a/tests/stub/assembly/red7_v5_chr3.fasta b/tests/stub/assembly/red7_v5_chr3.fasta new file mode 100644 index 0000000..e69de29 diff --git a/tests/stub/assemblysheet.csv b/tests/stub/assemblysheet.csv index 7fdff29..07e072b 100644 --- a/tests/stub/assemblysheet.csv +++ b/tests/stub/assemblysheet.csv @@ -1,4 +1,5 @@ -tag,fasta,is_masked,te_lib,braker_gff3,braker_hints -red5_v2p1,tests/stub/target/red5_v2p1_chr1.fasta.gz,no,,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz -donghong,tests/stub/target/donghong.chr1.fsa.gz,no,tests/stub/te_lib/donghong.TElib.fa.gz,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz -red5_v3,tests/stub/target/red5_v3_chr1.fasta,yes +tag,fasta,is_masked,te_lib,braker_gff3,braker_hints,benchmark +red5_v2p1,tests/stub/assembly/red5_v2p1_chr1.fasta.gz,no,,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz, +donghong,tests/stub/assembly/donghong.chr1.fsa.gz,no,tests/stub/te_lib/donghong.TElib.fa.gz,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz +red5_v3,tests/stub/assembly/red5_v3_chr1.fasta,yes,,,,tests/stub/benchmark/red5_v3.benchmark.gff3 +red7_v5,tests/stub/assembly/red7_v5_chr3.fasta,no diff --git a/tests/stub/bam/1505KHS-0090_Root3.red5_v3.bam b/tests/stub/bam/1505KHS-0090_Root3.red5_v3.bam new file mode 100644 index 0000000..e69de29 diff --git a/tests/stub/bam/1505KHS-0092_Root3.red5_v3.bam b/tests/stub/bam/1505KHS-0092_Root3.red5_v3.bam new file mode 100644 index 0000000..e69de29 diff --git a/tests/stub/benchmark/red5_v3.benchmark.gff3 b/tests/stub/benchmark/red5_v3.benchmark.gff3 new file mode 100644 index 0000000..e69de29 diff --git a/tests/stub/orthofinderannotations.csv b/tests/stub/orthofinderannotations.csv new file mode 100644 index 0000000..0443c1c --- /dev/null +++ b/tests/stub/orthofinderannotations.csv @@ -0,0 +1,2 @@ +tag,fasta +arguta,tests/stub/proteins/arguta_proteins.faa diff --git a/tests/stub/params.json b/tests/stub/params.json index db97d3b..59730d3 100644 --- a/tests/stub/params.json +++ b/tests/stub/params.json @@ -1,10 +1,11 @@ { "input": "tests/stub/assemblysheet.csv", - "external_protein_fastas": "tests/stub/external-protein-fastas.txt", + "protein_evidence": "tests/stub/external-protein-fastas.txt", "eggnogmapper_db_dir": "tests/stub/emapperdb/5.0.2", "eggnogmapper_tax_scope": 33090, - "fastq": "tests/stub/fastqsheet.csv", + "rna_evidence": "tests/stub/rnasheet.csv", "liftoff_annotations": "tests/stub/liftoffannotations.csv", + "orthofinder_annotations": "tests/stub/orthofinderannotations.csv", "max_cpus": 2, "max_memory": "3.GB" } diff --git a/tests/stub/proteins/arguta_proteins.faa b/tests/stub/proteins/arguta_proteins.faa new file mode 100644 index 0000000..e69de29 diff --git a/tests/stub/fastqsheet.csv b/tests/stub/rnasheet.csv similarity index 58% rename from tests/stub/fastqsheet.csv rename to tests/stub/rnasheet.csv index 0b7e223..610bfbc 100644 --- a/tests/stub/fastqsheet.csv +++ b/tests/stub/rnasheet.csv @@ -1,4 +1,6 @@ -sample,fastq_1,fastq_2,target_assemblies -Root1,tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1 -Root1,tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1 -cane3,tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1;donghong +sample,file_1,file_2,target_assemblies +Root1,tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1;red5_v3 +Root1,tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1;red5_v3 +Root3,tests/stub/bam/1505KHS-0090_Root3.red5_v3.bam,,red5_v3 +Root3,tests/stub/bam/1505KHS-0092_Root3.red5_v3.bam,,red5_v3 +cane3,tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1;donghong;red5_v3 diff --git a/workflows/genepal.nf b/workflows/genepal.nf new file mode 100644 index 0000000..18e4131 --- /dev/null +++ b/workflows/genepal.nf @@ -0,0 +1,263 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { PREPARE_ASSEMBLY } from '../subworkflows/local/prepare_assembly' +include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq' +include { ALIGN_RNASEQ } from '../subworkflows/local/align_rnaseq' +include { PREPARE_EXT_PROTS } from '../subworkflows/local/prepare_ext_prots' +include { FASTA_BRAKER3 } from '../subworkflows/local/fasta_braker3' +include { FASTA_LIFTOFF } from '../subworkflows/local/fasta_liftoff' +include { PURGE_BRAKER_MODELS } from '../subworkflows/local/purge_braker_models' +include { GFF_MERGE_CLEANUP } from '../subworkflows/local/gff_merge_cleanup' +include { GFF_EGGNOGMAPPER } from '../subworkflows/local/gff_eggnogmapper' +include { PURGE_NOHIT_MODELS } from '../subworkflows/local/purge_nohit_models' +include { GFF_STORE } from '../subworkflows/local/gff_store' +include { FASTA_ORTHOFINDER } from '../subworkflows/local/fasta_orthofinder' +include { FASTA_GXF_BUSCO_PLOT } from '../subworkflows/gallvp/fasta_gxf_busco_plot/main' +include { CAT_CAT as SAVE_MARKED_GFF3 } from '../modules/nf-core/cat/cat/main' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { GFFCOMPARE as BENCHMARK } from '../modules/nf-core/gffcompare/main' + +include { GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES } from '../subworkflows/gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow GENEPAL { + + take: + ch_target_assembly + ch_tar_assm_str + ch_is_masked + ch_te_library + ch_braker_annotation + ch_braker_ex_asm_str + ch_benchmark_gff + ch_rna_fq + ch_rna_bam + ch_rna_bam_by_assembly + ch_sortmerna_fastas + ch_ext_prot_fastas + ch_liftoff_fasta + ch_liftoff_gff + ch_tsebra_config + ch_orthofinder_pep + + + main: + // Versions channel + ch_versions = Channel.empty() + + // SUBWORKFLOW: PREPARE_ASSEMBLY + PREPARE_ASSEMBLY( + ch_target_assembly, + ch_te_library, + params.repeat_annotator, + params.repeatmasker_save_outputs, + ch_braker_ex_asm_str, + ch_is_masked + ) + + ch_valid_target_assembly = PREPARE_ASSEMBLY.out.target_assemby + ch_masked_target_assembly = PREPARE_ASSEMBLY.out.masked_target_assembly + ch_target_assemby_index = PREPARE_ASSEMBLY.out.target_assemby_index + ch_versions = ch_versions.mix(PREPARE_ASSEMBLY.out.versions) + + // SUBWORKFLOW: PREPROCESS_RNASEQ + PREPROCESS_RNASEQ( + ch_rna_fq, + ch_tar_assm_str, + ch_braker_ex_asm_str, + params.skip_fastqc, + params.skip_fastp, + params.save_trimmed, + params.min_trimmed_reads, + params.remove_ribo_rna, + ch_sortmerna_fastas + ) + + ch_trim_reads = PREPROCESS_RNASEQ.out.trim_reads + ch_reads_target = PREPROCESS_RNASEQ.out.reads_target + ch_versions = ch_versions.mix(PREPROCESS_RNASEQ.out.versions) + + // SUBWORKFLOW: ALIGN_RNASEQ + ALIGN_RNASEQ( + ch_reads_target, + ch_trim_reads, + ch_rna_bam_by_assembly, + ch_target_assemby_index, + ) + + ch_rnaseq_bam = ALIGN_RNASEQ.out.bam + ch_versions = ch_versions.mix(ALIGN_RNASEQ.out.versions) + + // MODULE: PREPARE_EXT_PROTS + PREPARE_EXT_PROTS( + ch_ext_prot_fastas + ) + + ch_ext_prots_fasta = PREPARE_EXT_PROTS.out.ext_prots_fasta + ch_versions = ch_versions.mix(PREPARE_EXT_PROTS.out.versions) + + // SUBWORKFLOW: FASTA_BRAKER3 + FASTA_BRAKER3( + ch_masked_target_assembly, + ch_braker_ex_asm_str, + ch_rnaseq_bam, + ch_ext_prots_fasta, + ch_braker_annotation + ) + + ch_braker_gff3 = FASTA_BRAKER3.out.braker_gff3 + ch_braker_hints = FASTA_BRAKER3.out.braker_hints + ch_versions = ch_versions.mix(FASTA_BRAKER3.out.versions) + + // SUBWORKFLOW: FASTA_LIFTOFF + FASTA_LIFTOFF( + ch_valid_target_assembly, + ch_liftoff_fasta, + ch_liftoff_gff, + params.filter_liftoff_by_hints, + ch_braker_hints, + ch_tsebra_config, + params.allow_isoforms + ) + + ch_liftoff_gff3 = FASTA_LIFTOFF.out.gff3 + ch_versions = ch_versions.mix(FASTA_LIFTOFF.out.versions) + + // SUBWORKFLOW: PURGE_BRAKER_MODELS + PURGE_BRAKER_MODELS( + ch_braker_gff3, + ch_braker_hints, + ch_liftoff_gff3, + ch_tsebra_config, + params.allow_isoforms + ) + + ch_braker_purged_gff = PURGE_BRAKER_MODELS.out.braker_purged_gff + ch_versions = ch_versions.mix(PURGE_BRAKER_MODELS.out.versions) + + // SUBWORKFLOW: GFF_MERGE_CLEANUP + GFF_MERGE_CLEANUP( + ch_braker_purged_gff, + ch_liftoff_gff3 + ) + + ch_merged_gff = GFF_MERGE_CLEANUP.out.gff + ch_versions = ch_versions.mix(GFF_MERGE_CLEANUP.out.versions) + + // SUBWORKFLOW: GFF_EGGNOGMAPPER + GFF_EGGNOGMAPPER( + ch_merged_gff, + ch_valid_target_assembly, + params.eggnogmapper_db_dir, + ) + + ch_eggnogmapper_hits = GFF_EGGNOGMAPPER.out.eggnogmapper_hits + ch_eggnogmapper_annotations = GFF_EGGNOGMAPPER.out.eggnogmapper_annotations + ch_versions = ch_versions.mix(GFF_EGGNOGMAPPER.out.versions) + + // SUBWORKFLOW: PURGE_NOHIT_MODELS + PURGE_NOHIT_MODELS( + ch_merged_gff, + ch_eggnogmapper_hits, + params.eggnogmapper_purge_nohits && params.eggnogmapper_db_dir + ) + + ch_purged_gff = PURGE_NOHIT_MODELS.out.purged_gff + ch_versions = ch_versions.mix(PURGE_NOHIT_MODELS.out.versions) + + // SUBWORKFLOW: GFF_STORE + GFF_STORE( + ch_purged_gff, + ch_eggnogmapper_annotations, + ch_valid_target_assembly, + params.eggnogmapper_db_dir + ) + + ch_final_gff = GFF_STORE.out.final_gff + ch_final_proteins = GFF_STORE.out.final_proteins + ch_versions = ch_versions.mix(GFF_STORE.out.versions) + + // SUBWORKFLOW: FASTA_ORTHOFINDER + FASTA_ORTHOFINDER( + ch_final_proteins, + ch_orthofinder_pep + ) + + ch_versions = ch_versions.mix(FASTA_ORTHOFINDER.out.versions) + + // SUBWORKFLOW: FASTA_GXF_BUSCO_PLOT + ch_busco_fasta = params.busco_skip + ? Channel.empty() + : ch_valid_target_assembly + + ch_busco_gff = params.busco_skip + ? Channel.empty() + : ch_final_gff + + FASTA_GXF_BUSCO_PLOT( + ch_busco_fasta, + ch_busco_gff, + 'genome', + params.busco_lineage_datasets?.tokenize(' '), + [], // val_busco_lineages_path + [] // val_busco_config + ) + + ch_versions = ch_versions.mix(FASTA_GXF_BUSCO_PLOT.out.versions) + + // SUBWORKFLOW: GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES + GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES( + ch_final_gff, + ch_valid_target_assembly + ) + + ch_splicing_marked_gff3 = GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES.out.marked_gff3 + ch_versions = ch_versions.mix(GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES.out.versions) + + // MODULE: CAT_CAT as SAVE_MARKED_GFF3 + SAVE_MARKED_GFF3 ( ch_splicing_marked_gff3 ) + + // MODULE: GFFCOMPARE as BENCHMARK + ch_benchmark_inputs = ch_final_gff + | join ( ch_valid_target_assembly ) + | join ( ch_benchmark_gff ) + + BENCHMARK ( + ch_benchmark_inputs.map { meta, gff, fasta, ref_gff -> [ meta, gff ] }, + ch_benchmark_inputs.map { meta, gff, fasta, ref_gff -> [ meta, fasta, [] ] }, + ch_benchmark_inputs.map { meta, gff, fasta, ref_gff -> [ meta, ref_gff ] } + ) + + ch_versions = ch_versions.mix(BENCHMARK.out.versions.first()) + + // Collate and save software versions + ch_versions = ch_versions + | unique + | map { yml -> + if ( yml ) { yml } + } + + ch_versions_yml = softwareVersionsToYAML(ch_versions) + | collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + cache: false + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/pangene.nf b/workflows/pangene.nf deleted file mode 100644 index b11a014..0000000 --- a/workflows/pangene.nf +++ /dev/null @@ -1,266 +0,0 @@ -include { fromSamplesheet; paramsSummaryLog } from 'plugin/nf-validation' -include { idFromFileName; validateFastqMetadata } from '../modules/local/utils' -include { PREPARE_ASSEMBLY } from '../subworkflows/local/prepare_assembly' -include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq' -include { ALIGN_RNASEQ } from '../subworkflows/local/align_rnaseq' -include { PREPARE_EXT_PROTS } from '../subworkflows/local/prepare_ext_prots' -include { FASTA_BRAKER3 } from '../subworkflows/local/fasta_braker3' -include { FASTA_LIFTOFF } from '../subworkflows/local/fasta_liftoff' -include { PURGE_BREAKER_MODELS } from '../subworkflows/local/purge_breaker_models' -include { GFF_MERGE_CLEANUP } from '../subworkflows/local/gff_merge_cleanup' -include { GFF_EGGNOGMAPPER } from '../subworkflows/local/gff_eggnogmapper' -include { PURGE_NOHIT_MODELS } from '../subworkflows/local/purge_nohit_models' -include { GFF_STORE } from '../subworkflows/local/gff_store' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions' - -log.info paramsSummaryLog(workflow) - -workflow PANGENE { - - // Versions channel - ch_versions = Channel.empty() - - // Input channels - ch_input = Channel.fromSamplesheet('input') - - ch_target_assembly = ch_input - | map { it -> - def tag = it[0] - def fasta = it[1] - - [ [ id: tag ], file(fasta, checkIfExists: true) ] - } - - ch_tar_assm_str = ch_input - | map { it -> - def tag = it[0].strip() - - tag - } - | collect - | map { it -> - it.join(",") - } - - ch_is_masked = ch_input - | map { it -> - def tag = it[0] - def is_masked = it[2] - - [ [ id: tag ], is_masked == "yes" ] - } - - ch_te_library = ch_input - | map { it -> - def tag = it[0] - def te_fasta = it[3] - - if ( te_fasta ) { - [ [ id:tag ], file(te_fasta, checkIfExists: true) ] - } - } - - ch_braker_annotation = ch_input - | map { it -> - def tag = it[0] - def braker_gff3 = it[4] - def hints_gff = it[5] - - if ( braker_gff3 ) { - [ - [ id: tag ], - file(braker_gff3, checkIfExists: true), - file(hints_gff, checkIfExists: true) - ] - } - } - - ch_braker_ex_asm_str = ch_braker_annotation - | map { meta, braker_gff3, hints_gff -> meta.id } - | collect - | map { it.join(",") } - | ifEmpty( "" ) - - ch_reads = ! params.fastq - ? Channel.empty() - : Channel.fromSamplesheet('fastq') - | map { meta, fq1, fq2 -> - fq2 - ? [ meta + [ single_end: false ], [ file(fq1, checkIfExists:true), file(fq2, checkIfExists:true) ] ] - : [ meta + [ single_end: true ], [ file(fq1, checkIfExists:true) ] ] - } - | map { meta, fqs -> - [ meta.id, meta + [ target_assemblies: meta.target_assemblies.split(';').sort() ], fqs ] - } - | groupTuple - | combine(ch_tar_assm_str) - | map { id, metas, fqs, tar_assm_str -> - validateFastqMetadata(metas, fqs, tar_assm_str) - } - - ch_ribo_db = params.remove_ribo_rna - ? file(params.ribo_database_manifest, checkIfExists: true) - : null - - ch_sortmerna_fastas = ch_ribo_db - ? Channel.from(ch_ribo_db ? ch_ribo_db.readLines() : null) - | map { row -> file(row, checkIfExists: true) } - | collect - : Channel.empty() - - ch_ext_prot_fastas = ! params.external_protein_fastas - ? Channel.empty() - : Channel.fromPath(params.external_protein_fastas) - | splitText - | map { file_path -> - def file_handle = file(file_path.strip(), checkIfExists: true) - [ [ id: idFromFileName( file_handle.baseName ) ], file_handle ] - } - - ch_liftoff_mm = ! params.liftoff_annotations - ? Channel.empty() - : Channel.fromSamplesheet('liftoff_annotations') - | multiMap { fasta, gff -> - def fastaFile = file(fasta, checkIfExists:true) - - fasta: [ [ id: idFromFileName( fastaFile.baseName ) ], fastaFile ] - gff: [ [ id: idFromFileName( fastaFile.baseName ) ], file(gff, checkIfExists:true) ] - } - - ch_liftoff_fasta = params.liftoff_annotations - ? ch_liftoff_mm.fasta - : Channel.empty() - - ch_liftoff_gff = params.liftoff_annotations - ? ch_liftoff_mm.gff - : Channel.empty() - - val_tsebra_config = params.braker_allow_isoforms - ? "${projectDir}/assets/tsebra-default.cfg" - : "${projectDir}/assets/tsebra-1form.cfg" - - // SUBWORKFLOW: PREPARE_ASSEMBLY - PREPARE_ASSEMBLY( - ch_target_assembly, - ch_te_library, - params.repeat_annotator, - ch_braker_ex_asm_str, - ch_is_masked - ) - - ch_valid_target_assembly = PREPARE_ASSEMBLY.out.target_assemby - ch_masked_target_assembly = PREPARE_ASSEMBLY.out.masked_target_assembly - ch_target_assemby_index = PREPARE_ASSEMBLY.out.target_assemby_index - ch_versions = ch_versions.mix(PREPARE_ASSEMBLY.out.versions) - - // SUBWORKFLOW: PREPROCESS_RNASEQ - PREPROCESS_RNASEQ( - ch_reads, - ch_tar_assm_str, - ch_braker_ex_asm_str, - params.skip_fastqc, - params.skip_fastp, - params.save_trimmed, - params.min_trimmed_reads, - params.remove_ribo_rna, - ch_sortmerna_fastas - ) - - ch_trim_reads = PREPROCESS_RNASEQ.out.trim_reads - ch_reads_target = PREPROCESS_RNASEQ.out.reads_target - ch_versions = ch_versions.mix(PREPROCESS_RNASEQ.out.versions) - - // SUBWORKFLOW: ALIGN_RNASEQ - ALIGN_RNASEQ( - ch_reads_target, - ch_trim_reads, - ch_target_assemby_index, - ) - - ch_rnaseq_bam = ALIGN_RNASEQ.out.bam - ch_versions = ch_versions.mix(ALIGN_RNASEQ.out.versions) - - // MODULE: PREPARE_EXT_PROTS - PREPARE_EXT_PROTS( - ch_ext_prot_fastas - ) - - ch_ext_prots_fasta = PREPARE_EXT_PROTS.out.ext_prots_fasta - ch_versions = ch_versions.mix(PREPARE_EXT_PROTS.out.versions) - - // SUBWORKFLOW: FASTA_BRAKER3 - FASTA_BRAKER3( - ch_masked_target_assembly, - ch_braker_ex_asm_str, - ch_rnaseq_bam, - ch_ext_prots_fasta, - ch_braker_annotation - ) - - ch_braker_gff3 = FASTA_BRAKER3.out.braker_gff3 - ch_braker_hints = FASTA_BRAKER3.out.braker_hints - ch_versions = ch_versions.mix(FASTA_BRAKER3.out.versions) - - // SUBWORKFLOW: FASTA_LIFTOFF - FASTA_LIFTOFF( - ch_valid_target_assembly, - ch_liftoff_fasta, - ch_liftoff_gff - ) - - ch_liftoff_gff3 = FASTA_LIFTOFF.out.gff3 - ch_versions = ch_versions.mix(FASTA_LIFTOFF.out.versions) - - // SUBWORKFLOW: PURGE_BREAKER_MODELS - PURGE_BREAKER_MODELS( - ch_braker_gff3, - ch_braker_hints, - ch_liftoff_gff3, - val_tsebra_config, - params.braker_allow_isoforms - ) - - ch_braker_purged_gff = PURGE_BREAKER_MODELS.out.braker_purged_gff - ch_versions = ch_versions.mix(PURGE_BREAKER_MODELS.out.versions) - - // SUBWORKFLOW: GFF_MERGE_CLEANUP - GFF_MERGE_CLEANUP( - ch_braker_purged_gff, - ch_liftoff_gff3 - ) - - ch_merged_gff = GFF_MERGE_CLEANUP.out.gff - ch_versions = ch_versions.mix(GFF_MERGE_CLEANUP.out.versions) - - // SUBWORKFLOW: GFF_EGGNOGMAPPER - GFF_EGGNOGMAPPER( - ch_merged_gff, - ch_valid_target_assembly, - params.eggnogmapper_db_dir, - ) - - ch_eggnogmapper_hits = GFF_EGGNOGMAPPER.out.eggnogmapper_hits - ch_eggnogmapper_annotations = GFF_EGGNOGMAPPER.out.eggnogmapper_annotations - ch_versions = ch_versions.mix(GFF_EGGNOGMAPPER.out.versions) - - // SUBWORKFLOW: PURGE_NOHIT_MODELS - PURGE_NOHIT_MODELS( - ch_merged_gff, - ch_eggnogmapper_hits, - params.eggnogmapper_purge_nohits - ) - - ch_purged_gff = PURGE_NOHIT_MODELS.out.purged_gff - ch_versions = ch_versions.mix(PURGE_NOHIT_MODELS.out.versions) - - // SUBWORKFLOW: GFF_STORE - GFF_STORE( - ch_purged_gff, - ch_eggnogmapper_annotations - ) - - // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) -}
Process Name \\", - " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow12.922000 K (92.984097%)", - "single end (151 cycles)" ] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 99" ] - def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_single_end-_match") - }, - { assert snapshot(process.out.versions).match("versions_single_end") } + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } ) } } - test("test_fastp_single_end-stub") { - - options '-stub' + test("test_fastp_paired_end") { when { - params { - outdir = "$outputDir" - } + process { """ adapter_fasta = [] + save_trimmed_pass = true save_trimmed_fail = false save_merged = false input[0] = Channel.of([ - [ id:'test', single_end:true ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = false + input[4] = false """ } } then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + test("fastp test_fastp_interleaved") { + + config './nextflow.interleaved.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { assertAll( { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_single_end-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_single_end_stub") } + { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert process.out.reads_fail == [] }, + { assert process.out.reads_merged == [] }, + { assert snapshot( + process.out.reads, + process.out.json, + process.out.versions).match() } ) } } - test("test_fastp_paired_end") { + test("test_fastp_single_end_trim_fail") { when { - params { - outdir = "$outputDir" + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + config './nextflow.save_failed.config' + when { process { """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.json, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + process { + """ input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = false + input[4] = true """ } } then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] - def log_text = [ "No adapter detected for read1", - "Q30 bases: 12281(88.3716%)"] - def json_text = ['"passed_filter_reads": 198'] - def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end") } + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() }, ) } } - test("test_fastp_paired_end-stub") { - - options '-stub' + test("test_fastp_paired_end_merged_adapterlist") { when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true + """ } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("
") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_qc_only") { + + when { process { """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + test("test_fastp_paired_end_qc_only") { + + when { + process { + """ input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = true + input[3] = false + input[4] = false """ } } @@ -232,114 +301,99 @@ nextflow_process { then { assertAll( { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end-stub") } + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } ) } } - test("fastp test_fastp_interleaved") { + test("test_fastp_single_end - stub") { + + options "-stub" - config './nextflow.interleaved.config' when { - params { - outdir = "$outputDir" + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end - stub") { + + options "-stub" + + when { + process { """ adapter_fasta = [] + save_trimmed_pass = true save_trimmed_fail = false save_merged = false input[0] = Channel.of([ - [ id:'test', single_end:true ], // meta map - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = false + input[4] = false """ } } then { - def html_text = [ "Q20 bases:
25.719000 K (93.033098%)", - "paired end (151 cycles + 151 cycles)"] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 162"] - def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_interleaved-_match") - }, - { assert snapshot(process.out.versions).match("versions_interleaved") } + { assert snapshot(process.out).match() } ) } } - test("fastp test_fastp_interleaved-stub") { + test("fastp - stub test_fastp_interleaved") { - options '-stub' + options "-stub" config './nextflow.interleaved.config' when { - params { - outdir = "$outputDir" - } process { """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = false - input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = false + input[4] = false """ } } @@ -347,277 +401,112 @@ nextflow_process { then { assertAll( { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { file(it[1]).getName() } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_interleaved-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_interleaved-stub") } + { assert snapshot(process.out).match() } ) } } - test("test_fastp_single_end_trim_fail") { + test("test_fastp_single_end_trim_fail - stub") { + + options "-stub" when { - params { - outdir = "$outputDir" - } + process { """ - adapter_fasta = [] - save_trimmed_fail = true - save_merged = false - input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = true + input[4] = false """ } } then { - def html_text = [ "Q20 bases:12.922000 K (92.984097%)", - "single end (151 cycles)"] - def log_text = [ "Q20 bases: 12922(92.9841%)", - "reads passed filter: 99" ] - def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } - } - }, - { failed_read_lines.each { failed_read_line -> - { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") } + { assert snapshot(process.out).match() } ) } } - test("test_fastp_paired_end_trim_fail") { + test("test_fastp_paired_end_trim_fail - stub") { + + options "-stub" config './nextflow.save_failed.config' when { - params { - outdir = "$outputDir" - } process { """ - adapter_fasta = [] - save_trimmed_fail = true - save_merged = false - input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = true + input[4] = false """ } } then { - def html_text = [ "Q20 bases:25.719000 K (93.033098%)", - "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] - def log_text = [ "No adapter detected for read1", - "Q30 bases: 12281(88.3716%)"] - def json_text = ['"passed_filter_reads": 162'] - def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", - "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", - "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { failed_read2_lines.each { failed_read2_line -> - { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") } + { assert snapshot(process.out).match() } ) } } - test("test_fastp_paired_end_merged") { + test("test_fastp_paired_end_merged - stub") { + + options "-stub" when { - params { - outdir = "$outputDir" - } process { """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = true input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = false + input[3] = false + input[4] = true """ } } then { - def html_text = [ "
"] - def log_text = [ "Merged and filtered:", - "total reads: 75", - "total bases: 13683"] - def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] - def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", - "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", - "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { read_merged_lines.each { read_merged_line -> - { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_merged_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged") } + { assert snapshot(process.out).match() } ) } } - test("test_fastp_paired_end_merged-stub") { + test("test_fastp_paired_end_merged_adapterlist - stub") { - options '-stub' + options "-stub" when { - params { - outdir = "$outputDir" - } process { """ - adapter_fasta = [] - save_trimmed_fail = false - save_merged = true - input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true """ } } @@ -625,101 +514,63 @@ nextflow_process { then { assertAll( { assert process.success }, - { - assert snapshot( - ( - [process.out.reads[0][0].toString()] + // meta - process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + - process.out.json.collect { file(it[1]).getName() } + - process.out.html.collect { file(it[1]).getName() } + - process.out.log.collect { file(it[1]).getName() } + - process.out.reads_fail.collect { file(it[1]).getName() } + - process.out.reads_merged.collect { file(it[1]).getName() } - ).sort() - ).match("test_fastp_paired_end_merged-for_stub_match") - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") } + { assert snapshot(process.out).match() } ) } } - test("test_fastp_paired_end_merged_adapterlist") { + test("test_fastp_single_end_qc_only - stub") { + + options "-stub" when { - params { - outdir = "$outputDir" - } process { """ - adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) - save_trimmed_fail = false - save_merged = true + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[1] = adapter_fasta - input[2] = save_trimmed_fail - input[3] = save_merged + input[1] = [] + input[2] = true + input[3] = false + input[4] = false """ } } then { - def html_text = [ "
"] - def log_text = [ "Merged and filtered:", - "total reads: 75", - "total bases: 13683"] - def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] - def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", - "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", - "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE - { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } - } - }, - { read2_lines.each { read2_line -> - { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } - } - }, - { read_merged_lines.each { read_merged_line -> - { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } - } - }, - { html_text.each { html_part -> - { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } - } - }, - { json_text.each { json_part -> - { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } - } - }, - { log_text.each { log_part -> - { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } - } - }, - { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") } + { assert snapshot(process.out).match() } ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap index 3e87628..54be7e4 100644 --- a/modules/nf-core/fastp/tests/main.nf.test.snap +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -1,55 +1,178 @@ { - "fastp test_fastp_interleaved_json": { + "test_fastp_single_end_qc_only - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] - ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-03-18T16:19:15.063001" + "timestamp": "2024-07-05T14:31:10.841098" }, - "test_fastp_paired_end_merged-for_stub_match": { + "test_fastp_paired_end": { "content": [ [ [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "test.merged.fastq.gz", - "{id=test, single_end=false}" + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-01-17T18:10:13.467574" + "timestamp": "2024-07-05T13:43:28.665779" }, - "versions_interleaved": { + "test_fastp_paired_end_merged_adapterlist": { "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,5914ca3f21ce162123a824e33e8564f6" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:56:24.615634793" + "timestamp": "2024-07-05T13:44:18.210375" }, - "test_fastp_single_end_json": { + "test_fastp_single_end_qc_only": { "content": [ [ [ @@ -57,274 +180,1152 @@ "id": "test", "single_end": true }, - "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + "test.fastp.json:md5,5cc5f01e449309e0e689ed6f51a2294a" ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:18:43.526412" - }, - "versions_paired_end": { - "content": [ + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:55:42.333545689" + "timestamp": "2024-07-05T13:44:27.380974" }, - "test_fastp_paired_end_match": { + "test_fastp_paired_end_trim_fail": { "content": [ [ [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:03:06.431833729" - }, - "test_fastp_interleaved-_match": { - "content": [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] + ] + ], [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-18T16:19:15.111894" - }, - "test_fastp_paired_end_merged_match": { - "content": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] + ] + ], + [ + + ], [ [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" - ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "test.merged.fastq.gz", - "{id=test, single_end=false}" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T12:08:44.496251446" - }, - "versions_single_end_stub": { - "content": [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,4c3268ddb50ea5b33125984776aa3519" + ] + ], [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:55:27.354051299" + "timestamp": "2024-07-05T13:43:58.749589" }, - "versions_interleaved-stub": { + "fastp - stub test_fastp_interleaved": { "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:56:46.535528418" + "timestamp": "2024-07-05T13:50:00.270029" }, - "versions_single_end_trim_fail": { + "test_fastp_single_end - stub": { "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:59:03.724591407" + "timestamp": "2024-07-05T13:49:42.502789" }, - "test_fastp_paired_end-for_stub_match": { + "test_fastp_paired_end_merged_adapterlist - stub": { "content": [ - [ - [ - "test_1.fastp.fastq.gz", - "test_2.fastp.fastq.gz" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] ], - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=false}" - ] + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-01-17T18:07:15.398827" + "timestamp": "2024-07-05T13:54:53.458252" }, - "versions_paired_end-stub": { + "test_fastp_paired_end_merged - stub": { "content": [ - [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:56:06.50017282" + "timestamp": "2024-07-05T13:50:27.689379" }, - "versions_single_end": { + "test_fastp_paired_end_merged": { "content": [ [ - "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-01T11:55:07.67921647" - }, - "versions_paired_end_merged_stub": { - "content": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:59:47.350653154" + "timestamp": "2024-07-05T13:44:08.68476" }, - "test_fastp_interleaved-for_stub_match": { + "test_fastp_paired_end - stub": { "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-01-17T18:08:06.127974" + "timestamp": "2024-07-05T13:49:51.679221" }, - "versions_paired_end_trim_fail": { + "test_fastp_single_end": { "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + + ], + [ + + ], [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:59:18.140484878" + "timestamp": "2024-07-05T13:43:18.834322" }, - "test_fastp_single_end-for_stub_match": { + "test_fastp_single_end_trim_fail - stub": { "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-01-17T18:06:00.244202" + "timestamp": "2024-07-05T14:05:36.898142" }, - "test_fastp_single_end-_match": { + "test_fastp_paired_end_trim_fail - stub": { "content": [ - [ - "test.fastp.fastq.gz", - "test.fastp.html", - "test.fastp.json", - "test.fastp.log", - "{id=test, single_end=true}" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-03-18T16:18:43.580336" + "timestamp": "2024-07-05T14:05:49.212847" }, - "versions_paired_end_merged_adapterlist": { + "fastp test_fastp_interleaved": { "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" + ] + ], [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T12:05:37.845370554" + "timestamp": "2024-07-05T13:43:38.910832" }, - "versions_paired_end_merged": { + "test_fastp_single_end_trim_fail": { "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd" + ] + ], + [ + + ], [ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-01T11:59:32.860543858" + "timestamp": "2024-07-05T13:43:48.22378" }, - "test_fastp_single_end_trim_fail_json": { + "test_fastp_paired_end_qc_only": { "content": [ [ [ { "id": "test", - "single_end": true + "single_end": false }, - "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + "test.fastp.json:md5,623064a45912dac6f2b64e3f2e9901df" ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:36.334938" + }, + "test_fastp_paired_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" }, - "timestamp": "2024-01-17T18:08:41.942317" + "timestamp": "2024-07-05T14:31:27.096468" } } \ No newline at end of file diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 1787b38..691d4c7 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,7 +1,5 @@ -name: fastqc channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 1fd7ac4..d8989f4 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -26,7 +26,10 @@ process FASTQC { def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) @@ -38,6 +41,7 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index ee5507e..4827da7 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -16,35 +16,44 @@ tools: homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ licence: ["GPL-2.0-only"] + identifier: biotools:fastqc input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 70edae4..e9d79a0 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -23,17 +23,14 @@ nextflow_process { then { assertAll ( - { assert process.success }, - - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - - { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, - { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, - { assert path(process.out.html[0][1]).text.contains("
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls
File typeConventional base calls