Skip to content

Commit 0d7fdf6

Browse files
authored
Merge pull request #6 from cokelaer/dev
refactorise to remove modules, include resources
2 parents 4d0ff06 + c1bf859 commit 0d7fdf6

File tree

7 files changed

+122
-60
lines changed

7 files changed

+122
-60
lines changed

.github/workflows/main.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,20 @@ name: Tests
33
on:
44
push:
55
branches:
6-
- master
6+
- main
7+
- dev
78
pull_request:
89
branches-ignore: []
10+
schedule:
11+
- cron: '0 0 * * SUN'
912

1013
jobs:
1114
build-linux:
1215
runs-on: ubuntu-latest
1316
strategy:
1417
max-parallel: 5
1518
matrix:
16-
python: [3.7,3.8]
19+
python: [3.7,3.8, 3.9]
1720
fail-fast: false
1821

1922

@@ -35,12 +38,11 @@ jobs:
3538
run: |
3639
# $CONDA is an environment variable pointing to the root of the miniconda directory
3740
echo $CONDA/bin >> $GITHUB_PATH
38-
conda update ruamel_yaml
41+
#conda update ruamel_yaml
3942
4043
- name: conda
4144
run: |
42-
conda install -c conda-forge mamba --quiet
43-
mamba install -c bioconda -c conda-forge --quiet -y fastqc falco graphviz
45+
conda install -c conda-forge -c bioconda --quiet -y python=${{ matrix.python }} fastqc falco graphviz
4446
4547
- name: Install dependencies
4648
run: |

.github/workflows/pypi.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ on:
88
jobs:
99
build-n-publish:
1010
name: Build and publish to PyPI and TestPyPI
11-
runs-on: ubuntu-18.04
11+
runs-on: ubuntu-20.04
1212
steps:
13-
- uses: actions/checkout@master
13+
- uses: actions/checkout@main
1414
- name: Set up Python 3.7
1515
uses: actions/setup-python@v1
1616
with:
@@ -26,14 +26,14 @@ jobs:
2626
python setup.py sdist
2727
2828
- name: Publish distribution to Test PyPI
29-
uses: pypa/gh-action-pypi-publish@master
29+
uses: pypa/gh-action-pypi-publish@release/v1
3030
with:
3131
user: __token__
3232
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
3333
repository_url: https://test.pypi.org/legacy/
3434
- name: Publish distribution to PyPI
3535
if: startsWith(github.ref, 'refs/tags')
36-
uses: pypa/gh-action-pypi-publish@master
36+
uses: pypa/gh-action-pypi-publish@release/v1
3737
with:
3838
user: __token__
3939
password: ${{ secrets.PYPI_API_TOKEN }}

sequana_pipelines/fastqc/config.yaml

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,34 @@
1111
input_directory: '.'
1212
input_pattern: '*fastq.gz'
1313

14+
################################################################################
15+
# extra_prefixes_to_strip
16+
#
17+
# In most cases, the input_directory and input_pattern (and input_read_tag for paired analysis)
18+
# allows us to retrieve unique sample names. We automatically removed common prefixes for you.
19+
#
20+
# Sometimes, some prefixes are not common to all samples but still need to be removed
21+
# to get unique sample names or better output. You can provide extra prefixes to be removed
22+
# by uncommenting and filling the field extra_prefixes_to_strip.
23+
#
24+
# For instance, if you have files called prefix.mess.A.fastq.gz and prefix.B.fastq.gz
25+
# 'prefix.' will be removed automatically because it is common, but not 'mess'.
26+
# use thoses prefixes in the left to right order ['prefix', 'mess'] or ['prefix.mess']
27+
#
28+
# extra_prefixes_to_strip: []
29+
30+
31+
################################################################################
32+
# sample_pattern
33+
#
34+
# You may have trailing words that are in sample names but not wanted. Consider
35+
# the filename A_mess.fastq.gz, you can get rid of _mess if it appears in all
36+
# samples using a pattern as follows '{sample}_mess.fastq.gz
37+
#
38+
# uncomment and fill to use this option. Be aware that prefixes are not removed
39+
# if you use sample_pattern
40+
#
41+
# sample_pattern: '{sample}_mess.fastq.gz'
1442

1543
##############################################################################
1644
# general section
@@ -28,10 +56,13 @@ general:
2856
fastqc:
2957
options: ''
3058
threads: 4
31-
59+
resources:
60+
mem: 8G
3261
falco:
3362
options: ''
3463
threads: 4
64+
resources:
65+
mem: 8G
3566

3667
##############################################################################
3768
#
@@ -45,6 +76,11 @@ multiqc:
4576
options: -p -f
4677
input_directory: "."
4778
modules: fastqc # falco is not set; the fastqc module works for falco
48-
config_file:
79+
config_file:
80+
resources:
81+
mem: 8G
4982

83+
plotting_and_stats:
84+
resources:
85+
mem: 8G
5086

sequana_pipelines/fastqc/fastqc.rules

Lines changed: 50 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,35 +19,28 @@ import pandas as pd
1919
from sequana.utils.datatables_js import DataTable
2020
from sequana.utils.tree import HTMLDirectory
2121

22-
from sequana_pipetools import PipelineManagerGeneric
22+
from sequana_pipetools import PipelineManager
2323
from sequana_pipetools import snaketools as sm
2424

2525
# This must be defined before the include
2626
configfile: "config.yaml"
2727

28-
# A convenient manager
29-
def func(filename):
30-
return filename.split("/")[-1].split('.', 1)[0]
28+
manager = PipelineManager("fastqc", config)
3129

32-
manager = PipelineManagerGeneric("fastqc", config, sample_func=func)
33-
34-
# the manager can figure out whether data is paired or not but with input
35-
# sam/bam it is not for sure, so we need some additional simple code here:
36-
# we can try to figure out whether input data is paired.
37-
PAIRED = False
38-
39-
# do we have illumina paired data with tag _R1_
30+
# This is just for information. Not used in the pipeline but only for HTML rpeort
31+
# do we have illumina paired data with tag _R1_ ?
4032
R1 = [1 for x in manager.samples.values() if "_R1_" in x.split("/")[-1]]
4133
R2 = [1 for x in manager.samples.values() if "_R2_" in x.split("/")[-1]]
4234

35+
PAIRED = False
4336
if len(R1) == len(R2) and len(R1) != 0:
4437
PAIRED = True
4538
else:
4639
R1 = [1 for x in manager.samples.values() if "_1." in x.split("/")[-1]]
4740
R2 = [1 for x in manager.samples.values() if "_2." in x.split("/")[-1]]
4841
if len(R1) == len(R2) and len(R1) != 0:
4942
PAIRED = True
50-
manager.paired = PAIRED
43+
manager._paired = PAIRED
5144

5245

5346
# Some sanity checks
@@ -73,19 +66,40 @@ if 'general' in config and 'method_choice' in config['general'] and \
7366

7467
METHOD = "falco"
7568

76-
__falco__input = manager.getrawdata()
77-
__falco__ouptut = "samples/{sample}/summary.txt"
78-
include: sm.modules["falco"]
79-
__qc_done__ = expand(__falco__ouptut, sample=manager.samples)
69+
rule falco:
70+
input: manager.getrawdata()
71+
output: "samples/{sample}/summary.txt"
72+
log:
73+
"samples/{sample}/falco.log"
74+
threads:
75+
config['falco']['threads']
76+
params:
77+
options=config['falco']['options'],
78+
working_directory="samples/{sample}"
79+
resources:
80+
**config['falco']['resources']
81+
wrapper:
82+
"main/wrappers/falco"
83+
__multiqc__input = expand("samples/{sample}/summary.txt", sample=manager.samples)
8084

8185
else:
8286
METHOD = "fastqc"
83-
__fastqc__input = manager.getrawdata()
84-
__fastqc__output = "samples/{sample}/fastqc.done"
85-
__fastqc__log = "samples/{sample}/fastqc.log"
86-
__fastqc__wkdir = "samples/{sample}"
87-
include: sm.modules["fastqc"]
88-
__qc_done__ = expand(__fastqc__output, sample=manager.samples)
87+
88+
rule fastqc:
89+
input: manager.getrawdata()
90+
output: "samples/{sample}/fastqc.done"
91+
log:
92+
"samples/{sample}/fastqc.log"
93+
threads:
94+
config['fastqc']['threads']
95+
params:
96+
options=config['fastqc']['options'],
97+
working_directory="samples/{sample}"
98+
resources:
99+
**config['fastqc']['resources']
100+
wrapper:
101+
"main/wrappers/fastqc"
102+
__multiqc__input = expand("samples/{sample}/fastqc.done", sample=manager.samples)
89103

90104

91105
# define a list of files for the md5sum
@@ -130,43 +144,40 @@ comments += f"""<br><b><a href="https://github.com/sequana/sequana_pipetools">Se
130144
# Multiqc rule
131145
if config['multiqc']['do']:
132146

133-
if METHOD == "falco":
134-
__multiqc__input = expand("samples/{sample}/summary.txt", sample=manager.samples)
135-
else:
136-
__multiqc__input = expand(__qc_done__, sample=manager.samples)
137147
# do not specify fastqc itself alone, otherwise it fails (feb 2020)
138-
139148
config['multiqc']['options'] = config["multiqc"]["options"] + f" --comment '{comments}'"
140149

141150

142-
__multiqc__output = "multiqc/multiqc_report.html"
143151
rule multiqc:
144-
input:
152+
input:
145153
__multiqc__input
146-
output:
147-
__multiqc__output
148-
params:
154+
output:
155+
"multiqc/multiqc_report.html"
156+
params:
149157
options=config['multiqc']['options'],
150158
input_directory=config['multiqc']['input_directory'],
151159
config_file=config['multiqc']['config_file'],
152160
modules=config['multiqc']['modules']
153-
log:
161+
log:
154162
"multiqc/multiqc.log"
155-
wrapper:
163+
resources:
164+
**config["multiqc"]["resources"]
165+
wrapper:
156166
"main/wrappers/multiqc"
157167

158168

159169
# ====================================================================== rulegraph
160170
sequana_rulegraph_mapper = {}
161171
if config['multiqc']['do']:
162-
sequana_rulegraph_mapper["multiqc"] = f"../{__multiqc__output}"
172+
sequana_rulegraph_mapper["multiqc"] = "../multiqc/multiqc_report.html"
163173
include: sm.modules['rulegraph']
164174

165175

166-
167176
rule plotting_and_stats:
168-
input: __qc_done__
177+
input: expand("samples/{sample}/" + f"{METHOD}.done", sample=manager.samples)
169178
output: "outputs/summary.png", "outputs/summary.json"
179+
resources:
180+
**config["multiqc"]["resources"]
170181
run:
171182
import glob
172183
from sequana.fastqc import FastQC
@@ -229,7 +240,7 @@ onsuccess:
229240
manager.teardown()
230241

231242
if config['multiqc']['do']:
232-
manager.clean_multiqc(__multiqc__output)
243+
manager.clean_multiqc("multiqc/multiqc_report.html")
233244

234245
# Now, the main HTML report
235246

sequana_pipelines/fastqc/schema.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ mapping:
2020
"threads":
2121
type: int
2222
required: True
23+
"resources":
24+
type: any
25+
required: true
2326

2427
"multiqc":
2528
type: map
@@ -34,6 +37,9 @@ mapping:
3437
type: str
3538
"input_directory":
3639
type: str
40+
"resources":
41+
type: any
42+
required: true
3743
"general":
3844
type: map
3945
mapping:
@@ -51,3 +57,13 @@ mapping:
5157
"threads":
5258
type: int
5359
required: True
60+
"resources":
61+
type: any
62+
required: true
63+
64+
"plotting_and_stats":
65+
type: map
66+
mapping:
67+
"resources":
68+
type: any
69+
required: true

setup.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import subprocess
77

88
_MAJOR = 1
9-
_MINOR = 4
10-
_MICRO = 2
9+
_MINOR = 5
10+
_MICRO = 0
1111
version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO)
1212
release = '%d.%d' % (_MAJOR, _MINOR)
1313

@@ -68,22 +68,19 @@ def run(self):
6868
classifiers = metainfo['classifiers'],
6969

7070
# package installation
71-
packages = ["sequana_pipelines.fastqc",
72-
'sequana_pipelines.fastqc.data' ],
71+
packages = ["sequana_pipelines.fastqc"],
7372

7473
install_requires = open("requirements.txt").read(),
7574

7675
# This is recursive include of data files
7776
exclude_package_data = {"": ["__pycache__"]},
7877
package_data = {
79-
'': ['*.yaml', "*.rules", "*.json", "requirements.txt", "*png"],
80-
'sequana_pipelines.fastqc.data' : ['*.*'],
78+
'': ['*.yaml', "*.rules", "*.json", "requirements.txt", "*png", "*yml", "*smk"]
8179
},
8280

8381
zip_safe=False,
8482

8583
entry_points = {'console_scripts':[
86-
'sequana_pipelines_fastqc=sequana_pipelines.fastqc.main:main',
8784
'sequana_fastqc=sequana_pipelines.fastqc.main:main']
8885
}
8986

test/test_main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#
1313
def test_standalone_subprocess():
1414
directory = tempfile.TemporaryDirectory()
15-
cmd = "sequana_pipelines_fastqc --input-directory {} "
15+
cmd = "sequana_fastqc --input-directory {} "
1616
cmd += "--working-directory {} --run-mode local --force"
1717
cmd = cmd.format(sharedir, directory.name)
1818
subprocess.call(cmd.split())
@@ -31,7 +31,7 @@ def test_full():
3131
with tempfile.TemporaryDirectory() as directory:
3232
wk = directory
3333

34-
cmd = "sequana_pipelines_fastqc --input-directory {} "
34+
cmd = "sequana_fastqc --input-directory {} "
3535
cmd += "--working-directory {} --run-mode local --force"
3636
cmd = cmd.format(sharedir, wk)
3737
subprocess.call(cmd.split())
@@ -46,5 +46,5 @@ def test_full():
4646
assert os.path.exists(wk + "/multiqc/multiqc_report.html")
4747

4848
def test_version():
49-
cmd = "sequana_pipelines_fastqc --version"
49+
cmd = "sequana_fastqc --version"
5050
subprocess.call(cmd.split())

0 commit comments

Comments
 (0)