-
Notifications
You must be signed in to change notification settings - Fork 4
minor fixes 0525 #133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
minor fixes 0525 #133
Changes from all commits
7022d1b
f6c1e92
d52d4c7
3f9a444
d43d9bd
2487fd3
6916948
6fc79f9
1a0bc62
5f432e2
efe2fbd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,3 +133,4 @@ tests/data/sample-sheets/*/*/*.csv | |
|
||
# test output | ||
tests/data/output_dir/ | ||
tests/data/211021_A00000_0000_SAMPLE/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ class WorkflowFactory(): | |
ST_TO_IN_MAP = {PROTOCOL_NAME_ILLUMINA: ['standard_metag', | ||
'standard_metat', | ||
'absquant_metag', | ||
'abs_quant_metag', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that this module already depends on Also, there still is no |
||
'absquant_metat'], | ||
PROTOCOL_NAME_TELLSEQ: ['tellseq_metag', | ||
'tellseq_absquant']} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ | |
class InstrumentUtils(): | ||
types = {'A': 'NovaSeq 6000', 'D': 'HiSeq 2500', 'FS': 'iSeq', | ||
'K': 'HiSeq 4000', 'LH': 'NovaSeq X Plus', 'M': 'MiSeq', | ||
'MN': 'MiniSeq', | ||
'MN': 'MiniSeq', 'SH': 'MiSeq i100', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am worried about this one, not for this code, but for |
||
# SN – RapidRun which is HiSeq 2500 | ||
'SN': 'RapidRun'} | ||
|
||
|
@@ -911,8 +911,8 @@ def is_sample_sheet(sample_sheet_path): | |
|
||
return False | ||
|
||
def _generate_dummy_sample_sheet(self, first_read, last_read, | ||
indexed_reads, dummy_sample_id): | ||
def _generate_dummy_sample_sheet(self, index_cycles, non_index_cycles, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW, the code expected (as this is how old RunInfo.xml files are formed) that the reads would be like index1, read1, read2, index2 - however this is not the case in the new versions. Thus, changed the method to actually be aware of what's an index and non_index read There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am 100% in favor of anything that makes the nature of the inputs more explicit! However, I don't really understand what the inputs should be here. Is it possible to add a docstring for this function, maybe with an example input for each param? |
||
len_index, dummy_sample_id): | ||
# create object and initialize header | ||
sheet = AmpliconSampleSheet() | ||
sheet.Header['IEMFileVersion'] = '4' | ||
|
@@ -924,13 +924,13 @@ def _generate_dummy_sample_sheet(self, first_read, last_read, | |
sheet.Header['Chemistry'] = 'Amplicon' | ||
|
||
# generate override_cycles string | ||
tmp = [f"N{x['NumCycles']}" for x in indexed_reads] | ||
tmp = [f"N{index_cycles}" for i in range(len_index)] | ||
tmp = ';'.join(tmp) | ||
override_cycles = f"Y{first_read};{tmp};Y{last_read}" | ||
override_cycles = f"Y{non_index_cycles};{tmp};Y{non_index_cycles}" | ||
|
||
# set Reads and Settings according to input values | ||
# we'll get this from the code on the server | ||
sheet.Reads = [first_read, last_read] | ||
sheet.Reads = [non_index_cycles, non_index_cycles] | ||
sheet.Settings['OverrideCycles'] = override_cycles | ||
sheet.Settings['MaskShortReads'] = '1' | ||
sheet.Settings['CreateFastqForIndexReads'] = '1' | ||
|
@@ -973,32 +973,21 @@ def generate_dummy_sample_sheet(self, run_dir, output_fp): | |
else: | ||
raise ValueError("run_dir %s not found." % run_dir) | ||
|
||
# assumptions are first and last reads are non-indexed and there | ||
# are always two. Between them there is either 1 or 2 indexed | ||
# reads. If this is not true, raise an Error. | ||
|
||
if len(reads) < 3 or len(reads) > 4: | ||
# there must be a first and last read w/a minimum of one read | ||
# in the middle and maximum two in the middle. | ||
raise ValueError("RunInfo.xml contains abnormal reads.") | ||
|
||
first_read = reads.pop(0) | ||
last_read = reads.pop() | ||
|
||
if (first_read['IsIndexedRead'] is True or | ||
last_read['IsIndexedRead'] is True): | ||
# the assumptions are: if we have 3 reads we should only have 1 | ||
# index; and if we have 4 reads, 2 should be index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1! |
||
index_reads = [r for r in reads if r['IsIndexedRead']] | ||
non_index_reads = [r for r in reads if not r['IsIndexedRead']] | ||
len_index_reads = len(index_reads) | ||
len_non_index_reads = len(non_index_reads) | ||
if len_non_index_reads != 2 or (len_index_reads != 1 and | ||
len_index_reads != 2): | ||
raise ValueError("RunInfo.xml contains abnormal reads.") | ||
|
||
# confirm the interior read(s) are indexed ones. | ||
for read in reads: | ||
if read['IsIndexedRead'] is False: | ||
raise ValueError("RunInfo.xml contains abnormal reads.") | ||
|
||
dummy_sample_id = basename(run_dir) + '_SMPL1' | ||
|
||
sheet = self._generate_dummy_sample_sheet(first_read['NumCycles'], | ||
last_read['NumCycles'], | ||
reads, dummy_sample_id) | ||
sheet = self._generate_dummy_sample_sheet( | ||
index_reads[0]['NumCycles'], | ||
non_index_reads[0]['NumCycles'], | ||
len_index_reads, dummy_sample_id) | ||
|
||
with open(output_fp, 'w') as f: | ||
sheet.write(f, 1) | ||
|
@@ -1009,17 +998,17 @@ def process_reads(reads): | |
# the contents of each Read element are highly regular. | ||
# for now, process w/out installing xml2dict or other | ||
# library into Qiita env. | ||
found = findall('<Read (.+?) />', reads) | ||
found = findall('<Read (.+?)/>', reads) | ||
|
||
results = [] | ||
for item in found: | ||
attributes = item.split(' ') | ||
attributes = item.strip().split(' ') | ||
d = {} | ||
for attribute in attributes: | ||
k, v = attribute.split('=') | ||
if k in ['NumCycles', 'Number']: | ||
v = int(v.strip('"')) | ||
elif k in ['IsIndexedRead']: | ||
elif k in ['IsIndexedRead', 'IsReverseComplement']: | ||
v = v.strip('"') | ||
v = False if v == 'N' else True | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
<?xml version="1.0"?> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this one from a NovaSeqXPlus? |
||
<RunInfo Version="6"> | ||
<Run Id="20250418_SH00252_0069_ASC2107005-SC3" Number="69"> | ||
<Flowcell>BWR98012-2217</Flowcell> | ||
<Instrument>SH00252</Instrument> | ||
<Date>2025-04-18T20:11:37Z</Date> | ||
<Reads> | ||
<Read Number="1" NumCycles="12" IsIndexedRead="Y" IsReverseComplement="N"/> | ||
<Read Number="2" NumCycles="151" IsIndexedRead="N" IsReverseComplement="N"/> | ||
<Read Number="3" NumCycles="151" IsIndexedRead="N" IsReverseComplement="N"/> | ||
</Reads> | ||
<FlowcellLayout LaneCount="1" SurfaceCount="1" SwathCount="9" TileCount="10"> | ||
<TileSet TileNamingConvention="FourDigit"> | ||
<Tiles> | ||
<Tile>1_1101</Tile> | ||
</Tiles> | ||
</TileSet> | ||
</FlowcellLayout> | ||
<ImageDimensions Width="858" Height="512"/> | ||
<ImageChannels> | ||
<Name>green</Name> | ||
<Name>blue</Name> | ||
</ImageChannels> | ||
</Run> | ||
</RunInfo> | ||
|
Uh oh!
There was an error while loading. Please reload this page.