Skip to content

Commit

Permalink
trim random primer and store sequence in rS tag #54.
Browse files Browse the repository at this point in the history
  • Loading branch information
BuysDB committed Oct 17, 2019
1 parent 9af9b83 commit 676e747
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ def __init__(self,
barcodeRead=0, barcodeStart = 6, barcodeLength=8,
barcodeFileParser=None, barcodeFileAlias=None, indexFileParser=None,
indexFileAlias = 'illumina_merged_ThruPlex48S_RP',
random_primer_read = None, random_primer_length = 6,
**kwargs ):
self.description=''
self.barcodeFileAlias = barcodeFileAlias
Expand All @@ -340,7 +341,10 @@ def __init__(self,
self.barcodeLength = barcodeLength
self.autoDetectable = False

self.sequenceCapture = [slice(None) , slice(None) ] # ranges
self.random_primer_read = random_primer_read
self.random_primer_length = random_primer_length

self.sequenceCapture = [slice(None) , slice(None) ] # ranges to capture for read 1 and read 2
if umiLength==0:
# Barcode only
if barcodeStart!=0:
Expand All @@ -353,6 +357,16 @@ def __init__(self,
raise NotImplementedError('Complicated slice where we need to capture around a region')
self.sequenceCapture[barcodeRead] = slice( barcodeLength+umiLength, None)

if random_primer_read!=None:
if self.sequenceCapture[random_primer_read].stop is not None:
raise NotImplementedError()
self.sequenceCapture[random_primer_read] = slice(
self.sequenceCapture[random_primer_read].start,
-random_primer_length,
self.sequenceCapture[random_primer_read].step
)
self.random_primer_slice = slice(-random_primer_length,None,None)

def __repr__(self):
return f'{self.longName} bc: {self.barcodeStart}:{self.barcodeLength}, umi: {self.umiStart}:{self.umiLength} {self.description}'

Expand All @@ -376,6 +390,9 @@ def demultiplex(self, records, **kwargs):
if barcodeIdentifier is None:
raise NonMultiplexable(f'bc:{rawBarcode}_not_matching_{self.barcodeFileAlias}')

random_primer = None
if self.random_primer_read!=None:
random_primer = records[self.random_primer_read].sequence[self.random_primer_slice]
if self.umiLength!=0:
umi = records[self.umiRead].sequence[self.umiStart:self.umiStart+self.umiLength]
umiQual = records[self.umiRead].qual[self.umiStart:self.umiStart+self.umiLength]
Expand Down Expand Up @@ -406,7 +423,11 @@ def demultiplex(self, records, **kwargs):
'BC':barcode
})
#tr.addTagByTag('hd', hammingDistance, isPhred=False)

if random_primer is not None:
tr.addTagByTag('rP',
random_primer,
isPhred=False,
make_safe=False)

tr.addTagByTag('QT', barcodeQual, isPhred=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ def __init__(self, barcodeFileParser, **kwargs ):
UmiBarcodeDemuxMethod.__init__(self,
umiRead=0, umiStart = 8, umiLength=4,
barcodeRead=0, barcodeStart = 0, barcodeLength=8,
random_primer_read=1, random_primer_length=6,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'CS1C8U4'
self.longName = 'CELSeq 1, CB: 8bp, UMI: 4bp'
self.autoDetectable = True
self.description = 'R1 starts with a 8bp cell barcode followed by a 4bp UMI'
self.description = 'R1 starts with a 8bp cell barcode followed by a 4bp UMI. R2 ends with a 6bp random primer'
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ def __init__(self, barcodeFileParser, **kwargs ):
UmiBarcodeDemuxMethod.__init__(self,
umiRead=0, umiStart = 0, umiLength=6,
barcodeRead=0, barcodeStart = 6, barcodeLength=8,
random_primer_read=1, random_primer_length=6,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'CS2C8U6'
self.longName = 'CELSeq 2, CB: 8bp, UMI: 6bp'
self.autoDetectable = True
self.description = 'R1 starts with a 6bp UMI followed by a 8bp cell barcode'
self.description = 'R1 starts with a 6bp UMI followed by a 8bp cell barcode. R2 ends with a 6bp random primer'

# Reversed case:
class CELSeq2_c8_u6_swapped_reads(UmiBarcodeDemuxMethod):
Expand All @@ -20,11 +21,12 @@ def __init__(self, barcodeFileParser, **kwargs ):
UmiBarcodeDemuxMethod.__init__(self,
umiRead=1, umiStart = 0, umiLength=6,
barcodeRead=1, barcodeStart = 6, barcodeLength=8,
random_primer_read=0, random_primer_length=6,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'CS2C8U6S'
self.longName = 'CELSeq 2, CB: 8bp, UMI: 6bp'
self.longName = 'CELSeq 2, CB: 8bp, UMI: 6bp, RP: 6bp'
self.autoDetectable = True
self.description = 'R2 starts with a 6bp UMI followed by a 8bp cell barcode'
self.description = 'R2 starts with a 6bp UMI followed by a 8bp cell barcode. R1 ends with a 6bp random primer'


# Cell seq 2 with 8bp UMI
Expand All @@ -33,12 +35,13 @@ def __init__(self, barcodeFileParser, **kwargs ):
self.barcodeFileAlias = 'celseq2'
UmiBarcodeDemuxMethod.__init__(self,
umiRead=0, umiStart = 0, umiLength=8,
random_primer_read=1, random_primer_length=6,
barcodeRead=0, barcodeStart = 8, barcodeLength=8,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'CS2C8U8'
self.longName = 'CELSeq 2, CB: 8bp, UMI: 8bp'
self.autoDetectable = True
self.description = 'R1 starts with a longer 8bp UMI followed by a 8bp cell barcode'
self.description = 'R1 starts with a longer 8bp UMI followed by a 8bp cell barcode. R2 ends with a 6bp random primer'

# Reversed case:
class CELSeq2_c8_u8(UmiBarcodeDemuxMethod):
Expand All @@ -47,18 +50,20 @@ def __init__(self, barcodeFileParser, **kwargs ):
UmiBarcodeDemuxMethod.__init__(self,
umiRead=1, umiStart = 0, umiLength=8,
barcodeRead=1, barcodeStart = 8, barcodeLength=8,
random_primer_read=0, random_primer_length=6,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'CS2C8U8S'
self.longName = 'CELSeq 2, CB: 8bp, UMI: 8bp'
self.autoDetectable = True
self.description = 'R2 starts with a longer 8bp UMI followed by a 8bp cell barcode'
self.description = 'R2 starts with a longer 8bp UMI followed by a 8bp cell barcode. R1 ends with a 6bp primer'

class CELSeq2_c8_u8_NNLAIII(UmiBarcodeDemuxMethod):
def __init__(self, barcodeFileParser, **kwargs ):
self.barcodeFileAlias = 'celseq2_noNla'
UmiBarcodeDemuxMethod.__init__(self,
umiRead=0, umiStart = 0, umiLength=8,
barcodeRead=0, barcodeStart = 8, barcodeLength=8,
random_primer_read=1, random_primer_length=6,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'CS2C8U8NNLA'
self.longName = 'CELSeq 2, CB: 8bp, UMI: 8bp, NLAIII free'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ def __init__(self, barcodeFileParser, **kwargs ):
self.barcodeFileAlias = 'lennart96NLA'
UmiBarcodeDemuxMethod.__init__(self,
umiRead=0, umiStart = 0, umiLength=3,
random_primer_read=1, random_primer_length=6,
barcodeRead=0, barcodeStart = 3, barcodeLength=8,

barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'NLAIII96C8U3'
self.longName = 'NLAIII, 96well CB: 8bp UMI: 3bp'
self.longName = 'NLAIII, 96well CB: 8bp UMI: 3bp RP: 6bp'
self.autoDetectable = True
self.description = '96 well format. 3bp umi followed by 8bp barcode'
self.description = '96 well format. 3bp umi followed by 8bp barcode. R2 ends with a 6bp random primer'



Expand All @@ -22,11 +24,12 @@ def __init__(self, barcodeFileParser, **kwargs ):
UmiBarcodeDemuxMethod.__init__(self,
umiRead=0, umiStart = 0, umiLength=3,
barcodeRead=0, barcodeStart = 3, barcodeLength=8,
random_primer_read=1, random_primer_length=6,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'NLAIII384C8U3'
self.longName = 'NLAIII, 384well CB: 8bp UMI: 3bp'
self.longName = 'NLAIII, 384well CB: 8bp UMI: 3bp RP:6bp'
self.autoDetectable = True
self.description = '384 well format. 3bp umi followed by 8bp barcode'
self.description = '384 well format. 3bp umi followed by 8bp barcode. R2 ends with a 6bp random primer'

def demultiplex(self, records, **kwargs):
if kwargs.get('probe') and records[0].sequence[self.barcodeLength+ self.umiLength : self.barcodeLength+ self.umiLength+4]!='CATG':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ def __init__(self, barcodeFileParser, **kwargs ):
UmiBarcodeDemuxMethod.__init__(self,
umiRead=0, umiStart = 0, umiLength=3,
barcodeRead=0, barcodeStart = 3, barcodeLength=8,
random_primer_read=1, random_primer_length=6,
barcodeFileAlias = self.barcodeFileAlias ,barcodeFileParser=barcodeFileParser, **kwargs )
self.shortName = 'scCHIC384C8U3'
self.longName = 'Single cell CHIC, 384well CB: 8bp UMI: 3bp'
self.longName = 'Single cell CHIC, 384well CB: 8bp UMI: 3bp, RP: 6BP'
self.autoDetectable = True
self.description = '384 well format. 3bp umi followed by 8bp barcode and a single A'
self.description = '384 well format. 3bp umi followed by 8bp barcode and a single A. R2 ends with a 6bp random primer'

self.sequenceCapture[0] = slice( self.barcodeLength+ self.umiLength + 1, None) # dont capture the first base

Expand Down

0 comments on commit 676e747

Please sign in to comment.