Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support RNA sequence in gb file #5

Merged
merged 7 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion packages/bio-parsers/src/genbankToJson.js
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ function genbankToJson(string, options = {}) {

if (
j === 4 &&
(item.match(/ds-dna/i) || item.match(/ss-dna/i) || item.match(/dna/i))
(item.match(/ds-dna/i) || item.match(/ss-dna/i) || item.match(/dna/i) || item.match(/rna/i))
) {
if (options.isProtein === undefined) {
options.isProtein = false;
Expand All @@ -347,6 +347,9 @@ function genbankToJson(string, options = {}) {
if (item.match(/ss-dna/i)) {
options.isSingleStrandedDNA = true;
}
if (item.match(/rna/i) && !item.match(/ss-rna/i)) {
options.isDoubleStrandedRNA = true;
}
}

// Division
Expand All @@ -368,6 +371,7 @@ function genbankToJson(string, options = {}) {
result.parsedSequence.gbDivision = gbDivision;
result.parsedSequence.sequenceTypeFromLocus = options.sequenceTypeFromLocus;
result.parsedSequence.isSingleStrandedDNA = options.isSingleStrandedDNA;
result.parsedSequence.isDoubleStrandedRNA = options.isDoubleStrandedRNA;
result.parsedSequence.date = date;
result.parsedSequence.circular = circular;
}
Expand Down
4 changes: 2 additions & 2 deletions packages/bio-parsers/src/jsonToGenbank.js
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,9 @@ function createGenbankLocus(serSeq, options) {
if (serSeq.isProtein) {
dnaType = "";
} else if (serSeq.type === "RNA") {
dnaType = "RNA";
dnaType = serSeq?.doubleStranded ? 'RNA' : serSeq?.sequenceTypeFromLocus ?? "ss-RNA";
} else {
dnaType = serSeq?.sequenceTypeFromLocus ?? "DNA";
dnaType = serSeq?.doubleStranded ? 'DNA' : serSeq?.sequenceTypeFromLocus ?? "DNA";
}
const date = getCurrentDateString();

Expand Down
64 changes: 64 additions & 0 deletions packages/bio-parsers/test/genbankToJson.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,70 @@ ORIGIN
},
]);
});

it(`correctly handles the single-stranded/double-stranded RNA/DNA in LOCUS line`, () => {
const ss_DNA_string = `LOCUS Tt2-PstI-SphI-rev(dna) 20 bp ss-DNA circular
04-FEB-2021
DEFINITION [Heavy] lalalal
more description here
and still more
ACCESSION Tt2-PstI-SphI-rev
VERSION Tt2-PstI-SphI-rev.0
KEYWORDS .
SOURCE Homo sapiens
ORGANISM Homo sapiens
.
COMMENT Chain:Heavy
Numbering:Kabat
AnnotationCategory:VREGION
Plasmid: pAETEST
ClonedAnnotationCategory:VREGION
ORIGIN
1 tcgcgcgttt cggtgatgac
//`;

const ds_DNA_string = ss_DNA_string.replace('ss-DNA', 'DNA');

const ss_RNA_string = `LOCUS Tt2-PstI-SphI-rev(rna) 20 bp ss-RNA circular
04-FEB-2021
DEFINITION [Heavy] lalalal
more description here
and still more
ACCESSION Tt2-PstI-SphI-rev
VERSION Tt2-PstI-SphI-rev.0
KEYWORDS .
SOURCE Homo sapiens
ORGANISM Homo sapiens
.
COMMENT Chain:Heavy
Numbering:Kabat
AnnotationCategory:VREGION
Plasmid: pAETEST
ClonedAnnotationCategory:VREGION
ORIGIN
1 ucgcgcguuu cggugaugac
//`;

const ds_RNA_string = ss_RNA_string.replace('ss-RNA', 'RNA');


const ss_DNA_result = genbankToJson(ss_DNA_string);
ss_DNA_result[0].parsedSequence.isSingleStrandedDNA.should.equal(true);


const ds_DNA_result = genbankToJson(ds_DNA_string);
Boolean(ds_DNA_result[0].parsedSequence.isSingleStrandedDNA).should.equal(false);

const ss_RNA_result = genbankToJson(ss_RNA_string);
Boolean(ss_RNA_result[0].parsedSequence.isDoubleStrandedRNA).should.equal(false);


const ds_RNA_result = genbankToJson(ds_RNA_string);
ds_RNA_result[0].parsedSequence.isDoubleStrandedRNA.should.equal(true);
});



it(`correctly handles a multi-line DEFINITION converting it to description`, () => {
const string = `LOCUS Tt2-PstI-SphI-rev(dna) 7628 bp DNA circular
04-FEB-2021
Expand Down
48 changes: 48 additions & 0 deletions packages/bio-parsers/test/jsonToGenbank.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,54 @@ describe("genbank exporter/parser conversion", function () {
result[0].parsedSequence.features[0].start.should.equal(3);
result[0].parsedSequence.features[0].end.should.equal(29);
});

it(`should has ss-RNA/RNA/ss-DNA/DNA in the LOCUS line`, () => {
const dnaSequence = 'agctttgggttt';
const rnaSequence = 'agcuuuggguuu';
const dsDNAString1 = jsonToGenbank({
sequence: dnaSequence,
type: 'DNA'
});
const dsDNAString2 = jsonToGenbank({
sequence: dnaSequence,
doubleStranded: true,
type: 'DNA'
});
const ssDNAString = jsonToGenbank({
sequence: dnaSequence,
sequenceTypeFromLocus: 'ss-DNA',
type: 'DNA'
});

assert(dsDNAString1.indexOf('DNA') !== -1);
assert(dsDNAString1.indexOf('ss-DNA') === -1);
assert(dsDNAString2.indexOf('DNA') !== -1);
assert(dsDNAString2.indexOf('ss-DNA') === -1);
assert(ssDNAString.indexOf('ss-DNA') !== -1);

const dsRNAString1 = jsonToGenbank({
sequence: rnaSequence,
doubleStranded: true,
type: 'RNA'
});
const dsRNAString2 = jsonToGenbank({
sequence: rnaSequence,
sequenceTypeFromLocus: 'RNA',
type: 'RNA'
});
const ssRNAString = jsonToGenbank({
sequence: rnaSequence,
sequenceTypeFromLocus: 'ss-RNA',
type: 'RNA'
});

assert(dsRNAString1.indexOf('RNA') !== -1);
assert(dsRNAString1.indexOf('ss-RNA') === -1);
assert(dsRNAString2.indexOf('RNA') !== -1);
assert(dsRNAString2.indexOf('ss-RNA') === -1);
assert(ssRNAString.indexOf('ss-RNA') !== -1);
});

it(`should have a space at the 68 position in the genbank locus `, () => {
const string = jsonToGenbank({
sequence: "agagagagagag",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export default function getComplementSequenceAndAnnotations(
options
);
const newSeqObj = Object.assign({}, seqObj, {
sequence: getComplementSequenceString(seqObj.sequence)
sequence: getComplementSequenceString(seqObj.sequence, seqObj.isRna)
});
return tidyUpSequenceData(newSeqObj, options);
};
6 changes: 4 additions & 2 deletions packages/sequence-utils/src/getComplementSequenceString.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import DNAComplementMap from "./DNAComplementMap";
import { merge } from "lodash";


// ac.throw([ac.string,ac.bool],arguments);
export default function getComplementSequenceString(sequence) {
export default function getComplementSequenceString(sequence, isRna) {
// ac.throw([ac.string],arguments);
let complementSeqString = "";
const complementMap = merge(DNAComplementMap, isRna ? { a: 'u', A: 'U'} : {a: 't', A: 'T'});
for (let i = 0; i < sequence.length; i++) {
let complementChar = DNAComplementMap[sequence[i]];
let complementChar = complementMap[sequence[i]];
if (!complementChar) {
complementChar = sequence[i];
// throw new Error('trying to get the reverse compelement of an invalid base');
Expand Down
13 changes: 13 additions & 0 deletions packages/sequence-utils/src/getComplementSequenceString.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import getComplementSequenceString from "./getComplementSequenceString";

import assert from "assert";

describe('complement base should be shown correctly', () => {
it('complement base should be shown correctly for RNA sequence', () => {
assert.equal('UUA', getComplementSequenceString('AAU', true))
});

it('complement base should be shown correctly for DNA sequence', () => {
assert.equal('TTA', getComplementSequenceString('AAT'))
});
});
Loading