Skip to content

Commit

Permalink
Merge pull request #1185 from nextstrain/v3
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Aug 30, 2023
2 parents 242d56f + eef91b1 commit 049883f
Show file tree
Hide file tree
Showing 560 changed files with 944,951 additions and 14,871 deletions.
9 changes: 9 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ codegen-units = 4096
incremental = true
lto = "off"

[profile.profiling]
inherits = "release"
debug = 1
strip = false

# Optimize dependencies even in debug mode
[profile.dev.package."*"]
opt-level = 2
Expand Down Expand Up @@ -189,6 +194,7 @@ rustflags = [
"-Aclippy::cognitive_complexity",
"-Aclippy::comparison-chain",
"-Aclippy::default_numeric_fallback",
"-Aclippy::deref_by_slicing",
"-Aclippy::doc_markdown",
"-Aclippy::else_if_without_else",
"-Aclippy::exhaustive_enums",
Expand Down Expand Up @@ -217,7 +223,9 @@ rustflags = [
"-Aclippy::mod_module_files",
"-Aclippy::module_inception",
"-Aclippy::module_name_repetitions",
"-Aclippy::modulo_arithmetic",
"-Aclippy::must_use_candidate",
"-Aclippy::needless_for_each",
"-Aclippy::new_without_default",
"-Aclippy::non_ascii_literal",
"-Aclippy::option_if_let_else",
Expand All @@ -243,6 +251,7 @@ rustflags = [
"-Aclippy::suboptimal_flops",
"-Aclippy::too_many_arguments",
"-Aclippy::too_many_lines",
"-Aclippy::type_repetition_in_bounds",
"-Aclippy::unnecessary_wraps",
"-Aclippy::unreachable",
"-Aclippy::unreadable_literal",
Expand Down
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ SYNC_DESTINATION=123.456.789.123:~/nextclade

# URL of Nextclade datasets server. See: https://github.com/neherlab/nextclade_data
# Replace this with `http://localhost:27722` to use local data server instead
DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org
DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3
# DATA_FULL_DOMAIN=http://localhost:27722

# Directory path (relative to the root of the project) from which local data server takes the data.
Expand Down
12 changes: 5 additions & 7 deletions .github/workflows/cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,17 @@ jobs:
- name: "Setup environment (release)"
if: endsWith(github.ref, '/release-cli')
run: |
echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org/v3" >> $GITHUB_ENV
- name: "Setup environment (staging)"
if: endsWith(github.ref, '/staging-cli')
run: |
echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org/v3" >> $GITHUB_ENV
- name: "Setup environment (master)"
if: ${{ !endsWith(github.ref, '/staging-cli') && !endsWith(github.ref, '/release-cli') }}
run: |
echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3" >> $GITHUB_ENV
- name: "Checkout code"
uses: actions/checkout@v3
Expand Down Expand Up @@ -106,7 +106,7 @@ jobs:
run: |
cp .env.example .env
sed -i -e "s|OSXCROSS_URL=http://example.com/osxcross/osxcross.tar.xz|OSXCROSS_URL=${{ secrets.OSXCROSS_URL }}|g" .env
sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|DATA_FULL_DOMAIN=${DATA_FULL_DOMAIN}|g" .env
sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3|DATA_FULL_DOMAIN=${DATA_FULL_DOMAIN}|g" .env
- name: "Login to Docker Hub"
uses: docker/login-action@v2
Expand Down Expand Up @@ -170,7 +170,6 @@ jobs:
run: |
cp .env.example .env
sed -i -e "s|OSXCROSS_URL=http://example.com/osxcross/osxcross.tar.xz|OSXCROSS_URL=${{ secrets.OSXCROSS_URL }}|g" .env
sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|g" .env
- name: "Run unit tests"
run: |
Expand Down Expand Up @@ -217,7 +216,6 @@ jobs:
run: |
cp .env.example .env
sed -i -e "s|OSXCROSS_URL=http://example.com/osxcross/osxcross.tar.xz|OSXCROSS_URL=${{ secrets.OSXCROSS_URL }}|g" .env
sed -i -e "s|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org|g" .env
- name: "Run lints"
run: |
Expand Down Expand Up @@ -251,7 +249,7 @@ jobs:
- name: "Run smoke tests (linux)"
run: |
chmod +x ./.out/*
./tests/run-smoke-tests ./.out/nextclade-x86_64-unknown-linux-gnu
JOBS=2 ./tests/run-smoke-tests ./.out/nextclade-x86_64-unknown-linux-gnu
# run-smoke-tests-mac:
# name: "Run smoke tests (mac)"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/web.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,23 @@ jobs:
run: |
echo "ENV_NAME=release" >> $GITHUB_ENV
echo "FULL_DOMAIN=https://clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.clades.nextstrain.org/v3" >> $GITHUB_ENV
echo "PLAUSIBLE_IO_DOMAIN=clades.nextstrain.org" >> $GITHUB_ENV
- name: "Setup environment (staging)"
if: endsWith(github.ref, '/staging')
run: |
echo "ENV_NAME=staging" >> $GITHUB_ENV
echo "FULL_DOMAIN=https://staging.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.staging.clades.nextstrain.org/v3" >> $GITHUB_ENV
echo "PLAUSIBLE_IO_DOMAIN=staging.clades.nextstrain.org" >> $GITHUB_ENV
- name: "Setup environment (master)"
if: ${{ !endsWith(github.ref, '/staging') && !endsWith(github.ref, '/release') }}
run: |
echo "ENV_NAME=master" >> $GITHUB_ENV
echo "FULL_DOMAIN=https://master.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org" >> $GITHUB_ENV
echo "DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3" >> $GITHUB_ENV
echo "PLAUSIBLE_IO_DOMAIN=master.clades.nextstrain.org" >> $GITHUB_ENV
- name: "Checkout code"
Expand Down
157 changes: 157 additions & 0 deletions 01cds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
from Bio import Seq
import numpy as np

# 012345678901234567890123456789012345678901234567890123
ref = 'TGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA'

ref_aln = 'TGATGCACA---ATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA'
qry_aln = '-GATGCACACGCATC---TTTAAACGGGTTTGCGGTGTCAGT---GCCCGTCTTACA'



cds_1 = [{'start':4, 'end':37}]
cds_2 = [
{'start':4, 'end':21},
{'start':20, 'end':39}, # slippage at position 20 -- 20 is read twice
{'start':45, 'end':51}, # another cds segment
]



def extract_cds_sequence(cds_annotation, seq):
'''extract a cds from a raw sequence'''
cds = ''
for segment in cds_annotation:
cds += seq[segment['start']:segment['end']]

return cds

def extract_cds_alignment(cds_annotation, seq_aln, coord_map):
cds_aln = ''
cds_to_aln = []
rta = coord_map['ref_to_aln']
for si,segment in enumerate(cds_annotation):
start = rta[segment['start']]
end = rta[segment['end']]
cds_to_aln.append({"global":np.arange(start, end), 'start':len(cds_aln), "len": end-start})
cds_aln += seq_aln[start:end]

return cds_aln, cds_to_aln

def get_aln_to_seq(aln):
aln_array = np.array(list(aln))
aln_to_seq = np.cumsum(aln_array!='-') - 1
aln_to_seq = np.concatenate([aln_to_seq, [aln_to_seq[-1]+1]])
seq_to_aln = np.arange(len(aln))[aln_array!='-']
return aln_to_seq, seq_to_aln


def make_coord_map(ref_aln, qry_aln):

aln_to_ref, ref_to_aln = get_aln_to_seq(ref_aln)
aln_to_qry, qry_to_aln = get_aln_to_seq(qry_aln)

return {'aln_to_ref':aln_to_ref, 'aln_to_qry': aln_to_qry, 'ref_to_aln': ref_to_aln}

def cds_to_global_aln_position(pos, cds_to_aln_map):
'''
map a position in the extracted alignment of the CDS to the global alignment.
returns a result for each CDS segment, but a single position can only be in ONE CDS segment
'''
result = []
for segment in cds_to_aln_map:
res = {}
pos_in_segment = pos - segment['start']
if pos_in_segment<0:
res['status'] = 'before'
res['pos'] = np.nan
elif pos_in_segment>=segment['len']:
res['status'] = 'after'
res['pos'] = np.nan
else:
res['status'] = 'inside'
res['pos'] = segment["global"][pos_in_segment]
result.append(res)

return result

def cds_to_global_ref_position(pos, cds_to_aln_map, coord_map):
'''
map a position in the extracted alignment of the CDS to the reference sequence.
returns a result for each CDS segment, but a single position can only be in ONE CDS segment
'''
cds_to_aln_res = cds_to_global_aln_position(pos, cds_to_aln_map)
result = []
for segment in cds_to_aln_res:
if segment['status']=='inside':
result.append({'status':'inside', 'pos':coord_map['aln_to_ref'][segment['pos']]})
else:
result.append(segment)
return result

def cds_to_global_aln_range(start, end, cds_to_aln_map):
'''
map a range in the extracted alignment of the CDS to the global alignment.
returns a result for each CDS segment, as a range can span multiple CDS-segments
'''
cds_to_aln_start = cds_to_global_aln_position(start, cds_to_aln_map)
# need to map end position -1 to correspond to the last included position
cds_to_aln_end = cds_to_global_aln_position(end-1, cds_to_aln_map)
result = []
for seg_start, seg_end, seg_map in zip(cds_to_aln_start, cds_to_aln_end, cds_to_aln_map):
if seg_end['status']=="before":
result.append({"status":"before", "start":np.nan, "end":np.nan})
continue
if seg_start['status']=="after":
result.append({"status":"after", "start":np.nan, "end":np.nan})
continue

if seg_start['status']=="before":
start_pos = seg_map["global"][0]
elif seg_start['status']=='inside':
start_pos = seg_start["pos"]

# map end and increment by one to correspond to open interval
if seg_end['status']=="after":
end_pos = seg_map["global"][-1]+1
elif seg_end['status']=='inside':
end_pos = seg_end["pos"]+1

result.append({"status":"covered", "start":start_pos, "end":end_pos})
return result

def codon_to_global_aln_range(codon, cds_map):
'''
expands a codon in the extracted alignment to a range in the global alignment
'''
start_pos = codon*3
end_pos = codon*3 + 3
return cds_to_global_aln_range(start_pos, end_pos, cds_map)

print('Translation of reference sequence:')
print(Seq.translate(extract_cds_sequence(cds_1, ref)))
print(Seq.translate(extract_cds_sequence(cds_2, ref)))

coord_map = make_coord_map(ref_aln, qry_aln)
print('\nGlobal coordinate map:')
print(coord_map)

cds_aln, cds_map = extract_cds_alignment(cds_2, ref_aln, coord_map)

print('\nExtracted CDS from the alignment:')
print(extract_cds_alignment(cds_2, ref_aln, coord_map)[0])
print(extract_cds_alignment(cds_2, qry_aln, coord_map)[0])


print('\nPosition in the aligned CDS mapped to global alignment:')
print(cds_to_global_aln_position(10,cds_map))
print('\nPosition in the aligned CDS mapped to reference sequence:')
print(cds_to_global_ref_position(10,cds_map, coord_map))

print('\nRange in the aligned CDS mapped to global alignment:')
print(cds_to_global_aln_range(10,15,cds_map))

print('\nCodon in the aligned CDS mapped to global alignment:')
print("5:",codon_to_global_aln_range(5,cds_map))
print("6:",codon_to_global_aln_range(6,cds_map))
print("7:",codon_to_global_aln_range(7,cds_map))
Loading

1 comment on commit 049883f

@vercel
Copy link

@vercel vercel bot commented on 049883f Aug 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nextclade – ./

nextclade-git-master-nextstrain.vercel.app
nextclade.vercel.app
nextclade-nextstrain.vercel.app

Please sign in to comment.