Skip to content

Commit

Permalink
#43 MultipleAlignment with correct mouseover sequence
Browse files Browse the repository at this point in the history
  • Loading branch information
josiahseaman committed Oct 4, 2019
1 parent 64ef73a commit e9b6aee
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 43 deletions.
97 changes: 55 additions & 42 deletions DDV/MultipleAlignmentLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import os
import traceback
from datetime import datetime

from DNASkittleUtils.Contigs import read_contigs
from PIL import Image, ImageDraw

import math
from DDV.TileLayout import hex_to_rgb, TileLayout
from DDV.TileLayout import hex_to_rgb, TileLayout, is_protein_sequence
from natsort import natsorted

from DDV.DDVUtils import make_output_directory
Expand Down Expand Up @@ -121,14 +123,7 @@ def draw_nucleotides(self, verbose=False):
"""Layout a whole set of different repeat types with different widths. Column height is fixed,
but column width varies constantly. Wrapping to the next row is determined by hitting the
edge of the allocated image."""
if not self.single_file:
super(MultipleAlignmentLayout, self).draw_nucleotides(verbose)
else:
all_contigs = self.contigs
for i, contig in enumerate(all_contigs):
self.i_layout = i
self.contigs = [contig]
super(MultipleAlignmentLayout, self).draw_nucleotides(verbose)
super(MultipleAlignmentLayout, self).draw_nucleotides(verbose)


def calc_all_padding(self):
Expand Down Expand Up @@ -169,7 +164,7 @@ def guess_image_dimensions(self):
areas = []
for source in self.all_contents.values():
areas.append((source[-1].consensus_width + self.x_pad) * (len(source) + self.y_pad))
area = sum(areas)
area = sum(areas) if not self.single_file else len(self.contigs[0].seq) * len(self.all_contents) *1.2
self.image_length = int(area * 1.2)
square_dim = int(math.sqrt(self.image_length))
desired_width = 5 * square_dim // 3
Expand All @@ -191,7 +186,7 @@ def calculate_mixed_layout(self):
image_wh = self.guess_image_dimensions()

#unsorted, largest height per row, tends to be less dense
self.each_layout = [] # delete old defaul layout
self.each_layout = [] # delete old default layout
for filename in self.fasta_sources:
source = self.all_contents[filename]
height = len(source) + self.title_height_px
Expand All @@ -201,7 +196,6 @@ def calculate_mixed_layout(self):

adjusted_height = self.next_origin[1] + self.current_column_height + self.y_pad #could extend image
if self.single_file:
self.spread_large_MSA_source()
adjusted_height = image_wh[1]
self.prepare_image(0, image_wh[0], adjusted_height)

Expand All @@ -211,34 +205,25 @@ def preview_all_files(self, input_fasta_folder):
"""Populates fasta_sources with files from a directory"""
files = fastas_in_folder(input_fasta_folder)
self.single_file = len(files) == 1
for single_MSA in files:
self.read_contigs_and_calc_padding(single_MSA, None)
fasta_name = os.path.basename(single_MSA)
self.fasta_sources.append(fasta_name)
self.all_contents[fasta_name] = self.contigs # store contigs so the can be wiped
if self.sort_contigs: # do this before self.each_layout is created in order
heights = [(len(self.all_contents[fasta_name]), fasta_name) for fasta_name in self.fasta_sources]
heights.sort(key=lambda pair: -pair[0]) # largest number of sequences first
self.fasta_sources = [pair[1] for pair in heights] # override old ordering
if self.single_file:
self.spread_large_MSA_source(files[0])
else:
for single_MSA in files:
self.read_contigs_and_calc_padding(single_MSA, None)
fasta_name = os.path.basename(single_MSA)
self.fasta_sources.append(fasta_name)
self.all_contents[fasta_name] = self.contigs # store contigs so the can be wiped
if self.sort_contigs: # do this before self.each_layout is created in order
heights = [(len(self.all_contents[fasta_name]), fasta_name) for fasta_name in self.fasta_sources]
heights.sort(key=lambda pair: -pair[0]) # largest number of sequences first
self.fasta_sources = [pair[1] for pair in heights] # override old ordering


def layout_based_on_repeat_size(self, width, height, max_width, contigs):
"""change layout to match dimensions of the repeat
"""
usable_width = max_width - (self.border_width * 2)
if width > usable_width: # Case with one massively wide MSA
# TODO more than one large MSA
height = len(contigs) # number of individuals
padding_between_mega_rows = 1
n_rows = math.ceil(len(contigs[0].seq) / usable_width)
modulos = [usable_width, 1, 1, n_rows]
padding = [0, height + padding_between_mega_rows, 0, height + padding_between_mega_rows]
for row in contigs: # one layout for mouse over of each individual
self.next_origin[0] = self.border_width
self.next_origin[1] += 1
self.each_layout.append(level_layout_factory(modulos, padding, self.next_origin))
self.next_origin[1] += n_rows * \
(height + padding_between_mega_rows)
if self.single_file:
self.layout_phased_file(width, height, max_width)
else: # Typical case with many small MSA
# skip to next mega row
if self.next_origin[0] + width + 1 >= max_width:
Expand All @@ -254,6 +239,25 @@ def layout_based_on_repeat_size(self, width, height, max_width, contigs):
self.i_layout = len(self.each_layout) - 1 # select current layout


def layout_phased_file(self, width, height, max_width):
self.each_layout = []
usable_width = min(width, max_width - (self.border_width * 2))
# TODO more than one large MSA
height = len(self.fasta_sources) # number of individuals
padding_between_mega_rows = 1
n_rows = math.ceil(len(self.contigs[0].seq) / usable_width)
modulos = [usable_width, 1, 1, n_rows]
padding = [0, height + padding_between_mega_rows, 0, height + padding_between_mega_rows]
for y, row in enumerate(self.fasta_sources): # one layout for mouse over of each individual
self.next_origin[0] = self.border_width
self.next_origin[1] = 30 + y
self.each_layout.append(level_layout_factory(modulos, padding, self.next_origin))
# move origin to bottom of image
self.next_origin[1] += n_rows * \
(height + padding_between_mega_rows)
self.i_layout = len(self.each_layout) - 1 # select current layout


def draw_titles(self):
"""Draw one title for each file (MSA Block) that includes many contigs.
We use a fake contig with no sequence and the name of file, instead of the name of the first
Expand All @@ -268,10 +272,19 @@ def draw_titles(self):
self.write_title(contig_name, self.levels.base_width, self.title_height_px, font_size,
title_lines, title_width, upper_left, False, self.image)

def spread_large_MSA_source(self):
filename = self.fasta_sources[0]
actual_contigs = self.all_contents[filename]
n_individuals = len(actual_contigs)
self.fasta_sources = [filename] * n_individuals
for filename in self.fasta_sources:
source = self.all_contents[filename]
def spread_large_MSA_source(self, fasta_path):
individuals = read_contigs(fasta_path)
self.contigs = individuals
self.fasta_sources = [os.path.basename(fasta_path) + str(i) for i in range(len(individuals))]
self.all_contents = {source: [individuals[i]] for i, source in enumerate(self.fasta_sources)}
self.protein_palette = is_protein_sequence(self.contigs[0])

# Zero padding
for name, container in self.all_contents.items():
contig = container[0]
contig.reset_padding = 0
contig.title_padding = 0
contig.tail_padding = 0
contig.nuc_title_start = 0
contig.nuc_seq_start = 0
contig.consensus_width = len(contig.seq)
2 changes: 1 addition & 1 deletion DDV/html_template/nucleotideNumber.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ function peano_mouse_position(nucNumX, nucNumY, layout_levels, source_index) {
var next_axis = axis_flipped[(part + 1) %2];
axis_flipped[(part + 1) %2] = this_level_flipped? !next_axis : next_axis; // XOR

if (xy_remaining[part] >= level.thickness - level.padding && xy_remaining[part] < level.thickness) {
if (i != 1 && xy_remaining[part] >= level.thickness - level.padding && xy_remaining[part] < level.thickness) {
return "";//check for invalid coordinate (margins)
}
}
Expand Down

0 comments on commit e9b6aee

Please sign in to comment.