From ec7ae6d58ea51aef7307808f91f80fa68e63bd13 Mon Sep 17 00:00:00 2001 From: josiahseaman Date: Thu, 26 Sep 2019 16:59:11 +0100 Subject: [PATCH] #81 Major: moved www-data/dnadata/ to /results/. --- .gitignore | 3 +- DDV/AnnotatedAlignment.py | 2 +- DDV/DDVUtils.py | 2 +- DDV/Ideogram.py | 14 +-- DDV/Sequenaut.py | 210 -------------------------------------- README.md | 11 +- setup.py | 6 +- 7 files changed, 14 insertions(+), 234 deletions(-) delete mode 100644 DDV/Sequenaut.py diff --git a/.gitignore b/.gitignore index 08b3a49..fc36a7c 100644 --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,7 @@ env/ env365/ install_test/ release/ -www-data/to_process -www-data/dnadata +DDV/results/ release.bat scraps/ env_fresh/ diff --git a/DDV/AnnotatedAlignment.py b/DDV/AnnotatedAlignment.py index c792264..8848604 100644 --- a/DDV/AnnotatedAlignment.py +++ b/DDV/AnnotatedAlignment.py @@ -160,7 +160,7 @@ def markup_annotation_differences(self): if __name__ == '__main__': output_name = 'hg38_panTro4_annotated_' - base_path = os.path.join('.', 'www-data', 'dnadata', output_name) + base_path = os.path.join('.', 'results', output_name) chimp_annotation = r'data\PanTro_refseq2.1.4_genes.gtf' human_anno = r'data\Hg38_genes.gtf' aligner = AnnotatedAlignment('hg38ToPanTro4.over.chain', 'hg38.fa', human_anno, 'panTro4.fa', chimp_annotation, base_path) diff --git a/DDV/DDVUtils.py b/DDV/DDVUtils.py index e177be3..39653bb 100644 --- a/DDV/DDVUtils.py +++ b/DDV/DDVUtils.py @@ -104,7 +104,7 @@ def execution_dir(): def base_directories(output_name): BASE_DIR = execution_dir() - SERVER_HOME = os.path.join(BASE_DIR, 'www-data', 'dnadata') + SERVER_HOME = os.path.join(BASE_DIR, 'results') base_path = os.path.join(SERVER_HOME, output_name) if output_name else SERVER_HOME return SERVER_HOME, base_path diff --git a/DDV/Ideogram.py b/DDV/Ideogram.py index d3ac6b5..c01d450 100644 --- a/DDV/Ideogram.py +++ b/DDV/Ideogram.py @@ -214,7 +214,7 @@ def process_file(self, input_file_path, output_folder, output_file_name, # self.palette['T'] = hex_to_rgb('A19E3D') # light green # self.palette['A'] = hex_to_rgb('6D772F') # Dark Green - def draw_nucleotides(self): + def draw_nucleotides(self, verbose=True): # points_file_name = os.path.join(self.final_output_location, "test_ideogram_points.txt") # points_file = None # open(points_file_name, 'w') # if points_file: @@ -293,18 +293,6 @@ def increment(digits, radices, place): if __name__ == "__main__": - # layout = Ideogram([3,3,3,63], [5,5,3,3,21]) - # layout.process_file("example_data/hg38_chr19_sample.fa", 'www-data/dnadata/test ideogram', 'ideogram-padding2') - - # layout = Ideogram([3,3,3,63], [5,5,3,3,21], 2, 2) - # layout.process_file("example_data/hg38_chr19_sample.fa", 'www-data/dnadata/test ideogram', 'ideogram-sparse') - # layout = Ideogram(([5,5,5,5,11], # thick, local - # [5,5,5,5,5 ,53], 1, 1)) - ## thin layout layout = Ideogram([3,3,3,3,3,27], [3,3,3,3,3,3 ,53], 1, 1) - # ([5,5,5,5,5,27], [3,3,3,3,3,3 ,53], 1, 1) - # 3*3*3*3*3*27* - # 3*3*3*3*3 = 1,594,323 bp per fiber row - radix_settings = eval(sys.argv[2]) assert len(radix_settings) == 4 and \ type(radix_settings[0]) == type(radix_settings[1]) == type([]) and \ diff --git a/DDV/Sequenaut.py b/DDV/Sequenaut.py deleted file mode 100644 index fed1875..0000000 --- a/DDV/Sequenaut.py +++ /dev/null @@ -1,210 +0,0 @@ -from __future__ import print_function, division, absolute_import, \ - with_statement, generators, nested_scopes - -import argparse -import math -import sys -from collections import defaultdict - -from DDV.TileLayout import TileLayout -from DDV.fluentdna import create_tile_layout_viz_from_fasta -from DDV.DDVUtils import make_output_directory, base_directories, interpolate - - -def hasDepth(listLike): - try: - return len(listLike) > 0 and not isinstance(listLike, (str, dict, tuple, type(u"unicode string"))) and hasattr( - listLike[0], "__getitem__") - except: - return False - - -def countNucleotides(seq, oligomerSize): - if hasDepth(seq): - return [countNucleotides(x, oligomerSize) for x in seq if x != '' and x != [] and x != {}] - if not seq: - return {} - counts = defaultdict(lambda: 0) - for endIndex in range(oligomerSize, len(seq) + 1, 1): - c = seq[endIndex - oligomerSize: endIndex] - counts[c] += 1 # defaults to 0 - return counts - - -def get_line(start, end): - """Bresenham's Line Algorithm - Produces a list of tuples from start and end - Copied from http://www.roguebasin.com/index.php?title=Bresenham%27s_Line_Algorithm#Python - """ - # Setup initial conditions - x1, y1 = start - x2, y2 = end - dx = x2 - x1 - dy = y2 - y1 - - # Determine how steep the line is - is_steep = abs(dy) > abs(dx) - - # Rotate line - if is_steep: - x1, y1 = y1, x1 - x2, y2 = y2, x2 - - # Swap start and end points if necessary and store swap state - swapped = False - if x1 > x2: - x1, x2 = x2, x1 - y1, y2 = y2, y1 - swapped = True - - # Recalculate differentials - dx = x2 - x1 - dy = y2 - y1 - - # Calculate error - error = int(dx / 2.0) - ystep = 1 if y1 < y2 else -1 - - # Iterate over bounding box generating points between start and end - y = y1 - points = [] - for x in range(x1, x2 + 1): - coord = (y, x) if is_steep else (x, y) - points.append(coord) - error -= abs(dy) - if error < 0: - y += ystep - error += dx - - # Reverse the list if the coordinates were swapped - if swapped: - points.reverse() - return points - - -class Sequenaut(TileLayout): # TODO: make an abstract class parent - def __init__(self, layout='triangle_slide', oligomer_size=100, peak=10.0, baseline=1.0, log_scale=True): - super(Sequenaut, self).__init__() - self.oligomer_size = oligomer_size - self.layout = layout - self.peak = peak - self.baseline = baseline - midpoint = self.oligomer_size // 2 - self.weight_matrix = list(linspace(baseline, self.peak, midpoint)) + list(linspace(self.peak, baseline, self.oligomer_size - midpoint)) - self.image_length = self.max_dimensions(oligomer_size, verbose=True)[0] - self.hits = [[0] * self.image_length for i in range(self.image_length)] - self.log_scale = log_scale - - @staticmethod - def coordinate(olig, weight_matrix): - """Turns an Olig into an (x,y) tuple coordinate. - axes = {'A': Point(0,1), 'G': Point(0,0), 'C': Point(1,0), 'T': Point(1,1)}""" - x = 0.0 - y = 0.0 - for c, weight in zip(olig, weight_matrix): - if c == 'T': - x += weight - y += weight - elif c == 'A': - y += weight - elif c == 'C': - x += weight - return int(x), int(y) - - - def max_dimensions(self, ignored, verbose=False): - resolution = self.coordinate('T' * self.oligomer_size, self.weight_matrix)[0] + 1 - if verbose: - print("Image will be %i x %i: %s pixels" % (resolution, resolution, "{:,}".format(resolution**2))) - return [resolution, resolution] - - - def weighted_sequenaut(self, seq): - counts = countNucleotides(seq, self.oligomer_size) - for olig, count in counts.items(): - point = self.coordinate(olig, self.weight_matrix) - self.hits[point[1]][point[0]] += count - - - def connected_sequenaut(self, seq): - prev_coord = self.coordinate(seq[: self.oligomer_size], self.weight_matrix) - for i in range(len(seq) - self.oligomer_size + 1): - olig = seq[i: i + self.oligomer_size] - point = self.coordinate(olig, self.weight_matrix) - line = get_line(prev_coord, point) - for x, y in line: - self.hits[y][x] += 1 - prev_coord = point - - - def draw_nucleotides(self): - for contig in self.contigs: - if self.layout == 'connected': - self.connected_sequenaut(contig.seq) - else: - self.weighted_sequenaut(contig.seq) - leader = max([max(column) for column in self.hits]) - if self.log_scale: - leader = math.log(leader) - try: - middle = self.max_dimensions(self.oligomer_size)[0] // 2 - print("Leader", leader, int(math.log(self.hits[middle][middle]) / leader * 235 + 20)) - except: pass # just talking - - for y in range(len(self.hits)): - for x in range(len(self.hits[y])): - if self.hits[y][x]: - val = math.log(self.hits[y][x]) if self.log_scale else self.hits[y][x] - grey = 255 - int(val / leader * 235 + 20) - self.pixels[x, y] = (grey, grey, grey) - - def draw_titles(self): - pass # There are no titles in Sequenaut - - - def process_file(self, input_file_path, output_folder, output_file_name): - # use the parent process_file() but override methods in child - super(Sequenaut, self).process_file(input_file_path, output_folder, output_file_name) - # TODO: support --no_webpage - self.generate_html(output_folder, output_file_name) - - -def run_sequenaut(args): - SERVER_HOME, base_path = base_directories(args.output_name) - # TODO: allow batch of tiling layout by chromosome - output_dir = make_output_directory(base_path) - renderer = Sequenaut(layout=args.layout, oligomer_size=args.oligomer_size, peak=args.peak, baseline=args.baseline, log_scale=not args.linear_scale) - create_tile_layout_viz_from_fasta(args, args.fasta, args.output_name, renderer) - sys.exit(0) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(usage="%(prog)s [options]", - description="Creates visualizations of FASTA formatted DNA nucleotide data.", - add_help=True) - - parser = argparse.ArgumentParser(prog='Sequenaut.exe') - - parser.add_argument("-f", "--fasta", - type=str, - help="Path to main FASTA file to process into new visualization.", - dest="fasta") - parser.add_argument("-o", "--output_name", - type=str, - help="What to name the output folder (not a path). Defaults to name of the fasta file.", - dest="output_name") - parser.add_argument("-l", "--layout", - type=str, - help="The layout algorithm.", - choices=["triangle_slide", "connected"],) - parser.add_argument('--linear_scale', action='store_true', help='Use linear color scaling (defaults to logarithmic).',) - parser.add_argument('--oligomer_size', type=int, default=100, help='Size of the sliding window in nucleotides.',) - parser.add_argument('--peak', type=float, default=3.0, help='Highest scaling factor for the triangular weight matrix.',) - parser.add_argument('--baseline', type=float, default=1.0, help='Lowest scaling factor for the triangular weight matrix.',) - args = parser.parse_args() - - if not args.layout: - args.layout = "triangle_slide" # default - args.output_name = '_'.join([args.output_name, str(args.oligomer_size), str(int(args.peak)), str(int(args.baseline))]) - - run_sequenaut(args) diff --git a/README.md b/README.md index 65f0a10..4b2b9fe 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,12 @@ You can start using FluentDNA: 1. Downloading and unzipping the [Latest Release](https://github.com/josiahseaman/FluentDNA/releases) (Mac and Windows only). 2. Open a terminal (command line) in the same folder you unzipped FluentDNA. 3. Run the command `./fluentdna --fasta="DDV/example_data/Human selenoproteins.fa" --runserver` - 4. Once your private server has started, all your results available at [http://127.0.0.1:8000](http://127.0.0.1:8000). Note that this server is not actually using the internet at all, it's just using your browser as a file viewer. - -To use FluentDNA as a python module (required for Linux), follow the [pip install instructions](https://github.com/josiahseaman/FluentDNA/blob/python-master/docs/installation.md). + 4. Your result files will be placed in the FluentDNA directory `DDV/results/`. Once your private server has started, + all your results are viewable at [http://localhost:8000](http://127.0.0.1:8000). + +To use FluentDNA as a python module (required for Linux), follow the [pip install instructions](https://github.com/josiahseaman/FluentDNA/blob/python-master/docs/installation.md). +**Locating Results:** You will need to be using the same computer the server is running on. The server will not be visible +over network or internet unless your administrator opens the port. *** @@ -45,7 +48,7 @@ This generates an image pyramid with the standard legend (insert image of legend * FA File: [DDV/example_data/hg38_chr19_sample.fa](https://github.com/josiahseaman/FluentDNA/blob/python-master/DDV/example_data/hg38_chr19_sample.fa) **Result:** [Hg38 chr19 sample](https://dnaskittle.com/ddvresults/dnadata/Test%20Simple/) -![Example FluentDNA output of Human Chr19 2MBp](https://dnaskittle.com/ddvresults/dnadata/Test%20Simple/Test%20Simple.png) +![Example FluentDNA output of Human Chr19 2MBp](https://dnaskittle.com/ddvresults/dnadata/Test%20Simple/sources/Test%20Simple.png) It is also possible to generate an image file only that can be accessed with an image viewer using `--no_webpage`. diff --git a/setup.py b/setup.py index 8dcb1e3..cf6fa48 100644 --- a/setup.py +++ b/setup.py @@ -5,11 +5,11 @@ setup( name='DDV', version=VERSION, - description='Visualization tool for fasta files. Supports whole genome alignment and multiple sequence alignment.', + description='Visualization tool for bare fasta files. Supports whole genome alignment and multiple sequence alignment.', author='Josiah Seaman, Bryan Hurst', author_email='josiah.seaman@gmail.com', license='BSD', - packages=find_packages(exclude=('build', 'obj', 'www-data')), + packages=find_packages(exclude=('build', 'obj', 'results')), include_package_data=True, package_data={'DDV': ['html_template/*', 'example_data/*', 'html_template/img/*', 'example_data/alignments/*',]}, @@ -31,7 +31,7 @@ zip_safe=False, url='https://github.com/josiahseaman/FluentDNA', download_url='https://github.com/josiahseaman/FluentDNA', # TODO: post a tarball - keywords=['bioinformatics', 'dna', 'fasta', 'chain', 'alignment'], + keywords=['bioinformatics', 'dna', 'fasta', 'chain', 'alignment', 'species diversity'], classifiers=[ 'Development Status :: 4 - Beta', # 5 - Production/Stable 'Intended Audience :: Developers',