Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
3601c29
fix #1505
antgonza Jan 2, 2017
0d6788e
improving some GUI stuff
antgonza Jan 3, 2017
12406cc
improving some GUI stuff - missing lines
antgonza Jan 3, 2017
958fcbe
pull upstream master
antgonza Jan 4, 2017
a57ef23
addressing all comments
antgonza Jan 5, 2017
2ead7a6
ready for review
antgonza Jan 5, 2017
73a78e7
fix #1987
antgonza Jan 16, 2017
e64a22a
Merge pull request #2036 from antgonza/fix-1505
josenavas Jan 16, 2017
0dcae8b
Merge pull request #2047 from antgonza/fix-1987
josenavas Jan 17, 2017
4a5bbbc
initial commit
antgonza Jan 18, 2017
f99975c
requested changes
antgonza Jan 18, 2017
ed899a8
Merge pull request #2049 from antgonza/add-processing-suggestions
josenavas Jan 18, 2017
d508320
fix filter job list
antgonza Jan 18, 2017
025cc1e
Merge pull request #2050 from antgonza/fix-filter-job-list
josenavas Jan 18, 2017
599bcde
Fixing server cert (#2051)
josenavas Jan 19, 2017
d12ccfe
fix get_studies
antgonza Jan 20, 2017
b33983b
flake8
antgonza Jan 20, 2017
b4f1b1f
fix #503
antgonza Jan 20, 2017
62a1b93
fix #2010
antgonza Jan 20, 2017
2e36141
fix #1913
antgonza Jan 21, 2017
e006e20
fix errors
antgonza Jan 21, 2017
c174693
Merge pull request #2052 from antgonza/fix-get_studies
josenavas Jan 23, 2017
131dd6a
Merge pull request #2053 from antgonza/fix-by-blinking
josenavas Jan 23, 2017
ccb55bd
addressing @josenavas comment
antgonza Jan 24, 2017
dfe2e83
flake8
antgonza Jan 24, 2017
15fcceb
Merge pull request #2056 from antgonza/fix-1913
josenavas Jan 24, 2017
7f97f2a
fix #1010
antgonza Jan 26, 2017
9eb9dbb
fix #1066 (#2058)
antgonza Jan 26, 2017
23104d7
addressing @josenavas comments
antgonza Jan 27, 2017
1f1e826
fix #1961
antgonza Jan 27, 2017
19a9dda
fix #1837
antgonza Jan 27, 2017
19889f9
Automatic jobs & new stats (#2057)
antgonza Jan 27, 2017
4e380e0
Merge pull request #2060 from antgonza/fix-1961
wasade Jan 28, 2017
6f0dd71
generalizing this functionality
antgonza Jan 28, 2017
ed9fc65
fix #1816
antgonza Jan 29, 2017
4b19b45
fix #1959
antgonza Jan 30, 2017
d9b41e8
addressing @josenavas comments
antgonza Feb 1, 2017
5ef06ae
addressing @josenavas comments
antgonza Feb 2, 2017
5e3504a
fixing error
antgonza Feb 2, 2017
d10096a
Merge branch 'master' of https://github.com/biocore/qiita into fix-1010
antgonza Feb 2, 2017
661342f
fixed?
antgonza Feb 2, 2017
fcd249b
addressing @josenavas comments
antgonza Feb 3, 2017
f3c1216
Merge pull request #2063 from antgonza/fix-1816
josenavas Feb 3, 2017
a91a6fd
Merge pull request #2064 from antgonza/fix-1959
tanaes Feb 3, 2017
7b9fa6f
addressing @wasade comments
antgonza Feb 3, 2017
33bcbe5
Merge pull request #2059 from antgonza/fix-1010
josenavas Feb 3, 2017
5e4bd9b
Merge branch 'master' of https://github.com/biocore/qiita into fix-1837
antgonza Feb 3, 2017
8bf3d6e
fix flake8
antgonza Feb 3, 2017
7807bac
Merge pull request #2061 from antgonza/fix-1837
josenavas Feb 3, 2017
6360675
generate biom and metadata release (#2066)
antgonza Feb 3, 2017
811b7a7
database changes to fix 969
antgonza Feb 3, 2017
751d4ad
adding delete
antgonza Feb 3, 2017
65a86df
addressing @josenavas comments
antgonza Feb 3, 2017
b1817dd
addressing @ElDeveloper comments
antgonza Feb 4, 2017
18d77e1
duh!
antgonza Feb 4, 2017
01c656c
Merge pull request #2071 from antgonza/fix-969-db
josenavas Feb 6, 2017
53188a6
fix generate_biom_and_metadata_release (#2072)
antgonza Feb 7, 2017
1ab4e3b
Fixing merge conflicts with master
josenavas Feb 8, 2017
1e8332e
Merge branch 'analysis-refactor' of https://github.com/biocore/qiita …
josenavas Feb 9, 2017
cb67d3d
Removing qiita ware code that will not be used anymore
josenavas Feb 9, 2017
5a5127d
Merge branch 'analysis-refactor' of https://github.com/biocore/qiita …
josenavas Feb 9, 2017
0033480
Organizing the handlers and new analysis description page
josenavas Feb 9, 2017
067f14f
Addressing @antgonza's comments
josenavas Feb 10, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Automatic jobs & new stats (#2057)
* fix #814, fix #1636

* fixing error in test-env

* fixing stats.html call

* adding img

* addressing @josenavas comments

* rm for loops

* addresssing @ElDeveloper comments
  • Loading branch information
antgonza authored and ElDeveloper committed Jan 27, 2017
commit 19889f9aeddf65d5b79927a307840320205c2792
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ script:
- qiita-env start_cluster qiita-general
- qiita-env make --no-load-ontologies
- if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
- if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
- if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage -v --cover-package=qiita_db,qiita_pet,qiita_core,qiita_ware; fi
- flake8 qiita_* setup.py scripts/qiita scripts/qiita-env scripts/qiita-test-install
- flake8 qiita_* setup.py scripts/*
- ls -R /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_pet/support_files/doc/
- qiita pet webserver
addons:
Expand Down
156 changes: 155 additions & 1 deletion qiita_db/meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@
# -----------------------------------------------------------------------------
from __future__ import division

from moi import r_client
from os import stat
from time import strftime, localtime
import matplotlib.pyplot as plt
import matplotlib as mpl
from base64 import b64encode
from urllib import quote
from StringIO import StringIO
from future.utils import viewitems
from datetime import datetime

from qiita_core.qiita_settings import qiita_config
import qiita_db as qdb

Expand Down Expand Up @@ -122,6 +133,147 @@ def get_accessible_filepath_ids(user):
return filepath_ids


def update_redis_stats():
"""Generate the system stats and save them in redis

Returns
-------
list of str
artifact filepaths that are not present in the file system
"""
STUDY = qdb.study.Study
studies = {'public': STUDY.get_by_status('private'),
'private': STUDY.get_by_status('public'),
'sanbox': STUDY.get_by_status('sandbox')}
number_studies = {k: len(v) for k, v in viewitems(studies)}

number_of_samples = {}
ebi_samples_prep = {}
num_samples_ebi = 0
for k, sts in viewitems(studies):
number_of_samples[k] = 0
for s in sts:
st = s.sample_template
if st is not None:
number_of_samples[k] += len(list(st.keys()))

ebi_samples_prep_count = 0
for pt in s.prep_templates():
ebi_samples_prep_count += len([
1 for _, v in viewitems(pt.ebi_experiment_accessions)
if v is not None and v != ''])
ebi_samples_prep[s.id] = ebi_samples_prep_count

if s.sample_template is not None:
num_samples_ebi += len([
1 for _, v in viewitems(
s.sample_template.ebi_sample_accessions)
if v is not None and v != ''])

num_users = qdb.util.get_count('qiita.qiita_user')

lat_longs = get_lat_longs()

num_studies_ebi = len(ebi_samples_prep)
number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)])

# generating file size stats
stats = []
missing_files = []
for k, sts in viewitems(studies):
for s in sts:
for a in s.artifacts():
for _, fp, dt in a.filepaths:
try:
s = stat(fp)
stats.append((dt, s.st_size, strftime('%Y-%m',
localtime(s.st_ctime))))
except OSError:
missing_files.append(fp)

summary = {}
all_dates = []
for ft, size, ym in stats:
if ft not in summary:
summary[ft] = {}
if ym not in summary[ft]:
summary[ft][ym] = 0
all_dates.append(ym)
summary[ft][ym] += size
all_dates = sorted(set(all_dates))

# sorting summaries
rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log',
'biom', 'raw_sff', 'raw_qual']
ordered_summary = {}
for dt in summary:
if dt in rm_from_data:
continue
new_list = []
current_value = 0
for ad in all_dates:
if ad in summary[dt]:
current_value += summary[dt][ad]
new_list.append(current_value)
ordered_summary[dt] = new_list

plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary],
key=lambda x: x[1])

# helper function to generate y axis, modified from:
# http://stackoverflow.com/a/1094933
def sizeof_fmt(value, position):
number = None
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
if abs(value) < 1024.0:
number = "%3.1f%s" % (value, unit)
break
value /= 1024.0
if number is None:
number = "%.1f%s" % (value, 'Yi')
return number

all_dates_axis = range(len(all_dates))
plt.locator_params(axis='y', nbins=10)
plt.figure(figsize=(20, 10))
for k, v in plot_order:
plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k)

plt.xticks(all_dates_axis, all_dates)
plt.legend()
plt.grid()
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt))
plt.xlabel('Date')
plt.ylabel('Storage space per data type')

plot = StringIO()
plt.savefig(plot, format='png')
plot.seek(0)
img = 'data:image/png;base64,' + quote(b64encode(plot.buf))

time = datetime.now().strftime('%m-%d-%y %H:%M:%S')

portal = qiita_config.portal
vals = [
('number_studies', number_studies, r_client.hmset),
('number_of_samples', number_of_samples, r_client.hmset),
('num_users', num_users, r_client.set),
('lat_longs', lat_longs, r_client.set),
('num_studies_ebi', num_studies_ebi, r_client.set),
('num_samples_ebi', num_samples_ebi, r_client.set),
('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set),
('img', img, r_client.set),
('time', time, r_client.set)]
for k, v, f in vals:
redis_key = '%s:stats:%s' % (portal, k)
# important to "flush" variables to avoid errors
r_client.delete(redis_key)
f(redis_key, v)

return missing_files


def get_lat_longs():
"""Retrieve the latitude and longitude of all the samples in the DB

Expand All @@ -146,7 +298,9 @@ def get_lat_longs():
sql = [('SELECT CAST(latitude AS FLOAT), '
' CAST(longitude AS FLOAT) '
'FROM qiita.%s '
'WHERE isnumeric(latitude) AND isnumeric(latitude)' % s)
'WHERE isnumeric(latitude) AND isnumeric(longitude) '
"AND latitude <> 'NaN' "
"AND longitude <> 'NaN' " % s)
for s in qdb.sql_connection.TRN.execute_fetchflatten()]
sql = ' UNION '.join(sql)
qdb.sql_connection.TRN.add(sql)
Expand Down
38 changes: 38 additions & 0 deletions qiita_db/test/test_meta_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import pandas as pd

from moi import r_client
from qiita_core.qiita_settings import qiita_config
from qiita_core.util import qiita_test_checker

Expand Down Expand Up @@ -180,6 +181,43 @@ def test_get_lat_longs_EMP_portal(self):

self.assertItemsEqual(obs, exp)

def test_update_redis_stats(self):
qdb.meta_util.update_redis_stats()

portal = qiita_config.portal
vals = [
('number_studies', {'sanbox': '2', 'public': '0',
'private': '1'}, r_client.hgetall),
('number_of_samples', {'sanbox': '1', 'public': '0',
'private': '27'}, r_client.hgetall),
('num_users', '4', r_client.get),
('lat_longs', EXP_LAT_LONG, r_client.get),
('num_studies_ebi', '3', r_client.get),
('num_samples_ebi', '27', r_client.get),
('number_samples_ebi_prep', '54', r_client.get)
# not testing img/time for simplicity
# ('img', r_client.get),
# ('time', r_client.get)
]
for k, exp, f in vals:
redis_key = '%s:stats:%s' % (portal, k)
self.assertEqual(f(redis_key), exp)


EXP_LAT_LONG = (
'[[0.291867635913, 68.5945325743], [68.0991287718, 34.8360987059],'
' [10.6655599093, 70.784770579], [40.8623799474, 6.66444220187],'
' [13.089194595, 92.5274472082], [84.0030227585, 66.8954849864],'
' [12.7065957714, 84.9722975792], [78.3634273709, 74.423907894],'
' [82.8302905615, 86.3615778099], [53.5050692395, 31.6056761814],'
' [43.9614715197, 82.8516734159], [29.1499460692, 82.1270418227],'
' [23.1218032799, 42.838497795], [12.6245524972, 96.0693176066],'
' [38.2627021402, 3.48274264219], [74.0894932572, 65.3283470202],'
' [35.2374368957, 68.5041623253], [4.59216095574, 63.5115213108],'
' [95.2060749748, 27.3592668624], [68.51099627, 2.35063674718],'
' [85.4121476399, 15.6526750776], [60.1102854322, 74.7123248382],'
' [3.21190859967, 26.8138925876], [57.571893782, 32.5563076447],'
' [44.9725384282, 66.1920014699], [42.42, 41.41]]')

if __name__ == '__main__':
main()
16 changes: 15 additions & 1 deletion qiita_db/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from unittest import TestCase, main
from tempfile import mkstemp
from os import close, remove
from os import close, remove, mkdir
from os.path import join, exists, basename
from shutil import rmtree
from datetime import datetime
Expand Down Expand Up @@ -365,6 +365,20 @@ def _common_purge_filpeaths_test(self):
def test_purge_filepaths(self):
self._common_purge_filpeaths_test()

def test_empty_trash_upload_folder(self):
# creating file to delete so we know it actually works
study_id = '1'
uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], study_id)
trash = join(uploads_fp, 'trash')
if not exists(trash):
mkdir(trash)
fp = join(trash, 'my_file_to_delete.txt')
open(fp, 'w').close()

self.assertTrue(exists(fp))
qdb.util.empty_trash_upload_folder()
self.assertFalse(exists(fp))

def test_purge_filepaths_null_cols(self):
# For more details about the source of the issue that motivates this
# test: http://www.depesz.com/2008/08/13/nulls-vs-not-in/
Expand Down
79 changes: 61 additions & 18 deletions qiita_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,9 +714,24 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
for fpid, fp, fp_type_, m, s in results]


def purge_filepaths():
def _rm_files(TRN, fp):
# Remove the data
if exists(fp):
if isdir(fp):
func = rmtree
else:
func = remove
TRN.add_post_commit_func(func, fp)


def purge_filepaths(delete_files=True):
r"""Goes over the filepath table and remove all the filepaths that are not
used in any place

Parameters
----------
delete_files : bool
if True it will actually delete the files, if False print
"""
with qdb.sql_connection.TRN:
# Get all the (table, column) pairs that reference to the filepath
Expand All @@ -739,30 +754,58 @@ def purge_filepaths():
union_str = " UNION ".join(
["SELECT %s FROM qiita.%s WHERE %s IS NOT NULL" % (col, table, col)
for table, col in qdb.sql_connection.TRN.execute_fetchindex()])
# Get all the filepaths from the filepath table that are not
# referenced from any place in the database
sql = """SELECT filepath_id, filepath, filepath_type, data_directory_id
FROM qiita.filepath FP JOIN qiita.filepath_type FPT
ON FP.filepath_type_id = FPT.filepath_type_id
WHERE filepath_id NOT IN (%s)""" % union_str
qdb.sql_connection.TRN.add(sql)
if union_str:
# Get all the filepaths from the filepath table that are not
# referenced from any place in the database
sql = """SELECT filepath_id, filepath, filepath_type, data_directory_id
FROM qiita.filepath FP JOIN qiita.filepath_type FPT
ON FP.filepath_type_id = FPT.filepath_type_id
WHERE filepath_id NOT IN (%s)""" % union_str
qdb.sql_connection.TRN.add(sql)

# We can now go over and remove all the filepaths
sql = "DELETE FROM qiita.filepath WHERE filepath_id=%s"
db_results = qdb.sql_connection.TRN.execute_fetchindex()
for fp_id, fp, fp_type, dd_id in db_results:
qdb.sql_connection.TRN.add(sql, [fp_id])
if delete_files:
qdb.sql_connection.TRN.add(sql, [fp_id])
fp = join(get_mountpoint_path_by_id(dd_id), fp)
_rm_files(qdb.sql_connection.TRN, fp)
else:
print fp, fp_type

# Remove the data
fp = join(get_mountpoint_path_by_id(dd_id), fp)
if exists(fp):
if fp_type is 'directory':
func = rmtree
else:
func = remove
qdb.sql_connection.TRN.add_post_commit_func(func, fp)
if delete_files:
qdb.sql_connection.TRN.execute()

qdb.sql_connection.TRN.execute()

def empty_trash_upload_folder(delete_files=True):
r"""Delete all files in the trash folder inside each of the upload
folders

Parameters
----------
delete_files : bool
if True it will actually delete the files, if False print
"""
gfp = partial(join, get_db_files_base_dir())
with qdb.sql_connection.TRN:
sql = """SELECT mountpoint
FROM qiita.data_directory
WHERE data_type = 'uploads'"""
qdb.sql_connection.TRN.add(sql)

for mp in qdb.sql_connection.TRN.execute_fetchflatten():
for path, dirs, files in walk(gfp(mp)):
if path.endswith('/trash'):
if delete_files:
for f in files:
fp = join(path, f)
_rm_files(qdb.sql_connection.TRN, fp)
else:
print files

if delete_files:
qdb.sql_connection.TRN.execute()


def move_filepaths_to_upload_folder(study_id, filepaths):
Expand Down
Loading