Skip to content

Commit c723359

Browse files
antgonzaElDeveloper
authored andcommitted
Scp (#2660)
* fixes #2629 changed button text from Select None to Unselect All * adding ssh backend support * added paramiko * install scp package for python * testing scp directly in travis * testing scp directly in travis * testing scp directly in travis * testing scp directly in travis * testing scp directly in travis * testing scp directly in travis * addressing @ElDeveloper comments * fix _get_valid_files * glob.glob -> glob * fix error * fix error * bringing back download_remote * improving tests * rm docstrings as requested by @ElDeveloper * fixing tests - remote_files->local_files
1 parent 1bd63f8 commit c723359

File tree

12 files changed

+479
-9
lines changed

12 files changed

+479
-9
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ config.py
4646
*~
4747

4848
# don't show the data on git status
49-
qiita_db/support_files/test_data
49+
qiita_db/support_files/test_data/
5050

5151
# ignoring redis files
5252
dump.rdb
@@ -60,3 +60,6 @@ qiita_pet/static/doc/
6060

6161
# webdis log
6262
webdis.log
63+
64+
# test keys should be generated in travis
65+
qiita_ware/test/test_data/test_key

.travis.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ install:
5050
- export REDBIOM_HOST=http://127.0.0.1:7379
5151
- cp $PWD/qiita_core/support_files/BIOM\ type_2.1.4.conf ~/.qiita_plugins/BIOM\ type_2.1.4\ -\ Qiime2.conf
5252
- touch ~/.bash_profile
53+
- ssh-keygen -t rsa -C "ssh test key" -f $PWD/qiita_ware/test/test_data/test_key -P ""
54+
- mv $PWD/qiita_ware/test/test_data/test_key.pub ~/.ssh/
55+
- ssh-keyscan localhost >> ~/.ssh/known_hosts
56+
- cat ~/.ssh/test_key.pub >> ~/.ssh/authorized_keys
57+
- scp -i $PWD/qiita_ware/test/test_data/test_key localhost:$PWD/qiita_ware/test/test_data/random_key $PWD/qiita_ware/test/test_data/random_key_copy
5358
# Install the biom plugin so we can run the analysis tests
5459
- wget https://data.qiime2.org/distro/core/qiime2-2017.12-py35-linux-conda.yml
5560
- travis_retry conda env create -q -n qtp-biom --file qiime2-2017.12-py35-linux-conda.yml

qiita_db/support_files/patches/66.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
-- August 6, 2018
2+
3+
SELECT 42;
4+
15
-- August 22, 2018
26
-- add specimen_id_column to study table (needed to plate samples in labman)
37

48
ALTER TABLE qiita.study ADD specimen_id_column varchar(256);
59

610
COMMENT ON COLUMN qiita.study.specimen_id_column IS 'The name of the column that describes the specimen identifiers (such as what is written on the tubes).';
7-
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
# August 6, 2018
2+
# Create parameters for the ssh/scp remote file upload commands
3+
4+
5+
from json import loads, dumps
6+
7+
from qiita_db.sql_connection import TRN
8+
from qiita_db.software import Software, Command
9+
from qiita_db.exceptions import (QiitaDBError, QiitaDBDuplicateError)
10+
from qiita_db.util import convert_to_id
11+
12+
13+
# Copied from patch 58.py. Couldn't import due to how patching system works
14+
def create_command(software, name, description, parameters, outputs=None,
15+
analysis_only=False):
16+
r"""Replicates the Command.create code at the time the patch was written"""
17+
# Perform some sanity checks in the parameters dictionary
18+
if not parameters:
19+
raise QiitaDBError(
20+
"Error creating command %s. At least one parameter should "
21+
"be provided." % name)
22+
sql_param_values = []
23+
sql_artifact_params = []
24+
for pname, vals in parameters.items():
25+
if len(vals) != 2:
26+
raise QiitaDBError(
27+
"Malformed parameters dictionary, the format should be "
28+
"{param_name: [parameter_type, default]}. Found: "
29+
"%s for parameter name %s" % (vals, pname))
30+
31+
ptype, dflt = vals
32+
# Check that the type is one of the supported types
33+
supported_types = ['string', 'integer', 'float', 'reference',
34+
'boolean', 'prep_template', 'analysis']
35+
if ptype not in supported_types and not ptype.startswith(
36+
('choice', 'mchoice', 'artifact')):
37+
supported_types.extend(['choice', 'mchoice', 'artifact'])
38+
raise QiitaDBError(
39+
"Unsupported parameters type '%s' for parameter %s. "
40+
"Supported types are: %s"
41+
% (ptype, pname, ', '.join(supported_types)))
42+
43+
if ptype.startswith(('choice', 'mchoice')) and dflt is not None:
44+
choices = set(loads(ptype.split(':')[1]))
45+
dflt_val = dflt
46+
if ptype.startswith('choice'):
47+
# In the choice case, the dflt value is a single string,
48+
# create a list with it the string on it to use the
49+
# issuperset call below
50+
dflt_val = [dflt_val]
51+
else:
52+
# jsonize the list to store it in the DB
53+
dflt = dumps(dflt)
54+
if not choices.issuperset(dflt_val):
55+
raise QiitaDBError(
56+
"The default value '%s' for the parameter %s is not "
57+
"listed in the available choices: %s"
58+
% (dflt, pname, ', '.join(choices)))
59+
60+
if ptype.startswith('artifact'):
61+
atypes = loads(ptype.split(':')[1])
62+
sql_artifact_params.append(
63+
[pname, 'artifact', atypes])
64+
else:
65+
if dflt is not None:
66+
sql_param_values.append([pname, ptype, False, dflt])
67+
else:
68+
sql_param_values.append([pname, ptype, True, None])
69+
70+
with TRN:
71+
sql = """SELECT EXISTS(SELECT *
72+
FROM qiita.software_command
73+
WHERE software_id = %s AND name = %s)"""
74+
TRN.add(sql, [software.id, name])
75+
if TRN.execute_fetchlast():
76+
raise QiitaDBDuplicateError(
77+
"command", "software: %d, name: %s"
78+
% (software.id, name))
79+
# Add the command to the DB
80+
sql = """INSERT INTO qiita.software_command
81+
(name, software_id, description, is_analysis)
82+
VALUES (%s, %s, %s, %s)
83+
RETURNING command_id"""
84+
sql_params = [name, software.id, description, analysis_only]
85+
TRN.add(sql, sql_params)
86+
c_id = TRN.execute_fetchlast()
87+
88+
# Add the parameters to the DB
89+
sql = """INSERT INTO qiita.command_parameter
90+
(command_id, parameter_name, parameter_type, required,
91+
default_value)
92+
VALUES (%s, %s, %s, %s, %s)
93+
RETURNING command_parameter_id"""
94+
sql_params = [[c_id, pname, p_type, reqd, default]
95+
for pname, p_type, reqd, default in sql_param_values]
96+
TRN.add(sql, sql_params, many=True)
97+
TRN.execute()
98+
99+
# Add the artifact parameters
100+
sql_type = """INSERT INTO qiita.parameter_artifact_type
101+
(command_parameter_id, artifact_type_id)
102+
VALUES (%s, %s)"""
103+
supported_types = []
104+
for pname, p_type, atypes in sql_artifact_params:
105+
sql_params = [c_id, pname, p_type, True, None]
106+
TRN.add(sql, sql_params)
107+
pid = TRN.execute_fetchlast()
108+
sql_params = [[pid, convert_to_id(at, 'artifact_type')]
109+
for at in atypes]
110+
TRN.add(sql_type, sql_params, many=True)
111+
supported_types.extend([atid for _, atid in sql_params])
112+
113+
# If the software type is 'artifact definition', there are a couple
114+
# of extra steps
115+
if software.type == 'artifact definition':
116+
# If supported types is not empty, link the software with these
117+
# types
118+
if supported_types:
119+
sql = """INSERT INTO qiita.software_artifact_type
120+
(software_id, artifact_type_id)
121+
VALUES (%s, %s)"""
122+
sql_params = [[software.id, atid]
123+
for atid in supported_types]
124+
TRN.add(sql, sql_params, many=True)
125+
# If this is the validate command, we need to add the
126+
# provenance and name parameters. These are used internally,
127+
# that's why we are adding them here
128+
if name == 'Validate':
129+
sql = """INSERT INTO qiita.command_parameter
130+
(command_id, parameter_name, parameter_type,
131+
required, default_value)
132+
VALUES (%s, 'name', 'string', 'False',
133+
'dflt_name'),
134+
(%s, 'provenance', 'string', 'False', NULL)
135+
"""
136+
TRN.add(sql, [c_id, c_id])
137+
138+
# Add the outputs to the command
139+
if outputs:
140+
sql = """INSERT INTO qiita.command_output
141+
(name, command_id, artifact_type_id)
142+
VALUES (%s, %s, %s)"""
143+
sql_args = [[pname, c_id, convert_to_id(at, 'artifact_type')]
144+
for pname, at in outputs.items()]
145+
TRN.add(sql, sql_args, many=True)
146+
TRN.execute()
147+
148+
return Command(c_id)
149+
150+
151+
with TRN:
152+
qiita_plugin = Software.from_name_and_version('Qiita', 'alpha')
153+
154+
# Create the 'list_remote_files' command
155+
parameters = {'url': ['string', None],
156+
'private_key': ['string', None]}
157+
create_command(qiita_plugin, "list_remote_files",
158+
"retrieves list of valid study files from remote dir",
159+
parameters)
160+
161+
# Create the 'download_remote_files' command
162+
parameters = {'url': ['string', None],
163+
'destination': ['string', None],
164+
'private_key': ['string', None]}
165+
create_command(qiita_plugin, "download_remote_files",
166+
"downloads valid study files from remote dir", parameters)

qiita_ware/commands.py

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,16 @@
66
# The full license is in the file LICENSE, distributed with this software.
77
# -----------------------------------------------------------------------------
88

9-
from os.path import join, isdir
9+
from os.path import basename, isdir, join
1010
from shutil import rmtree
1111
from tarfile import open as taropen
1212
from tempfile import mkdtemp
1313
from os import environ
1414
from traceback import format_exc
15+
from paramiko import AutoAddPolicy, RSAKey, SSHClient
16+
from scp import SCPClient
17+
from urlparse import urlparse
18+
from functools import partial
1519

1620
from qiita_db.artifact import Artifact
1721
from qiita_db.logger import LogEntry
@@ -21,6 +25,145 @@
2125
from qiita_ware.exceptions import ComputeError, EBISubmissionError
2226

2327

28+
def _ssh_session(p_url, private_key):
29+
"""Initializes an SSH session
30+
31+
Parameters
32+
----------
33+
p_url : urlparse object
34+
a parsed url
35+
private_key : str
36+
Path to the private key used to authenticate connection
37+
38+
Returns
39+
-------
40+
paramiko.SSHClient
41+
the SSH session
42+
"""
43+
scheme = p_url.scheme
44+
hostname = p_url.hostname
45+
# if port is '' Python 2.7.6 will raise an error
46+
try:
47+
port = p_url.port
48+
except Exception:
49+
port = 22
50+
username = p_url.username
51+
52+
if scheme == 'scp' or scheme == 'sftp':
53+
54+
# if port not specified, use default 22 as port
55+
if port is None:
56+
port = 22
57+
58+
# step 1: both schemes require an SSH connection
59+
ssh = SSHClient()
60+
ssh.set_missing_host_key_policy(AutoAddPolicy)
61+
62+
# step 2: connect to fileserver
63+
key = RSAKey.from_private_key_file(private_key)
64+
ssh.connect(hostname, port=port, username=username,
65+
pkey=key, look_for_keys=False)
66+
return ssh
67+
else:
68+
raise ValueError(
69+
'Not valid scheme. Valid options are ssh and scp.')
70+
71+
72+
def _list_valid_files(ssh, directory):
73+
"""Gets a list of valid study files from ssh session
74+
75+
Parameters
76+
----------
77+
ssh : paramiko.SSHClient
78+
An initializeed ssh session
79+
directory : str
80+
the directory to search for files
81+
82+
Returns
83+
-------
84+
list of str
85+
list of valid study files (basenames)
86+
"""
87+
88+
valid_file_extensions = tuple(qiita_config.valid_upload_extension)
89+
sftp = ssh.open_sftp()
90+
files = sftp.listdir(directory)
91+
92+
valid_files = [f for f in files if f.endswith(valid_file_extensions)]
93+
sftp.close()
94+
return valid_files
95+
96+
97+
def list_remote(URL, private_key):
98+
"""Retrieve valid study files from a remote directory
99+
100+
Parameters
101+
----------
102+
URL : str
103+
The url to the remote directory
104+
private_key : str
105+
Path to the private key used to authenticate connection
106+
107+
Returns
108+
-------
109+
list of str
110+
list of files that are valid study files
111+
112+
Notes
113+
-----
114+
Only the allowed extensions described by the config file
115+
will be listed.
116+
"""
117+
p_url = urlparse(URL)
118+
directory = p_url.path
119+
ssh = _ssh_session(p_url, private_key)
120+
valid_files = _list_valid_files(ssh, directory)
121+
ssh.close()
122+
return valid_files
123+
124+
125+
def download_remote(URL, private_key, destination):
126+
"""Add study files by specifying a remote directory to download from
127+
128+
Parameters
129+
----------
130+
URL : str
131+
The url to the remote directory
132+
private_key : str
133+
Path to the private key used to authenticate connection
134+
destination : str
135+
The path to the study upload folder
136+
"""
137+
138+
# step 1: initialize connection and list valid files
139+
p_url = urlparse(URL)
140+
ssh = _ssh_session(p_url, private_key)
141+
142+
directory = p_url.path
143+
valid_files = _list_valid_files(ssh, directory)
144+
file_paths = [join(directory, f) for f in valid_files]
145+
146+
# step 2: download files
147+
scheme = p_url.scheme
148+
# note that scp/sftp's code seems similar but the local_path/localpath
149+
# variable is different within the for loop
150+
if scheme == 'scp':
151+
scp = SCPClient(ssh.get_transport())
152+
for f in file_paths:
153+
download = partial(
154+
scp.get, local_path=join(destination, basename(f)))
155+
download(f)
156+
elif scheme == 'sftp':
157+
sftp = ssh.open_sftp()
158+
for f in file_paths:
159+
download = partial(
160+
sftp.get, localpath=join(destination, basename(f)))
161+
download(f)
162+
163+
# step 3: close the connection
164+
ssh.close()
165+
166+
24167
def submit_EBI(artifact_id, action, send, test=False):
25168
"""Submit an artifact to EBI
26169

0 commit comments

Comments
 (0)