-
Couldn't load subscription status.
- Fork 79
Separate plugin file system from BASA_DATA_DIR
#3480
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 100 commits
aab4750
49b0448
2840601
f1c9149
2eb6d08
48ca02a
dc4bd20
e1f3c13
48f09a5
baf40df
670a55a
091ffc6
1feefc0
29ce7dd
2ca5bb8
1b787cb
88319b2
02d9af0
b1e1b6b
a7d3b84
125835a
5f28092
19b4d7b
33f2879
c9d413a
6bfafcb
9a5e7cc
b2fc279
5cc0896
949084d
c3b040b
d96bbae
a491870
b1baece
81fdcbf
e0c4002
bb9c685
79e794a
c9aacec
a5deb83
7693c5e
b4ab605
baa7230
52e57ca
4061373
51307d1
7a0ec9f
0c365a1
e993a99
ca5f7f6
4d5c6a2
fd6d15e
9c8b824
a654e48
27f6d35
85bf1fa
8a504cc
ef05eed
a5270a0
2efb70f
c8b1198
3d6f718
3957030
648f2f9
73f92b9
0dc243d
9b81163
bb03167
a76288b
8cc718f
89cab41
5c3fa7b
769e382
1a61930
1b55c47
fa26cff
d377393
5e34bab
931bc4d
b0de5c7
6c1fd0e
5499e04
44d4315
2b70f7c
dd622c6
85d30f4
9858b42
2f43e89
3f77f85
ad06f27
28cbb98
3edd07e
5bad769
d5f00d7
e9028a8
7efc716
91bbbe4
2342304
3fce312
be38c10
6cdbbe1
53adaf0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| from .file_transfer_handlers import (FetchFileFromCentralHandler, | ||
| PushFileToCentralHandler) | ||
|
|
||
| __all__ = ['FetchFileFromCentralHandler'] | ||
|
|
||
| ENDPOINTS = [ | ||
| (r"/cloud/fetch_file_from_central/(.*)", FetchFileFromCentralHandler), | ||
| (r"/cloud/push_file_to_central/", PushFileToCentralHandler) | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| import os | ||
|
|
||
| from tornado.web import HTTPError, RequestHandler | ||
| from tornado.gen import coroutine | ||
|
|
||
| from qiita_core.util import execute_as_transaction | ||
| from qiita_db.handlers.oauth2 import authenticate_oauth | ||
| from qiita_core.qiita_settings import qiita_config | ||
|
|
||
|
|
||
| class FetchFileFromCentralHandler(RequestHandler): | ||
| @authenticate_oauth | ||
| @coroutine | ||
| @execute_as_transaction | ||
| def get(self, requested_filepath): | ||
| # ensure we have an absolute path, i.e. starting at / | ||
| filepath = os.path.join(os.path.sep, requested_filepath) | ||
| # use a canonic version of the filepath | ||
| filepath = os.path.abspath(filepath) | ||
|
|
||
| # canonic version of base_data_dir | ||
| basedatadir = os.path.abspath(qiita_config.base_data_dir) | ||
|
|
||
| # TODO: can we somehow check, if the requesting client (which should be | ||
| # one of the plugins) was started from a user that actually has | ||
| # access to the requested file? | ||
|
Comment on lines
+24
to
+26
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory, a user can only start a job if they have access to that artifact. However, I don't see why this shouldn't be added. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am also undecided at the moment. I think of multiple "runner" locations for plugins, e.g. UCSD, JLU Gießen, .... Do we really want to trust these runners to be nice and access only files of the central filesystem necessary for their job?! |
||
|
|
||
| if not filepath.startswith(basedatadir): | ||
| # attempt to access files outside of the BASE_DATA_DIR | ||
| # intentionally NOT reporting the actual location to avoid exposing | ||
| # instance internal information | ||
| raise HTTPError(403, reason=( | ||
| "You cannot access files outside of " | ||
| "the BASE_DATA_DIR of Qiita!")) | ||
|
|
||
| if not os.path.exists(filepath): | ||
| raise HTTPError(403, reason=( | ||
| "The requested file is not present in Qiita's BASE_DATA_DIR!")) | ||
|
|
||
| # delivery of the file via nginx requires replacing the basedatadir | ||
| # with the prefix defined in the nginx configuration for the | ||
| # base_data_dir, '/protected/' by default | ||
| protected_filepath = filepath.replace(basedatadir, '/protected') | ||
|
|
||
| self.set_header('Content-Type', 'application/octet-stream') | ||
| self.set_header('Content-Transfer-Encoding', 'binary') | ||
| self.set_header('X-Accel-Redirect', protected_filepath) | ||
| self.set_header('Content-Description', 'File Transfer') | ||
| self.set_header('Expires', '0') | ||
| self.set_header('Cache-Control', 'no-cache') | ||
| self.set_header('Content-Disposition', | ||
| 'attachment; filename=%s' % os.path.basename( | ||
| protected_filepath)) | ||
| self.finish() | ||
|
|
||
|
|
||
| class PushFileToCentralHandler(RequestHandler): | ||
| @authenticate_oauth | ||
| @coroutine | ||
| @execute_as_transaction | ||
| def post(self): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wondering if this is a web blocking operation? If yes, I guess we should add a new block to the nginx/supervisord files so they have their own workers. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm, shouldn't the @coroutine decorator spawn a new "thread" / "worker"? But this might only be true for python/tornado, not for the nginx itself. But doesn't come nginx with load balancing? |
||
| if not self.request.files: | ||
| raise HTTPError(400, reason='No files to upload defined!') | ||
|
|
||
| # canonic version of base_data_dir | ||
| basedatadir = os.path.abspath(qiita_config.base_data_dir) | ||
| stored_files = [] | ||
|
|
||
| for filespath, filelist in self.request.files.items(): | ||
| if filespath.startswith(basedatadir): | ||
| filespath = filespath[len(basedatadir):] | ||
|
|
||
| for file in filelist: | ||
| filepath = os.path.join(filespath, file['filename']) | ||
| # remove leading / | ||
| if filepath.startswith(os.sep): | ||
| filepath = filepath[len(os.sep):] | ||
| filepath = os.path.abspath(os.path.join(basedatadir, filepath)) | ||
|
|
||
| if os.path.exists(filepath): | ||
| raise HTTPError(403, reason=( | ||
| "The requested file is already " | ||
| "present in Qiita's BASE_DATA_DIR!")) | ||
|
|
||
| os.makedirs(os.path.dirname(filepath), exist_ok=True) | ||
| with open(filepath, "wb") as f: | ||
| f.write(file['body']) | ||
| stored_files.append(filepath) | ||
|
|
||
| self.write("Stored %i files into BASE_DATA_DIR of Qiita:\n%s\n" % ( | ||
| len(stored_files), | ||
| '\n'.join(map(lambda x: ' - %s' % x, stored_files)))) | ||
|
|
||
| self.finish() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| from unittest import main | ||
| from os.path import exists, basename | ||
| from os import remove | ||
| import filecmp | ||
|
|
||
| from qiita_db.handlers.tests.oauthbase import OauthTestingBase | ||
| import qiita_db as qdb | ||
|
|
||
|
|
||
| class FetchFileFromCentralHandlerTests(OauthTestingBase): | ||
| def setUp(self): | ||
| super(FetchFileFromCentralHandlerTests, self).setUp() | ||
|
|
||
| def test_get(self): | ||
| endpoint = '/cloud/fetch_file_from_central/' | ||
| base_data_dir = qdb.util.get_db_files_base_dir() | ||
|
|
||
| obs = self.get_authed(endpoint + 'nonexistingfile') | ||
| self.assertEqual(obs.status_code, 403) | ||
| self.assertIn('outside of the BASE_DATA_DIR', obs.reason) | ||
|
|
||
| obs = self.get_authed( | ||
| endpoint + base_data_dir[1:] + '/nonexistingfile') | ||
| self.assertEqual(obs.status_code, 403) | ||
| self.assertIn('The requested file is not present', obs.reason) | ||
|
|
||
| obs = self.get_authed( | ||
| endpoint + base_data_dir[1:] + | ||
| '/raw_data/FASTA_QUAL_preprocessing.fna') | ||
| self.assertEqual(obs.status_code, 200) | ||
| self.assertIn('FLP3FBN01ELBSX length=250 xy=1766_01', str(obs.content)) | ||
|
|
||
|
|
||
| class PushFileToCentralHandlerTests(OauthTestingBase): | ||
| def setUp(self): | ||
| super(PushFileToCentralHandlerTests, self).setUp() | ||
|
|
||
| def test_post(self): | ||
| endpoint = '/cloud/push_file_to_central/' | ||
| base_data_dir = qdb.util.get_db_files_base_dir() | ||
|
|
||
| # create a test file "locally", i.e. in current working directory | ||
| fp_source = 'foo.bar' | ||
| with open(fp_source, 'w') as f: | ||
| f.write("this is a test\n") | ||
| self._files_to_remove.append(fp_source) | ||
|
|
||
| # if successful, expected location of the file in BASE_DATA_DIR | ||
| fp_target = base_data_dir + '/bar/' + basename(fp_source) | ||
| self._files_to_remove.append(fp_target) | ||
|
|
||
| # create a second test file | ||
| fp_source2 = 'foo_two.bar' | ||
| with open(fp_source2, 'w') as f: | ||
| f.write("this is another test\n") | ||
| self._files_to_remove.append(fp_source2) | ||
| fp_target2 = base_data_dir + '/barr/' + basename(fp_source2) | ||
| self._files_to_remove.append(fp_target2) | ||
|
|
||
| # test raise error if no file is given | ||
| obs = self.post_authed(endpoint) | ||
| self.assertEqual(obs.reason, "No files to upload defined!") | ||
|
|
||
| # test correct mechanism | ||
| with open(fp_source, 'rb') as fh: | ||
| obs = self.post_authed(endpoint, files={'bar/': fh}) | ||
| self.assertIn('Stored 1 files into BASE_DATA_DIR of Qiita', | ||
| str(obs.content)) | ||
| self.assertTrue(filecmp.cmp(fp_source, fp_target, shallow=False)) | ||
|
|
||
| # check if error is raised, if file already exists | ||
| with open(fp_source, 'rb') as fh: | ||
| obs = self.post_authed(endpoint, files={'bar/': fh}) | ||
| self.assertIn("already present in Qiita's BASE_DATA_DIR!", | ||
| obs.reason) | ||
|
|
||
| # test transfer of multiple files | ||
| if exists(fp_target): | ||
| remove(fp_target) | ||
| with open(fp_source, 'rb') as fh1: | ||
| with open(fp_source2, 'rb') as fh2: | ||
| obs = self.post_authed( | ||
| endpoint, files={'bar/': fh1, 'barr/': fh2}) | ||
| self.assertIn('Stored 2 files into BASE_DATA_DIR of Qiita', | ||
| str(obs.content)) | ||
| self.assertTrue(filecmp.cmp(fp_source, fp_target, | ||
| shallow=False)) | ||
| self.assertTrue(filecmp.cmp(fp_source2, fp_target2, | ||
| shallow=False)) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be part of the configuration file, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are credentials necessary to be present in qiitaDB to obtain a token ... finally on the plugin side. But here it is within the db side, just for testing.
For real plugins, client_id and client_secret get initialized with random values and the "plugin registry" process is when qiita admins "trust" this plugin and add their credentials into the DB.
So yes, these are values stored in the plugin configuration file
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Opened an issue about this: #3481