forked from galaxyproject/galaxy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdump_library_paths.py
158 lines (132 loc) · 4.65 KB
/
dump_library_paths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""Dump paths to library dataset files (e.g. for backups). Caveats:
- Does not perform any permissions checks on library *contents* (but you can restrict to "public" libraries).
- Only the latest version of undeleted library datasets are dumped.
To use with rsync:
$ python dump_library_paths.py [options] -o libfiles.txt --relative /srv/galaxy/datasets
$ rsync -arvPR --files-from=libfiles.txt /srv/galaxy/datasets backup@backuphost:/backup/galaxy/datasets
Or all in one:
$ python dump_library_paths.py [options] --relative /srv/galaxy/datasets | rsync -arvPR \
--files-from=- /srv/galaxy/datasets backuphost:/backup/galaxy/datasets
"""
import logging
import os
import sys
from sqlalchemy import (
false,
not_,
)
sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, "lib")))
import galaxy.config
import galaxy.model.mapping
from galaxy.objectstore import build_object_store_from_config
from galaxy.util.script import main_factory
DESCRIPTION = "Locate all datasets in libraries."
ARGUMENTS = (
(
("-v", "--verbose"),
dict(
action="store_true",
default=False,
help="Verbose logging output",
),
),
(
("-o", "--output"),
dict(
default="stdout",
help="Write output to file",
),
),
(
("-p", "--public"),
dict(action="store_true", default=False, help='Only dump files in "public" libraries'),
),
(
("--relative",),
dict(
default=None,
help="Write paths relative to the given directory",
),
),
(
("--exists",),
dict(
action="store_true",
default=False,
help="Check for dataset existence, warn if it does not exist",
),
),
)
logging.basicConfig(stream=sys.stderr)
log = logging.getLogger(__name__)
def _config_logging(args):
if args.verbose:
log.setLevel(logging.DEBUG)
def _get_libraries(args, model):
log.debug("Setting up query")
library_access_action = model.security_agent.permitted_actions.LIBRARY_ACCESS.action
query = model.context.query(model.Library)
query = query.filter(model.Library.table.c.deleted == false())
if args.public:
restricted_library_ids = {
lp.library_id
for lp in (
model.context.query(model.LibraryPermissions)
.filter(model.LibraryPermissions.table.c.action == library_access_action)
.distinct()
)
}
if restricted_library_ids:
query = query.filter(not_(model.Library.table.c.id.in_(restricted_library_ids)))
query = query.order_by(model.Library.table.c.name)
return query
def _walk_library(folder):
datasets = set()
for f in folder.folders:
datasets.update(_walk_library(f))
for ld in folder.active_datasets:
datasets.add(ld.library_dataset_dataset_association.dataset)
return datasets
def _walk_libraries(args, model):
for library in _get_libraries(args, model):
for dataset in _walk_library(library.root_folder):
yield (library, dataset)
def _open_output(args):
if args.output == "stdout":
return sys.stdout
else:
return open(args.output, "w")
def _path(path, args):
if args.relative is not None:
return os.path.relpath(path, args.relative)
else:
return path
def _get_library_dataset_paths(args, kwargs):
_config_logging(args)
config = galaxy.config.Configuration(**kwargs)
object_store = build_object_store_from_config(config)
model = galaxy.model.mapping.init("/tmp/", kwargs.get("database_connection"), object_store=object_store)
output = _open_output(args)
last_library = None
log.debug("Beginning library walk")
for library, dataset in _walk_libraries(args, model):
if library != last_library:
log.info("Library: %s", library.name)
filename = object_store.get_filename(dataset)
files_dir = dataset.get_extra_files_path()
if (args.exists and object_store.exists(dataset)) or not args.exists:
output.write("%s\n" % _path(filename, args))
elif args.exists:
log.warning("Missing %s", filename)
if files_dir and os.path.exists(files_dir):
output.write("%s\n" % _path(files_dir, args))
last_library = library
output.close()
ACTIONS = {
"get_library_dataset_paths": _get_library_dataset_paths,
}
if __name__ == "__main__":
main = main_factory(
description=DESCRIPTION, actions=ACTIONS, arguments=ARGUMENTS, default_action="get_library_dataset_paths"
)
main()