forked from yugabyte/yugabyte-db
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlibrary_packager.py
402 lines (332 loc) · 16.7 KB
/
library_packager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
#!/usr/bin/env python2
# Copyright (c) YugaByte, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations
# under the License.
#
"""
Copyright (c) YugaByte, Inc.
Finds all Linux dynamic libraries that have to be packaged with the YugaByte distribution tarball by
starting from a small set of executables and walking the dependency graph. Creates a self-sufficient
distribution directory.
Run doctest tests as follows:
python -m doctest python/yb/library_packager.py
"""
import argparse
import collections
import glob
import json
import logging
import os
import re
import shutil
import subprocess
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from yb.command_util import run_program, mkdir_p # nopep8
from yb.linuxbrew import get_linuxbrew_dir # nopep8
from yb.common_util import get_thirdparty_dir, YB_SRC_ROOT, sorted_grouped_by, \
safe_path_join # nopep8
# A resolved shared library dependency shown by ldd.
# Example (split across two lines):
# libmaster.so => /home/mbautin/code/yugabyte/build/debug-gcc-dynamic/lib/libmaster.so
# (0x00007f941fa5f000)
RESOLVED_DEP_RE = re.compile(r'^\s*(\S+)\s+=>\s+(\S.*\S)\s+[(]')
SYSTEM_LIBRARY_PATH_RE = re.compile(r'^/(usr|lib|lib64)/.*')
SYSTEM_LIBRARY_PATHS = ['/usr/lib', '/usr/lib64', '/lib', '/lib64',
# This is used on Ubuntu
'/usr/lib/x86_64-linux-gnu',
'/lib/x86_64-linux-gnu']
HOME_DIR = os.path.expanduser('~')
LINUXBREW_HOME = get_linuxbrew_dir()
LINUXBREW_CELLAR_GLIBC_DIR = safe_path_join(LINUXBREW_HOME, 'Cellar', 'glibc')
ADDITIONAL_LIB_NAME_GLOBS = ['libnss_*', 'libresolv*']
LINUXBREW_LDD_PATH = safe_path_join(LINUXBREW_HOME, 'bin', 'ldd')
YB_SCRIPT_BIN_DIR = os.path.join(YB_SRC_ROOT, 'bin')
YB_BUILD_SUPPORT_DIR = os.path.join(YB_SRC_ROOT, 'build-support')
PATCHELF_NOT_AN_ELF_EXECUTABLE = 'not an ELF executable'
PATCHELF_PATH = safe_path_join(LINUXBREW_HOME, 'bin', 'patchelf')
LIBRARY_PATH_RE = re.compile('^(.*[.]so)(?:$|[.].*$)')
LIBRARY_CATEGORIES = ['system', 'yb', 'yb-thirdparty', 'linuxbrew']
# This is an alternative to global variables, bundling a few commonly used things.
DistributionContext = collections.namedtuple(
'DistributionContext',
['build_dir',
'dest_dir',
'verbose_mode'])
class Dependency:
"""
Describes a dependency of an executable or a shared library on another shared library.
@param name: the name of the library as requested by the original executable/shared library
@param target: target file pointed to by the dependency
"""
def __init__(self, name, target, origin, context):
self.name = name
self.target = target
self.category = None
self.context = context
self.origin = origin
def __hash__(self):
return hash(self.name) ^ hash(self.target)
def __eq__(self, other):
return self.name == other.name and \
self.target == other.target
def __str__(self):
return "Dependency(name='{}', target='{}', origin='{}')".format(
self.name, self.target, self.origin)
def __repr__(self):
return str(self)
def __cmp__(self, other):
return (self.name, self.target) < (other.name, other.target)
def get_category(self):
"""
Categorizes binaries into a few buckets:
- yb -- YugaByte product itself
- yb-thirdparty -- built with YugaByte
- linuxbrew -- built using Linuxbrew
- system -- grabbed from a system-wide library directory
"""
if self.category:
return self.category
if self.target.startswith(LINUXBREW_HOME + '/'):
self.category = 'linuxbrew'
elif self.target.startswith(get_thirdparty_dir() + '/'):
self.category = 'yb-thirdparty'
elif self.target.startswith(self.context.build_dir + '/'):
self.category = 'yb'
elif (self.target.startswith(YB_SCRIPT_BIN_DIR + '/') or
self.target.startswith(YB_BUILD_SUPPORT_DIR + '/')):
self.category = 'yb-scripts'
if not self.category:
for system_library_path in SYSTEM_LIBRARY_PATHS:
if self.target.startswith(system_library_path + '/'):
self.category = 'system'
break
if self.category:
if self.category not in LIBRARY_CATEGORIES:
raise RuntimeError(
("Internal error: library category computed as '{}', must be one of: {}. " +
"Dependency: {}").format(self.category, LIBRARY_CATEGORIES, self))
return self.category
raise RuntimeError(
("Could not determine the category of this binary "
"(yugabyte / yb-thirdparty / linuxbrew / system): '{}'. "
"Does not reside in the Linuxbrew directory ('{}'), "
"YB third-party directory ('{}'), "
"YB build directory ('{}'), "
"YB general-purpose script directory ('{}'), "
"YB build support script directory ('{}'), "
"and does not appear to be a system library (does not start with any of {})."
).format(self.target, LINUXBREW_HOME, get_thirdparty_dir(), self.context.build_dir,
YB_SCRIPT_BIN_DIR, YB_BUILD_SUPPORT_DIR, SYSTEM_LIBRARY_PATHS))
def add_common_arguments(parser):
"""
Add command-line arguments common between library_packager_old.py invoked as a script, and
the yb_release.py script.
"""
parser.add_argument('--verbose',
help='Enable verbose output.',
action='store_true')
def run_patchelf(*args):
patchelf_result = run_program([PATCHELF_PATH] + list(args), error_ok=True)
if patchelf_result.returncode != 0 and patchelf_result.stderr not in [
'cannot find section .interp',
'cannot find section .dynamic',
PATCHELF_NOT_AN_ELF_EXECUTABLE]:
raise RuntimeError(patchelf_result.error_msg)
return patchelf_result
def symlink(source, link_path):
if os.path.exists(link_path):
if not source.startswith('/') or os.path.realpath(link_path) != os.path.realpath(source):
raise RuntimeError(
"Trying to create symlink '{}' -> '{}' but it already points to '{}'".format(
link_path, source, os.readlink(link_path)))
# In the case source is an absolute path and the link already points at it there will be
# no error.
else:
os.symlink(source, link_path)
class LibraryPackager:
"""
A utility for starting with a set of 'seed' executables, and walking the dependency tree of
libraries to find all libraries that need to be packaged with the product.
"""
def __init__(self,
build_dir,
seed_executable_patterns,
dest_dir,
verbose_mode=False):
build_dir = os.path.realpath(build_dir)
if not os.path.exists(build_dir):
raise IOError("Build directory '{}' does not exist".format(build_dir))
self.seed_executable_patterns = seed_executable_patterns
self.dest_dir = dest_dir
logging.debug(
"Traversing the dependency graph of executables/libraries, starting "
"with seed executable patterns: {}".format(", ".join(seed_executable_patterns)))
self.context = DistributionContext(
dest_dir=dest_dir,
build_dir=build_dir,
verbose_mode=verbose_mode)
self.installed_dyn_linked_binaries = []
self.main_dest_bin_dir = os.path.join(self.dest_dir, 'bin')
self.postgres_dest_bin_dir = os.path.join(self.dest_dir, 'postgres', 'bin')
def install_dyn_linked_binary(self, src_path, dest_dir):
if not os.path.isdir(dest_dir):
raise RuntimeError("Not a directory: '{}'".format(dest_dir))
shutil.copy(src_path, dest_dir)
installed_binary_path = os.path.join(dest_dir, os.path.basename(src_path))
self.installed_dyn_linked_binaries.append(installed_binary_path)
return installed_binary_path
def find_elf_dependencies(self, elf_file_path):
"""
Run ldd on the given ELF file and find libraries that it depends on. Also run patchelf and
get the dynamic linker used by the file.
@param elf_file_path: ELF file (executable/library) path
"""
elf_file_path = os.path.realpath(elf_file_path)
if SYSTEM_LIBRARY_PATH_RE.match(elf_file_path):
ldd_path = '/usr/bin/ldd'
else:
ldd_path = LINUXBREW_LDD_PATH
ldd_result = run_program([ldd_path, elf_file_path], error_ok=True)
dependencies = set()
if ldd_result.returncode != 0:
# Interestingly, the below error message is printed to stdout, not stderr.
if ldd_result.stdout == 'not a dynamic executable':
logging.debug(
"Not a dynamic executable: {}, ignoring dependency tracking".format(
elf_file_path))
return dependencies
raise RuntimeError(ldd_result.error_msg)
for ldd_output_line in ldd_result.stdout.split("\n"):
resolved_dep_match = RESOLVED_DEP_RE.match(ldd_output_line)
if resolved_dep_match:
lib_name = resolved_dep_match.group(1)
lib_resolved_path = os.path.realpath(resolved_dep_match.group(2))
dependencies.add(Dependency(lib_name, lib_resolved_path, elf_file_path,
self.context))
tokens = ldd_output_line.split()
if len(tokens) >= 4 and tokens[1:4] == ['=>', 'not', 'found']:
missing_lib_name = tokens[0]
raise RuntimeError("Library not found for '{}': {}".format(
elf_file_path, missing_lib_name))
# If we matched neither RESOLVED_DEP_RE or the "not found" case, that is still fine,
# e.g. there could be a line of the following form in the ldd output:
# linux-vdso.so.1 => (0x00007ffc0f9d2000)
return dependencies
@staticmethod
def is_postgres_binary(file_path):
return os.path.dirname(file_path).endswith('/postgres/bin')
def get_dest_bin_dir_for_executable(self, file_path):
if self.is_postgres_binary(file_path):
dest_bin_dir = self.postgres_dest_bin_dir
else:
dest_bin_dir = self.main_dest_bin_dir
return dest_bin_dir
@staticmethod
def join_binary_names_for_bash(binary_names):
return ' '.join(['"{}"'.format(name) for name in binary_names])
def package_binaries(self):
"""
The main entry point to this class. Arranges binaries (executables and shared libraries),
starting with the given set of "seed executables", in the destination directory so that
the executables can find all of their dependencies.
"""
all_deps = []
dest_lib_dir = os.path.join(self.dest_dir, 'lib')
mkdir_p(dest_lib_dir)
mkdir_p(self.main_dest_bin_dir)
mkdir_p(self.postgres_dest_bin_dir)
main_elf_names_to_patch = []
postgres_elf_names_to_patch = []
for seed_executable_glob in self.seed_executable_patterns:
glob_results = glob.glob(seed_executable_glob)
if not glob_results:
raise RuntimeError("No files found matching the pattern '{}'".format(
seed_executable_glob))
for executable in glob_results:
deps = self.find_elf_dependencies(executable)
all_deps += deps
dest_bin_dir = self.get_dest_bin_dir_for_executable(executable)
if deps:
self.install_dyn_linked_binary(executable, dest_bin_dir)
executable_basename = os.path.basename(executable)
if self.is_postgres_binary(executable):
postgres_elf_names_to_patch.append(executable_basename)
else:
main_elf_names_to_patch.append(executable_basename)
else:
# This is probably a script.
shutil.copy(executable, dest_bin_dir)
# Not using the install_dyn_linked_binary method for copying patchelf and ld.so as we won't
# need to do any post-processing on these two later.
shutil.copy(PATCHELF_PATH, self.main_dest_bin_dir)
ld_path = os.path.join(LINUXBREW_HOME, 'lib', 'ld.so')
shutil.copy(ld_path, dest_lib_dir)
all_deps = sorted(set(all_deps))
for dep_name, deps in sorted_grouped_by(all_deps, lambda dep: dep.name):
targets = sorted(set([dep.target for dep in deps]))
if len(targets) > 1:
raise RuntimeError(
"Multiple dependencies with the same name {} but different targets: {}".format(
dep_name, deps
))
for category, deps_in_category in sorted_grouped_by(all_deps,
lambda dep: dep.get_category()):
logging.info("Found {} dependencies in category '{}':".format(
len(deps_in_category), category))
max_name_len = max([len(dep.name) for dep in deps_in_category])
for dep in sorted(deps_in_category, key=lambda dep: dep.target):
logging.info(" {} -> {}".format(
dep.name + ' ' * (max_name_len - len(dep.name)), dep.target))
category_dest_dir = os.path.join(dest_lib_dir, category)
mkdir_p(category_dest_dir)
for dep in deps_in_category:
self.install_dyn_linked_binary(dep.target, category_dest_dir)
if os.path.basename(dep.target) != dep.name:
symlink(os.path.basename(dep.target),
os.path.join(category_dest_dir, dep.name))
linuxbrew_lib_dest_dir = os.path.join(dest_lib_dir, 'linuxbrew')
# Add libresolv and libnss_* libraries explicitly because they are loaded by glibc at
# runtime and will not be discovered automatically using ldd.
for additional_lib_name_glob in ADDITIONAL_LIB_NAME_GLOBS:
for lib_path in glob.glob(os.path.join(LINUXBREW_CELLAR_GLIBC_DIR, '*', 'lib',
additional_lib_name_glob)):
lib_basename = os.path.basename(lib_path)
if lib_basename.endswith('.a'):
continue
if os.path.isfile(lib_path):
self.install_dyn_linked_binary(lib_path, linuxbrew_lib_dest_dir)
elif os.path.islink(lib_path):
link_target_basename = os.path.basename(os.readlink(lib_path))
symlink(link_target_basename,
os.path.join(linuxbrew_lib_dest_dir, lib_basename))
else:
raise RuntimeError(
"Expected '{}' to be a file or a symlink".format(lib_path))
for installed_binary in self.installed_dyn_linked_binaries:
# Sometimes files that we copy from other locations are not even writable by user!
subprocess.check_call(['chmod', 'u+w', installed_binary])
# Remove rpath (we will set it appropriately in post_install.sh).
run_patchelf('--remove-rpath', installed_binary)
post_install_path = os.path.join(self.main_dest_bin_dir, 'post_install.sh')
with open(post_install_path) as post_install_script_input:
post_install_script = post_install_script_input.read()
new_post_install_script = post_install_script
for macro_var_name, list_of_binary_names in [
("main_elf_names_to_patch", main_elf_names_to_patch),
("postgres_elf_names_to_patch", postgres_elf_names_to_patch)
]:
new_post_install_script = new_post_install_script.replace(
'${%s}' % macro_var_name,
self.join_binary_names_for_bash(list_of_binary_names)
)
with open(post_install_path, 'w') as post_install_script_output:
post_install_script_output.write(new_post_install_script)