Skip to content

Commit 919d2b1

Browse files
anakryikoAlexei Starovoitov
authored andcommitted
libbpf: Allow modification of BTF and add btf__add_str API
Allow internal BTF representation to switch from default read-only mode, in which raw BTF data is a single non-modifiable block of memory with BTF header, types, and strings layed out sequentially and contiguously in memory, into a writable representation with types and strings data split out into separate memory regions, that can be dynamically expanded. Such writable internal representation is transparent to users of libbpf APIs, but allows to append new types and strings at the end of BTF, which is a typical use case when generating BTF programmatically. All the basic guarantees of BTF types and strings layout is preserved, i.e., user can get `struct btf_type *` pointer and read it directly. Such btf_type pointers might be invalidated if BTF is modified, so some care is required in such mixed read/write scenarios. Switch from read-only to writable configuration happens automatically the first time when user attempts to modify BTF by either adding a new type or new string. It is still possible to get raw BTF data, which is a single piece of memory that can be persisted in ELF section or into a file as raw BTF. Such raw data memory is also still owned by BTF and will be freed either when BTF object is freed or if another modification to BTF happens, as any modification invalidates BTF raw representation. This patch adds the first two BTF manipulation APIs: btf__add_str(), which allows to add arbitrary strings to BTF string section, and btf__find_str() which allows to find existing string offset, but not add it if it's missing. All the added strings are automatically deduplicated. This is achieved by maintaining an additional string lookup index for all unique strings. Such index is built when BTF is switched to modifiable mode. If at that time BTF strings section contained duplicate strings, they are not de-duplicated. This is done specifically to not modify the existing content of BTF (types, their string offsets, etc), which can cause confusion and is especially important property if there is struct btf_ext associated with struct btf. By following this "imperfect deduplication" process, btf_ext is kept consitent and correct. If deduplication of strings is necessary, it can be forced by doing BTF deduplication, at which point all the strings will be eagerly deduplicated and all string offsets both in struct btf and struct btf_ext will be updated. Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/20200926011357.2366158-6-andriin@fb.com
1 parent 7d9c71e commit 919d2b1

File tree

3 files changed

+258
-8
lines changed

3 files changed

+258
-8
lines changed

tools/lib/bpf/btf.c

Lines changed: 252 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,46 @@ struct btf {
4444
* hdr | |
4545
* types_data-+ |
4646
* strs_data------------+
47+
*
48+
* If BTF data is later modified, e.g., due to types added or
49+
* removed, BTF deduplication performed, etc, this contiguous
50+
* representation is broken up into three independently allocated
51+
* memory regions to be able to modify them independently.
52+
* raw_data is nulled out at that point, but can be later allocated
53+
* and cached again if user calls btf__get_raw_data(), at which point
54+
* raw_data will contain a contiguous copy of header, types, and
55+
* strings:
56+
*
57+
* +----------+ +---------+ +-----------+
58+
* | Header | | Types | | Strings |
59+
* +----------+ +---------+ +-----------+
60+
* ^ ^ ^
61+
* | | |
62+
* hdr | |
63+
* types_data----+ |
64+
* strs_data------------------+
65+
*
66+
* +----------+---------+-----------+
67+
* | Header | Types | Strings |
68+
* raw_data----->+----------+---------+-----------+
4769
*/
4870
struct btf_header *hdr;
71+
4972
void *types_data;
50-
void *strs_data;
73+
size_t types_data_cap; /* used size stored in hdr->type_len */
5174

5275
/* type ID to `struct btf_type *` lookup index */
5376
__u32 *type_offs;
5477
size_t type_offs_cap;
5578
__u32 nr_types;
5679

80+
void *strs_data;
81+
size_t strs_data_cap; /* used size stored in hdr->str_len */
82+
83+
/* lookup index for each unique string in strings section */
84+
struct hashmap *strs_hash;
85+
/* whether strings are already deduplicated */
86+
bool strs_deduped;
5787
/* BTF object FD, if loaded into kernel */
5888
int fd;
5989

@@ -506,6 +536,11 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
506536
return -ENOENT;
507537
}
508538

539+
static bool btf_is_modifiable(const struct btf *btf)
540+
{
541+
return (void *)btf->hdr != btf->raw_data;
542+
}
543+
509544
void btf__free(struct btf *btf)
510545
{
511546
if (IS_ERR_OR_NULL(btf))
@@ -514,6 +549,17 @@ void btf__free(struct btf *btf)
514549
if (btf->fd >= 0)
515550
close(btf->fd);
516551

552+
if (btf_is_modifiable(btf)) {
553+
/* if BTF was modified after loading, it will have a split
554+
* in-memory representation for header, types, and strings
555+
* sections, so we need to free all of them individually. It
556+
* might still have a cached contiguous raw data present,
557+
* which will be unconditionally freed below.
558+
*/
559+
free(btf->hdr);
560+
free(btf->types_data);
561+
free(btf->strs_data);
562+
}
517563
free(btf->raw_data);
518564
free(btf->type_offs);
519565
free(btf);
@@ -922,8 +968,29 @@ void btf__set_fd(struct btf *btf, int fd)
922968
btf->fd = fd;
923969
}
924970

925-
const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
971+
const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
926972
{
973+
struct btf *btf = (struct btf *)btf_ro;
974+
975+
if (!btf->raw_data) {
976+
struct btf_header *hdr = btf->hdr;
977+
void *data;
978+
979+
btf->raw_size = hdr->hdr_len + hdr->type_len + hdr->str_len;
980+
btf->raw_data = calloc(1, btf->raw_size);
981+
if (!btf->raw_data)
982+
return NULL;
983+
data = btf->raw_data;
984+
985+
memcpy(data, hdr, hdr->hdr_len);
986+
data += hdr->hdr_len;
987+
988+
memcpy(data, btf->types_data, hdr->type_len);
989+
data += hdr->type_len;
990+
991+
memcpy(data, btf->strs_data, hdr->str_len);
992+
data += hdr->str_len;
993+
}
927994
*size = btf->raw_size;
928995
return btf->raw_data;
929996
}
@@ -1071,6 +1138,181 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
10711138
return 0;
10721139
}
10731140

1141+
static size_t strs_hash_fn(const void *key, void *ctx)
1142+
{
1143+
struct btf *btf = ctx;
1144+
const char *str = btf->strs_data + (long)key;
1145+
1146+
return str_hash(str);
1147+
}
1148+
1149+
static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
1150+
{
1151+
struct btf *btf = ctx;
1152+
const char *str1 = btf->strs_data + (long)key1;
1153+
const char *str2 = btf->strs_data + (long)key2;
1154+
1155+
return strcmp(str1, str2) == 0;
1156+
}
1157+
1158+
/* Ensure BTF is ready to be modified (by splitting into a three memory
1159+
* regions for header, types, and strings). Also invalidate cached
1160+
* raw_data, if any.
1161+
*/
1162+
static int btf_ensure_modifiable(struct btf *btf)
1163+
{
1164+
void *hdr, *types, *strs, *strs_end, *s;
1165+
struct hashmap *hash = NULL;
1166+
long off;
1167+
int err;
1168+
1169+
if (btf_is_modifiable(btf)) {
1170+
/* any BTF modification invalidates raw_data */
1171+
if (btf->raw_data) {
1172+
free(btf->raw_data);
1173+
btf->raw_data = NULL;
1174+
}
1175+
return 0;
1176+
}
1177+
1178+
/* split raw data into three memory regions */
1179+
hdr = malloc(btf->hdr->hdr_len);
1180+
types = malloc(btf->hdr->type_len);
1181+
strs = malloc(btf->hdr->str_len);
1182+
if (!hdr || !types || !strs)
1183+
goto err_out;
1184+
1185+
memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
1186+
memcpy(types, btf->types_data, btf->hdr->type_len);
1187+
memcpy(strs, btf->strs_data, btf->hdr->str_len);
1188+
1189+
/* build lookup index for all strings */
1190+
hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
1191+
if (IS_ERR(hash)) {
1192+
err = PTR_ERR(hash);
1193+
hash = NULL;
1194+
goto err_out;
1195+
}
1196+
1197+
strs_end = strs + btf->hdr->str_len;
1198+
for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
1199+
/* hashmap__add() returns EEXIST if string with the same
1200+
* content already is in the hash map
1201+
*/
1202+
err = hashmap__add(hash, (void *)off, (void *)off);
1203+
if (err == -EEXIST)
1204+
continue; /* duplicate */
1205+
if (err)
1206+
goto err_out;
1207+
}
1208+
1209+
/* only when everything was successful, update internal state */
1210+
btf->hdr = hdr;
1211+
btf->types_data = types;
1212+
btf->types_data_cap = btf->hdr->type_len;
1213+
btf->strs_data = strs;
1214+
btf->strs_data_cap = btf->hdr->str_len;
1215+
btf->strs_hash = hash;
1216+
/* if BTF was created from scratch, all strings are guaranteed to be
1217+
* unique and deduplicated
1218+
*/
1219+
btf->strs_deduped = btf->hdr->str_len <= 1;
1220+
1221+
/* invalidate raw_data representation */
1222+
free(btf->raw_data);
1223+
btf->raw_data = NULL;
1224+
1225+
return 0;
1226+
1227+
err_out:
1228+
hashmap__free(hash);
1229+
free(hdr);
1230+
free(types);
1231+
free(strs);
1232+
return -ENOMEM;
1233+
}
1234+
1235+
static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
1236+
{
1237+
return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
1238+
btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
1239+
}
1240+
1241+
/* Find an offset in BTF string section that corresponds to a given string *s*.
1242+
* Returns:
1243+
* - >0 offset into string section, if string is found;
1244+
* - -ENOENT, if string is not in the string section;
1245+
* - <0, on any other error.
1246+
*/
1247+
int btf__find_str(struct btf *btf, const char *s)
1248+
{
1249+
long old_off, new_off, len;
1250+
void *p;
1251+
1252+
/* BTF needs to be in a modifiable state to build string lookup index */
1253+
if (btf_ensure_modifiable(btf))
1254+
return -ENOMEM;
1255+
1256+
/* see btf__add_str() for why we do this */
1257+
len = strlen(s) + 1;
1258+
p = btf_add_str_mem(btf, len);
1259+
if (!p)
1260+
return -ENOMEM;
1261+
1262+
new_off = btf->hdr->str_len;
1263+
memcpy(p, s, len);
1264+
1265+
if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
1266+
return old_off;
1267+
1268+
return -ENOENT;
1269+
}
1270+
1271+
/* Add a string s to the BTF string section.
1272+
* Returns:
1273+
* - > 0 offset into string section, on success;
1274+
* - < 0, on error.
1275+
*/
1276+
int btf__add_str(struct btf *btf, const char *s)
1277+
{
1278+
long old_off, new_off, len;
1279+
void *p;
1280+
int err;
1281+
1282+
if (btf_ensure_modifiable(btf))
1283+
return -ENOMEM;
1284+
1285+
/* Hashmap keys are always offsets within btf->strs_data, so to even
1286+
* look up some string from the "outside", we need to first append it
1287+
* at the end, so that it can be addressed with an offset. Luckily,
1288+
* until btf->hdr->str_len is incremented, that string is just a piece
1289+
* of garbage for the rest of BTF code, so no harm, no foul. On the
1290+
* other hand, if the string is unique, it's already appended and
1291+
* ready to be used, only a simple btf->hdr->str_len increment away.
1292+
*/
1293+
len = strlen(s) + 1;
1294+
p = btf_add_str_mem(btf, len);
1295+
if (!p)
1296+
return -ENOMEM;
1297+
1298+
new_off = btf->hdr->str_len;
1299+
memcpy(p, s, len);
1300+
1301+
/* Now attempt to add the string, but only if the string with the same
1302+
* contents doesn't exist already (HASHMAP_ADD strategy). If such
1303+
* string exists, we'll get its offset in old_off (that's old_key).
1304+
*/
1305+
err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
1306+
HASHMAP_ADD, (const void **)&old_off, NULL);
1307+
if (err == -EEXIST)
1308+
return old_off; /* duplicated string, return existing offset */
1309+
if (err)
1310+
return err;
1311+
1312+
btf->hdr->str_len += len; /* new unique string, adjust data length */
1313+
return new_off;
1314+
}
1315+
10741316
struct btf_ext_sec_setup_param {
10751317
__u32 off;
10761318
__u32 len;
@@ -1537,6 +1779,9 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
15371779
return -EINVAL;
15381780
}
15391781

1782+
if (btf_ensure_modifiable(btf))
1783+
return -ENOMEM;
1784+
15401785
err = btf_dedup_strings(d);
15411786
if (err < 0) {
15421787
pr_debug("btf_dedup_strings failed:%d\n", err);
@@ -1926,6 +2171,9 @@ static int btf_dedup_strings(struct btf_dedup *d)
19262171
int i, j, err = 0, grp_idx;
19272172
bool grp_used;
19282173

2174+
if (d->btf->strs_deduped)
2175+
return 0;
2176+
19292177
/* build index of all strings */
19302178
while (p < end) {
19312179
if (strs.cnt + 1 > strs.cap) {
@@ -2018,6 +2266,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
20182266
goto done;
20192267

20202268
d->btf->hdr->str_len = end - start;
2269+
d->btf->strs_deduped = true;
20212270

20222271
done:
20232272
free(tmp_strs);
@@ -3021,12 +3270,7 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
30213270
if (!new_offs)
30223271
return -ENOMEM;
30233272
d->btf->type_offs = new_offs;
3024-
3025-
/* make sure string section follows type information without gaps */
3026-
d->btf->hdr->str_off = p - d->btf->types_data;
3027-
memmove(p, d->btf->strs_data, d->btf->hdr->str_len);
3028-
d->btf->strs_data = p;
3029-
3273+
d->btf->hdr->str_off = d->btf->hdr->type_len;
30303274
d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
30313275
return 0;
30323276
}

tools/lib/bpf/btf.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#define __LIBBPF_BTF_H
66

77
#include <stdarg.h>
8+
#include <stdbool.h>
89
#include <linux/btf.h>
910
#include <linux/types.h>
1011

@@ -72,6 +73,9 @@ LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
7273

7374
LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
7475

76+
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
77+
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
78+
7579
struct btf_dedup_opts {
7680
unsigned int dedup_table_size;
7781
bool dont_resolve_fwds;

tools/lib/bpf/libbpf.map

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ LIBBPF_0.2.0 {
305305
bpf_prog_bind_map;
306306
bpf_prog_test_run_opts;
307307
bpf_program__section_name;
308+
btf__add_str;
309+
btf__find_str;
308310
perf_buffer__buffer_cnt;
309311
perf_buffer__buffer_fd;
310312
perf_buffer__epoll_fd;

0 commit comments

Comments
 (0)