Skip to content

Commit 1620726

Browse files
committed
Introspection: emit messages inline in the binaries instead of introducing a ptr+len indirection
Significantly simplifies the parsing I dropped the old code path, we can add it back if we want to maintain backward compatibility
1 parent 50a2786 commit 1620726

File tree

5 files changed

+38
-74
lines changed

5 files changed

+38
-74
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -697,8 +697,7 @@ jobs:
697697
with:
698698
save-if: ${{ github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'CI-save-pr-cache') }}
699699
- run: python -m pip install --upgrade pip && pip install nox[uv]
700-
# TODO test will be fixed in https://github.com/PyO3/pyo3/pull/5450
701-
# - run: nox -s test-introspection
700+
- run: nox -s test-introspection
702701
env:
703702
CARGO_BUILD_TARGET: ${{ matrix.platform.rust-target }}
704703

newsfragments/5450.changed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Introspection: change the way introspection data is emitted in the binaries to avoid a pointer indirection and simplify parsing.

pyo3-introspection/src/introspection.rs

Lines changed: 13 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ use goblin::mach::{Mach, MachO, SingleArch};
1010
use goblin::pe::PE;
1111
use goblin::Object;
1212
use serde::Deserialize;
13+
use serde_json::Deserializer;
1314
use std::cmp::Ordering;
1415
use std::collections::HashMap;
15-
use std::fs;
1616
use std::path::Path;
17+
use std::{fs, str};
1718

1819
/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
1920
///
@@ -272,11 +273,8 @@ fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Re
272273
ensure!(u32::try_from(sym.st_shndx)? != SHN_XINDEX, "Section names length is greater than SHN_LORESERVE in ELF, this is not supported by PyO3 yet");
273274
let section_header = &elf.section_headers[sym.st_shndx];
274275
let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
275-
chunks.push(read_symbol_value_with_ptr_and_len(
276+
chunks.push(deserialize_prefix(
276277
&library_content[usize::try_from(data_offset).context("File offset overflow")?..],
277-
0,
278-
library_content,
279-
elf.is_64,
280278
)?);
281279
}
282280
}
@@ -313,85 +311,40 @@ fn find_introspection_chunks_in_macho(
313311
{
314312
let section = &sections[nlist.n_sect - 1]; // Sections are counted from 1
315313
let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
316-
chunks.push(read_symbol_value_with_ptr_and_len(
314+
chunks.push(deserialize_prefix(
317315
&library_content[usize::try_from(data_offset).context("File offset overflow")?..],
318-
0,
319-
library_content,
320-
macho.is_64,
321316
)?);
322317
}
323318
}
324319
Ok(chunks)
325320
}
326321

327322
fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
328-
let rdata_data_section = pe
329-
.sections
330-
.iter()
331-
.find(|section| section.name().unwrap_or_default() == ".rdata")
332-
.context("No .rdata section found")?;
333-
let rdata_shift = usize::try_from(pe.image_base).context("image_base overflow")?
334-
+ usize::try_from(rdata_data_section.virtual_address)
335-
.context(".rdata virtual_address overflow")?
336-
- usize::try_from(rdata_data_section.pointer_to_raw_data)
337-
.context(".rdata pointer_to_raw_data overflow")?;
338-
339323
let mut chunks = Vec::new();
340324
for export in &pe.exports {
341325
if is_introspection_symbol(export.name.unwrap_or_default()) {
342-
chunks.push(read_symbol_value_with_ptr_and_len(
326+
chunks.push(deserialize_prefix(
343327
&library_content[export.offset.context("No symbol offset")?..],
344-
rdata_shift,
345-
library_content,
346-
pe.is_64,
347328
)?);
348329
}
349330
}
350331
Ok(chunks)
351332
}
352333

353-
fn read_symbol_value_with_ptr_and_len(
354-
value_slice: &[u8],
355-
shift: usize,
356-
full_library_content: &[u8],
357-
is_64: bool,
358-
) -> Result<Chunk> {
359-
let (ptr, len) = if is_64 {
360-
let (ptr, len) = value_slice[..16].split_at(8);
361-
let ptr = usize::try_from(u64::from_le_bytes(
362-
ptr.try_into().context("Too short symbol value")?,
363-
))
364-
.context("Pointer overflow")?;
365-
let len = usize::try_from(u64::from_le_bytes(
366-
len.try_into().context("Too short symbol value")?,
367-
))
368-
.context("Length overflow")?;
369-
(ptr, len)
370-
} else {
371-
let (ptr, len) = value_slice[..8].split_at(4);
372-
let ptr = usize::try_from(u32::from_le_bytes(
373-
ptr.try_into().context("Too short symbol value")?,
374-
))
375-
.context("Pointer overflow")?;
376-
let len = usize::try_from(u32::from_le_bytes(
377-
len.try_into().context("Too short symbol value")?,
378-
))
379-
.context("Length overflow")?;
380-
(ptr, len)
381-
};
382-
let chunk = &full_library_content[ptr - shift..ptr - shift + len];
383-
serde_json::from_slice(chunk).with_context(|| {
384-
format!(
385-
"Failed to parse introspection chunk: '{}'",
386-
String::from_utf8_lossy(chunk)
387-
)
334+
fn deserialize_prefix(chunk: &[u8]) -> Result<Chunk> {
335+
Chunk::deserialize(&mut Deserializer::from_slice(chunk)).with_context(|| {
336+
// We take the first valid utf8 bytes, it's quite likely to be the actual chunk
337+
// We use a 4096 upper bound for security
338+
let chunk = str::from_utf8(&chunk[..4096])
339+
.unwrap_or_else(|e| str::from_utf8(&chunk[..e.valid_up_to()]).unwrap_or_default());
340+
format!("Failed to parse introspection chunk: '{chunk}'")
388341
})
389342
}
390343

391344
fn is_introspection_symbol(name: &str) -> bool {
392345
name.strip_prefix('_')
393346
.unwrap_or(name)
394-
.starts_with("PYO3_INTROSPECTION_0_")
347+
.starts_with("PYO3_INTROSPECTION_1_")
395348
}
396349

397350
#[derive(Deserialize)]

pyo3-macros-backend/src/introspection.rs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -353,17 +353,10 @@ impl IntrospectionNode<'_> {
353353
fn emit(self, pyo3_crate_path: &PyO3CratePath) -> TokenStream {
354354
let mut content = ConcatenationBuilder::default();
355355
self.add_to_serialization(&mut content, pyo3_crate_path);
356-
let content = content.into_token_stream(pyo3_crate_path);
357-
358-
let static_name = format_ident!("PYO3_INTROSPECTION_0_{}", unique_element_id());
359-
// #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too.
360-
quote! {
361-
const _: () = {
362-
#[used]
363-
#[no_mangle]
364-
static #static_name: &'static [u8] = #content;
365-
};
366-
}
356+
content.into_static(
357+
pyo3_crate_path,
358+
format_ident!("PYO3_INTROSPECTION_1_{}", unique_element_id()),
359+
)
367360
}
368361

369362
fn add_to_serialization(
@@ -530,6 +523,24 @@ impl ConcatenationBuilder {
530523
}
531524
}
532525
}
526+
527+
fn into_static(self, pyo3_crate_path: &PyO3CratePath, ident: Ident) -> TokenStream {
528+
let mut elements = self.elements;
529+
if !self.current_string.is_empty() {
530+
elements.push(ConcatenationBuilderElement::String(self.current_string));
531+
}
532+
533+
// #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too.
534+
quote! {
535+
const _: () = {
536+
const PIECES: &[&[u8]] = &[#(#elements , )*];
537+
const PIECES_LEN: usize = #pyo3_crate_path::impl_::concat::combined_len(PIECES);
538+
#[used]
539+
#[no_mangle]
540+
static #ident: [u8; PIECES_LEN] = #pyo3_crate_path::impl_::concat::combine_to_array::<PIECES_LEN>(PIECES);
541+
};
542+
}
543+
}
533544
}
534545

535546
enum ConcatenationBuilderElement {

tests/test_compile_error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ fn test_compile_errors() {
6969
t.compile_fail("tests/ui/invalid_pymodule_glob.rs");
7070
t.compile_fail("tests/ui/invalid_pymodule_trait.rs");
7171
t.compile_fail("tests/ui/invalid_pymodule_two_pymodule_init.rs");
72-
#[cfg(feature = "experimental-async")]
72+
#[cfg(all(feature = "experimental-async", not(feature = "experimental-inspect")))]
7373
#[cfg(any(not(Py_LIMITED_API), Py_3_10))] // to avoid PyFunctionArgument for &str
7474
t.compile_fail("tests/ui/invalid_cancel_handle.rs");
7575
t.pass("tests/ui/pymodule_missing_docs.rs");

0 commit comments

Comments
 (0)