Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,7 @@ jobs:
with:
save-if: ${{ github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'CI-save-pr-cache') }}
- run: python -m pip install --upgrade pip && pip install nox[uv]
# TODO test will be fixed in https://github.com/PyO3/pyo3/pull/5450
# - run: nox -s test-introspection
- run: nox -s test-introspection
env:
CARGO_BUILD_TARGET: ${{ matrix.platform.rust-target }}

Expand Down
1 change: 1 addition & 0 deletions newsfragments/5450.changed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Introspection: change the way introspection data is emitted in the binaries to avoid a pointer indirection and simplify parsing.
80 changes: 27 additions & 53 deletions pyo3-introspection/src/introspection.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::model::{
Argument, Arguments, Attribute, Class, Function, Module, VariableLengthArgument,
};
use anyhow::{bail, ensure, Context, Result};
use anyhow::{anyhow, bail, ensure, Context, Result};
use goblin::elf::section_header::SHN_XINDEX;
use goblin::elf::Elf;
use goblin::mach::load_command::CommandVariant;
use goblin::mach::symbols::{NO_SECT, N_SECT};
Expand All @@ -11,8 +12,8 @@ use goblin::Object;
use serde::Deserialize;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::{fs, str};

/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
///
Expand Down Expand Up @@ -268,13 +269,12 @@ fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Re
let mut chunks = Vec::new();
for sym in &elf.syms {
if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
ensure!(u32::try_from(sym.st_shndx)? != SHN_XINDEX, "Section names length is greater than SHN_LORESERVE in ELF, this is not supported by PyO3 yet");
let section_header = &elf.section_headers[sym.st_shndx];
let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
chunks.push(read_symbol_value_with_ptr_and_len(
chunks.push(deserialize_chunk(
&library_content[usize::try_from(data_offset).context("File offset overflow")?..],
0,
library_content,
elf.is_64,
elf.little_endian,
)?);
}
}
Expand Down Expand Up @@ -311,73 +311,47 @@ fn find_introspection_chunks_in_macho(
{
let section = &sections[nlist.n_sect - 1]; // Sections are counted from 1
let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
chunks.push(read_symbol_value_with_ptr_and_len(
chunks.push(deserialize_chunk(
&library_content[usize::try_from(data_offset).context("File offset overflow")?..],
0,
library_content,
macho.is_64,
macho.little_endian,
)?);
}
}
Ok(chunks)
}

fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
let rdata_data_section = pe
.sections
.iter()
.find(|section| section.name().unwrap_or_default() == ".rdata")
.context("No .rdata section found")?;
let rdata_shift = usize::try_from(pe.image_base).context("image_base overflow")?
+ usize::try_from(rdata_data_section.virtual_address)
.context(".rdata virtual_address overflow")?
- usize::try_from(rdata_data_section.pointer_to_raw_data)
.context(".rdata pointer_to_raw_data overflow")?;

let mut chunks = Vec::new();
for export in &pe.exports {
if is_introspection_symbol(export.name.unwrap_or_default()) {
chunks.push(read_symbol_value_with_ptr_and_len(
chunks.push(deserialize_chunk(
&library_content[export.offset.context("No symbol offset")?..],
rdata_shift,
library_content,
pe.is_64,
true,
)?);
}
}
Ok(chunks)
}

fn read_symbol_value_with_ptr_and_len(
value_slice: &[u8],
shift: usize,
full_library_content: &[u8],
is_64: bool,
fn deserialize_chunk(
content_with_chunk_at_the_beginning: &[u8],
is_little_endian: bool,
) -> Result<Chunk> {
let (ptr, len) = if is_64 {
let (ptr, len) = value_slice[..16].split_at(8);
let ptr = usize::try_from(u64::from_le_bytes(
ptr.try_into().context("Too short symbol value")?,
))
.context("Pointer overflow")?;
let len = usize::try_from(u64::from_le_bytes(
len.try_into().context("Too short symbol value")?,
))
.context("Length overflow")?;
(ptr, len)
let length = content_with_chunk_at_the_beginning
.split_at(4)
.0
.try_into()
.context("The introspection chunk must contain a length")?;
let length = if is_little_endian {
u32::from_le_bytes(length)
} else {
let (ptr, len) = value_slice[..8].split_at(4);
let ptr = usize::try_from(u32::from_le_bytes(
ptr.try_into().context("Too short symbol value")?,
))
.context("Pointer overflow")?;
let len = usize::try_from(u32::from_le_bytes(
len.try_into().context("Too short symbol value")?,
))
.context("Length overflow")?;
(ptr, len)
u32::from_be_bytes(length)
};
let chunk = &full_library_content[ptr - shift..ptr - shift + len];
let chunk = content_with_chunk_at_the_beginning
.get(4..4 + length as usize)
.ok_or_else(|| {
anyhow!("The introspection chunk length {length} is greater that the binary size")
})?;
serde_json::from_slice(chunk).with_context(|| {
format!(
"Failed to parse introspection chunk: '{}'",
Expand All @@ -389,7 +363,7 @@ fn read_symbol_value_with_ptr_and_len(
fn is_introspection_symbol(name: &str) -> bool {
name.strip_prefix('_')
.unwrap_or(name)
.starts_with("PYO3_INTROSPECTION_0_")
.starts_with("PYO3_INTROSPECTION_1_")
}

#[derive(Deserialize)]
Expand Down
36 changes: 25 additions & 11 deletions pyo3-macros-backend/src/introspection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,17 +353,10 @@ impl IntrospectionNode<'_> {
fn emit(self, pyo3_crate_path: &PyO3CratePath) -> TokenStream {
let mut content = ConcatenationBuilder::default();
self.add_to_serialization(&mut content, pyo3_crate_path);
let content = content.into_token_stream(pyo3_crate_path);

let static_name = format_ident!("PYO3_INTROSPECTION_0_{}", unique_element_id());
// #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too.
quote! {
const _: () = {
#[used]
#[no_mangle]
static #static_name: &'static [u8] = #content;
};
}
content.into_static(
pyo3_crate_path,
format_ident!("PYO3_INTROSPECTION_1_{}", unique_element_id()),
)
}

fn add_to_serialization(
Expand Down Expand Up @@ -530,6 +523,27 @@ impl ConcatenationBuilder {
}
}
}

fn into_static(self, pyo3_crate_path: &PyO3CratePath, ident: Ident) -> TokenStream {
let mut elements = self.elements;
if !self.current_string.is_empty() {
elements.push(ConcatenationBuilderElement::String(self.current_string));
}

// #[no_mangle] is required to make sure some linkers like Linux ones do not mangle the section name too.
quote! {
const _: () = {
const PIECES: &[&[u8]] = &[#(#elements , )*];
const PIECES_LEN: usize = #pyo3_crate_path::impl_::concat::combined_len(PIECES);
#[used]
#[no_mangle]
static #ident: #pyo3_crate_path::impl_::introspection::SerializedIntrospectionFragment<PIECES_LEN> = #pyo3_crate_path::impl_::introspection::SerializedIntrospectionFragment {
length: PIECES_LEN as u32,
fragment: #pyo3_crate_path::impl_::concat::combine_to_array::<PIECES_LEN>(PIECES)
};
};
}
}
}

enum ConcatenationBuilderElement {
Expand Down
2 changes: 1 addition & 1 deletion pytests/stubs/pyfunctions.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def many_keyword_arguments(
newt: typing.Any | None = None,
owl: typing.Any | None = None,
penguin: typing.Any | None = None,
) -> typing.Any: ...
) -> None: ...
def none() -> None: ...
def positional_only(a: typing.Any, /, b: typing.Any) -> typing.Any: ...
def simple(
Expand Down
6 changes: 6 additions & 0 deletions src/impl_/introspection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,9 @@ impl<'a, T: IntoPyObject<'a>> PyReturnType for T {
impl<T: PyReturnType, E> PyReturnType for Result<T, E> {
const OUTPUT_TYPE: &'static str = T::OUTPUT_TYPE;
}

#[repr(C)]
pub struct SerializedIntrospectionFragment<const LEN: usize> {
pub length: u32,
pub fragment: [u8; LEN],
}
2 changes: 1 addition & 1 deletion tests/test_compile_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ fn test_compile_errors() {
t.compile_fail("tests/ui/invalid_pymodule_glob.rs");
t.compile_fail("tests/ui/invalid_pymodule_trait.rs");
t.compile_fail("tests/ui/invalid_pymodule_two_pymodule_init.rs");
#[cfg(feature = "experimental-async")]
#[cfg(all(feature = "experimental-async", not(feature = "experimental-inspect")))]
#[cfg(any(not(Py_LIMITED_API), Py_3_10))] // to avoid PyFunctionArgument for &str
t.compile_fail("tests/ui/invalid_cancel_handle.rs");
t.pass("tests/ui/pymodule_missing_docs.rs");
Expand Down
Loading