@@ -10,10 +10,11 @@ use goblin::mach::{Mach, MachO, SingleArch};
1010use goblin:: pe:: PE ;
1111use goblin:: Object ;
1212use serde:: Deserialize ;
13+ use serde_json:: Deserializer ;
1314use std:: cmp:: Ordering ;
1415use std:: collections:: HashMap ;
15- use std:: fs;
1616use std:: path:: Path ;
17+ use std:: { fs, str} ;
1718
1819/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
1920///
@@ -272,11 +273,8 @@ fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Re
272273 ensure ! ( u32 :: try_from( sym. st_shndx) ? != SHN_XINDEX , "Section names length is greater than SHN_LORESERVE in ELF, this is not supported by PyO3 yet" ) ;
273274 let section_header = & elf. section_headers [ sym. st_shndx ] ;
274275 let data_offset = sym. st_value + section_header. sh_offset - section_header. sh_addr ;
275- chunks. push ( read_symbol_value_with_ptr_and_len (
276+ chunks. push ( deserialize_prefix (
276277 & library_content[ usize:: try_from ( data_offset) . context ( "File offset overflow" ) ?..] ,
277- 0 ,
278- library_content,
279- elf. is_64 ,
280278 ) ?) ;
281279 }
282280 }
@@ -313,85 +311,40 @@ fn find_introspection_chunks_in_macho(
313311 {
314312 let section = & sections[ nlist. n_sect - 1 ] ; // Sections are counted from 1
315313 let data_offset = nlist. n_value + u64:: from ( section. offset ) - section. addr ;
316- chunks. push ( read_symbol_value_with_ptr_and_len (
314+ chunks. push ( deserialize_prefix (
317315 & library_content[ usize:: try_from ( data_offset) . context ( "File offset overflow" ) ?..] ,
318- 0 ,
319- library_content,
320- macho. is_64 ,
321316 ) ?) ;
322317 }
323318 }
324319 Ok ( chunks)
325320}
326321
327322fn find_introspection_chunks_in_pe ( pe : & PE < ' _ > , library_content : & [ u8 ] ) -> Result < Vec < Chunk > > {
328- let rdata_data_section = pe
329- . sections
330- . iter ( )
331- . find ( |section| section. name ( ) . unwrap_or_default ( ) == ".rdata" )
332- . context ( "No .rdata section found" ) ?;
333- let rdata_shift = usize:: try_from ( pe. image_base ) . context ( "image_base overflow" ) ?
334- + usize:: try_from ( rdata_data_section. virtual_address )
335- . context ( ".rdata virtual_address overflow" ) ?
336- - usize:: try_from ( rdata_data_section. pointer_to_raw_data )
337- . context ( ".rdata pointer_to_raw_data overflow" ) ?;
338-
339323 let mut chunks = Vec :: new ( ) ;
340324 for export in & pe. exports {
341325 if is_introspection_symbol ( export. name . unwrap_or_default ( ) ) {
342- chunks. push ( read_symbol_value_with_ptr_and_len (
326+ chunks. push ( deserialize_prefix (
343327 & library_content[ export. offset . context ( "No symbol offset" ) ?..] ,
344- rdata_shift,
345- library_content,
346- pe. is_64 ,
347328 ) ?) ;
348329 }
349330 }
350331 Ok ( chunks)
351332}
352333
353- fn read_symbol_value_with_ptr_and_len (
354- value_slice : & [ u8 ] ,
355- shift : usize ,
356- full_library_content : & [ u8 ] ,
357- is_64 : bool ,
358- ) -> Result < Chunk > {
359- let ( ptr, len) = if is_64 {
360- let ( ptr, len) = value_slice[ ..16 ] . split_at ( 8 ) ;
361- let ptr = usize:: try_from ( u64:: from_le_bytes (
362- ptr. try_into ( ) . context ( "Too short symbol value" ) ?,
363- ) )
364- . context ( "Pointer overflow" ) ?;
365- let len = usize:: try_from ( u64:: from_le_bytes (
366- len. try_into ( ) . context ( "Too short symbol value" ) ?,
367- ) )
368- . context ( "Length overflow" ) ?;
369- ( ptr, len)
370- } else {
371- let ( ptr, len) = value_slice[ ..8 ] . split_at ( 4 ) ;
372- let ptr = usize:: try_from ( u32:: from_le_bytes (
373- ptr. try_into ( ) . context ( "Too short symbol value" ) ?,
374- ) )
375- . context ( "Pointer overflow" ) ?;
376- let len = usize:: try_from ( u32:: from_le_bytes (
377- len. try_into ( ) . context ( "Too short symbol value" ) ?,
378- ) )
379- . context ( "Length overflow" ) ?;
380- ( ptr, len)
381- } ;
382- let chunk = & full_library_content[ ptr - shift..ptr - shift + len] ;
383- serde_json:: from_slice ( chunk) . with_context ( || {
384- format ! (
385- "Failed to parse introspection chunk: '{}'" ,
386- String :: from_utf8_lossy( chunk)
387- )
334+ fn deserialize_prefix ( chunk : & [ u8 ] ) -> Result < Chunk > {
335+ Chunk :: deserialize ( & mut Deserializer :: from_slice ( chunk) ) . with_context ( || {
336+ // We take the first valid utf8 bytes, it's quite likely to be the actual chunk
337+ // We use a 4096 upper bound for security
338+ let chunk = str:: from_utf8 ( & chunk[ ..4096 ] )
339+ . unwrap_or_else ( |e| str:: from_utf8 ( & chunk[ ..e. valid_up_to ( ) ] ) . unwrap_or_default ( ) ) ;
340+ format ! ( "Failed to parse introspection chunk: '{chunk}'" )
388341 } )
389342}
390343
391344fn is_introspection_symbol ( name : & str ) -> bool {
392345 name. strip_prefix ( '_' )
393346 . unwrap_or ( name)
394- . starts_with ( "PYO3_INTROSPECTION_0_ " )
347+ . starts_with ( "PYO3_INTROSPECTION_1_ " )
395348}
396349
397350#[ derive( Deserialize ) ]
0 commit comments