Skip to content

Commit

Permalink
PyPy3.7 argv: Minimal Patch
Browse files Browse the repository at this point in the history
  • Loading branch information
ax3l committed Jan 18, 2022
1 parent 1ce72c6 commit 9110a67
Showing 1 changed file with 25 additions and 226 deletions.
251 changes: 25 additions & 226 deletions recipe/2726.patch
Original file line number Diff line number Diff line change
@@ -1,252 +1,51 @@
From b12cb7abb8b197a27c65bec7be53cd6fb5a35bba Mon Sep 17 00:00:00 2001
From fda182a61c49051d6f0fd0fccda8798cdadc8140 Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Fri, 7 Jan 2022 19:31:39 +0100
Subject: [PATCH] argvC: fix off-by-one error.
Date: Mon, 17 Jan 2022 20:48:30 -0800
Subject: [PATCH] Python: Fix UB in Inputs Passing

Python Bindings: utf->ascii
Trying to fix the macOS PyPy 3.7 error seen in https://github.com/conda-forge/warpx-feedstock/issues/37
Testing in https://github.com/conda-forge/warpx-feedstock/pull/38

We forward to char strings in C/C++.
After googling for a while, the original implementation was likely based on https://code.activestate.com/lists/python-list/704158, which contains bugs.

Python: Improve Argv Handling
1) Bug: `create_string_buffer`

- put correct process name in argv[0]
- allow to append more AMReX-ish arguments from the command line
Allocating new, null-terminated char arrays with `ctypes.create_string_buffer` does lead to scrambled arrays in pypy3.7.
As far as I can see, this [should have also worked](https://docs.python.org/3/library/ctypes.html), but maybe there is a bug in the upstream implementation or the original code created some kind of use-after-free on a temporary while the new implementation just shares the existing byte address.

Debug C++ side of strings
This leads to errors such as the ones here:
https://github.com/conda-forge/warpx-feedstock/pull/38#issuecomment-1010160519

replace: `create_string_buffer` w/ `c_char_p`
The call `self.argvC[i] = ctypes.c_char_p(enc_arg)` is equivalent in creating a `NULL`-terminated char array.

Fixes scrambled up text in PyPy3.7
2) Bug: Last Argv Argument

Add back +1 element with explicit NULL
The last argument in the array of char arrays `argv` in ANSII C needs to be a plain `NULL` ptr.
Before this PR, this has been allocated but never initialized, leading to a undefined behavior (read as: crashes).

Fun fact: this is required by ANSII C:
- https://stackoverflow.com/a/39096006/2719194
- https://code.activestate.com/lists/python-list/704158

and we did not yet initialize it properly.
Reference: https://stackoverflow.com/a/39096006/2719194
---
Python/pywarpx/_libwarpx.py | 69 +++++++++++++++++++++++----------
Source/Python/WarpXWrappers.cpp | 10 +++++
2 files changed, 58 insertions(+), 21 deletions(-)
Python/pywarpx/_libwarpx.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Python/pywarpx/_libwarpx.py b/Python/pywarpx/_libwarpx.py
index 52bebf46a..08acceb91 100755
index 9de8227397..78f731c9a1 100755
--- a/Python/pywarpx/_libwarpx.py
+++ b/Python/pywarpx/_libwarpx.py
@@ -38,7 +38,8 @@ else:
_path_libc = _find_library('c')
_libc = ctypes.CDLL(_path_libc)
_LP_c_int = ctypes.POINTER(ctypes.c_int)
-_LP_c_char = ctypes.POINTER(ctypes.c_char)
+_LP_c_char = ctypes.c_char_p
+_LP_LP_c_char = ctypes.POINTER(_LP_c_char)


class LibWarpX():
@@ -54,6 +55,9 @@ class LibWarpX():
self.initialized = False
atexit.register(self.finalize)

+ # store the argv arguments in C encoding persistently
+ self.argvC = None
+
def __getattr__(self, attribute):
if attribute == 'libwarpx_so':
# If the 'libwarpx_so' is referenced, load it.
@@ -159,7 +163,6 @@ class LibWarpX():
_LP_LP_c_real = ctypes.POINTER(_LP_c_real)
_LP_c_particlereal = ctypes.POINTER(c_particlereal)
_LP_LP_c_particlereal = ctypes.POINTER(_LP_c_particlereal)
- _LP_LP_c_char = ctypes.POINTER(_LP_c_char)

# set the arg and return types of the wrapped functions
self.libwarpx_so.amrex_init.argtypes = (ctypes.c_int, _LP_LP_c_char)
@@ -394,22 +397,36 @@ class LibWarpX():

'''
return self.libwarpx_so.warpx_nCompsSpecies(
- ctypes.c_char_p(species_name.encode('utf-8')))
+ ctypes.c_char_p(species_name.encode('ascii')))

@@ -397,10 +397,15 @@ def get_nattr_species(self, species_name):
def amrex_init(self, argv, mpi_comm=None):
# --- Construct the ctype list of strings to pass in
argc = len(argv)
- argvC = (_LP_c_char * (argc+1))()
+
+ # note: +1 since there is an extra char-string array element,
+ # that ANSII C requires to be a simple NULL entry
+ # https://stackoverflow.com/a/39096006/2719194
+ # https://code.activestate.com/lists/python-list/704158
+ self.argvC = (_LP_c_char * (argc + 1))()
+ print("amrex_init argv=", argv)
+ print("amrex_init len(argv)=", len(argv))
argvC = (_LP_c_char * (argc+1))()
for i, arg in enumerate(argv):
enc_arg = arg.encode('utf-8')
- argvC[i] = ctypes.create_string_buffer(enc_arg)
+ print(enc_arg, len(enc_arg), repr(enc_arg), type(enc_arg))
+ #self.argvC[i] = ctypes.create_string_buffer(enc_arg)
+ self.argvC[i] = ctypes.c_char_p(enc_arg)
+ self.argvC[argc] = ctypes.c_char_p(b"\0")
+
+ print("len(argvC)=", len(self.argvC))
+ self.argvC = ctypes.cast(self.argvC, _LP_LP_c_char)
+ print("argc=", argc)
+ print("argvC=", self.argvC)
+ argvC[i] = ctypes.c_char_p(enc_arg)
+ argvC[argc] = ctypes.c_char_p(b"\0") # +1 element must be NULL

if mpi_comm is None or MPI is None:
- self.libwarpx_so.amrex_init(argc, argvC)
+ self.libwarpx_so.amrex_init(argc, self.argvC)
else:
comm_ptr = MPI._addressof(mpi_comm)
comm_val = _MPI_Comm_type.from_address(comm_ptr)
- self.libwarpx_so.amrex_init_with_inited_mpi(argc, argvC, comm_val)
+ self.libwarpx_so.amrex_init_with_inited_mpi(argc, self.argvC, comm_val)

def initialize(self, argv=None, mpi_comm=None):
'''
@@ -418,8 +435,18 @@ class LibWarpX():
doing anything else.

'''
+ # no arguments passed? Take the ones from Python
if argv is None:
argv = sys.argv
+ # some arguments were passed
+ else:
+ # overwrite argv[0] with actual name
+ argv[0] = sys.argv[0]
+ # append extra arguments from Python command line
+ if len(sys.argv) > 1:
+ argv += sys.argv[1:]
+
+ # call initialization routines
self.amrex_init(argv, mpi_comm)
self.libwarpx_so.warpx_ConvertLabParamsToBoost()
self.libwarpx_so.warpx_ReadBCParams()
@@ -622,7 +649,7 @@ class LibWarpX():
attr[:,self.get_particle_comp_index(species_name, key)-3] = vals

self.libwarpx_so.warpx_addNParticles(
- ctypes.c_char_p(species_name.encode('utf-8')), x.size,
+ ctypes.c_char_p(species_name.encode('ascii')), x.size,
x, y, z, ux, uy, uz, nattr, attr, unique_particles
)

@@ -644,7 +671,7 @@ class LibWarpX():

'''
return self.libwarpx_so.warpx_getNumParticles(
- ctypes.c_char_p(species_name.encode('utf-8'))
+ ctypes.c_char_p(species_name.encode('ascii'))
)

def get_particle_structs(self, species_name, level):
@@ -672,7 +699,7 @@ class LibWarpX():
particles_per_tile = _LP_c_int()
num_tiles = ctypes.c_int(0)
data = self.libwarpx_so.warpx_getParticleStructs(
- ctypes.c_char_p(species_name.encode('utf-8')), level,
+ ctypes.c_char_p(species_name.encode('ascii')), level,
ctypes.byref(num_tiles), ctypes.byref(particles_per_tile)
)

@@ -712,8 +739,8 @@ class LibWarpX():
particles_per_tile = _LP_c_int()
num_tiles = ctypes.c_int(0)
data = self.libwarpx_so.warpx_getParticleArrays(
- ctypes.c_char_p(species_name.encode('utf-8')),
- ctypes.c_char_p(comp_name.encode('utf-8')),
+ ctypes.c_char_p(species_name.encode('ascii')),
+ ctypes.c_char_p(comp_name.encode('ascii')),
level, ctypes.byref(num_tiles), ctypes.byref(particles_per_tile)
)

@@ -885,8 +912,8 @@ class LibWarpX():

'''
return self.libwarpx_so.warpx_getParticleCompIndex(
- ctypes.c_char_p(species_name.encode('utf-8')),
- ctypes.c_char_p(pid_name.encode('utf-8'))
+ ctypes.c_char_p(species_name.encode('ascii')),
+ ctypes.c_char_p(pid_name.encode('ascii'))
)

def add_real_comp(self, species_name, pid_name, comm=True):
@@ -903,8 +930,8 @@ class LibWarpX():

'''
self.libwarpx_so.warpx_addRealComp(
- ctypes.c_char_p(species_name.encode('utf-8')),
- ctypes.c_char_p(pid_name.encode('utf-8')), comm
+ ctypes.c_char_p(species_name.encode('ascii')),
+ ctypes.c_char_p(pid_name.encode('ascii')), comm
)

def get_particle_boundary_buffer_size(self, species_name, boundary):
@@ -927,7 +954,7 @@ class LibWarpX():

'''
return self.libwarpx_so.warpx_getParticleBoundaryBufferSize(
- ctypes.c_char_p(species_name.encode('utf-8')),
+ ctypes.c_char_p(species_name.encode('ascii')),
self.get_boundary_number(boundary)
)

@@ -960,7 +987,7 @@ class LibWarpX():
particles_per_tile = _LP_c_int()
num_tiles = ctypes.c_int(0)
data = self.libwarpx_so.warpx_getParticleBoundaryBufferStructs(
- ctypes.c_char_p(species_name.encode('utf-8')),
+ ctypes.c_char_p(species_name.encode('ascii')),
self.get_boundary_number(boundary), level,
ctypes.byref(num_tiles), ctypes.byref(particles_per_tile)
)
@@ -1007,16 +1034,16 @@ class LibWarpX():
num_tiles = ctypes.c_int(0)
if comp_name == 'step_scraped':
data = self.libwarpx_so.warpx_getParticleBoundaryBufferScrapedSteps(
- ctypes.c_char_p(species_name.encode('utf-8')),
+ ctypes.c_char_p(species_name.encode('ascii')),
self.get_boundary_number(boundary), level,
ctypes.byref(num_tiles), ctypes.byref(particles_per_tile)
)
else:
data = self.libwarpx_so.warpx_getParticleBoundaryBuffer(
- ctypes.c_char_p(species_name.encode('utf-8')),
+ ctypes.c_char_p(species_name.encode('ascii')),
self.get_boundary_number(boundary), level,
ctypes.byref(num_tiles), ctypes.byref(particles_per_tile),
- ctypes.c_char_p(comp_name.encode('utf-8'))
+ ctypes.c_char_p(comp_name.encode('ascii'))
)

particle_data = []
diff --git a/Source/Python/WarpXWrappers.cpp b/Source/Python/WarpXWrappers.cpp
index d4589ec8a..08e259463 100644
--- a/Source/Python/WarpXWrappers.cpp
+++ b/Source/Python/WarpXWrappers.cpp
@@ -147,11 +147,21 @@ namespace

void amrex_init (int argc, char* argv[])
{
+ std::cout << "C++ amrex_init: argc=" << argc << std::endl;
+ for(int i = 0; i < argc; ++i)
+ {
+ std::cout << i << " " << argv[i] << std::endl;
+ }
warpx_amrex_init(argc, argv);
}

void amrex_init_with_inited_mpi (int argc, char* argv[], MPI_Comm mpicomm)
{
+ std::cout << "C++ amrex_init_with_inited_mpi: argc=" << argc << std::endl;
+ for(int i = 0; i < argc; ++i)
+ {
+ std::cout << i << " " << argv[i] << std::endl;
+ }
warpx_amrex_init(argc, argv, true, mpicomm);
}

--
2.25.1

self.libwarpx_so.amrex_init(argc, argvC)

0 comments on commit 9110a67

Please sign in to comment.