28
28
29
29
from pyspark .serializers import read_int
30
30
31
- _gateway_connection = None
32
31
33
32
def launch_gateway ():
34
- global _gateway_connection
35
33
SPARK_HOME = os .environ ["SPARK_HOME" ]
36
34
37
35
if "PYSPARK_GATEWAY_PORT" in os .environ :
@@ -55,6 +53,8 @@ def launch_gateway():
55
53
env ['PYSPARK_DRIVER_CALLBACK_HOST' ] = callback_host
56
54
env ['PYSPARK_DRIVER_CALLBACK_PORT' ] = str (callback_port )
57
55
56
+ # Launch the Java gateway.
57
+ # We open a pipe to stdin so that the Java gateway can die when the pipe is broken
58
58
if not on_windows :
59
59
# Don't send ctrl-c / SIGINT to the Java gateway:
60
60
def preexec_func ():
@@ -65,9 +65,11 @@ def preexec_func():
65
65
# preexec_fn not supported on Windows
66
66
proc = Popen (command , stdout = PIPE , stdin = PIPE , env = env )
67
67
68
- _gateway_connection = callback_socket .accept ()[0 ]
68
+ gateway_connection = callback_socket .accept ()[0 ]
69
69
# Determine which ephemeral port the server started on:
70
- gateway_port = read_int (_gateway_connection .makefile ())
70
+ gateway_port = read_int (gateway_connection .makefile ())
71
+ gateway_connection .close ()
72
+ callback_socket .close ()
71
73
72
74
# In Windows, ensure the Java child processes do not linger after Python has exited.
73
75
# In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when
0 commit comments