Skip to content

Commit 290203f

Browse files
committed
fix: improve EOFError exception when remote nvim crashes
Problem: When the remote Nvim instance is aborted or terminates, the pynvim client will face an exception `OSError: EOF`. It is not very clear what is wrong and why EOF is received. Solution: This happens when the remote the nvim process terminates unexpectedly (or a tcp/file socket is broken or gets disconnected). We can provide a bit more detailed information about why the asyncio session is stopping, through asyncio Protocol. An `EOFError` will be raised instead of OSError. For example, during pynvim's unit tests we may see: ``` EOFError: process_exited: pid = 40000, return_code = -6 ``` which means that the Nvim subprocess (pid = 40000) exited unexpectedly after getting SIGABRT (signal 6), i.e. segfault.
1 parent 9c5ec67 commit 290203f

File tree

2 files changed

+50
-19
lines changed

2 files changed

+50
-19
lines changed

pynvim/msgpack_rpc/event_loop/asyncio.py

+38-17
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pynvim.msgpack_rpc.event_loop.base import BaseEventLoop, TTransportType
1919

2020
logger = logging.getLogger(__name__)
21-
debug, info, warn = (logger.debug, logger.info, logger.warning,)
21+
debug, info, warn = (logger.debug, logger.info, logger.warning)
2222

2323
loop_cls = asyncio.SelectorEventLoop
2424

@@ -47,13 +47,14 @@ def __init__(self, on_data, on_error):
4747
@override
4848
def connection_made(self, transport):
4949
"""Used to signal `asyncio.Protocol` of a successful connection."""
50-
del transport # no-op
50+
self._transport = transport
5151

5252
@override
5353
def connection_lost(self, exc: Optional[Exception]) -> None:
5454
"""Used to signal `asyncio.Protocol` of a lost connection."""
55-
debug(f"connection_lost: exc = {exc}")
56-
self._on_error(exc if exc else EOFError())
55+
warn(f"connection_lost: exc = {exc}")
56+
57+
self._on_error(exc if exc else EOFError("connection_lost"))
5758

5859
@override
5960
def data_received(self, data: bytes) -> None:
@@ -63,11 +64,19 @@ def data_received(self, data: bytes) -> None:
6364
@override
6465
def pipe_connection_lost(self, fd: int, exc: Optional[Exception]) -> None:
6566
"""Used to signal `asyncio.SubprocessProtocol` of a lost connection."""
66-
debug("pipe_connection_lost: fd = %s, exc = %s", fd, exc)
67+
68+
assert isinstance(self._transport, asyncio.SubprocessTransport)
69+
debug_info = {'fd': fd, 'exc': exc, 'pid': self._transport.get_pid()}
70+
warn(f"pipe_connection_lost {debug_info}")
71+
6772
if os.name == 'nt' and fd == 2: # stderr
6873
# On windows, ignore piped stderr being closed immediately (#505)
6974
return
70-
self._on_error(exc if exc else EOFError())
75+
76+
# pipe_connection_lost() *may* be called before process_exited() is
77+
# called, when a Nvim subprocess crashes (SIGABRT). Do not handle
78+
# errors here, as errors will be handled somewhere else
79+
# self._on_error(exc if exc else EOFError("pipe_connection_lost"))
7180

7281
@override
7382
def pipe_data_received(self, fd, data):
@@ -81,8 +90,13 @@ def pipe_data_received(self, fd, data):
8190
@override
8291
def process_exited(self) -> None:
8392
"""Used to signal `asyncio.SubprocessProtocol` when the child exits."""
84-
debug("process_exited")
85-
self._on_error(EOFError())
93+
assert isinstance(self._transport, asyncio.SubprocessTransport)
94+
pid = self._transport.get_pid()
95+
return_code = self._transport.get_returncode()
96+
97+
warn("process_exited, pid = %s, return_code = %s", pid, return_code)
98+
err = EOFError(f"process_exited: pid = {pid}, return_code = {return_code}")
99+
self._on_error(err)
86100

87101

88102
class AsyncioEventLoop(BaseEventLoop):
@@ -131,7 +145,8 @@ def _on_data(data: bytes) -> None:
131145
def _connect_tcp(self, address: str, port: int) -> None:
132146
async def connect_tcp():
133147
transport, protocol = await self._loop.create_connection(
134-
self._protocol_factory, address, port)
148+
self._protocol_factory, address, port
149+
)
135150
debug(f"tcp connection successful: {address}:{port}")
136151
self._transport = transport
137152
self._protocol = protocol
@@ -146,8 +161,7 @@ async def connect_socket():
146161
else:
147162
_create_connection = self._loop.create_unix_connection
148163

149-
transport, protocol = await _create_connection(
150-
self._protocol_factory, path)
164+
transport, protocol = await _create_connection(self._protocol_factory, path)
151165
debug("socket connection successful: %s", self._transport)
152166
self._transport = transport
153167
self._protocol = protocol
@@ -162,10 +176,12 @@ async def connect_stdin():
162176
else:
163177
pipe = sys.stdin
164178
transport, protocol = await self._loop.connect_read_pipe(
165-
self._protocol_factory, pipe)
179+
self._protocol_factory, pipe
180+
)
166181
debug("native stdin connection successful")
167182
self._to_close.append(transport)
168183
del protocol
184+
169185
self._loop.run_until_complete(connect_stdin())
170186

171187
# Make sure subprocesses don't clobber stdout,
@@ -177,26 +193,29 @@ async def connect_stdout():
177193
if os.name == 'nt':
178194
pipe = PipeHandle(msvcrt.get_osfhandle(rename_stdout))
179195
else:
180-
pipe = os.fdopen(rename_stdout, 'wb')
196+
pipe = os.fdopen(rename_stdout, "wb")
181197

182198
transport, protocol = await self._loop.connect_write_pipe(
183-
self._protocol_factory, pipe)
199+
self._protocol_factory, pipe
200+
)
184201
debug("native stdout connection successful")
185202
self._transport = transport
186203
self._protocol = protocol
204+
187205
self._loop.run_until_complete(connect_stdout())
188206

189207
@override
190208
def _connect_child(self, argv: List[str]) -> None:
191-
if os.name != 'nt':
209+
if os.name != "nt":
192210
# see #238, #241
193211
self._child_watcher = asyncio.get_child_watcher()
194212
self._child_watcher.attach_loop(self._loop)
195213

196214
async def create_subprocess():
197215
transport: asyncio.SubprocessTransport # type: ignore
198216
transport, protocol = await self._loop.subprocess_exec(
199-
self._protocol_factory, *argv)
217+
self._protocol_factory, *argv
218+
)
200219
pid = transport.get_pid()
201220
debug("child subprocess_exec successful, PID = %s", pid)
202221

@@ -250,11 +269,13 @@ def _close_transport(transport):
250269
# Windows: for ProactorBasePipeTransport, close() doesn't take in
251270
# effect immediately (closing happens asynchronously inside the
252271
# event loop), need to wait a bit for completing graceful shutdown.
253-
if os.name == 'nt' and hasattr(transport, '_sock'):
272+
if os.name == 'nt' and hasattr(transport, "_sock"):
273+
254274
async def wait_until_closed():
255275
# pylint: disable-next=protected-access
256276
while transport._sock is not None:
257277
await asyncio.sleep(0.01)
278+
258279
self._loop.run_until_complete(wait_until_closed())
259280

260281
if self._transport:

pynvim/msgpack_rpc/event_loop/base.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,13 @@ def run(self, data_cb: Callable[[bytes], None]) -> None:
194194
signal.signal(signal.SIGINT, default_int_handler)
195195
self._on_data = None
196196

197+
# eventloop was stopped due to an error, re-raise it
198+
# (e.g. connection lost when subprocess nvim dies)
199+
if self._error:
200+
# Note: traceback is not preserved and attached for some reason,
201+
# should be somewhere from msgpack_rpc.event_loop.asyncio.Protocol
202+
raise self._error
203+
197204
@abstractmethod
198205
def _run(self) -> None:
199206
raise NotImplementedError()
@@ -234,8 +241,11 @@ def _on_signal(self, signum: signal.Signals) -> None:
234241
self.stop()
235242

236243
def _on_error(self, exc: Exception) -> None:
237-
debug(str(exc))
238-
self._error = exc
244+
warn('on_error: %s', repr(exc))
245+
if self._error is None:
246+
# ignore subsequent exceptions, it's enough to raise only
247+
# the first exception arrived
248+
self._error = exc
239249
self.stop()
240250

241251
def _on_interrupt(self) -> None:

0 commit comments

Comments
 (0)