From 14e276c6f192674937cf42db8c71226e69da88bc Mon Sep 17 00:00:00 2001 From: Min RK Date: Fri, 20 May 2016 10:38:19 +0200 Subject: [PATCH 1/5] allow explicit reconnect rather than reconnect being a no-op if it appears connected this allows an escape hatch if the connection should be forcefully reestablished. --- notebook/static/services/kernels/kernel.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebook/static/services/kernels/kernel.js b/notebook/static/services/kernels/kernel.js index 9e3d957ac..995199c7f 100644 --- a/notebook/static/services/kernels/kernel.js +++ b/notebook/static/services/kernels/kernel.js @@ -338,7 +338,7 @@ define([ * @function reconnect */ if (this.is_connected()) { - return; + this.stop_channels(); } this._reconnect_attempt = this._reconnect_attempt + 1; this.events.trigger('kernel_reconnecting.Kernel', { From e11091449542001b4476d069ce228afe523ac22f Mon Sep 17 00:00:00 2001 From: Min RK Date: Fri, 20 May 2016 10:38:49 +0200 Subject: [PATCH 2/5] register callbacks before sending message just in case of infinitely fast replies --- notebook/static/services/kernels/kernel.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebook/static/services/kernels/kernel.js b/notebook/static/services/kernels/kernel.js index 995199c7f..d71139065 100644 --- a/notebook/static/services/kernels/kernel.js +++ b/notebook/static/services/kernels/kernel.js @@ -638,8 +638,8 @@ define([ */ var msg = this._get_msg(msg_type, content, metadata, buffers); msg.channel = 'shell'; - this._send(serialize.serialize(msg)); this.set_callbacks_for_msg(msg.header.msg_id, callbacks); + this._send(serialize.serialize(msg)); return msg.header.msg_id; }; From 4fe10b8142e3b4a8b87287c2a57e0c4d9b6874ee Mon Sep 17 00:00:00 2001 From: Min RK Date: Fri, 20 May 2016 10:44:12 +0200 Subject: [PATCH 3/5] Prevent session_id collisions Keep a registry of open sessions, and refuse to open duplicates. --- notebook/services/kernels/handlers.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/notebook/services/kernels/handlers.py b/notebook/services/kernels/handlers.py index 3b50a10c2..c64a9853c 100644 --- a/notebook/services/kernels/handlers.py +++ b/notebook/services/kernels/handlers.py @@ -16,7 +16,7 @@ from jupyter_client.jsonutil import date_default from ipython_genutils.py3compat import cast_unicode from notebook.utils import url_path_join, url_escape -from ...base.handlers import IPythonHandler, APIHandler, json_errors +from ...base.handlers import APIHandler, json_errors from ...base.zmqhandlers import AuthenticatedZMQStreamHandler, deserialize_binary_message from jupyter_client import protocol_version as client_protocol_version @@ -96,6 +96,12 @@ class KernelActionHandler(APIHandler): class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): + + # class-level registry of open sessions + # allows checking for conflict on session-id, + # which is used as a zmq identity and must be unique. + session_key = '' + _open_sessions = {} @property def kernel_info_timeout(self): @@ -209,6 +215,8 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): def pre_get(self): # authenticate first super(ZMQChannelsHandler, self).pre_get() + # check session collision: + self._register_session() # then request kernel info, waiting up to a certain time before giving up. # We don't want to wait forever, because browsers don't take it well when # servers never respond to websocket connection requests. @@ -232,6 +240,13 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): self.kernel_id = cast_unicode(kernel_id, 'ascii') yield super(ZMQChannelsHandler, self).get(kernel_id=kernel_id) + def _register_session(self): + """Ensure we aren't creating a duplicate session""" + self.session_key = '%s:%s' % (self.kernel_id, self.session.session) + if self.session_key in self._open_sessions: + raise web.HTTPError(400, "Session %s already open." % self.session_key) + self._open_sessions[self.session_key] = self + def open(self, kernel_id): super(ZMQChannelsHandler, self).open() try: @@ -350,6 +365,10 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): def on_close(self): + self.log.debug("Websocket closed %s", self.session_key) + # unregister myself as an open session (only if it's really me) + if self._open_sessions.get(self.session_key) is self: + self._open_sessions.pop(self.session_key) km = self.kernel_manager if self.kernel_id in km: km.remove_restart_callback( From 6c4687bc8e7870bce1af70d632eefd3263631aaa Mon Sep 17 00:00:00 2001 From: Min RK Date: Fri, 20 May 2016 10:45:39 +0200 Subject: [PATCH 4/5] remove outdated "you will NOT be able to run code" message from connection-failed dialog. This is a reference to ignored execute requests, which are fixed by the pending-message queue. --- notebook/static/notebook/js/notificationarea.js | 3 +-- notebook/static/services/kernels/kernel.js | 9 +++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/notebook/static/notebook/js/notificationarea.js b/notebook/static/notebook/js/notificationarea.js index 94ab4b116..33b9ab237 100644 --- a/notebook/static/notebook/js/notificationarea.js +++ b/notebook/static/notebook/js/notificationarea.js @@ -138,8 +138,7 @@ define([ if (info.attempt === 1) { var msg = "A connection to the notebook server could not be established." + - " The notebook will continue trying to reconnect, but" + - " until it does, you will NOT be able to run code. Check your" + + " The notebook will continue trying to reconnect. Check your" + " network connection or notebook server configuration."; dialog.kernel_modal({ diff --git a/notebook/static/services/kernels/kernel.js b/notebook/static/services/kernels/kernel.js index d71139065..95f51398c 100644 --- a/notebook/static/services/kernels/kernel.js +++ b/notebook/static/services/kernels/kernel.js @@ -534,8 +534,13 @@ define([ this.events.trigger('kernel_disconnected.Kernel', {kernel: this}); if (error) { - console.log('WebSocket connection failed: ', ws_url); - this.events.trigger('kernel_connection_failed.Kernel', {kernel: this, ws_url: ws_url, attempt: this._reconnect_attempt}); + console.log('WebSocket connection failed: ', ws_url, error); + this.events.trigger('kernel_connection_failed.Kernel', { + kernel: this, + ws_url: ws_url, + attempt: this._reconnect_attempt, + error: error, + }); } this._schedule_reconnect(); }; From b6e8a3732e670a5815cd5b062c8d7a22c36fe9b3 Mon Sep 17 00:00:00 2001 From: Min RK Date: Fri, 20 May 2016 10:57:17 +0200 Subject: [PATCH 5/5] Allow replacing stale websocket connections instead of refusing duplecate connections, since reconnect on flaky network is more likely. --- notebook/services/kernels/handlers.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/notebook/services/kernels/handlers.py b/notebook/services/kernels/handlers.py index c64a9853c..1087aad18 100644 --- a/notebook/services/kernels/handlers.py +++ b/notebook/services/kernels/handlers.py @@ -100,7 +100,6 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): # class-level registry of open sessions # allows checking for conflict on session-id, # which is used as a zmq identity and must be unique. - session_key = '' _open_sessions = {} @property @@ -200,6 +199,8 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): self.kernel_id = None self.kernel_info_channel = None self._kernel_info_future = Future() + self._close_future = Future() + self.session_key = '' # Rate limiting code self._iopub_window_msg_count = 0 @@ -216,7 +217,7 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): # authenticate first super(ZMQChannelsHandler, self).pre_get() # check session collision: - self._register_session() + yield self._register_session() # then request kernel info, waiting up to a certain time before giving up. # We don't want to wait forever, because browsers don't take it well when # servers never respond to websocket connection requests. @@ -240,11 +241,19 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): self.kernel_id = cast_unicode(kernel_id, 'ascii') yield super(ZMQChannelsHandler, self).get(kernel_id=kernel_id) + @gen.coroutine def _register_session(self): - """Ensure we aren't creating a duplicate session""" + """Ensure we aren't creating a duplicate session. + + If a previous identical session is still open, close it to avoid collisions. + This is likely due to a client reconnecting from a lost network connection, + where the socket on our side has not been cleaned up yet. + """ self.session_key = '%s:%s' % (self.kernel_id, self.session.session) - if self.session_key in self._open_sessions: - raise web.HTTPError(400, "Session %s already open." % self.session_key) + stale_handler = self._open_sessions.get(self.session_key) + if stale_handler: + self.log.warning("Replacing stale connection: %s", self.session_key) + yield stale_handler.close() self._open_sessions[self.session_key] = self def open(self, kernel_id): @@ -363,6 +372,9 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): return super(ZMQChannelsHandler, self)._on_zmq_reply(stream, msg) + def close(self): + super(ZMQChannelsHandler, self).close() + return self._close_future def on_close(self): self.log.debug("Websocket closed %s", self.session_key) @@ -389,6 +401,7 @@ class ZMQChannelsHandler(AuthenticatedZMQStreamHandler): socket.close() self.channels = {} + self._close_future.set_result(None) def _send_status_message(self, status): msg = self.session.msg("status",