From 4000838321c90e0a03905dac6989598912548c7a Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Wed, 22 Feb 2017 11:13:34 -0800 Subject: [PATCH 1/4] Cull idle kernels after specified period --- notebook/services/kernels/kernelmanager.py | 70 +++++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/notebook/services/kernels/kernelmanager.py b/notebook/services/kernels/kernelmanager.py index b5c4c9f5d..a3b8d5121 100644 --- a/notebook/services/kernels/kernelmanager.py +++ b/notebook/services/kernels/kernelmanager.py @@ -11,15 +11,17 @@ import os from tornado import gen, web from tornado.concurrent import Future -from tornado.ioloop import IOLoop +from tornado.ioloop import IOLoop, PeriodicCallback from jupyter_client.multikernelmanager import MultiKernelManager -from traitlets import Dict, List, Unicode, TraitError, default, validate +from traitlets import Dict, List, Unicode, TraitError, Integer, default, validate from notebook.utils import to_os_path from notebook._tz import utcnow, isoformat from ipython_genutils.py3compat import getcwd +from datetime import datetime, timedelta + class MappingKernelManager(MultiKernelManager): """A KernelManager that handles notebook mapping and HTTP error handling""" @@ -34,6 +36,10 @@ class MappingKernelManager(MultiKernelManager): _kernel_connections = Dict() + _culler_callback = None + + _initialized_culler = False + @default('root_dir') def _default_root_dir(self): try: @@ -52,6 +58,26 @@ class MappingKernelManager(MultiKernelManager): raise TraitError("kernel root dir %r is not a directory" % value) return value + cull_kernels_after_minutes_env = 'CULL_KERNELS_AFTER_MINUTES' + cull_kernels_after_minutes_default = 0 + cull_kernels_after_minutes = Integer(cull_kernels_after_minutes_default, config=True, + help="""Duration (minutes) in which a kernel must remain idle before it can be culled. Culling is disabled (0) by default.""" + ) + + @default('cull_kernels_after_minutes') + def cull_kernels_after_minutes_value(self): + return int(os.getenv(self.cull_kernels_after_minutes_env, self.cull_kernels_after_minutes_default)) + + kernel_culling_interval_seconds_env = 'KERNEL_CULLING_INTERVAL_SECONDS' + kernel_culling_interval_seconds_default = 300 # 5 minutes + kernel_culling_interval_seconds = Integer(kernel_culling_interval_seconds_default, config=True, + help="""The interval (seconds) in which kernels are culled if exceeding the idle duration.""" + ) + + @default('kernel_culling_interval_seconds') + def kernel_culling_interval_seconds_value(self): + return int(os.getenv(self.kernel_culling_interval_seconds_env, self.kernel_culling_interval_seconds_default)) + #------------------------------------------------------------------------- # Methods for managing kernels and sessions #------------------------------------------------------------------------- @@ -105,6 +131,11 @@ class MappingKernelManager(MultiKernelManager): else: self._check_kernel_id(kernel_id) self.log.info("Using existing kernel: %s" % kernel_id) + + # Initialize culling if not already + if not self._initialized_culler: + self.initialize_culler() + # py2-compat raise gen.Return(kernel_id) @@ -225,3 +256,38 @@ class MappingKernelManager(MultiKernelManager): kernel._activity_stream.on_recv(record_activity) + def initialize_culler(self): + """Start idle culler if 'cull_kernels_after_minutes' is greater than zero. + + Regardless of that value, set flag that we've been here. + """ + if not self._initialized_culler and self.cull_kernels_after_minutes > 0: + if self._culler_callback is None: + loop = IOLoop.current() + self._culler_callback = PeriodicCallback( + self.cull_kernels, 1000*self.kernel_culling_interval_seconds, loop) + self.log.info("Culling kernels with idle durations > %s minutes at %s second intervals ...", + self.cull_kernels_after_minutes, self.kernel_culling_interval_seconds) + self._culler_callback.start() + + self._initialized_culler = True + + def cull_kernels(self): + self.log.debug("Polling every %s seconds for kernels idle > %s minutes...", + self.kernel_culling_interval_seconds, self.cull_kernels_after_minutes) + for kId, kernel in self._kernels.items(): + self.cull_kernel(kId, kernel) + + def cull_kernel(self, kId, kernel): + activity = kernel.last_activity + name = kernel.kernel_name + self.log.debug("kId=%s, name=%s, last_activity=%s", kId, name, activity) + if activity is not None: + dtNow = utcnow() + #dtActivity = datetime.strptime(activity,'%Y-%m-%dT%H:%M:%S.%f') + dtIdle = dtNow - activity + if dtIdle > timedelta(minutes=self.cull_kernels_after_minutes): # can be culled + idleDuration = int(dtIdle.total_seconds()/60.0) + self.log.warn("Culling kernel '%s' (%s) due to %s minutes of inactivity.", name, kId, idleDuration) + self.shutdown_kernel(kId) + From 2f787088d5d3d009edc7086a90e8b7283763e04f Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Wed, 22 Feb 2017 11:59:34 -0800 Subject: [PATCH 2/4] Validate culling interval. --- notebook/services/kernels/kernelmanager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/notebook/services/kernels/kernelmanager.py b/notebook/services/kernels/kernelmanager.py index a3b8d5121..71f265683 100644 --- a/notebook/services/kernels/kernelmanager.py +++ b/notebook/services/kernels/kernelmanager.py @@ -264,6 +264,10 @@ class MappingKernelManager(MultiKernelManager): if not self._initialized_culler and self.cull_kernels_after_minutes > 0: if self._culler_callback is None: loop = IOLoop.current() + if self.kernel_culling_interval_seconds <= 0: #handle case where user set invalid value + self.log.warn("Invalid value for 'kernel_culling_interval_seconds' detected (%s) - using default value (%s).", + self.kernel_culling_interval_seconds, self.kernel_culling_interval_seconds_default) + self.kernel_culling_interval_seconds = self.kernel_culling_interval_seconds_default self._culler_callback = PeriodicCallback( self.cull_kernels, 1000*self.kernel_culling_interval_seconds, loop) self.log.info("Culling kernels with idle durations > %s minutes at %s second intervals ...", From c3f753faf87cb33ce5de7327d47998424965fb21 Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Thu, 23 Feb 2017 08:36:00 -0800 Subject: [PATCH 3/4] incorporate review recommendations --- notebook/services/kernels/kernelmanager.py | 71 +++++++++------------- 1 file changed, 29 insertions(+), 42 deletions(-) diff --git a/notebook/services/kernels/kernelmanager.py b/notebook/services/kernels/kernelmanager.py index 71f265683..9bf6d8ab9 100644 --- a/notebook/services/kernels/kernelmanager.py +++ b/notebook/services/kernels/kernelmanager.py @@ -58,26 +58,15 @@ class MappingKernelManager(MultiKernelManager): raise TraitError("kernel root dir %r is not a directory" % value) return value - cull_kernels_after_minutes_env = 'CULL_KERNELS_AFTER_MINUTES' - cull_kernels_after_minutes_default = 0 - cull_kernels_after_minutes = Integer(cull_kernels_after_minutes_default, config=True, - help="""Duration (minutes) in which a kernel must remain idle before it can be culled. Culling is disabled (0) by default.""" + cull_idle_timeout = Integer(0, config=True, + help="""Timeout (in seconds) after which a kernel is considered idle and ready to be culled.""" ) - @default('cull_kernels_after_minutes') - def cull_kernels_after_minutes_value(self): - return int(os.getenv(self.cull_kernels_after_minutes_env, self.cull_kernels_after_minutes_default)) - - kernel_culling_interval_seconds_env = 'KERNEL_CULLING_INTERVAL_SECONDS' - kernel_culling_interval_seconds_default = 300 # 5 minutes - kernel_culling_interval_seconds = Integer(kernel_culling_interval_seconds_default, config=True, - help="""The interval (seconds) in which kernels are culled if exceeding the idle duration.""" + cull_interval_default = 300 # 5 minutes + cull_interval = Integer(cull_interval_default, config=True, + help="""The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value.""" ) - @default('kernel_culling_interval_seconds') - def kernel_culling_interval_seconds_value(self): - return int(os.getenv(self.kernel_culling_interval_seconds_env, self.kernel_culling_interval_seconds_default)) - #------------------------------------------------------------------------- # Methods for managing kernels and sessions #------------------------------------------------------------------------- @@ -257,41 +246,39 @@ class MappingKernelManager(MultiKernelManager): kernel._activity_stream.on_recv(record_activity) def initialize_culler(self): - """Start idle culler if 'cull_kernels_after_minutes' is greater than zero. + """Start idle culler if 'cull_idle_timeout' is greater than zero. Regardless of that value, set flag that we've been here. """ - if not self._initialized_culler and self.cull_kernels_after_minutes > 0: + if not self._initialized_culler and self.cull_idle_timeout > 0: if self._culler_callback is None: loop = IOLoop.current() - if self.kernel_culling_interval_seconds <= 0: #handle case where user set invalid value - self.log.warn("Invalid value for 'kernel_culling_interval_seconds' detected (%s) - using default value (%s).", - self.kernel_culling_interval_seconds, self.kernel_culling_interval_seconds_default) - self.kernel_culling_interval_seconds = self.kernel_culling_interval_seconds_default + if self.cull_interval <= 0: #handle case where user set invalid value + self.log.warning("Invalid value for 'cull_interval' detected (%s) - using default value (%s).", + self.cull_interval, self.cull_interval_default) + self.cull_interval = self.cull_interval_default self._culler_callback = PeriodicCallback( - self.cull_kernels, 1000*self.kernel_culling_interval_seconds, loop) - self.log.info("Culling kernels with idle durations > %s minutes at %s second intervals ...", - self.cull_kernels_after_minutes, self.kernel_culling_interval_seconds) + self.cull_kernels, 1000*self.cull_interval, loop) + self.log.info("Culling kernels with idle durations > %s seconds at %s second intervals ...", + self.cull_idle_timeout, self.cull_interval) self._culler_callback.start() self._initialized_culler = True def cull_kernels(self): - self.log.debug("Polling every %s seconds for kernels idle > %s minutes...", - self.kernel_culling_interval_seconds, self.cull_kernels_after_minutes) - for kId, kernel in self._kernels.items(): - self.cull_kernel(kId, kernel) - - def cull_kernel(self, kId, kernel): - activity = kernel.last_activity - name = kernel.kernel_name - self.log.debug("kId=%s, name=%s, last_activity=%s", kId, name, activity) - if activity is not None: - dtNow = utcnow() - #dtActivity = datetime.strptime(activity,'%Y-%m-%dT%H:%M:%S.%f') - dtIdle = dtNow - activity - if dtIdle > timedelta(minutes=self.cull_kernels_after_minutes): # can be culled - idleDuration = int(dtIdle.total_seconds()/60.0) - self.log.warn("Culling kernel '%s' (%s) due to %s minutes of inactivity.", name, kId, idleDuration) - self.shutdown_kernel(kId) + self.log.debug("Polling every %s seconds for kernels idle > %s seconds...", + self.cull_interval, self.cull_idle_timeout) + for kernel_id in list(self._kernels): + self.cull_kernel_if_idle(kernel_id) + + def cull_kernel_if_idle(self, kernel_id): + kernel = self._kernels[kernel_id] + self.log.debug("kernel_id=%s, kernel_name=%s, last_activity=%s", kernel_id, kernel.kernel_name, kernel.last_activity) + if kernel.last_activity is not None: + dt_now = utcnow() + dt_idle = dt_now - kernel.last_activity + if dt_idle > timedelta(seconds=self.cull_idle_timeout): # exceeds timeout, can be culled + idle_duration = int(dt_idle.total_seconds()) + self.log.warning("Culling kernel '%s' (%s) due to %s seconds of inactivity.", kernel.kernel_name, kernel_id, idle_duration) + self.shutdown_kernel(kernel_id) From ff19c4cd7c862912822bbfbe1c1b23c0a9a3ee21 Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Fri, 24 Feb 2017 10:03:30 -0800 Subject: [PATCH 4/4] enforce minimum timeout, ensure exceptions don't prevent culling of others --- notebook/services/kernels/kernelmanager.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/notebook/services/kernels/kernelmanager.py b/notebook/services/kernels/kernelmanager.py index 9bf6d8ab9..292cb6239 100644 --- a/notebook/services/kernels/kernelmanager.py +++ b/notebook/services/kernels/kernelmanager.py @@ -58,8 +58,11 @@ class MappingKernelManager(MultiKernelManager): raise TraitError("kernel root dir %r is not a directory" % value) return value + cull_idle_timeout_minimum = 300 # 5 minutes cull_idle_timeout = Integer(0, config=True, - help="""Timeout (in seconds) after which a kernel is considered idle and ready to be culled.""" + help="""Timeout (in seconds) after which a kernel is considered idle and ready to be culled. Values of 0 or + lower disable culling. The minimum timeout is 300 seconds (5 minutes). Positive values less than the minimum value + will be set to the minimum.""" ) cull_interval_default = 300 # 5 minutes @@ -252,6 +255,10 @@ class MappingKernelManager(MultiKernelManager): """ if not self._initialized_culler and self.cull_idle_timeout > 0: if self._culler_callback is None: + if self.cull_idle_timeout < self.cull_idle_timeout_minimum: + self.log.warning("'cull_idle_timeout' (%s) is less than the minimum value (%s) and has been set to the minimum.", + self.cull_idle_timeout, self.cull_idle_timeout_minimum) + self.cull_idle_timeout = self.cull_idle_timeout_minimum loop = IOLoop.current() if self.cull_interval <= 0: #handle case where user set invalid value self.log.warning("Invalid value for 'cull_interval' detected (%s) - using default value (%s).", @@ -268,8 +275,13 @@ class MappingKernelManager(MultiKernelManager): def cull_kernels(self): self.log.debug("Polling every %s seconds for kernels idle > %s seconds...", self.cull_interval, self.cull_idle_timeout) + """Create a separate list of kernels to avoid conflicting updates while iterating""" for kernel_id in list(self._kernels): - self.cull_kernel_if_idle(kernel_id) + try: + self.cull_kernel_if_idle(kernel_id) + except Exception as e: + self.log.exception("The following exception was encountered while checking the idle duration of kernel %s: %s", + kernel_id, e) def cull_kernel_if_idle(self, kernel_id): kernel = self._kernels[kernel_id]