Merge pull request #5372 from kevin-bates/cull-terminals
Add ability to cull terminals and track last activitypull/5462/head
commit
3ec00ad2ad
@ -0,0 +1,151 @@
|
||||
"""A MultiTerminalManager for use in the notebook webserver
|
||||
- raises HTTPErrors
|
||||
- creates REST API models
|
||||
"""
|
||||
|
||||
# Copyright (c) Jupyter Development Team.
|
||||
# Distributed under the terms of the Modified BSD License.
|
||||
|
||||
import warnings
|
||||
|
||||
from datetime import timedelta
|
||||
from notebook._tz import utcnow, isoformat
|
||||
from terminado import NamedTermManager
|
||||
from tornado import web
|
||||
from tornado.ioloop import IOLoop, PeriodicCallback
|
||||
from traitlets import Integer, validate
|
||||
from traitlets.config import LoggingConfigurable
|
||||
from ..prometheus.metrics import TERMINAL_CURRENTLY_RUNNING_TOTAL
|
||||
|
||||
|
||||
class TerminalManager(LoggingConfigurable, NamedTermManager):
|
||||
""" """
|
||||
|
||||
_culler_callback = None
|
||||
|
||||
_initialized_culler = False
|
||||
|
||||
cull_inactive_timeout = Integer(0, config=True,
|
||||
help="""Timeout (in seconds) in which a terminal has been inactive and ready to be culled.
|
||||
Values of 0 or lower disable culling."""
|
||||
)
|
||||
|
||||
cull_interval_default = 300 # 5 minutes
|
||||
cull_interval = Integer(cull_interval_default, config=True,
|
||||
help="""The interval (in seconds) on which to check for terminals exceeding the inactive timeout value."""
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Methods for managing terminals
|
||||
# -------------------------------------------------------------------------
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TerminalManager, self).__init__(*args, **kwargs)
|
||||
|
||||
def create(self):
|
||||
"""Create a new terminal."""
|
||||
name, term = self.new_named_terminal()
|
||||
# Monkey-patch last-activity, similar to kernels. Should we need
|
||||
# more functionality per terminal, we can look into possible sub-
|
||||
# classing or containment then.
|
||||
term.last_activity = utcnow()
|
||||
model = self.get_terminal_model(name)
|
||||
# Increase the metric by one because a new terminal was created
|
||||
TERMINAL_CURRENTLY_RUNNING_TOTAL.inc()
|
||||
# Ensure culler is initialized
|
||||
self._initialize_culler()
|
||||
return model
|
||||
|
||||
def get(self, name):
|
||||
"""Get terminal 'name'."""
|
||||
model = self.get_terminal_model(name)
|
||||
return model
|
||||
|
||||
def list(self):
|
||||
"""Get a list of all running terminals."""
|
||||
models = [self.get_terminal_model(name) for name in self.terminals]
|
||||
|
||||
# Update the metric below to the length of the list 'terms'
|
||||
TERMINAL_CURRENTLY_RUNNING_TOTAL.set(
|
||||
len(models)
|
||||
)
|
||||
return models
|
||||
|
||||
async def terminate(self, name, force=False):
|
||||
"""Terminate terminal 'name'."""
|
||||
self._check_terminal(name)
|
||||
await super(TerminalManager, self).terminate(name, force=force)
|
||||
|
||||
# Decrease the metric below by one
|
||||
# because a terminal has been shutdown
|
||||
TERMINAL_CURRENTLY_RUNNING_TOTAL.dec()
|
||||
|
||||
async def terminate_all(self):
|
||||
"""Terminate all terminals."""
|
||||
terms = [name for name in self.terminals]
|
||||
for term in terms:
|
||||
await self.terminate(term, force=True)
|
||||
|
||||
def get_terminal_model(self, name):
|
||||
"""Return a JSON-safe dict representing a terminal.
|
||||
For use in representing terminals in the JSON APIs.
|
||||
"""
|
||||
self._check_terminal(name)
|
||||
term = self.terminals[name]
|
||||
model = {
|
||||
"name": name,
|
||||
"last_activity": isoformat(term.last_activity),
|
||||
}
|
||||
return model
|
||||
|
||||
def _check_terminal(self, name):
|
||||
"""Check a that terminal 'name' exists and raise 404 if not."""
|
||||
if name not in self.terminals:
|
||||
raise web.HTTPError(404, u'Terminal not found: %s' % name)
|
||||
|
||||
def _initialize_culler(self):
|
||||
"""Start culler if 'cull_inactive_timeout' is greater than zero.
|
||||
Regardless of that value, set flag that we've been here.
|
||||
"""
|
||||
if not self._initialized_culler and self.cull_inactive_timeout > 0:
|
||||
if self._culler_callback is None:
|
||||
loop = IOLoop.current()
|
||||
if self.cull_interval <= 0: # handle case where user set invalid value
|
||||
self.log.warning("Invalid value for 'cull_interval' detected (%s) - using default value (%s).",
|
||||
self.cull_interval, self.cull_interval_default)
|
||||
self.cull_interval = self.cull_interval_default
|
||||
self._culler_callback = PeriodicCallback(
|
||||
self._cull_terminals, 1000 * self.cull_interval)
|
||||
self.log.info("Culling terminals with inactivity > %s seconds at %s second intervals ...",
|
||||
self.cull_inactive_timeout, self.cull_interval)
|
||||
self._culler_callback.start()
|
||||
|
||||
self._initialized_culler = True
|
||||
|
||||
async def _cull_terminals(self):
|
||||
self.log.debug("Polling every %s seconds for terminals inactive for > %s seconds...",
|
||||
self.cull_interval, self.cull_inactive_timeout)
|
||||
# Create a separate list of terminals to avoid conflicting updates while iterating
|
||||
for name in list(self.terminals):
|
||||
try:
|
||||
await self._cull_inactive_terminal(name)
|
||||
except Exception as e:
|
||||
self.log.exception("The following exception was encountered while checking the "
|
||||
"activity of terminal {}: {}".format(name, e))
|
||||
|
||||
async def _cull_inactive_terminal(self, name):
|
||||
try:
|
||||
term = self.terminals[name]
|
||||
except KeyError:
|
||||
return # KeyErrors are somewhat expected since the terminal can be terminated as the culling check is made.
|
||||
|
||||
self.log.debug("name=%s, last_activity=%s", name, term.last_activity)
|
||||
if hasattr(term, 'last_activity'):
|
||||
dt_now = utcnow()
|
||||
dt_inactive = dt_now - term.last_activity
|
||||
# Compute idle properties
|
||||
is_time = dt_inactive > timedelta(seconds=self.cull_inactive_timeout)
|
||||
# Cull the kernel if all three criteria are met
|
||||
if (is_time):
|
||||
inactivity = int(dt_inactive.total_seconds())
|
||||
self.log.warning("Culling terminal '%s' due to %s seconds of inactivity.", name, inactivity)
|
||||
await self.terminate(name, force=True)
|
||||
@ -0,0 +1,169 @@
|
||||
"""Test the terminal service API."""
|
||||
|
||||
import time
|
||||
|
||||
from requests import HTTPError
|
||||
from traitlets.config import Config
|
||||
|
||||
from notebook.utils import url_path_join
|
||||
from notebook.tests.launchnotebook import NotebookTestBase, assert_http_error
|
||||
|
||||
|
||||
class TerminalAPI(object):
|
||||
"""Wrapper for terminal REST API requests"""
|
||||
def __init__(self, request, base_url, headers):
|
||||
self.request = request
|
||||
self.base_url = base_url
|
||||
self.headers = headers
|
||||
|
||||
def _req(self, verb, path, body=None):
|
||||
response = self.request(verb, path, data=body)
|
||||
|
||||
if 400 <= response.status_code < 600:
|
||||
try:
|
||||
response.reason = response.json()['message']
|
||||
except:
|
||||
pass
|
||||
response.raise_for_status()
|
||||
|
||||
return response
|
||||
|
||||
def list(self):
|
||||
return self._req('GET', 'api/terminals')
|
||||
|
||||
def get(self, name):
|
||||
return self._req('GET', url_path_join('api/terminals', name))
|
||||
|
||||
def start(self):
|
||||
return self._req('POST', 'api/terminals')
|
||||
|
||||
def shutdown(self, name):
|
||||
return self._req('DELETE', url_path_join('api/terminals', name))
|
||||
|
||||
|
||||
class TerminalAPITest(NotebookTestBase):
|
||||
"""Test the terminals web service API"""
|
||||
def setUp(self):
|
||||
self.term_api = TerminalAPI(self.request,
|
||||
base_url=self.base_url(),
|
||||
headers=self.auth_headers(),
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
for k in self.term_api.list().json():
|
||||
self.term_api.shutdown(k['name'])
|
||||
|
||||
def test_no_terminals(self):
|
||||
# Make sure there are no terminals running at the start
|
||||
terminals = self.term_api.list().json()
|
||||
self.assertEqual(terminals, [])
|
||||
|
||||
def test_create_terminal(self):
|
||||
# POST request
|
||||
r = self.term_api._req('POST', 'api/terminals')
|
||||
term1 = r.json()
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertIsInstance(term1, dict)
|
||||
|
||||
def test_terminal_root_handler(self):
|
||||
# POST request
|
||||
r = self.term_api.start()
|
||||
term1 = r.json()
|
||||
self.assertEqual(r.status_code, 200)
|
||||
self.assertIsInstance(term1, dict)
|
||||
|
||||
# GET request
|
||||
r = self.term_api.list()
|
||||
self.assertEqual(r.status_code, 200)
|
||||
assert isinstance(r.json(), list)
|
||||
self.assertEqual(r.json()[0]['name'], term1['name'])
|
||||
|
||||
# create another terminal and check that they both are added to the
|
||||
# list of terminals from a GET request
|
||||
term2 = self.term_api.start().json()
|
||||
assert isinstance(term2, dict)
|
||||
r = self.term_api.list()
|
||||
terminals = r.json()
|
||||
self.assertEqual(r.status_code, 200)
|
||||
assert isinstance(terminals, list)
|
||||
self.assertEqual(len(terminals), 2)
|
||||
|
||||
def test_terminal_handler(self):
|
||||
# GET terminal with given name
|
||||
term = self.term_api.start().json()['name']
|
||||
r = self.term_api.get(term)
|
||||
term1 = r.json()
|
||||
self.assertEqual(r.status_code, 200)
|
||||
assert isinstance(term1, dict)
|
||||
self.assertIn('name', term1)
|
||||
self.assertEqual(term1['name'], term)
|
||||
|
||||
# Request a bad terminal id and check that a JSON
|
||||
# message is returned!
|
||||
bad_term = 'nonExistentTerm'
|
||||
with assert_http_error(404, 'Terminal not found: ' + bad_term):
|
||||
self.term_api.get(bad_term)
|
||||
|
||||
# DELETE terminal with name
|
||||
r = self.term_api.shutdown(term)
|
||||
self.assertEqual(r.status_code, 204)
|
||||
terminals = self.term_api.list().json()
|
||||
self.assertEqual(terminals, [])
|
||||
|
||||
# Request to delete a non-existent terminal name
|
||||
bad_term = 'nonExistentTerm'
|
||||
with assert_http_error(404, 'Terminal not found: ' + bad_term):
|
||||
self.term_api.shutdown(bad_term)
|
||||
|
||||
|
||||
class TerminalCullingTest(NotebookTestBase):
|
||||
|
||||
# Configure culling
|
||||
config = Config({
|
||||
'NotebookApp': {
|
||||
'TerminalManager': {
|
||||
'cull_interval': 3,
|
||||
'cull_inactive_timeout': 2
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
def setUp(self):
|
||||
self.term_api = TerminalAPI(self.request,
|
||||
base_url=self.base_url(),
|
||||
headers=self.auth_headers(),
|
||||
)
|
||||
|
||||
def tearDown(self):
|
||||
for k in self.term_api.list().json():
|
||||
self.term_api.shutdown(k['name'])
|
||||
|
||||
# Sanity check verifying that the configurable was properly set.
|
||||
def test_config(self):
|
||||
self.assertEqual(self.config.NotebookApp.TerminalManager.cull_inactive_timeout, 2)
|
||||
self.assertEqual(self.config.NotebookApp.TerminalManager.cull_interval, 3)
|
||||
terminal_mgr = self.notebook.web_app.settings['terminal_manager']
|
||||
self.assertEqual(terminal_mgr.cull_inactive_timeout, 2)
|
||||
self.assertEqual(terminal_mgr.cull_interval, 3)
|
||||
|
||||
def test_culling(self):
|
||||
# POST request
|
||||
r = self.term_api.start()
|
||||
self.assertEqual(r.status_code, 200)
|
||||
body = r.json()
|
||||
term1 = body['name']
|
||||
last_activity = body['last_activity']
|
||||
|
||||
culled = False
|
||||
for i in range(10): # Culling should occur in a few seconds
|
||||
try:
|
||||
r = self.term_api.get(term1)
|
||||
except HTTPError as e:
|
||||
self.assertEqual(e.response.status_code, 404)
|
||||
culled = True
|
||||
break
|
||||
else:
|
||||
self.assertEqual(r.status_code, 200)
|
||||
time.sleep(1)
|
||||
|
||||
self.assertTrue(culled)
|
||||
Loading…
Reference in new issue