Merge pull request #4036 from Hyaxia/metrics_terminal

Added metrics for currently running terminals and labeled by type kernels
pull/4063/head
Steven Silvester 8 years ago committed by GitHub
commit dfcea5b3f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -17,7 +17,10 @@ install:
- cmd: conda config --set show_channel_urls true
- cmd: conda config --add channels conda-forge
#- cmd: conda update --yes --quiet conda
- cmd: conda install -y pyzmq tornado jupyter_client nbformat nbconvert ipykernel pip nodejs nose
- cmd: conda install -y pyzmq tornado jupyter_client nbformat ipykernel pip nodejs nose
# not using `conda install -y` on nbconvent package because there is
# currently a bug with the version that the anaconda installs, so we will just install it with pip
- cmd: pip install nbconvert
- cmd: python setup.py build
- cmd: pip install .[test]

@ -7,7 +7,8 @@
import json
from tornado.log import access_log
from .metrics import prometheus_log_method
from .prometheus.log_functions import prometheus_log_method
def log_request(handler):
"""log a bit more information about each request than tornado's default

@ -0,0 +1,4 @@
"""
A package containing all the functionality and
configuration connected to the prometheus metrics
"""

@ -1,18 +1,5 @@
"""
Prometheus metrics exported by Jupyter Notebook Server
from ..prometheus.metrics import HTTP_REQUEST_DURATION_SECONDS
Read https://prometheus.io/docs/practices/naming/ for naming
conventions for metrics & labels.
"""
from prometheus_client import Histogram
# This is a fairly standard name for HTTP duration latency reporting
HTTP_REQUEST_DURATION_SECONDS = Histogram(
'http_request_duration_seconds',
'duration in seconds for all HTTP requests',
['method', 'handler', 'status_code'],
)
def prometheus_log_method(handler):
"""

@ -0,0 +1,27 @@
"""
Prometheus metrics exported by Jupyter Notebook Server
Read https://prometheus.io/docs/practices/naming/ for naming
conventions for metrics & labels.
"""
from prometheus_client import Histogram, Gauge
HTTP_REQUEST_DURATION_SECONDS = Histogram(
'http_request_duration_seconds',
'duration in seconds for all HTTP requests',
['method', 'handler', 'status_code'],
)
TERMINAL_CURRENTLY_RUNNING_TOTAL = Gauge(
'terminal_currently_running_total',
'counter for how many terminals are running',
)
KERNEL_CURRENTLY_RUNNING_TOTAL = Gauge(
'kernel_currently_running_total',
'counter for how many kernels are running labeled by type',
['type']
)

@ -26,6 +26,8 @@ from notebook.utils import to_os_path, exists
from notebook._tz import utcnow, isoformat
from ipython_genutils.py3compat import getcwd
from notebook.prometheus.metrics import KERNEL_CURRENTLY_RUNNING_TOTAL
class MappingKernelManager(MultiKernelManager):
"""A KernelManager that handles notebook mapping and HTTP error handling"""
@ -168,6 +170,13 @@ class MappingKernelManager(MultiKernelManager):
lambda : self._handle_kernel_died(kernel_id),
'dead',
)
# Increase the metric of number of kernels running
# for the relevant kernel type by 1
KERNEL_CURRENTLY_RUNNING_TOTAL.labels(
type=self._kernels[kernel_id].kernel_name
).inc()
else:
self._check_kernel_id(kernel_id)
self.log.info("Using existing kernel: %s" % kernel_id)
@ -278,6 +287,13 @@ class MappingKernelManager(MultiKernelManager):
self.stop_buffering(kernel_id)
self._kernel_connections.pop(kernel_id, None)
self.last_kernel_activity = utcnow()
# Decrease the metric of number of kernels
# running for the relevant kernel type by 1
KERNEL_CURRENTLY_RUNNING_TOTAL.labels(
type=self._kernels[kernel_id].kernel_name
).dec()
return super(MappingKernelManager, self).shutdown_kernel(kernel_id, now=now)
def restart_kernel(self, kernel_id):

@ -1,7 +1,8 @@
import json
from tornado import web, gen
from ..base.handlers import APIHandler
from ..utils import url_path_join
from ..prometheus.metrics import TERMINAL_CURRENTLY_RUNNING_TOTAL
class TerminalRootHandler(APIHandler):
@web.authenticated
@ -10,12 +11,20 @@ class TerminalRootHandler(APIHandler):
terms = [{'name': name} for name in tm.terminals]
self.finish(json.dumps(terms))
# Update the metric below to the length of the list 'terms'
TERMINAL_CURRENTLY_RUNNING_TOTAL.set(
len(terms)
)
@web.authenticated
def post(self):
"""POST /terminals creates a new terminal and redirects to it"""
name, _ = self.terminal_manager.new_named_terminal()
self.finish(json.dumps({'name': name}))
# Increase the metric by one because a new terminal was created
TERMINAL_CURRENTLY_RUNNING_TOTAL.inc()
class TerminalHandler(APIHandler):
SUPPORTED_METHODS = ('GET', 'DELETE')
@ -36,5 +45,10 @@ class TerminalHandler(APIHandler):
yield tm.terminate(name, force=True)
self.set_status(204)
self.finish()
# Decrease the metric below by one
# because a terminal has been shutdown
TERMINAL_CURRENTLY_RUNNING_TOTAL.dec()
else:
raise web.HTTPError(404, "Terminal not found: %r" % name)

Loading…
Cancel
Save