From 8aa22d62f8eb947523cd7f1dad85df0c07b28069 Mon Sep 17 00:00:00 2001 From: yuvipanda Date: Mon, 2 Apr 2018 12:03:16 -0700 Subject: [PATCH] Log HTTP request codes & timings to Prometheus Code adapted from JupyterHub --- notebook/log.py | 3 ++- notebook/metrics.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 notebook/metrics.py diff --git a/notebook/log.py b/notebook/log.py index dab330cb4..64b35d811 100644 --- a/notebook/log.py +++ b/notebook/log.py @@ -7,6 +7,7 @@ import json from tornado.log import access_log +from .metrics import prometheus_log_method def log_request(handler): """log a bit more information about each request than tornado's default @@ -45,4 +46,4 @@ def log_request(handler): # log all headers if it caused an error log_method(json.dumps(dict(request.headers), indent=2)) log_method(msg.format(**ns)) - + prometheus_log_method(handler) diff --git a/notebook/metrics.py b/notebook/metrics.py new file mode 100644 index 000000000..cb73bc437 --- /dev/null +++ b/notebook/metrics.py @@ -0,0 +1,39 @@ +""" +Prometheus metrics exported by Jupyter Notebook Server + +Read https://prometheus.io/docs/practices/naming/ for naming +conventions for metrics & labels. We generally prefer naming them +`__`. So a histogram that's tracking +the duration (in seconds) of servers spawning would be called +SERVER_SPAWN_DURATION_SECONDS. +""" + +from prometheus_client import Histogram + +REQUEST_DURATION_SECONDS = Histogram( + 'request_duration_seconds', + 'request duration for all HTTP requests', + ['method', 'handler', 'code'], +) + +def prometheus_log_method(handler): + """ + Tornado log handler for recording RED metrics. + + We record the following metrics: + Rate – the number of requests, per second, your services are serving. + Errors – the number of failed requests per second. + Duration – The amount of time each request takes expressed as a time interval. + + We use a fully qualified name of the handler as a label, + rather than every url path to reduce cardinality. + + This function should be either the value of or called from a function + that is the 'log_function' tornado setting. This makes it get called + at the end of every request, allowing us to record the metrics we need. + """ + REQUEST_DURATION_SECONDS.labels( + method=handler.request.method, + handler='{}.{}'.format(handler.__class__.__module__, type(handler).__name__), + code=handler.get_status() + ).observe(handler.request.request_time())