element-synapse/synapse/metrics/_reactor_metrics.py
Eric Eastwood 6363d77ba2
Warn about skipping reactor metrics when using unknown reactor type (#19383)
Spawning from not seeing any reactor metrics in the Grafana dashboard in
some load tests, noticing `python_twisted_reactor_tick_time_bucket` is
`0` in Prometheus, following it back to Synapse and seeing that we don't
warn about skipping reactor metrics in all cases (when using an unknown
reactor type).

A follow-up to this would be to actually figure out how to instrument
the `ProxiedReactor` or why `ProxiedReactor` is being chosen in the
first place and see if we can get it to use a more normal type
🤔


### Reproduction instructions

1. Using the Complement scripts **with workers**: `WORKERS=1
./scripts-dev/complement.sh ./tests/csapi`
 1. `docker logs complement_csapi_dirty_hs1 2>&1 | grep -i "reactor"`
1. With these changes, notice `Skipping configuring
ReactorLastSeenMetric: unexpected reactor type: <__main__.ProxiedReactor
object at 0x7fc0adaaea50>` and `Twisted reactor: ProxiedReactor`
 1. Cleanup:
- `docker stop $(docker ps --all --filter "label=complement_context"
--quiet)`
- `docker rm $(docker ps --all --filter "label=complement_context"
--quiet)`

I'm unable to reproduce with the normal Synapse images or
`complement-synapse` without workers. They all use `Twisted reactor:
EPollReactor`

<details>
<summary>Checking <code>docker/Dockerfile-workers</code></summary>

1. Build the Docker image for Synapse: `docker build -t
matrixdotorg/synapse -f docker/Dockerfile . && docker build -t
matrixdotorg/synapse-workers -f docker/Dockerfile-workers .`
([docs](7a24fafbc3/docker/README-testing.md (building-and-running-the-images-manually)))
 1. Start Synapse:
     ```
    docker run -d --name synapse \
        --mount type=volume,src=synapse-data,dst=/data \
        -e SYNAPSE_SERVER_NAME=my.docker.synapse.server \
        -e SYNAPSE_REPORT_STATS=no \
        -e SYNAPSE_ENABLE_METRICS=1 \
        -p 8008:8008 \
        -p 9469:9469 \
        matrixdotorg/synapse-workers:latest
    ```
 1. `docker logs synapse 2>&1 | grep -i "reactor"`
 1. Says `Twisted reactor: EPollReactor`
 
 </details>
2026-01-15 15:49:10 -06:00

176 lines
6 KiB
Python

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2022 The Matrix.org Foundation C.I.C.
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import logging
import time
from selectors import SelectSelector, _PollLikeSelector # type: ignore[attr-defined]
from typing import Any, Callable, Iterable
from prometheus_client import Histogram, Metric
from prometheus_client.core import REGISTRY, GaugeMetricFamily
from twisted.internet import reactor, selectreactor
from twisted.internet.asyncioreactor import AsyncioSelectorReactor
from synapse.metrics._types import Collector
try:
from selectors import KqueueSelector # type: ignore[attr-defined]
except ImportError:
class KqueueSelector: # type: ignore[no-redef]
pass
try:
from twisted.internet.epollreactor import EPollReactor
except ImportError:
class EPollReactor: # type: ignore[no-redef]
pass
try:
from twisted.internet.pollreactor import PollReactor
except ImportError:
class PollReactor: # type: ignore[no-redef]
pass
logger = logging.getLogger(__name__)
#
# Twisted reactor metrics
#
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
tick_time = Histogram( # type: ignore[missing-server-name-label]
"python_twisted_reactor_tick_time",
"Tick time of the Twisted reactor (sec)",
buckets=[0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2, 5],
)
class CallWrapper:
"""A wrapper for a callable which records the time between calls"""
def __init__(self, wrapped: Callable[..., Any]):
self.last_polled = time.time()
self._wrapped = wrapped
def __call__(self, *args, **kwargs) -> Any: # type: ignore[no-untyped-def]
# record the time since this was last called. This gives a good proxy for
# how long it takes to run everything in the reactor - ie, how long anything
# waiting for the next tick will have to wait.
tick_time.observe(time.time() - self.last_polled)
ret = self._wrapped(*args, **kwargs)
self.last_polled = time.time()
return ret
class ObjWrapper:
"""A wrapper for an object which wraps a specified method in CallWrapper.
Other methods/attributes are passed to the original object.
This is necessary when the wrapped object does not allow the attribute to be
overwritten.
"""
def __init__(self, wrapped: Any, method_name: str):
self._wrapped = wrapped
self._method_name = method_name
self._wrapped_method = CallWrapper(getattr(wrapped, method_name))
def __getattr__(self, item: str) -> Any:
if item == self._method_name:
return self._wrapped_method
return getattr(self._wrapped, item)
class ReactorLastSeenMetric(Collector):
def __init__(self, call_wrapper: CallWrapper):
self._call_wrapper = call_wrapper
def collect(self) -> Iterable[Metric]:
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
cm = GaugeMetricFamily( # type: ignore[missing-server-name-label]
"python_twisted_reactor_last_seen",
"Seconds since the Twisted reactor was last seen",
)
cm.add_metric([], time.time() - self._call_wrapper.last_polled)
yield cm
# Twisted has already select a reasonable reactor for us, so assumptions can be
# made about the shape.
wrapper = None
try:
if isinstance(reactor, (PollReactor, EPollReactor)):
reactor._poller = ObjWrapper(reactor._poller, "poll") # type: ignore[attr-defined]
wrapper = reactor._poller._wrapped_method # type: ignore[attr-defined]
elif isinstance(reactor, selectreactor.SelectReactor):
# Twisted uses a module-level _select function.
wrapper = selectreactor._select = CallWrapper(selectreactor._select)
elif isinstance(reactor, AsyncioSelectorReactor):
# For asyncio look at the underlying asyncio event loop.
asyncio_loop = reactor._asyncioEventloop # A sub-class of BaseEventLoop,
# A sub-class of BaseSelector.
selector = asyncio_loop._selector # type: ignore[attr-defined]
if isinstance(selector, SelectSelector):
wrapper = selector._select = CallWrapper(selector._select) # type: ignore[attr-defined]
# poll, epoll, and /dev/poll.
elif isinstance(selector, _PollLikeSelector):
selector._selector = ObjWrapper(selector._selector, "poll") # type: ignore[attr-defined]
wrapper = selector._selector._wrapped_method # type: ignore[attr-defined]
elif isinstance(selector, KqueueSelector):
selector._selector = ObjWrapper(selector._selector, "control") # type: ignore[attr-defined]
wrapper = selector._selector._wrapped_method # type: ignore[attr-defined]
else:
# E.g. this does not support the (Windows-only) ProactorEventLoop.
logger.warning(
"Skipping configuring reactor metrics: unexpected asyncio loop selector: %r via %r",
selector,
asyncio_loop,
)
else:
logger.warning(
"Skipping configuring reactor metrics: unexpected reactor type: %r",
reactor,
)
except Exception as e:
logger.warning("Configuring reactor metrics failed: %r", e)
if wrapper:
# This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`.
REGISTRY.register(ReactorLastSeenMetric(wrapper)) # type: ignore[missing-server-name-label]