element-synapse/synapse/storage/controllers/purge_events.py
Erik Johnston 1bddd25a85
Port Clock functions to use Duration class (#19229)
This changes the arguments in clock functions to be `Duration` and
converts call sites and constants into `Duration`. There are still some
more functions around that should be converted (e.g.
`timeout_deferred`), but we leave that to another PR.

We also changes `.as_secs()` to return a float, as the rounding broke
things subtly. The only reason to keep it (its the same as
`timedelta.total_seconds()`) is for symmetry with `as_millis()`.

Follows on from https://github.com/element-hq/synapse/pull/19223
2025-12-01 13:55:06 +00:00

456 lines
17 KiB
Python

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2019 The Matrix.org Foundation C.I.C.
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
import itertools
import logging
from typing import (
TYPE_CHECKING,
Collection,
Mapping,
)
from synapse.logging.context import nested_logging_context
from synapse.metrics.background_process_metrics import wrap_as_background_process
from synapse.storage.database import LoggingTransaction
from synapse.storage.databases import Databases
from synapse.types.storage import _BackgroundUpdates
from synapse.util.duration import Duration
from synapse.util.stringutils import shortstr
if TYPE_CHECKING:
from synapse.server import HomeServer
logger = logging.getLogger(__name__)
class PurgeEventsStorageController:
"""High level interface for purging rooms and event history."""
def __init__(self, hs: "HomeServer", stores: Databases):
self.hs = hs # nb must be called this for @wrap_as_background_process
self.server_name = hs.hostname
self.stores = stores
if hs.config.worker.run_background_tasks:
self._delete_state_loop_call = hs.get_clock().looping_call(
self._delete_state_groups_loop, Duration(minutes=1)
)
self.stores.state.db_pool.updates.register_background_update_handler(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
self._background_delete_unrefereneced_state_groups,
)
async def purge_room(self, room_id: str) -> None:
"""Deletes all record of a room"""
with nested_logging_context(room_id):
await self.stores.main.purge_room(room_id)
await self.stores.state.purge_room_state(room_id)
async def purge_history(
self, room_id: str, token: str, delete_local_events: bool
) -> None:
"""Deletes room history before a certain point
Args:
room_id: The room ID
token: A topological token to delete events before
delete_local_events:
if True, we will delete local events as well as remote ones
(instead of just marking them as outliers and deleting their
state groups).
"""
with nested_logging_context(room_id):
state_groups = await self.stores.main.purge_history(
room_id, token, delete_local_events
)
logger.info("[purge] finding state groups that can be deleted")
sg_to_delete = await self._find_unreferenced_groups(state_groups)
# Mark these state groups as pending deletion, they will actually
# get deleted automatically later.
await self.stores.state_deletion.mark_state_groups_as_pending_deletion(
sg_to_delete
)
async def _find_unreferenced_groups(
self,
state_groups: Collection[int],
) -> set[int]:
"""Used when purging history to figure out which state groups can be
deleted.
Args:
state_groups: Set of state groups referenced by events
that are going to be deleted.
Returns:
The set of state groups that can be deleted.
"""
# Set of events that we have found to be referenced by events
referenced_groups = set()
# Set of state groups we've already seen
state_groups_seen = set(state_groups)
# Set of state groups to handle next.
next_to_search = set(state_groups)
while next_to_search:
# We bound size of groups we're looking up at once, to stop the
# SQL query getting too big
if len(next_to_search) < 100:
current_search = next_to_search
next_to_search = set()
else:
current_search = set(itertools.islice(next_to_search, 100))
next_to_search -= current_search
referenced = await self.stores.main.get_referenced_state_groups(
current_search
)
referenced_groups |= referenced
# We don't continue iterating up the state group graphs for state
# groups that are referenced.
current_search -= referenced
edges = await self.stores.state.get_previous_state_groups(current_search)
prevs = set(edges.values())
# We don't bother re-handling groups we've already seen
prevs -= state_groups_seen
next_to_search |= prevs
state_groups_seen |= prevs
# We also check to see if anything referencing the state groups are
# also unreferenced. This helps ensure that we delete unreferenced
# state groups, if we don't then we will de-delta them when we
# delete the other state groups leading to increased DB usage.
next_edges = await self.stores.state.get_next_state_groups(current_search)
nexts = set(next_edges.keys())
nexts -= state_groups_seen
next_to_search |= nexts
state_groups_seen |= nexts
to_delete = state_groups_seen - referenced_groups
return to_delete
@wrap_as_background_process("_delete_state_groups_loop")
async def _delete_state_groups_loop(self) -> None:
"""Background task that deletes any state groups that may be pending
deletion."""
while True:
next_to_delete = await self.stores.state_deletion.get_next_state_group_collection_to_delete()
if next_to_delete is None:
break
(room_id, groups_to_sequences) = next_to_delete
logger.info(
"[purge] deleting state groups for room %s: %s",
room_id,
shortstr(groups_to_sequences.keys(), maxitems=10),
)
made_progress = await self._delete_state_groups(
room_id, groups_to_sequences
)
# If no progress was made in deleting the state groups, then we
# break to allow a pause before trying again next time we get
# called.
if not made_progress:
break
async def _delete_state_groups(
self, room_id: str, groups_to_sequences: Mapping[int, int]
) -> bool:
"""Tries to delete the given state groups.
Returns:
Whether we made progress in deleting the state groups (or marking
them as referenced).
"""
# We double check if any of the state groups have become referenced.
# This shouldn't happen, as any usages should cause the state group to
# be removed as pending deletion.
referenced_state_groups = await self.stores.main.get_referenced_state_groups(
groups_to_sequences
)
if referenced_state_groups:
# We mark any state groups that have become referenced as being
# used.
await self.stores.state_deletion.mark_state_groups_as_used(
referenced_state_groups
)
# Update list of state groups to remove referenced ones
groups_to_sequences = {
state_group: sequence_number
for state_group, sequence_number in groups_to_sequences.items()
if state_group not in referenced_state_groups
}
if not groups_to_sequences:
# We made progress here as long as we marked some state groups as
# now referenced.
return len(referenced_state_groups) > 0
return await self.stores.state.purge_unreferenced_state_groups(
room_id,
groups_to_sequences,
)
async def _background_delete_unrefereneced_state_groups(
self, progress: dict, batch_size: int
) -> int:
"""This background update will slowly delete any unreferenced state groups"""
last_checked_state_group = progress.get("last_checked_state_group")
if last_checked_state_group is None:
# This is the first run.
last_checked_state_group = (
await self.stores.state.db_pool.simple_select_one_onecol(
table="state_groups",
keyvalues={},
retcol="MAX(id)",
allow_none=True,
desc="get_max_state_group",
)
)
if last_checked_state_group is None:
# There are no state groups so the background process is finished.
await self.stores.state.db_pool.updates._end_background_update(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE
)
return batch_size
last_checked_state_group += 1
(
last_checked_state_group,
final_batch,
) = await self._delete_unreferenced_state_groups_batch(
last_checked_state_group,
batch_size,
)
if not final_batch:
# There are more state groups to check.
progress = {
"last_checked_state_group": last_checked_state_group,
}
await self.stores.state.db_pool.updates._background_update_progress(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE,
progress,
)
else:
# This background process is finished.
await self.stores.state.db_pool.updates._end_background_update(
_BackgroundUpdates.MARK_UNREFERENCED_STATE_GROUPS_FOR_DELETION_BG_UPDATE
)
return batch_size
async def _delete_unreferenced_state_groups_batch(
self,
last_checked_state_group: int,
batch_size: int,
) -> tuple[int, bool]:
"""Looks for unreferenced state groups starting from the last state group
checked and marks them for deletion.
Args:
last_checked_state_group: The last state group that was checked.
batch_size: How many state groups to process in this iteration.
Returns:
(last_checked_state_group, final_batch)
"""
# Find all state groups that can be deleted if any of the original set are deleted.
(
to_delete,
last_checked_state_group,
final_batch,
) = await self._find_unreferenced_groups_for_background_deletion(
last_checked_state_group, batch_size
)
if len(to_delete) == 0:
return last_checked_state_group, final_batch
await self.stores.state_deletion.mark_state_groups_as_pending_deletion(
to_delete
)
return last_checked_state_group, final_batch
async def _find_unreferenced_groups_for_background_deletion(
self,
last_checked_state_group: int,
batch_size: int,
) -> tuple[set[int], int, bool]:
"""Used when deleting unreferenced state groups in the background to figure out
which state groups can be deleted.
To avoid increased DB usage due to de-deltaing state groups, this returns only
state groups which are free standing (ie. no shared edges with referenced groups) or
state groups which do not share edges which result in a future referenced group.
The following scenarios outline the possibilities based on state group data in
the DB.
ie. Free standing -> state groups 1-N would be returned:
SG_1
|
...
|
SG_N
ie. Previous reference -> state groups 2-N would be returned:
SG_1 <- referenced by event
|
SG_2
|
...
|
SG_N
ie. Future reference -> none of the following state groups would be returned:
SG_1
|
SG_2
|
...
|
SG_N <- referenced by event
Args:
last_checked_state_group: The last state group that was checked.
batch_size: How many state groups to process in this iteration.
Returns:
(to_delete, last_checked_state_group, final_batch)
"""
# If a state group's next edge is not pending deletion then we don't delete the state group.
# If there is no next edge or the next edges are all marked for deletion, then delete
# the state group.
# This holds since we walk backwards from the latest state groups, ensuring that
# we've already checked newer state groups for event references along the way.
def get_next_state_groups_marked_for_deletion_txn(
txn: LoggingTransaction,
) -> tuple[dict[int, bool], dict[int, int]]:
state_group_sql = """
SELECT s.id, e.state_group, d.state_group
FROM (
SELECT id FROM state_groups
WHERE id < ? ORDER BY id DESC LIMIT ?
) as s
LEFT JOIN state_group_edges AS e ON (s.id = e.prev_state_group)
LEFT JOIN state_groups_pending_deletion AS d ON (e.state_group = d.state_group)
"""
txn.execute(state_group_sql, (last_checked_state_group, batch_size))
# Mapping from state group to whether we should delete it.
state_groups_to_deletion: dict[int, bool] = {}
# Mapping from state group to prev state group.
state_groups_to_prev: dict[int, int] = {}
for row in txn:
state_group = row[0]
next_edge = row[1]
pending_deletion = row[2]
if next_edge is not None:
state_groups_to_prev[next_edge] = state_group
if next_edge is not None and not pending_deletion:
# We have found an edge not marked for deletion.
# Check previous results to see if this group is part of a chain
# within this batch that qualifies for deletion.
# ie. batch contains:
# SG_1 -> SG_2 -> SG_3
# If SG_3 is a candidate for deletion, then SG_2 & SG_1 should also
# be, even though they have edges which may not be marked for
# deletion.
# This relies on SQL results being sorted in DESC order to work.
next_is_deletion_candidate = state_groups_to_deletion.get(next_edge)
if (
next_is_deletion_candidate is None
or not next_is_deletion_candidate
):
state_groups_to_deletion[state_group] = False
else:
state_groups_to_deletion.setdefault(state_group, True)
else:
# This state group may be a candidate for deletion
state_groups_to_deletion.setdefault(state_group, True)
return state_groups_to_deletion, state_groups_to_prev
(
state_groups_to_deletion,
state_group_edges,
) = await self.stores.state.db_pool.runInteraction(
"get_next_state_groups_marked_for_deletion",
get_next_state_groups_marked_for_deletion_txn,
)
deletion_candidates = {
state_group
for state_group, deletion in state_groups_to_deletion.items()
if deletion
}
final_batch = False
state_groups = state_groups_to_deletion.keys()
if len(state_groups) < batch_size:
final_batch = True
else:
last_checked_state_group = min(state_groups)
if len(state_groups) == 0:
return set(), last_checked_state_group, final_batch
# Determine if any of the remaining state groups are directly referenced.
referenced = await self.stores.main.get_referenced_state_groups(
deletion_candidates
)
# Remove state groups from deletion_candidates which are directly referenced or share a
# future edge with a referenced state group within this batch.
def filter_reference_chains(group: int | None) -> None:
while group is not None:
deletion_candidates.discard(group)
group = state_group_edges.get(group)
for referenced_group in referenced:
filter_reference_chains(referenced_group)
return deletion_candidates, last_checked_state_group, final_batch