bgpd: Optimize BGP path lookup using typesafe hash for efficient lookup

Problem:
--------
BGP path lookup currently uses O(N) linear search through the list
(dest->info) for every incoming route update. In high-ECMP scenarios,
each update requires iterating through the entire path list to check
if a path from that peer already exists.

This becomes a severe performance bottleneck in data center environments
with high ECMP during large-scale route updates churn.

Solution:
---------
Implement a path_info hash per table using typesafe hash.
Use the same in bgp_update() for efficient lookup.

This hash lookup improves CPU overhead upto ~60%

Signed-off-by: Krishnasamy R <krishnasamyr@nvidia.com>
Signed-off-by: Krishnasamy <krishnasamyr@nvidia.com>
This commit is contained in:
Krishnasamy 2025-12-26 07:15:06 +00:00
parent b2cc85bdc1
commit b45c53b4f5
5 changed files with 100 additions and 7 deletions

View file

@ -31,6 +31,7 @@
#include "zclient.h"
#include "frrdistance.h"
#include "frregex_real.h"
#include "jhash.h"
#include "bgpd/bgpd.h"
#include "bgpd/bgp_nhc.h"
@ -127,6 +128,52 @@ static const struct message bgp_pmsi_tnltype_str[] = {
static int clear_batch_rib_helper(struct bgp_clearing_info *cinfo);
static void bgp_gr_start_tier2_timer_if_required(struct bgp *bgp, afi_t afi, safi_t safi);
/*
* Typesafe Hash Functions for bgp_path_info
* hash key: prefix, peer, type, sub_type, addpath_rx_id
*/
int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2)
{
int ret;
/* Get the prefix from pi->net and compare */
const struct prefix *pfx1 = bgp_dest_get_prefix(p1->net);
const struct prefix *pfx2 = bgp_dest_get_prefix(p2->net);
ret = prefix_cmp(pfx1, pfx2);
if (ret != 0)
return ret;
if (p1->peer != p2->peer)
return (p1->peer < p2->peer) ? -1 : 1;
if (p1->type != p2->type)
return (p1->type < p2->type) ? -1 : 1;
if (p1->sub_type != p2->sub_type)
return (p1->sub_type < p2->sub_type) ? -1 : 1;
if (p1->addpath_rx_id != p2->addpath_rx_id)
return (p1->addpath_rx_id < p2->addpath_rx_id) ? -1 : 1;
/* All fields are equal */
return 0;
}
uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi)
{
uint32_t h = 0;
const struct prefix *pfx = bgp_dest_get_prefix(pi->net);
h = prefix_hash_key(pfx);
h = jhash_1word((uint32_t)(uintptr_t)pi->peer, h);
h = jhash_1word(pi->addpath_rx_id, h);
h = jhash_1word((uint32_t)pi->type, h);
h = jhash_1word((uint32_t)pi->sub_type, h);
return h;
}
static inline char *bgp_route_dump_path_info_flags(struct bgp_path_info *pi,
char *buf, size_t len)
{
@ -544,6 +591,7 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest,
{
frrtrace(3, frr_bgp, bgp_path_info_add, dest, pi, name);
struct bgp_path_info *top;
struct bgp_table *table;
top = bgp_dest_get_bgp_path_info(dest);
@ -553,6 +601,11 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest,
top->prev = pi;
bgp_dest_set_bgp_path_info(dest, pi);
/* Add this path info to global hash (per AFI/SAFI table) */
table = bgp_dest_table(dest);
if (table)
bgp_pi_hash_add(&table->pi_hash, pi);
SET_FLAG(pi->flags, BGP_PATH_UNSORTED);
bgp_path_info_lock(pi);
bgp_dest_lock_node(dest);
@ -568,6 +621,8 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest,
struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest,
struct bgp_path_info *pi)
{
struct bgp_table *table;
if (pi->next)
pi->next->prev = pi->prev;
if (pi->prev)
@ -578,6 +633,11 @@ struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest,
pi->next = NULL;
pi->prev = NULL;
/* Remove this path from global hash (per AFI/SAFI table) */
table = bgp_dest_table(dest);
if (table)
bgp_pi_hash_del(&table->pi_hash, pi);
if (pi->peer)
pi->peer->stat_pfx_loc_rib--;
hook_call(bgp_snmp_update_stats, dest, pi, false);
@ -589,9 +649,16 @@ struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest,
static struct bgp_dest *bgp_path_info_reap_unsorted(struct bgp_dest *dest,
struct bgp_path_info *pi)
{
struct bgp_table *table;
pi->next = NULL;
pi->prev = NULL;
/* Remove this path from global hash (per AFI/SAFI table) */
table = bgp_dest_table(dest);
if (table)
bgp_pi_hash_del(&table->pi_hash, pi);
if (pi->peer)
pi->peer->stat_pfx_loc_rib--;
hook_call(bgp_snmp_update_stats, dest, pi, false);
@ -5547,6 +5614,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
{
int ret;
struct bgp_dest *dest;
struct bgp_table *rib_table;
struct bgp *bgp;
struct attr new_attr = {};
struct attr *attr_new;
@ -5580,6 +5648,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
bgp = peer->bgp;
dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, p, prd);
rib_table = bgp_dest_table(dest);
if (num_labels &&
((afi == AFI_L2VPN && safi == SAFI_EVPN) || bgp_is_valid_label(&label[0]))) {
@ -5606,12 +5675,14 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id,
bgp_adj_in_set(dest, peer, attr, addpath_id, &bgp_labels);
}
/* Check previously received route. */
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next)
if (pi->peer == peer && pi->type == type
&& pi->sub_type == sub_type
&& pi->addpath_rx_id == addpath_id)
break;
/* Check previously received route using pi_hash */
struct bgp_path_info pi_lookup = { .net = dest,
.peer = peer,
.type = type,
.sub_type = sub_type,
.addpath_rx_id = addpath_id };
pi = bgp_pi_hash_find(&rib_table->pi_hash, &pi_lookup);
/* AS path local-as loop check. */
if (peer->change_local_as) {

View file

@ -11,6 +11,7 @@
#include "hook.h"
#include "queue.h"
#include "nexthop.h"
#include "typesafe.h"
#include "bgp_table.h"
#include "bgp_addpath_types.h"
#include "bgp_rpki.h"
@ -275,6 +276,9 @@ struct bgp_path_info {
struct bgp_path_info *next;
struct bgp_path_info *prev;
/* Hash linkage for pi_hash in bgp_table */
struct bgp_pi_hash_item pi_hash_link;
/* For nexthop linked list */
LIST_ENTRY(bgp_path_info) nh_thread;
@ -739,6 +743,11 @@ DECLARE_HOOK(bgp_route_update,
struct bgp_path_info *old_route, struct bgp_path_info *new_route),
(bgp, afi, safi, bn, old_route, new_route));
extern int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2);
extern uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi);
DECLARE_HASH(bgp_pi_hash, struct bgp_path_info, pi_hash_link, bgp_pi_hash_cmp, bgp_pi_hash_hashfn);
/* BGP show options */
#define BGP_SHOW_OPT_JSON (1 << 0)
#define BGP_SHOW_OPT_WIDE (1 << 1)

View file

@ -33,6 +33,9 @@ void bgp_table_unlock(struct bgp_table *rt)
return;
}
/* Cleanup pi_hash in bgp_table */
bgp_pi_hash_fini(&rt->pi_hash);
route_table_finish(rt->route_table);
rt->route_table = NULL;
@ -84,6 +87,8 @@ inline struct bgp_dest *bgp_dest_unlock_node(struct bgp_dest *dest)
if (rn->lock == 1) {
struct bgp_table *rt = bgp_dest_table(dest);
if (rt->bgp) {
bgp_addpath_free_node_data(&rt->bgp->tx_addpath,
&dest->tx_addpath, rt->afi,
@ -165,6 +170,7 @@ struct bgp_table *bgp_table_init(struct bgp *bgp, afi_t afi, safi_t safi)
bgp_table_lock(rt);
rt->afi = afi;
rt->safi = safi;
bgp_pi_hash_init(&rt->pi_hash);
return rt;
}

View file

@ -10,10 +10,14 @@
#include "table.h"
#include "queue.h"
#include "linklist.h"
#include "typesafe.h"
#include "bgpd.h"
#include "bgp_advertise.h"
#include "bgp_attr_srv6.h"
/* Typesafe hash for bgp_path_info lookup */
PREDECL_HASH(bgp_pi_hash);
struct bgp_table {
/* table belongs to this instance */
struct bgp *bgp;
@ -24,6 +28,9 @@ struct bgp_table {
int lock;
/* Hash for bgp_path_info lookups across all prefixes in this table */
struct bgp_pi_hash_head pi_hash;
/* soft_reconfig_table in progress */
bool soft_reconfig_init;
struct event *soft_reconfig_thread;

View file

@ -1008,7 +1008,7 @@ void add_vnc_route(struct rfapi_descriptor *rfd, /* cookie, VPN UN addr, peer */
}
}
new = info_make(type, sub_type, 0, rfd->peer, new_attr, NULL);
new = info_make(type, sub_type, 0, rfd->peer, new_attr, bn);
SET_FLAG(new->flags, BGP_PATH_VALID);
/* save backref to rfapi handle */