From b45c53b4f592424b48eeded3f4df030ab8d49e2c Mon Sep 17 00:00:00 2001 From: Krishnasamy Date: Fri, 26 Dec 2025 07:15:06 +0000 Subject: [PATCH] bgpd: Optimize BGP path lookup using typesafe hash for efficient lookup Problem: -------- BGP path lookup currently uses O(N) linear search through the list (dest->info) for every incoming route update. In high-ECMP scenarios, each update requires iterating through the entire path list to check if a path from that peer already exists. This becomes a severe performance bottleneck in data center environments with high ECMP during large-scale route updates churn. Solution: --------- Implement a path_info hash per table using typesafe hash. Use the same in bgp_update() for efficient lookup. This hash lookup improves CPU overhead upto ~60% Signed-off-by: Krishnasamy R Signed-off-by: Krishnasamy --- bgpd/bgp_route.c | 83 ++++++++++++++++++++++++++++++++++++++++++---- bgpd/bgp_route.h | 9 +++++ bgpd/bgp_table.c | 6 ++++ bgpd/bgp_table.h | 7 ++++ bgpd/rfapi/rfapi.c | 2 +- 5 files changed, 100 insertions(+), 7 deletions(-) diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 78ddec3429..7dbea67167 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -31,6 +31,7 @@ #include "zclient.h" #include "frrdistance.h" #include "frregex_real.h" +#include "jhash.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_nhc.h" @@ -127,6 +128,52 @@ static const struct message bgp_pmsi_tnltype_str[] = { static int clear_batch_rib_helper(struct bgp_clearing_info *cinfo); static void bgp_gr_start_tier2_timer_if_required(struct bgp *bgp, afi_t afi, safi_t safi); +/* + * Typesafe Hash Functions for bgp_path_info + * hash key: prefix, peer, type, sub_type, addpath_rx_id + */ +int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2) +{ + int ret; + + /* Get the prefix from pi->net and compare */ + const struct prefix *pfx1 = bgp_dest_get_prefix(p1->net); + const struct prefix *pfx2 = bgp_dest_get_prefix(p2->net); + + ret = prefix_cmp(pfx1, pfx2); + if (ret != 0) + return ret; + + if (p1->peer != p2->peer) + return (p1->peer < p2->peer) ? -1 : 1; + + if (p1->type != p2->type) + return (p1->type < p2->type) ? -1 : 1; + + if (p1->sub_type != p2->sub_type) + return (p1->sub_type < p2->sub_type) ? -1 : 1; + + if (p1->addpath_rx_id != p2->addpath_rx_id) + return (p1->addpath_rx_id < p2->addpath_rx_id) ? -1 : 1; + + /* All fields are equal */ + return 0; +} + +uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi) +{ + uint32_t h = 0; + const struct prefix *pfx = bgp_dest_get_prefix(pi->net); + + h = prefix_hash_key(pfx); + h = jhash_1word((uint32_t)(uintptr_t)pi->peer, h); + h = jhash_1word(pi->addpath_rx_id, h); + h = jhash_1word((uint32_t)pi->type, h); + h = jhash_1word((uint32_t)pi->sub_type, h); + + return h; +} + static inline char *bgp_route_dump_path_info_flags(struct bgp_path_info *pi, char *buf, size_t len) { @@ -544,6 +591,7 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest, { frrtrace(3, frr_bgp, bgp_path_info_add, dest, pi, name); struct bgp_path_info *top; + struct bgp_table *table; top = bgp_dest_get_bgp_path_info(dest); @@ -553,6 +601,11 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest, top->prev = pi; bgp_dest_set_bgp_path_info(dest, pi); + /* Add this path info to global hash (per AFI/SAFI table) */ + table = bgp_dest_table(dest); + if (table) + bgp_pi_hash_add(&table->pi_hash, pi); + SET_FLAG(pi->flags, BGP_PATH_UNSORTED); bgp_path_info_lock(pi); bgp_dest_lock_node(dest); @@ -568,6 +621,8 @@ void bgp_path_info_add_with_caller(const char *name, struct bgp_dest *dest, struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest, struct bgp_path_info *pi) { + struct bgp_table *table; + if (pi->next) pi->next->prev = pi->prev; if (pi->prev) @@ -578,6 +633,11 @@ struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest, pi->next = NULL; pi->prev = NULL; + /* Remove this path from global hash (per AFI/SAFI table) */ + table = bgp_dest_table(dest); + if (table) + bgp_pi_hash_del(&table->pi_hash, pi); + if (pi->peer) pi->peer->stat_pfx_loc_rib--; hook_call(bgp_snmp_update_stats, dest, pi, false); @@ -589,9 +649,16 @@ struct bgp_dest *bgp_path_info_reap(struct bgp_dest *dest, static struct bgp_dest *bgp_path_info_reap_unsorted(struct bgp_dest *dest, struct bgp_path_info *pi) { + struct bgp_table *table; + pi->next = NULL; pi->prev = NULL; + /* Remove this path from global hash (per AFI/SAFI table) */ + table = bgp_dest_table(dest); + if (table) + bgp_pi_hash_del(&table->pi_hash, pi); + if (pi->peer) pi->peer->stat_pfx_loc_rib--; hook_call(bgp_snmp_update_stats, dest, pi, false); @@ -5547,6 +5614,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, { int ret; struct bgp_dest *dest; + struct bgp_table *rib_table; struct bgp *bgp; struct attr new_attr = {}; struct attr *attr_new; @@ -5580,6 +5648,7 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, bgp = peer->bgp; dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, p, prd); + rib_table = bgp_dest_table(dest); if (num_labels && ((afi == AFI_L2VPN && safi == SAFI_EVPN) || bgp_is_valid_label(&label[0]))) { @@ -5606,12 +5675,14 @@ void bgp_update(struct peer *peer, const struct prefix *p, uint32_t addpath_id, bgp_adj_in_set(dest, peer, attr, addpath_id, &bgp_labels); } - /* Check previously received route. */ - for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) - if (pi->peer == peer && pi->type == type - && pi->sub_type == sub_type - && pi->addpath_rx_id == addpath_id) - break; + /* Check previously received route using pi_hash */ + struct bgp_path_info pi_lookup = { .net = dest, + .peer = peer, + .type = type, + .sub_type = sub_type, + .addpath_rx_id = addpath_id }; + + pi = bgp_pi_hash_find(&rib_table->pi_hash, &pi_lookup); /* AS path local-as loop check. */ if (peer->change_local_as) { diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index f05f2bfea4..42c31c36db 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -11,6 +11,7 @@ #include "hook.h" #include "queue.h" #include "nexthop.h" +#include "typesafe.h" #include "bgp_table.h" #include "bgp_addpath_types.h" #include "bgp_rpki.h" @@ -275,6 +276,9 @@ struct bgp_path_info { struct bgp_path_info *next; struct bgp_path_info *prev; + /* Hash linkage for pi_hash in bgp_table */ + struct bgp_pi_hash_item pi_hash_link; + /* For nexthop linked list */ LIST_ENTRY(bgp_path_info) nh_thread; @@ -739,6 +743,11 @@ DECLARE_HOOK(bgp_route_update, struct bgp_path_info *old_route, struct bgp_path_info *new_route), (bgp, afi, safi, bn, old_route, new_route)); +extern int bgp_pi_hash_cmp(const struct bgp_path_info *p1, const struct bgp_path_info *p2); +extern uint32_t bgp_pi_hash_hashfn(const struct bgp_path_info *pi); + +DECLARE_HASH(bgp_pi_hash, struct bgp_path_info, pi_hash_link, bgp_pi_hash_cmp, bgp_pi_hash_hashfn); + /* BGP show options */ #define BGP_SHOW_OPT_JSON (1 << 0) #define BGP_SHOW_OPT_WIDE (1 << 1) diff --git a/bgpd/bgp_table.c b/bgpd/bgp_table.c index 5d529f9e1d..17b417895f 100644 --- a/bgpd/bgp_table.c +++ b/bgpd/bgp_table.c @@ -33,6 +33,9 @@ void bgp_table_unlock(struct bgp_table *rt) return; } + /* Cleanup pi_hash in bgp_table */ + bgp_pi_hash_fini(&rt->pi_hash); + route_table_finish(rt->route_table); rt->route_table = NULL; @@ -84,6 +87,8 @@ inline struct bgp_dest *bgp_dest_unlock_node(struct bgp_dest *dest) if (rn->lock == 1) { struct bgp_table *rt = bgp_dest_table(dest); + + if (rt->bgp) { bgp_addpath_free_node_data(&rt->bgp->tx_addpath, &dest->tx_addpath, rt->afi, @@ -165,6 +170,7 @@ struct bgp_table *bgp_table_init(struct bgp *bgp, afi_t afi, safi_t safi) bgp_table_lock(rt); rt->afi = afi; rt->safi = safi; + bgp_pi_hash_init(&rt->pi_hash); return rt; } diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h index 94ff7587cd..153be63df6 100644 --- a/bgpd/bgp_table.h +++ b/bgpd/bgp_table.h @@ -10,10 +10,14 @@ #include "table.h" #include "queue.h" #include "linklist.h" +#include "typesafe.h" #include "bgpd.h" #include "bgp_advertise.h" #include "bgp_attr_srv6.h" +/* Typesafe hash for bgp_path_info lookup */ +PREDECL_HASH(bgp_pi_hash); + struct bgp_table { /* table belongs to this instance */ struct bgp *bgp; @@ -24,6 +28,9 @@ struct bgp_table { int lock; + /* Hash for bgp_path_info lookups across all prefixes in this table */ + struct bgp_pi_hash_head pi_hash; + /* soft_reconfig_table in progress */ bool soft_reconfig_init; struct event *soft_reconfig_thread; diff --git a/bgpd/rfapi/rfapi.c b/bgpd/rfapi/rfapi.c index fbd1edfbed..f6a7a91ffb 100644 --- a/bgpd/rfapi/rfapi.c +++ b/bgpd/rfapi/rfapi.c @@ -1008,7 +1008,7 @@ void add_vnc_route(struct rfapi_descriptor *rfd, /* cookie, VPN UN addr, peer */ } } - new = info_make(type, sub_type, 0, rfd->peer, new_attr, NULL); + new = info_make(type, sub_type, 0, rfd->peer, new_attr, bn); SET_FLAG(new->flags, BGP_PATH_VALID); /* save backref to rfapi handle */