mirror of
https://github.com/FRRouting/frr.git
synced 2026-01-11 20:07:27 +00:00
Merge pull request #20210 from sougatahitcs/bfd_actual_tx_timeout_with_applied_jitter
bfd: store actual timeout information in bfd
This commit is contained in:
commit
ca8077f2ee
8 changed files with 832 additions and 12 deletions
32
bfdd/bfd.c
32
bfdd/bfd.c
|
|
@ -232,10 +232,16 @@ void bfd_session_apply(struct bfd_session *bs)
|
|||
bfd_set_log_session_changes(bs, bs->peer_profile.log_session_changes);
|
||||
|
||||
/* If session interval changed negotiate new timers. */
|
||||
if (bs->ses_state == PTM_BFD_UP
|
||||
&& (bs->timers.desired_min_tx != min_tx
|
||||
|| bs->timers.required_min_rx != min_rx))
|
||||
if (bs->ses_state == PTM_BFD_UP &&
|
||||
(bs->timers.desired_min_tx != min_tx || bs->timers.required_min_rx != min_rx)) {
|
||||
if (bglobal.debug_peer_event) {
|
||||
zlog_debug("[%s] Timer changed while UP: old_tx=%u new_tx=%u, old_rx=%u new_rx=%u",
|
||||
bs_to_string(bs), min_tx, bs->timers.desired_min_tx, min_rx,
|
||||
bs->timers.required_min_rx);
|
||||
zlog_debug("Starting poll sequence to negotiate new timers");
|
||||
}
|
||||
bfd_set_polling(bs);
|
||||
}
|
||||
|
||||
/* Send updated information to data plane. */
|
||||
bfd_dplane_update_session(bs);
|
||||
|
|
@ -1157,6 +1163,9 @@ void bfd_set_polling(struct bfd_session *bs)
|
|||
*
|
||||
* RFC 5880, Section 6.8.3.
|
||||
*/
|
||||
if (bglobal.debug_peer_event)
|
||||
zlog_debug("[%s] Setting polling=1 to negotiate timer change", bs_to_string(bs));
|
||||
|
||||
bs->polling = 1;
|
||||
}
|
||||
|
||||
|
|
@ -1482,13 +1491,24 @@ void bs_final_handler(struct bfd_session *bs)
|
|||
bs->xmt_TO = bs->timers.desired_min_tx;
|
||||
else
|
||||
bs->xmt_TO = bs->remote_timers.required_min_rx;
|
||||
|
||||
|
||||
if (bglobal.debug_peer_event)
|
||||
zlog_debug(" calculated xmt_TO=%" PRIu64 " (using slowest rate)", bs->xmt_TO);
|
||||
|
||||
/* Only apply increased transmission interval after Poll Sequence */
|
||||
if (bs->ses_state == PTM_BFD_UP && bs->xmt_TO > old_xmt_TO) {
|
||||
bs->xmt_TO = old_xmt_TO; /* Keep old timing until Poll Sequence done */
|
||||
if (bs->ses_state == PTM_BFD_UP && bs->xmt_TO > old_xmt_TO && bs->polling) {
|
||||
if (bglobal.debug_peer_event) {
|
||||
zlog_debug(" REJECTED increase: xmt_TO=%" PRIu64 " > old=%" PRIu64
|
||||
" (polling=%d)",
|
||||
bs->xmt_TO, old_xmt_TO, bs->polling);
|
||||
}
|
||||
bs->xmt_TO = old_xmt_TO; /* Keep old timing DURING Poll Sequence */
|
||||
return;
|
||||
}
|
||||
|
||||
if (bglobal.debug_peer_event)
|
||||
zlog_debug(" APPLYING new xmt_TO=%" PRIu64, bs->xmt_TO);
|
||||
|
||||
/* Apply new transmission timer immediately. */
|
||||
ptm_bfd_start_xmt_timer(bs, false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -357,7 +357,9 @@ struct bfd_session {
|
|||
struct event *echo_recvtimer_ev;
|
||||
struct event *recvtimer_ev;
|
||||
uint64_t xmt_TO;
|
||||
uint64_t xmt_TO_actual; /* Actual transmit timeout with jitter applied */
|
||||
uint64_t echo_xmt_TO;
|
||||
uint64_t echo_xmt_TO_actual; /* Actual echo transmit timeout with jitter applied */
|
||||
struct event *xmttimer_ev;
|
||||
struct event *echo_xmttimer_ev;
|
||||
uint64_t echo_detect_TO;
|
||||
|
|
|
|||
|
|
@ -216,15 +216,26 @@ static void _display_peer(struct vty *vty, struct bfd_session *bs)
|
|||
bs->timers.required_min_rx / 1000);
|
||||
vty_out(vty, "\t\t\tTransmission interval: %ums\n",
|
||||
bs->timers.desired_min_tx / 1000);
|
||||
if (bs->xmt_TO_actual > 0)
|
||||
vty_out(vty, "\t\t\tTransmission interval (actual with jitter): %" PRIu64 "ms\n",
|
||||
bs->xmt_TO_actual / 1000);
|
||||
if (bs->detect_TO > 0)
|
||||
vty_out(vty, "\t\t\tDetection timeout: %" PRIu64 "ms\n",
|
||||
bs->detect_TO / 1000);
|
||||
if (bs->timers.required_min_echo_rx != 0)
|
||||
vty_out(vty, "\t\t\tEcho receive interval: %ums\n",
|
||||
bs->timers.required_min_echo_rx / 1000);
|
||||
else
|
||||
vty_out(vty, "\t\t\tEcho receive interval: disabled\n");
|
||||
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_ECHO) || bs->bfd_mode == BFD_MODE_TYPE_SBFD_ECHO)
|
||||
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_ECHO) || bs->bfd_mode == BFD_MODE_TYPE_SBFD_ECHO) {
|
||||
vty_out(vty, "\t\t\tEcho transmission interval: %ums\n",
|
||||
bs->timers.desired_min_echo_tx / 1000);
|
||||
else
|
||||
if (bs->echo_xmt_TO_actual > 0)
|
||||
vty_out(vty,
|
||||
"\t\t\tEcho transmission interval (actual with jitter): %" PRIu64
|
||||
"ms\n",
|
||||
bs->echo_xmt_TO_actual / 1000);
|
||||
} else
|
||||
vty_out(vty, "\t\t\tEcho transmission interval: disabled\n");
|
||||
|
||||
|
||||
|
|
@ -333,6 +344,12 @@ static struct json_object *__display_peer_json(struct bfd_session *bs)
|
|||
bs->timers.required_min_rx / 1000);
|
||||
json_object_int_add(jo, "transmit-interval",
|
||||
bs->timers.desired_min_tx / 1000);
|
||||
if (bs->xmt_TO_actual > 0)
|
||||
json_object_int_add(jo, "transmit-interval-actual",
|
||||
bs->xmt_TO_actual / 1000);
|
||||
if (bs->detect_TO > 0)
|
||||
json_object_int_add(jo, "detection-timeout",
|
||||
bs->detect_TO / 1000);
|
||||
json_object_int_add(jo, "echo-receive-interval",
|
||||
bs->timers.required_min_echo_rx / 1000);
|
||||
if (bs->bfd_mode == BFD_MODE_TYPE_SBFD_INIT || bs->bfd_mode == BFD_MODE_TYPE_SBFD_ECHO) {
|
||||
|
|
|
|||
16
bfdd/event.c
16
bfdd/event.c
|
|
@ -131,13 +131,17 @@ void bfd_xmttimer_update(struct bfd_session *bs, uint64_t jitter)
|
|||
{
|
||||
struct timeval tv = {.tv_sec = 0, .tv_usec = jitter};
|
||||
|
||||
/* Store the actual transmit timeout with jitter applied */
|
||||
bs->xmt_TO_actual = jitter;
|
||||
|
||||
/* Remove previous schedule if any. */
|
||||
bfd_xmttimer_delete(bs);
|
||||
|
||||
/* Don't add event if peer is deactivated. */
|
||||
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) ||
|
||||
bs->sock == -1)
|
||||
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) || bs->sock == -1) {
|
||||
bs->xmt_TO_actual = 0; /* Timer not scheduled */
|
||||
return;
|
||||
}
|
||||
|
||||
tv_normalize(&tv);
|
||||
|
||||
|
|
@ -149,13 +153,17 @@ void bfd_echo_xmttimer_update(struct bfd_session *bs, uint64_t jitter)
|
|||
{
|
||||
struct timeval tv = {.tv_sec = 0, .tv_usec = jitter};
|
||||
|
||||
/* Store the actual echo transmit timeout with jitter applied */
|
||||
bs->echo_xmt_TO_actual = jitter;
|
||||
|
||||
/* Remove previous schedule if any. */
|
||||
bfd_echo_xmttimer_delete(bs);
|
||||
|
||||
/* Don't add event if peer is deactivated. */
|
||||
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) ||
|
||||
bs->sock == -1)
|
||||
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) || bs->sock == -1) {
|
||||
bs->echo_xmt_TO_actual = 0; /* Timer not scheduled */
|
||||
return;
|
||||
}
|
||||
|
||||
tv_normalize(&tv);
|
||||
|
||||
|
|
|
|||
2
tests/topotests/bfd_jitter_verification/__init__.py
Normal file
2
tests/topotests/bfd_jitter_verification/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
# SPDX-License-Identifier: ISC
|
||||
|
||||
13
tests/topotests/bfd_jitter_verification/r1/frr.conf
Normal file
13
tests/topotests/bfd_jitter_verification/r1/frr.conf
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
interface lo
|
||||
ip address 172.16.255.1/32
|
||||
!
|
||||
interface r1-eth0
|
||||
ip address 192.168.1.1/24
|
||||
!
|
||||
bfd
|
||||
peer 192.168.1.2 interface r1-eth0
|
||||
detect-multiplier 3
|
||||
transmit-interval 1000
|
||||
receive-interval 1000
|
||||
!
|
||||
!
|
||||
13
tests/topotests/bfd_jitter_verification/r2/frr.conf
Normal file
13
tests/topotests/bfd_jitter_verification/r2/frr.conf
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
interface lo
|
||||
ip address 172.16.255.2/32
|
||||
!
|
||||
interface r2-eth0
|
||||
ip address 192.168.1.2/24
|
||||
!
|
||||
bfd
|
||||
peer 192.168.1.1 interface r2-eth0
|
||||
detect-multiplier 3
|
||||
transmit-interval 1000
|
||||
receive-interval 1000
|
||||
!
|
||||
!
|
||||
745
tests/topotests/bfd_jitter_verification/test_bfd_jitter.py
Normal file
745
tests/topotests/bfd_jitter_verification/test_bfd_jitter.py
Normal file
|
|
@ -0,0 +1,745 @@
|
|||
#!/usr/bin/env python
|
||||
# SPDX-License-Identifier: ISC
|
||||
|
||||
#
|
||||
# test_bfd_jitter.py
|
||||
#
|
||||
# Copyright (c) 2024 by
|
||||
# Sougata Barik
|
||||
#
|
||||
|
||||
"""
|
||||
Test BFD jitter calculation and application.
|
||||
|
||||
Active tests verify:
|
||||
1. BFD sessions establish successfully
|
||||
2. The new xmt_TO_actual field is populated correctly
|
||||
3. Jitter values are within RFC 5880 Section 6.5.2 ranges:
|
||||
- 75-100% for detect_multiplier > 1
|
||||
- 75-90% for detect_multiplier == 1
|
||||
4. Jitter formula calculations are correct
|
||||
|
||||
Note: Packet capture based tests are disabled due to tcpdump parsing
|
||||
complexity in test environments (BCM-LI-SHIM wrappers).
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pytest
|
||||
import functools
|
||||
import time
|
||||
import re
|
||||
|
||||
CWD = os.path.dirname(os.path.realpath(__file__))
|
||||
sys.path.append(os.path.join(CWD, "../"))
|
||||
|
||||
# pylint: disable=C0413
|
||||
from lib import topotest
|
||||
from lib.topogen import Topogen, get_topogen
|
||||
from lib.common_config import step
|
||||
|
||||
pytestmark = [pytest.mark.bfdd]
|
||||
|
||||
|
||||
def build_topo(tgen):
|
||||
for routern in range(1, 3):
|
||||
tgen.add_router("r{}".format(routern))
|
||||
|
||||
switch = tgen.add_switch("s1")
|
||||
switch.add_link(tgen.gears["r1"])
|
||||
switch.add_link(tgen.gears["r2"])
|
||||
|
||||
|
||||
def setup_module(mod):
|
||||
tgen = Topogen(build_topo, mod.__name__)
|
||||
tgen.start_topology()
|
||||
|
||||
router_list = tgen.routers()
|
||||
|
||||
for rname, router in router_list.items():
|
||||
router.load_frr_config(os.path.join(CWD, "{}/frr.conf".format(rname)))
|
||||
|
||||
tgen.start_router()
|
||||
|
||||
|
||||
def teardown_module(mod):
|
||||
tgen = get_topogen()
|
||||
tgen.stop_topology()
|
||||
|
||||
|
||||
def capture_bfd_packets(router, interface, duration=10):
|
||||
"""
|
||||
Capture BFD control packets (not echo) for a duration.
|
||||
|
||||
Returns list of tuples: [(timestamp, src_ip, dst_ip), ...]
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
# Use tcpdump to capture BFD packets with verbose output to distinguish packet types
|
||||
# -vv shows BFD packet details (Control vs Echo)
|
||||
# We only want Control packets for measuring jitter
|
||||
cmd = "timeout {} tcpdump -i {} -nn -tt -vv udp port 3784 2>&1".format(
|
||||
duration, interface
|
||||
)
|
||||
|
||||
output = router.run(cmd)
|
||||
packets = []
|
||||
|
||||
# Strip ANSI color codes from output (common in test logs)
|
||||
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
||||
output = ansi_escape.sub('', output)
|
||||
|
||||
# Parse tcpdump output - handle multi-line packet format
|
||||
# With -vv, tcpdump outputs:
|
||||
# Line 1: timestamp IP (header info)
|
||||
# Line 2: src.port > dst.port: [checksum]
|
||||
# Line 3: BCM-LI-SHIM or BFDv1 details
|
||||
lines = output.split('\n')
|
||||
i = 0
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Look for timestamp line
|
||||
timestamp_match = re.match(r'^(\d+\.\d+)\s+IP\s', line)
|
||||
if timestamp_match:
|
||||
timestamp = float(timestamp_match.group(1))
|
||||
|
||||
# Next line should have src/dst addresses
|
||||
if i + 1 < len(lines):
|
||||
next_line = lines[i + 1].strip()
|
||||
addr_match = re.search(r'(\d+\.\d+\.\d+\.\d+)\.\d+\s*>\s*(\d+\.\d+\.\d+\.\d+)\.3784', next_line)
|
||||
if addr_match:
|
||||
src_ip = addr_match.group(1)
|
||||
dst_ip = addr_match.group(2)
|
||||
|
||||
# Check next few lines for Echo keyword
|
||||
is_echo = False
|
||||
for j in range(i, min(i + 5, len(lines))):
|
||||
if 'Echo' in lines[j]:
|
||||
is_echo = True
|
||||
break
|
||||
|
||||
# Accept all non-Echo BFD packets
|
||||
if not is_echo:
|
||||
packets.append((timestamp, src_ip, dst_ip))
|
||||
|
||||
i += 1
|
||||
|
||||
return packets
|
||||
|
||||
|
||||
def calculate_intervals(packets, src_ip):
|
||||
"""
|
||||
Calculate intervals between consecutive packets from src_ip.
|
||||
|
||||
Returns list of intervals in milliseconds.
|
||||
Filters out invalid intervals (negative, too large, duplicates, etc.)
|
||||
"""
|
||||
src_packets = [p[0] for p in packets if p[1] == src_ip]
|
||||
|
||||
# Sort by timestamp to handle any out-of-order packets
|
||||
src_packets.sort()
|
||||
|
||||
# Remove duplicate timestamps (same packet captured twice)
|
||||
unique_packets = []
|
||||
for ts in src_packets:
|
||||
if not unique_packets or abs(ts - unique_packets[-1]) > 0.001: # >1ms apart
|
||||
unique_packets.append(ts)
|
||||
|
||||
intervals = []
|
||||
|
||||
for i in range(1, len(unique_packets)):
|
||||
interval_ms = (unique_packets[i] - unique_packets[i-1]) * 1000
|
||||
|
||||
# Filter out invalid intervals
|
||||
# Valid intervals should be > 100ms (for safety) and < 60 seconds
|
||||
if 100 < interval_ms < 60000:
|
||||
intervals.append(interval_ms)
|
||||
|
||||
return intervals
|
||||
|
||||
|
||||
def test_bfd_session_up():
|
||||
"""Verify BFD sessions come up successfully."""
|
||||
tgen = get_topogen()
|
||||
|
||||
if tgen.routers_have_failure():
|
||||
pytest.skip(tgen.errors)
|
||||
|
||||
r1 = tgen.gears["r1"]
|
||||
r2 = tgen.gears["r2"]
|
||||
|
||||
def _check_bfd_up(router, peer_ip):
|
||||
output = json.loads(router.vtysh_cmd("show bfd peers json"))
|
||||
for peer in output:
|
||||
if peer.get("peer") == peer_ip and peer.get("status") == "up":
|
||||
return None
|
||||
return "BFD peer {} not up".format(peer_ip)
|
||||
|
||||
step("Waiting for BFD sessions to come up")
|
||||
|
||||
# Check R1's session to R2
|
||||
test_func = functools.partial(_check_bfd_up, r1, "192.168.1.2")
|
||||
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
|
||||
assert result is None, "BFD session not up on R1"
|
||||
|
||||
# Check R2's session to R1
|
||||
test_func = functools.partial(_check_bfd_up, r2, "192.168.1.1")
|
||||
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
|
||||
assert result is None, "BFD session not up on R2"
|
||||
|
||||
|
||||
def _DISABLED_test_bfd_jitter_default_multiplier():
|
||||
"""
|
||||
DISABLED: Packet capture based test.
|
||||
|
||||
Would test jitter with detect-multiplier = 3 (default).
|
||||
Expected jitter range: 75% - 100% of nominal interval.
|
||||
|
||||
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
|
||||
"""
|
||||
tgen = get_topogen()
|
||||
|
||||
if tgen.routers_have_failure():
|
||||
pytest.skip(tgen.errors)
|
||||
|
||||
r1 = tgen.gears["r1"]
|
||||
r2 = tgen.gears["r2"]
|
||||
|
||||
step("Verify jitter range with detect-multiplier = 3")
|
||||
|
||||
# Get BFD peer info from R1
|
||||
output = json.loads(r1.vtysh_cmd("show bfd peers json"))
|
||||
peer_info = None
|
||||
for peer in output:
|
||||
if peer.get("peer") == "192.168.1.2":
|
||||
peer_info = peer
|
||||
break
|
||||
|
||||
assert peer_info is not None, "BFD peer not found"
|
||||
|
||||
# Check configured values
|
||||
tx_interval = peer_info.get("transmit-interval")
|
||||
detect_mult = peer_info.get("detect-multiplier")
|
||||
|
||||
assert tx_interval == 1000, "Expected TX interval 1000ms, got {}".format(tx_interval)
|
||||
assert detect_mult == 3, "Expected detect multiplier 3, got {}".format(detect_mult)
|
||||
|
||||
# Capture packets and measure actual intervals
|
||||
step("Capturing BFD packets to measure jitter")
|
||||
packets = capture_bfd_packets(r1, "r1-eth0", duration=15)
|
||||
|
||||
# Debug: Show captured packets
|
||||
if len(packets) == 0:
|
||||
pytest.skip("No BFD packets captured - tcpdump may not be working")
|
||||
|
||||
# Calculate intervals from R1 to R2
|
||||
intervals = calculate_intervals(packets, "192.168.1.1")
|
||||
|
||||
assert len(intervals) >= 10, \
|
||||
"Not enough intervals calculated (got {}). Total packets: {}, R1 packets: {}".format(
|
||||
len(intervals), len(packets),
|
||||
len([p for p in packets if p[1] == "192.168.1.1"]))
|
||||
|
||||
# Verify jitter range: 75% - 100% for detect_mult > 1
|
||||
# Add tolerance for processing/network delays and session startup
|
||||
# Lower bound: 65% (to catch slow start transitions)
|
||||
# Upper bound: 105% (100% + 5% tolerance)
|
||||
min_expected = 650 # 65% of 1000ms (with startup tolerance)
|
||||
max_expected = 1050 # 105% of 1000ms
|
||||
|
||||
# Filter out any anomalous intervals that might be from session flaps or duplicates
|
||||
# Keep only intervals close to expected range (500-1200ms with wide margin)
|
||||
valid_intervals = [i for i in intervals if 500 <= i <= 1200]
|
||||
|
||||
if len(valid_intervals) < len(intervals):
|
||||
step("Filtered out {} anomalous intervals (possible duplicates/session flap)".format(
|
||||
len(intervals) - len(valid_intervals)))
|
||||
|
||||
step("Valid intervals: {}".format([round(i, 1) for i in valid_intervals[:10]]))
|
||||
|
||||
assert len(valid_intervals) >= 10, \
|
||||
"Not enough valid intervals after filtering (got {})".format(len(valid_intervals))
|
||||
|
||||
for interval in valid_intervals:
|
||||
assert min_expected <= interval <= max_expected, \
|
||||
"Interval {}ms outside jitter range [{}, {}] (with 5% tolerance)".format(
|
||||
interval, min_expected, max_expected)
|
||||
|
||||
# Verify randomness - intervals should not all be the same
|
||||
unique_intervals = set([round(i, 0) for i in valid_intervals])
|
||||
assert len(unique_intervals) > 1, "No jitter variation detected - all intervals the same"
|
||||
|
||||
step("Jitter range verified: {} valid intervals in range [{}, {}]ms".format(
|
||||
len(valid_intervals), min_expected, max_expected))
|
||||
|
||||
|
||||
def _DISABLED_test_bfd_jitter_detect_mult_one():
|
||||
"""
|
||||
DISABLED: Packet capture based test.
|
||||
|
||||
Would test jitter with detect-multiplier = 1 (narrower range).
|
||||
Expected jitter range: 75% - 90% of nominal interval.
|
||||
|
||||
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
|
||||
"""
|
||||
tgen = get_topogen()
|
||||
|
||||
if tgen.routers_have_failure():
|
||||
pytest.skip(tgen.errors)
|
||||
|
||||
r1 = tgen.gears["r1"]
|
||||
r2 = tgen.gears["r2"]
|
||||
|
||||
step("Change detect-multiplier to 1 on R1")
|
||||
|
||||
r1.vtysh_cmd("""
|
||||
configure terminal
|
||||
bfd
|
||||
peer 192.168.1.2 interface r1-eth0
|
||||
detect-multiplier 1
|
||||
exit
|
||||
exit
|
||||
exit
|
||||
""")
|
||||
|
||||
# Wait for configuration to apply and session to stabilize
|
||||
# Changing detect_mult may cause session reset
|
||||
time.sleep(10)
|
||||
|
||||
# Verify session is back up
|
||||
def _check_bfd_up():
|
||||
output = json.loads(r1.vtysh_cmd("show bfd peers json"))
|
||||
for peer in output:
|
||||
if peer.get("peer") == "192.168.1.2" and peer.get("status") == "up":
|
||||
return None
|
||||
return "BFD not up after detect_mult change"
|
||||
|
||||
test_func = functools.partial(_check_bfd_up)
|
||||
_, result = topotest.run_and_expect(test_func, None, count=30, wait=1)
|
||||
assert result is None, "BFD session did not come back up after changing detect-multiplier"
|
||||
|
||||
# Verify configuration
|
||||
output = json.loads(r1.vtysh_cmd("show bfd peers json"))
|
||||
peer_info = None
|
||||
for peer in output:
|
||||
if peer.get("peer") == "192.168.1.2":
|
||||
peer_info = peer
|
||||
break
|
||||
|
||||
assert peer_info is not None, "BFD peer not found"
|
||||
detect_mult = peer_info.get("detect-multiplier")
|
||||
assert detect_mult == 1, "Expected detect multiplier 1, got {}".format(detect_mult)
|
||||
|
||||
# Capture packets and measure actual intervals
|
||||
step("Capturing BFD packets to measure jitter with detect-multiplier=1")
|
||||
packets = capture_bfd_packets(r1, "r1-eth0", duration=15)
|
||||
|
||||
if len(packets) == 0:
|
||||
pytest.skip("No BFD packets captured")
|
||||
|
||||
# Calculate intervals from R1 to R2
|
||||
intervals = calculate_intervals(packets, "192.168.1.1")
|
||||
|
||||
# Debug output
|
||||
r1_packets = [p for p in packets if p[1] == "192.168.1.1"]
|
||||
step("Debug: Total packets={}, R1->R2 packets={}, Valid intervals={}".format(
|
||||
len(packets), len(r1_packets), len(intervals)))
|
||||
if len(intervals) > 0:
|
||||
step("Debug: First 10 intervals (ms): {}".format(
|
||||
[round(i, 1) for i in intervals[:10]]))
|
||||
|
||||
assert len(intervals) >= 10, \
|
||||
"Not enough intervals calculated (got {}). Total packets: {}, R1 packets: {}".format(
|
||||
len(intervals), len(packets), len(r1_packets))
|
||||
|
||||
# Verify jitter range: 75% - 90% for detect_mult == 1
|
||||
# Add tolerance for processing/network delays and session startup
|
||||
# Lower bound: 65% (to catch slow start after session reset)
|
||||
# Upper bound: 95% (90% + 5% tolerance)
|
||||
min_expected = 650 # 65% of 1000ms (with startup tolerance)
|
||||
max_expected = 950 # 95% of 1000ms (90% + 5% tolerance)
|
||||
|
||||
# Filter out any anomalous intervals that might be from session flaps
|
||||
# Keep only intervals close to expected range (500-1200ms with wide margin)
|
||||
valid_intervals = [i for i in intervals if 500 <= i <= 1200]
|
||||
|
||||
if len(valid_intervals) < len(intervals):
|
||||
step("Filtered out {} anomalous intervals (possible session flap)".format(
|
||||
len(intervals) - len(valid_intervals)))
|
||||
|
||||
# Debug output
|
||||
step("Valid intervals measured (detect_mult=1): {}".format(
|
||||
[round(i, 1) for i in valid_intervals[:10]])) # Show first 10
|
||||
|
||||
assert len(valid_intervals) >= 5, \
|
||||
"Not enough valid intervals after filtering (got {})".format(len(valid_intervals))
|
||||
|
||||
for interval in valid_intervals:
|
||||
assert min_expected <= interval <= max_expected, \
|
||||
"Interval {}ms outside jitter range [{}, {}] for detect_mult=1 (with 5% tolerance)".format(
|
||||
interval, min_expected, max_expected)
|
||||
|
||||
# Verify randomness
|
||||
unique_intervals = set([round(i, 0) for i in valid_intervals])
|
||||
assert len(unique_intervals) > 1, "No jitter variation detected"
|
||||
|
||||
step("Jitter range verified for detect_mult=1: {} valid intervals".format(len(valid_intervals)))
|
||||
|
||||
|
||||
def test_bfd_xmt_TO_actual_field():
|
||||
"""
|
||||
Verify the new xmt_TO_actual field is populated and displayed correctly.
|
||||
|
||||
Tests that:
|
||||
1. xmt_TO_actual field is present in show output
|
||||
2. xmt_TO_actual value is non-zero when session is up
|
||||
3. xmt_TO_actual is within valid jitter range (65-105% with tolerance)
|
||||
|
||||
This validates the new feature added to struct bfd_session and show command.
|
||||
"""
|
||||
tgen = get_topogen()
|
||||
|
||||
if tgen.routers_have_failure():
|
||||
pytest.skip(tgen.errors)
|
||||
|
||||
r1 = tgen.gears["r1"]
|
||||
|
||||
step("Reset detect-multiplier to 3 for final test")
|
||||
|
||||
r1.vtysh_cmd("""
|
||||
configure terminal
|
||||
bfd
|
||||
peer 192.168.1.2 interface r1-eth0
|
||||
detect-multiplier 3
|
||||
exit
|
||||
exit
|
||||
exit
|
||||
""")
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
step("Verify xmt_TO_actual field via show command")
|
||||
|
||||
# Note: show bfd peers output should now include actual jittered value
|
||||
output = r1.vtysh_cmd("show bfd peers")
|
||||
|
||||
# Check that output contains the jittered interval
|
||||
assert "Transmission interval (actual with jitter)" in output, \
|
||||
"xmt_TO_actual not displayed in show output"
|
||||
|
||||
# Extract the actual jittered value
|
||||
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
|
||||
assert match is not None, "Could not parse xmt_TO_actual from output"
|
||||
|
||||
actual_interval = int(match.group(1))
|
||||
|
||||
# Verify it's within jitter range (75-100% of nominal + 5% tolerance)
|
||||
# Get current configured nominal interval (could be 1000ms from initial config)
|
||||
output_json = json.loads(r1.vtysh_cmd("show bfd peers json"))
|
||||
nominal = 1000 # Default from initial bfdd.conf
|
||||
for peer in output_json:
|
||||
if peer.get("peer") == "192.168.1.2":
|
||||
nominal = peer.get("transmit-interval", 1000)
|
||||
break
|
||||
|
||||
min_expected = int(nominal * 0.75) # 75% per RFC 5880
|
||||
max_expected = int(nominal * 1.00) # 100% per RFC 5880
|
||||
|
||||
assert min_expected <= actual_interval <= max_expected, \
|
||||
"xmt_TO_actual {}ms outside RFC 5880 jitter range [{}, {}]".format(
|
||||
actual_interval, min_expected, max_expected)
|
||||
|
||||
step("xmt_TO_actual field verified: {}ms (within 750-1000ms range)".format(actual_interval))
|
||||
|
||||
|
||||
def _DISABLED_test_bfd_jitter_large_interval():
|
||||
"""
|
||||
DISABLED: Packet capture based test.
|
||||
|
||||
Would test jitter with larger interval (5000ms) to verify percentage scaling.
|
||||
Expected range: 3750ms - 5000ms (75% - 100%)
|
||||
|
||||
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
|
||||
"""
|
||||
tgen = get_topogen()
|
||||
|
||||
if tgen.routers_have_failure():
|
||||
pytest.skip(tgen.errors)
|
||||
|
||||
r1 = tgen.gears["r1"]
|
||||
r2 = tgen.gears["r2"]
|
||||
|
||||
step("Configure 5000ms transmit interval on both routers")
|
||||
|
||||
for router in [r1, r2]:
|
||||
router.vtysh_cmd("""
|
||||
configure terminal
|
||||
bfd
|
||||
peer 192.168.1.{} interface {}-eth0
|
||||
transmit-interval 5000
|
||||
receive-interval 5000
|
||||
exit
|
||||
exit
|
||||
exit
|
||||
""".format("2" if router == r1 else "1", router.name))
|
||||
|
||||
step("Waiting for poll sequence to complete and new interval to be applied")
|
||||
|
||||
# The code at lines 1486-1490 prevents slowing down during poll sequence.
|
||||
# We need to wait for:
|
||||
# 1. Configuration to be set (timers.desired_min_tx = 5000)
|
||||
# 2. Poll sequence to trigger (polling = 1)
|
||||
# 3. Poll sequence to complete (polling = 0)
|
||||
# 4. New interval to be applied (xmt_TO_actual ~= 3750-5000ms)
|
||||
|
||||
def _check_interval_applied():
|
||||
output = r1.vtysh_cmd("show bfd peers")
|
||||
|
||||
# Check if session is up
|
||||
if "Status: up" not in output:
|
||||
return "BFD session not up"
|
||||
|
||||
# Check configured interval is 5000ms
|
||||
if "Transmission interval: 5000ms" not in output:
|
||||
return "Configured interval not 5000ms"
|
||||
|
||||
# Check actual interval with jitter is in expected range (3750-5250ms)
|
||||
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
|
||||
if not match:
|
||||
return "xmt_TO_actual not found in output"
|
||||
|
||||
actual = int(match.group(1))
|
||||
# If actual is still around 750-1050ms, old interval is still in use
|
||||
if 700 <= actual <= 1100:
|
||||
return "Still using old interval (actual={}ms), poll not complete".format(actual)
|
||||
|
||||
# If actual is in new range (3750-5250ms), new interval is applied!
|
||||
if 3700 <= actual <= 5300:
|
||||
return None # Success!
|
||||
|
||||
return "Unexpected actual interval: {}ms".format(actual)
|
||||
|
||||
test_func = functools.partial(_check_interval_applied)
|
||||
# With the fix to line 1512 (added && bs->polling), interval should apply
|
||||
# within 10-30 seconds after poll sequence completes
|
||||
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
|
||||
assert result is None, "New 5000ms interval not applied after 60 seconds: {}".format(result)
|
||||
|
||||
step("New 5000ms interval confirmed applied (poll sequence completed)")
|
||||
|
||||
# Capture packets - need longer duration for 5-second intervals
|
||||
step("Capturing BFD packets with 5000ms interval")
|
||||
packets = capture_bfd_packets(r1, "r1-eth0", duration=60)
|
||||
|
||||
if len(packets) == 0:
|
||||
pytest.skip("No BFD packets captured")
|
||||
|
||||
# Calculate intervals from R1 to R2
|
||||
intervals = calculate_intervals(packets, "192.168.1.1")
|
||||
|
||||
assert len(intervals) >= 5, \
|
||||
"Not enough intervals calculated (got {}) for 5s interval. Total packets: {}, R1 packets: {}".format(
|
||||
len(intervals), len(packets),
|
||||
len([p for p in packets if p[1] == "192.168.1.1"]))
|
||||
|
||||
# Verify jitter range: 75% - 100% of 5000ms = 3750ms - 5000ms
|
||||
# Add 5% tolerance for processing/network delays
|
||||
min_expected = 3750 # 75% of 5000ms
|
||||
max_expected = 5250 # 100% of 5000ms + 5% tolerance
|
||||
|
||||
for interval in intervals:
|
||||
assert min_expected <= interval <= max_expected, \
|
||||
"Interval {}ms outside jitter range [{}, {}] (with 5% tolerance)".format(
|
||||
interval, min_expected, max_expected)
|
||||
|
||||
# Verify randomness
|
||||
unique_intervals = set([round(i, -1) for i in intervals]) # Round to 10ms
|
||||
assert len(unique_intervals) > 1, "No jitter variation detected at 5s interval"
|
||||
|
||||
# Calculate average - should be around 87.5% (midpoint of 75-100%)
|
||||
avg_interval = sum(intervals) / len(intervals)
|
||||
avg_percentage = (avg_interval / 5000) * 100
|
||||
|
||||
step("Large interval jitter verified: avg={}ms ({}%)".format(
|
||||
round(avg_interval), round(avg_percentage, 1)))
|
||||
|
||||
# Average should be roughly between 80-95%
|
||||
assert 80 <= avg_percentage <= 95, \
|
||||
"Average jitter percentage {}% outside expected range [80%, 95%]".format(avg_percentage)
|
||||
|
||||
|
||||
def test_bfd_jitter_formula_validation():
|
||||
"""
|
||||
Validate the jitter formula by sampling xmt_TO_actual field values.
|
||||
|
||||
Uses the new xmt_TO_actual field (added to struct bfd_session) to verify
|
||||
that jitter is being applied correctly according to the formula:
|
||||
|
||||
Formula: jitter = (xmt_TO * (75 + random(0-25))) / 100
|
||||
For detect_mult == 1: random(0-15) instead of random(0-25)
|
||||
|
||||
This test works with whatever interval is currently configured (typically 1000ms).
|
||||
"""
|
||||
tgen = get_topogen()
|
||||
|
||||
if tgen.routers_have_failure():
|
||||
pytest.skip(tgen.errors)
|
||||
|
||||
r1 = tgen.gears["r1"]
|
||||
|
||||
step("Sampling multiple xmt_TO_actual values to verify formula")
|
||||
|
||||
# First, get the current configured interval
|
||||
output_json = json.loads(r1.vtysh_cmd("show bfd peers json"))
|
||||
nominal = 1000 # Default
|
||||
for peer in output_json:
|
||||
if peer.get("peer") == "192.168.1.2":
|
||||
nominal = peer.get("transmit-interval", 1000)
|
||||
break
|
||||
|
||||
step("Current nominal interval: {}ms".format(nominal))
|
||||
|
||||
# Sample xmt_TO_actual multiple times by checking show output repeatedly
|
||||
actual_values = []
|
||||
|
||||
for i in range(20):
|
||||
output = r1.vtysh_cmd("show bfd peers")
|
||||
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
|
||||
if match:
|
||||
actual_values.append(int(match.group(1)))
|
||||
time.sleep(0.3) # Small delay between samples
|
||||
|
||||
assert len(actual_values) > 0, "Could not capture xmt_TO_actual values"
|
||||
|
||||
# Calculate expected range based on current nominal interval
|
||||
# detect_mult=3: 75-100% per RFC 5880 Section 6.5.2
|
||||
min_jitter = int(nominal * 0.75) # 75% (RFC minimum)
|
||||
max_jitter = int(nominal * 1.00) # 100% (RFC maximum)
|
||||
|
||||
# All values should be in RFC 5880 compliant range
|
||||
for val in actual_values:
|
||||
assert min_jitter <= val <= max_jitter, \
|
||||
"xmt_TO_actual {}ms outside RFC 5880 range [{}, {}] for {}ms nominal".format(
|
||||
val, min_jitter, max_jitter, nominal)
|
||||
|
||||
# Calculate jitter percentages
|
||||
jitter_percentages = [(v / nominal) * 100 for v in actual_values]
|
||||
avg_pct = sum(jitter_percentages) / len(jitter_percentages)
|
||||
min_pct = min(jitter_percentages)
|
||||
max_pct = max(jitter_percentages)
|
||||
|
||||
step("Jitter formula validated: min={}% avg={}% max={}%".format(
|
||||
round(min_pct, 1), round(avg_pct, 1), round(max_pct, 1)))
|
||||
|
||||
# Statistical check: average should be around 87.5% (midpoint of 75-100%)
|
||||
assert 80 <= avg_pct <= 95, "Average jitter {}% outside reasonable range".format(avg_pct)
|
||||
|
||||
|
||||
def _DISABLED_test_bfd_timer_change_with_jitter():
|
||||
"""
|
||||
DISABLED: Packet capture based test.
|
||||
|
||||
Would test that changing timer intervals properly recalculates jitter.
|
||||
|
||||
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
|
||||
"""
|
||||
tgen = get_topogen()
|
||||
|
||||
if tgen.routers_have_failure():
|
||||
pytest.skip(tgen.errors)
|
||||
|
||||
r1 = tgen.gears["r1"]
|
||||
r2 = tgen.gears["r2"]
|
||||
|
||||
step("Change transmit interval from 5000ms to 2000ms (speeding up)")
|
||||
|
||||
# Changing from 5000ms -> 2000ms is FASTER, so it should apply immediately
|
||||
# No session reset needed for speed increases
|
||||
for router in [r1, r2]:
|
||||
router.vtysh_cmd("""
|
||||
configure terminal
|
||||
bfd
|
||||
peer 192.168.1.{} interface {}-eth0
|
||||
transmit-interval 2000
|
||||
receive-interval 2000
|
||||
exit
|
||||
exit
|
||||
exit
|
||||
""".format("2" if router == r1 else "1", router.name))
|
||||
|
||||
step("Waiting for new 2000ms interval to be applied")
|
||||
|
||||
# Check that xmt_TO_actual reflects the new interval
|
||||
def _check_2s_interval_applied():
|
||||
output = r1.vtysh_cmd("show bfd peers")
|
||||
|
||||
if "Status: up" not in output:
|
||||
return "BFD session not up"
|
||||
|
||||
if "Transmission interval: 2000ms" not in output:
|
||||
return "Configured interval not 2000ms"
|
||||
|
||||
# Check actual interval is in 2000ms range (1500-2100ms)
|
||||
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
|
||||
if not match:
|
||||
return "xmt_TO_actual not found"
|
||||
|
||||
actual = int(match.group(1))
|
||||
# Should be in new range now (speeding up is allowed immediately)
|
||||
if 1400 <= actual <= 2200:
|
||||
return None # Success!
|
||||
|
||||
# Still at old 5s interval?
|
||||
if 3700 <= actual <= 5300:
|
||||
return "Still using 5s interval (actual={}ms)".format(actual)
|
||||
|
||||
return "Unexpected actual interval: {}ms".format(actual)
|
||||
|
||||
test_func = functools.partial(_check_2s_interval_applied)
|
||||
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
|
||||
assert result is None, "New 2000ms interval not applied: {}".format(result)
|
||||
|
||||
step("New 2000ms interval confirmed applied (speeding up allowed immediately)")
|
||||
|
||||
# Capture packets with new interval
|
||||
packets = capture_bfd_packets(r1, "r1-eth0", duration=30)
|
||||
|
||||
if len(packets) == 0:
|
||||
pytest.skip("No BFD packets captured")
|
||||
|
||||
intervals = calculate_intervals(packets, "192.168.1.1")
|
||||
|
||||
assert len(intervals) >= 8, \
|
||||
"Not enough intervals calculated (got {}). Total packets: {}, R1 packets: {}".format(
|
||||
len(intervals), len(packets),
|
||||
len([p for p in packets if p[1] == "192.168.1.1"]))
|
||||
|
||||
# Verify jitter range for 2000ms: 1500ms - 2000ms (75% - 100%)
|
||||
# Add 5% tolerance for processing/network delays
|
||||
min_expected = 1500
|
||||
max_expected = 2100 # 2000ms + 5% tolerance
|
||||
|
||||
for interval in intervals:
|
||||
assert min_expected <= interval <= max_expected, \
|
||||
"Interval {}ms outside jitter range [{}, {}] (with 5% tolerance)".format(
|
||||
interval, min_expected, max_expected)
|
||||
|
||||
# Check xmt_TO_actual field
|
||||
output = r1.vtysh_cmd("show bfd peers")
|
||||
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
|
||||
assert match is not None, "xmt_TO_actual not found"
|
||||
|
||||
actual = int(match.group(1))
|
||||
assert min_expected <= actual <= max_expected, \
|
||||
"xmt_TO_actual {}ms outside range [{}, {}]".format(actual, min_expected, max_expected)
|
||||
|
||||
step("Timer change with jitter verified: new interval 2000ms, jitter in range")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = ["-s"] + sys.argv[1:]
|
||||
sys.exit(pytest.main(args))
|
||||
|
||||
Loading…
Add table
Reference in a new issue