Merge pull request #20210 from sougatahitcs/bfd_actual_tx_timeout_with_applied_jitter

bfd: store actual timeout information in bfd
This commit is contained in:
Russ White 2025-12-30 09:01:03 -05:00 committed by GitHub
commit ca8077f2ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 832 additions and 12 deletions

View file

@ -232,10 +232,16 @@ void bfd_session_apply(struct bfd_session *bs)
bfd_set_log_session_changes(bs, bs->peer_profile.log_session_changes);
/* If session interval changed negotiate new timers. */
if (bs->ses_state == PTM_BFD_UP
&& (bs->timers.desired_min_tx != min_tx
|| bs->timers.required_min_rx != min_rx))
if (bs->ses_state == PTM_BFD_UP &&
(bs->timers.desired_min_tx != min_tx || bs->timers.required_min_rx != min_rx)) {
if (bglobal.debug_peer_event) {
zlog_debug("[%s] Timer changed while UP: old_tx=%u new_tx=%u, old_rx=%u new_rx=%u",
bs_to_string(bs), min_tx, bs->timers.desired_min_tx, min_rx,
bs->timers.required_min_rx);
zlog_debug("Starting poll sequence to negotiate new timers");
}
bfd_set_polling(bs);
}
/* Send updated information to data plane. */
bfd_dplane_update_session(bs);
@ -1157,6 +1163,9 @@ void bfd_set_polling(struct bfd_session *bs)
*
* RFC 5880, Section 6.8.3.
*/
if (bglobal.debug_peer_event)
zlog_debug("[%s] Setting polling=1 to negotiate timer change", bs_to_string(bs));
bs->polling = 1;
}
@ -1482,13 +1491,24 @@ void bs_final_handler(struct bfd_session *bs)
bs->xmt_TO = bs->timers.desired_min_tx;
else
bs->xmt_TO = bs->remote_timers.required_min_rx;
if (bglobal.debug_peer_event)
zlog_debug(" calculated xmt_TO=%" PRIu64 " (using slowest rate)", bs->xmt_TO);
/* Only apply increased transmission interval after Poll Sequence */
if (bs->ses_state == PTM_BFD_UP && bs->xmt_TO > old_xmt_TO) {
bs->xmt_TO = old_xmt_TO; /* Keep old timing until Poll Sequence done */
if (bs->ses_state == PTM_BFD_UP && bs->xmt_TO > old_xmt_TO && bs->polling) {
if (bglobal.debug_peer_event) {
zlog_debug(" REJECTED increase: xmt_TO=%" PRIu64 " > old=%" PRIu64
" (polling=%d)",
bs->xmt_TO, old_xmt_TO, bs->polling);
}
bs->xmt_TO = old_xmt_TO; /* Keep old timing DURING Poll Sequence */
return;
}
if (bglobal.debug_peer_event)
zlog_debug(" APPLYING new xmt_TO=%" PRIu64, bs->xmt_TO);
/* Apply new transmission timer immediately. */
ptm_bfd_start_xmt_timer(bs, false);
}

View file

@ -357,7 +357,9 @@ struct bfd_session {
struct event *echo_recvtimer_ev;
struct event *recvtimer_ev;
uint64_t xmt_TO;
uint64_t xmt_TO_actual; /* Actual transmit timeout with jitter applied */
uint64_t echo_xmt_TO;
uint64_t echo_xmt_TO_actual; /* Actual echo transmit timeout with jitter applied */
struct event *xmttimer_ev;
struct event *echo_xmttimer_ev;
uint64_t echo_detect_TO;

View file

@ -216,15 +216,26 @@ static void _display_peer(struct vty *vty, struct bfd_session *bs)
bs->timers.required_min_rx / 1000);
vty_out(vty, "\t\t\tTransmission interval: %ums\n",
bs->timers.desired_min_tx / 1000);
if (bs->xmt_TO_actual > 0)
vty_out(vty, "\t\t\tTransmission interval (actual with jitter): %" PRIu64 "ms\n",
bs->xmt_TO_actual / 1000);
if (bs->detect_TO > 0)
vty_out(vty, "\t\t\tDetection timeout: %" PRIu64 "ms\n",
bs->detect_TO / 1000);
if (bs->timers.required_min_echo_rx != 0)
vty_out(vty, "\t\t\tEcho receive interval: %ums\n",
bs->timers.required_min_echo_rx / 1000);
else
vty_out(vty, "\t\t\tEcho receive interval: disabled\n");
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_ECHO) || bs->bfd_mode == BFD_MODE_TYPE_SBFD_ECHO)
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_ECHO) || bs->bfd_mode == BFD_MODE_TYPE_SBFD_ECHO) {
vty_out(vty, "\t\t\tEcho transmission interval: %ums\n",
bs->timers.desired_min_echo_tx / 1000);
else
if (bs->echo_xmt_TO_actual > 0)
vty_out(vty,
"\t\t\tEcho transmission interval (actual with jitter): %" PRIu64
"ms\n",
bs->echo_xmt_TO_actual / 1000);
} else
vty_out(vty, "\t\t\tEcho transmission interval: disabled\n");
@ -333,6 +344,12 @@ static struct json_object *__display_peer_json(struct bfd_session *bs)
bs->timers.required_min_rx / 1000);
json_object_int_add(jo, "transmit-interval",
bs->timers.desired_min_tx / 1000);
if (bs->xmt_TO_actual > 0)
json_object_int_add(jo, "transmit-interval-actual",
bs->xmt_TO_actual / 1000);
if (bs->detect_TO > 0)
json_object_int_add(jo, "detection-timeout",
bs->detect_TO / 1000);
json_object_int_add(jo, "echo-receive-interval",
bs->timers.required_min_echo_rx / 1000);
if (bs->bfd_mode == BFD_MODE_TYPE_SBFD_INIT || bs->bfd_mode == BFD_MODE_TYPE_SBFD_ECHO) {

View file

@ -131,13 +131,17 @@ void bfd_xmttimer_update(struct bfd_session *bs, uint64_t jitter)
{
struct timeval tv = {.tv_sec = 0, .tv_usec = jitter};
/* Store the actual transmit timeout with jitter applied */
bs->xmt_TO_actual = jitter;
/* Remove previous schedule if any. */
bfd_xmttimer_delete(bs);
/* Don't add event if peer is deactivated. */
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) ||
bs->sock == -1)
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) || bs->sock == -1) {
bs->xmt_TO_actual = 0; /* Timer not scheduled */
return;
}
tv_normalize(&tv);
@ -149,13 +153,17 @@ void bfd_echo_xmttimer_update(struct bfd_session *bs, uint64_t jitter)
{
struct timeval tv = {.tv_sec = 0, .tv_usec = jitter};
/* Store the actual echo transmit timeout with jitter applied */
bs->echo_xmt_TO_actual = jitter;
/* Remove previous schedule if any. */
bfd_echo_xmttimer_delete(bs);
/* Don't add event if peer is deactivated. */
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) ||
bs->sock == -1)
if (CHECK_FLAG(bs->flags, BFD_SESS_FLAG_SHUTDOWN) || bs->sock == -1) {
bs->echo_xmt_TO_actual = 0; /* Timer not scheduled */
return;
}
tv_normalize(&tv);

View file

@ -0,0 +1,2 @@
# SPDX-License-Identifier: ISC

View file

@ -0,0 +1,13 @@
interface lo
ip address 172.16.255.1/32
!
interface r1-eth0
ip address 192.168.1.1/24
!
bfd
peer 192.168.1.2 interface r1-eth0
detect-multiplier 3
transmit-interval 1000
receive-interval 1000
!
!

View file

@ -0,0 +1,13 @@
interface lo
ip address 172.16.255.2/32
!
interface r2-eth0
ip address 192.168.1.2/24
!
bfd
peer 192.168.1.1 interface r2-eth0
detect-multiplier 3
transmit-interval 1000
receive-interval 1000
!
!

View file

@ -0,0 +1,745 @@
#!/usr/bin/env python
# SPDX-License-Identifier: ISC
#
# test_bfd_jitter.py
#
# Copyright (c) 2024 by
# Sougata Barik
#
"""
Test BFD jitter calculation and application.
Active tests verify:
1. BFD sessions establish successfully
2. The new xmt_TO_actual field is populated correctly
3. Jitter values are within RFC 5880 Section 6.5.2 ranges:
- 75-100% for detect_multiplier > 1
- 75-90% for detect_multiplier == 1
4. Jitter formula calculations are correct
Note: Packet capture based tests are disabled due to tcpdump parsing
complexity in test environments (BCM-LI-SHIM wrappers).
"""
import os
import sys
import json
import pytest
import functools
import time
import re
CWD = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(CWD, "../"))
# pylint: disable=C0413
from lib import topotest
from lib.topogen import Topogen, get_topogen
from lib.common_config import step
pytestmark = [pytest.mark.bfdd]
def build_topo(tgen):
for routern in range(1, 3):
tgen.add_router("r{}".format(routern))
switch = tgen.add_switch("s1")
switch.add_link(tgen.gears["r1"])
switch.add_link(tgen.gears["r2"])
def setup_module(mod):
tgen = Topogen(build_topo, mod.__name__)
tgen.start_topology()
router_list = tgen.routers()
for rname, router in router_list.items():
router.load_frr_config(os.path.join(CWD, "{}/frr.conf".format(rname)))
tgen.start_router()
def teardown_module(mod):
tgen = get_topogen()
tgen.stop_topology()
def capture_bfd_packets(router, interface, duration=10):
"""
Capture BFD control packets (not echo) for a duration.
Returns list of tuples: [(timestamp, src_ip, dst_ip), ...]
"""
import subprocess
# Use tcpdump to capture BFD packets with verbose output to distinguish packet types
# -vv shows BFD packet details (Control vs Echo)
# We only want Control packets for measuring jitter
cmd = "timeout {} tcpdump -i {} -nn -tt -vv udp port 3784 2>&1".format(
duration, interface
)
output = router.run(cmd)
packets = []
# Strip ANSI color codes from output (common in test logs)
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
output = ansi_escape.sub('', output)
# Parse tcpdump output - handle multi-line packet format
# With -vv, tcpdump outputs:
# Line 1: timestamp IP (header info)
# Line 2: src.port > dst.port: [checksum]
# Line 3: BCM-LI-SHIM or BFDv1 details
lines = output.split('\n')
i = 0
while i < len(lines):
line = lines[i].strip()
# Look for timestamp line
timestamp_match = re.match(r'^(\d+\.\d+)\s+IP\s', line)
if timestamp_match:
timestamp = float(timestamp_match.group(1))
# Next line should have src/dst addresses
if i + 1 < len(lines):
next_line = lines[i + 1].strip()
addr_match = re.search(r'(\d+\.\d+\.\d+\.\d+)\.\d+\s*>\s*(\d+\.\d+\.\d+\.\d+)\.3784', next_line)
if addr_match:
src_ip = addr_match.group(1)
dst_ip = addr_match.group(2)
# Check next few lines for Echo keyword
is_echo = False
for j in range(i, min(i + 5, len(lines))):
if 'Echo' in lines[j]:
is_echo = True
break
# Accept all non-Echo BFD packets
if not is_echo:
packets.append((timestamp, src_ip, dst_ip))
i += 1
return packets
def calculate_intervals(packets, src_ip):
"""
Calculate intervals between consecutive packets from src_ip.
Returns list of intervals in milliseconds.
Filters out invalid intervals (negative, too large, duplicates, etc.)
"""
src_packets = [p[0] for p in packets if p[1] == src_ip]
# Sort by timestamp to handle any out-of-order packets
src_packets.sort()
# Remove duplicate timestamps (same packet captured twice)
unique_packets = []
for ts in src_packets:
if not unique_packets or abs(ts - unique_packets[-1]) > 0.001: # >1ms apart
unique_packets.append(ts)
intervals = []
for i in range(1, len(unique_packets)):
interval_ms = (unique_packets[i] - unique_packets[i-1]) * 1000
# Filter out invalid intervals
# Valid intervals should be > 100ms (for safety) and < 60 seconds
if 100 < interval_ms < 60000:
intervals.append(interval_ms)
return intervals
def test_bfd_session_up():
"""Verify BFD sessions come up successfully."""
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
r1 = tgen.gears["r1"]
r2 = tgen.gears["r2"]
def _check_bfd_up(router, peer_ip):
output = json.loads(router.vtysh_cmd("show bfd peers json"))
for peer in output:
if peer.get("peer") == peer_ip and peer.get("status") == "up":
return None
return "BFD peer {} not up".format(peer_ip)
step("Waiting for BFD sessions to come up")
# Check R1's session to R2
test_func = functools.partial(_check_bfd_up, r1, "192.168.1.2")
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
assert result is None, "BFD session not up on R1"
# Check R2's session to R1
test_func = functools.partial(_check_bfd_up, r2, "192.168.1.1")
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
assert result is None, "BFD session not up on R2"
def _DISABLED_test_bfd_jitter_default_multiplier():
"""
DISABLED: Packet capture based test.
Would test jitter with detect-multiplier = 3 (default).
Expected jitter range: 75% - 100% of nominal interval.
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
"""
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
r1 = tgen.gears["r1"]
r2 = tgen.gears["r2"]
step("Verify jitter range with detect-multiplier = 3")
# Get BFD peer info from R1
output = json.loads(r1.vtysh_cmd("show bfd peers json"))
peer_info = None
for peer in output:
if peer.get("peer") == "192.168.1.2":
peer_info = peer
break
assert peer_info is not None, "BFD peer not found"
# Check configured values
tx_interval = peer_info.get("transmit-interval")
detect_mult = peer_info.get("detect-multiplier")
assert tx_interval == 1000, "Expected TX interval 1000ms, got {}".format(tx_interval)
assert detect_mult == 3, "Expected detect multiplier 3, got {}".format(detect_mult)
# Capture packets and measure actual intervals
step("Capturing BFD packets to measure jitter")
packets = capture_bfd_packets(r1, "r1-eth0", duration=15)
# Debug: Show captured packets
if len(packets) == 0:
pytest.skip("No BFD packets captured - tcpdump may not be working")
# Calculate intervals from R1 to R2
intervals = calculate_intervals(packets, "192.168.1.1")
assert len(intervals) >= 10, \
"Not enough intervals calculated (got {}). Total packets: {}, R1 packets: {}".format(
len(intervals), len(packets),
len([p for p in packets if p[1] == "192.168.1.1"]))
# Verify jitter range: 75% - 100% for detect_mult > 1
# Add tolerance for processing/network delays and session startup
# Lower bound: 65% (to catch slow start transitions)
# Upper bound: 105% (100% + 5% tolerance)
min_expected = 650 # 65% of 1000ms (with startup tolerance)
max_expected = 1050 # 105% of 1000ms
# Filter out any anomalous intervals that might be from session flaps or duplicates
# Keep only intervals close to expected range (500-1200ms with wide margin)
valid_intervals = [i for i in intervals if 500 <= i <= 1200]
if len(valid_intervals) < len(intervals):
step("Filtered out {} anomalous intervals (possible duplicates/session flap)".format(
len(intervals) - len(valid_intervals)))
step("Valid intervals: {}".format([round(i, 1) for i in valid_intervals[:10]]))
assert len(valid_intervals) >= 10, \
"Not enough valid intervals after filtering (got {})".format(len(valid_intervals))
for interval in valid_intervals:
assert min_expected <= interval <= max_expected, \
"Interval {}ms outside jitter range [{}, {}] (with 5% tolerance)".format(
interval, min_expected, max_expected)
# Verify randomness - intervals should not all be the same
unique_intervals = set([round(i, 0) for i in valid_intervals])
assert len(unique_intervals) > 1, "No jitter variation detected - all intervals the same"
step("Jitter range verified: {} valid intervals in range [{}, {}]ms".format(
len(valid_intervals), min_expected, max_expected))
def _DISABLED_test_bfd_jitter_detect_mult_one():
"""
DISABLED: Packet capture based test.
Would test jitter with detect-multiplier = 1 (narrower range).
Expected jitter range: 75% - 90% of nominal interval.
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
"""
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
r1 = tgen.gears["r1"]
r2 = tgen.gears["r2"]
step("Change detect-multiplier to 1 on R1")
r1.vtysh_cmd("""
configure terminal
bfd
peer 192.168.1.2 interface r1-eth0
detect-multiplier 1
exit
exit
exit
""")
# Wait for configuration to apply and session to stabilize
# Changing detect_mult may cause session reset
time.sleep(10)
# Verify session is back up
def _check_bfd_up():
output = json.loads(r1.vtysh_cmd("show bfd peers json"))
for peer in output:
if peer.get("peer") == "192.168.1.2" and peer.get("status") == "up":
return None
return "BFD not up after detect_mult change"
test_func = functools.partial(_check_bfd_up)
_, result = topotest.run_and_expect(test_func, None, count=30, wait=1)
assert result is None, "BFD session did not come back up after changing detect-multiplier"
# Verify configuration
output = json.loads(r1.vtysh_cmd("show bfd peers json"))
peer_info = None
for peer in output:
if peer.get("peer") == "192.168.1.2":
peer_info = peer
break
assert peer_info is not None, "BFD peer not found"
detect_mult = peer_info.get("detect-multiplier")
assert detect_mult == 1, "Expected detect multiplier 1, got {}".format(detect_mult)
# Capture packets and measure actual intervals
step("Capturing BFD packets to measure jitter with detect-multiplier=1")
packets = capture_bfd_packets(r1, "r1-eth0", duration=15)
if len(packets) == 0:
pytest.skip("No BFD packets captured")
# Calculate intervals from R1 to R2
intervals = calculate_intervals(packets, "192.168.1.1")
# Debug output
r1_packets = [p for p in packets if p[1] == "192.168.1.1"]
step("Debug: Total packets={}, R1->R2 packets={}, Valid intervals={}".format(
len(packets), len(r1_packets), len(intervals)))
if len(intervals) > 0:
step("Debug: First 10 intervals (ms): {}".format(
[round(i, 1) for i in intervals[:10]]))
assert len(intervals) >= 10, \
"Not enough intervals calculated (got {}). Total packets: {}, R1 packets: {}".format(
len(intervals), len(packets), len(r1_packets))
# Verify jitter range: 75% - 90% for detect_mult == 1
# Add tolerance for processing/network delays and session startup
# Lower bound: 65% (to catch slow start after session reset)
# Upper bound: 95% (90% + 5% tolerance)
min_expected = 650 # 65% of 1000ms (with startup tolerance)
max_expected = 950 # 95% of 1000ms (90% + 5% tolerance)
# Filter out any anomalous intervals that might be from session flaps
# Keep only intervals close to expected range (500-1200ms with wide margin)
valid_intervals = [i for i in intervals if 500 <= i <= 1200]
if len(valid_intervals) < len(intervals):
step("Filtered out {} anomalous intervals (possible session flap)".format(
len(intervals) - len(valid_intervals)))
# Debug output
step("Valid intervals measured (detect_mult=1): {}".format(
[round(i, 1) for i in valid_intervals[:10]])) # Show first 10
assert len(valid_intervals) >= 5, \
"Not enough valid intervals after filtering (got {})".format(len(valid_intervals))
for interval in valid_intervals:
assert min_expected <= interval <= max_expected, \
"Interval {}ms outside jitter range [{}, {}] for detect_mult=1 (with 5% tolerance)".format(
interval, min_expected, max_expected)
# Verify randomness
unique_intervals = set([round(i, 0) for i in valid_intervals])
assert len(unique_intervals) > 1, "No jitter variation detected"
step("Jitter range verified for detect_mult=1: {} valid intervals".format(len(valid_intervals)))
def test_bfd_xmt_TO_actual_field():
"""
Verify the new xmt_TO_actual field is populated and displayed correctly.
Tests that:
1. xmt_TO_actual field is present in show output
2. xmt_TO_actual value is non-zero when session is up
3. xmt_TO_actual is within valid jitter range (65-105% with tolerance)
This validates the new feature added to struct bfd_session and show command.
"""
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
r1 = tgen.gears["r1"]
step("Reset detect-multiplier to 3 for final test")
r1.vtysh_cmd("""
configure terminal
bfd
peer 192.168.1.2 interface r1-eth0
detect-multiplier 3
exit
exit
exit
""")
time.sleep(5)
step("Verify xmt_TO_actual field via show command")
# Note: show bfd peers output should now include actual jittered value
output = r1.vtysh_cmd("show bfd peers")
# Check that output contains the jittered interval
assert "Transmission interval (actual with jitter)" in output, \
"xmt_TO_actual not displayed in show output"
# Extract the actual jittered value
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
assert match is not None, "Could not parse xmt_TO_actual from output"
actual_interval = int(match.group(1))
# Verify it's within jitter range (75-100% of nominal + 5% tolerance)
# Get current configured nominal interval (could be 1000ms from initial config)
output_json = json.loads(r1.vtysh_cmd("show bfd peers json"))
nominal = 1000 # Default from initial bfdd.conf
for peer in output_json:
if peer.get("peer") == "192.168.1.2":
nominal = peer.get("transmit-interval", 1000)
break
min_expected = int(nominal * 0.75) # 75% per RFC 5880
max_expected = int(nominal * 1.00) # 100% per RFC 5880
assert min_expected <= actual_interval <= max_expected, \
"xmt_TO_actual {}ms outside RFC 5880 jitter range [{}, {}]".format(
actual_interval, min_expected, max_expected)
step("xmt_TO_actual field verified: {}ms (within 750-1000ms range)".format(actual_interval))
def _DISABLED_test_bfd_jitter_large_interval():
"""
DISABLED: Packet capture based test.
Would test jitter with larger interval (5000ms) to verify percentage scaling.
Expected range: 3750ms - 5000ms (75% - 100%)
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
"""
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
r1 = tgen.gears["r1"]
r2 = tgen.gears["r2"]
step("Configure 5000ms transmit interval on both routers")
for router in [r1, r2]:
router.vtysh_cmd("""
configure terminal
bfd
peer 192.168.1.{} interface {}-eth0
transmit-interval 5000
receive-interval 5000
exit
exit
exit
""".format("2" if router == r1 else "1", router.name))
step("Waiting for poll sequence to complete and new interval to be applied")
# The code at lines 1486-1490 prevents slowing down during poll sequence.
# We need to wait for:
# 1. Configuration to be set (timers.desired_min_tx = 5000)
# 2. Poll sequence to trigger (polling = 1)
# 3. Poll sequence to complete (polling = 0)
# 4. New interval to be applied (xmt_TO_actual ~= 3750-5000ms)
def _check_interval_applied():
output = r1.vtysh_cmd("show bfd peers")
# Check if session is up
if "Status: up" not in output:
return "BFD session not up"
# Check configured interval is 5000ms
if "Transmission interval: 5000ms" not in output:
return "Configured interval not 5000ms"
# Check actual interval with jitter is in expected range (3750-5250ms)
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
if not match:
return "xmt_TO_actual not found in output"
actual = int(match.group(1))
# If actual is still around 750-1050ms, old interval is still in use
if 700 <= actual <= 1100:
return "Still using old interval (actual={}ms), poll not complete".format(actual)
# If actual is in new range (3750-5250ms), new interval is applied!
if 3700 <= actual <= 5300:
return None # Success!
return "Unexpected actual interval: {}ms".format(actual)
test_func = functools.partial(_check_interval_applied)
# With the fix to line 1512 (added && bs->polling), interval should apply
# within 10-30 seconds after poll sequence completes
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
assert result is None, "New 5000ms interval not applied after 60 seconds: {}".format(result)
step("New 5000ms interval confirmed applied (poll sequence completed)")
# Capture packets - need longer duration for 5-second intervals
step("Capturing BFD packets with 5000ms interval")
packets = capture_bfd_packets(r1, "r1-eth0", duration=60)
if len(packets) == 0:
pytest.skip("No BFD packets captured")
# Calculate intervals from R1 to R2
intervals = calculate_intervals(packets, "192.168.1.1")
assert len(intervals) >= 5, \
"Not enough intervals calculated (got {}) for 5s interval. Total packets: {}, R1 packets: {}".format(
len(intervals), len(packets),
len([p for p in packets if p[1] == "192.168.1.1"]))
# Verify jitter range: 75% - 100% of 5000ms = 3750ms - 5000ms
# Add 5% tolerance for processing/network delays
min_expected = 3750 # 75% of 5000ms
max_expected = 5250 # 100% of 5000ms + 5% tolerance
for interval in intervals:
assert min_expected <= interval <= max_expected, \
"Interval {}ms outside jitter range [{}, {}] (with 5% tolerance)".format(
interval, min_expected, max_expected)
# Verify randomness
unique_intervals = set([round(i, -1) for i in intervals]) # Round to 10ms
assert len(unique_intervals) > 1, "No jitter variation detected at 5s interval"
# Calculate average - should be around 87.5% (midpoint of 75-100%)
avg_interval = sum(intervals) / len(intervals)
avg_percentage = (avg_interval / 5000) * 100
step("Large interval jitter verified: avg={}ms ({}%)".format(
round(avg_interval), round(avg_percentage, 1)))
# Average should be roughly between 80-95%
assert 80 <= avg_percentage <= 95, \
"Average jitter percentage {}% outside expected range [80%, 95%]".format(avg_percentage)
def test_bfd_jitter_formula_validation():
"""
Validate the jitter formula by sampling xmt_TO_actual field values.
Uses the new xmt_TO_actual field (added to struct bfd_session) to verify
that jitter is being applied correctly according to the formula:
Formula: jitter = (xmt_TO * (75 + random(0-25))) / 100
For detect_mult == 1: random(0-15) instead of random(0-25)
This test works with whatever interval is currently configured (typically 1000ms).
"""
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
r1 = tgen.gears["r1"]
step("Sampling multiple xmt_TO_actual values to verify formula")
# First, get the current configured interval
output_json = json.loads(r1.vtysh_cmd("show bfd peers json"))
nominal = 1000 # Default
for peer in output_json:
if peer.get("peer") == "192.168.1.2":
nominal = peer.get("transmit-interval", 1000)
break
step("Current nominal interval: {}ms".format(nominal))
# Sample xmt_TO_actual multiple times by checking show output repeatedly
actual_values = []
for i in range(20):
output = r1.vtysh_cmd("show bfd peers")
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
if match:
actual_values.append(int(match.group(1)))
time.sleep(0.3) # Small delay between samples
assert len(actual_values) > 0, "Could not capture xmt_TO_actual values"
# Calculate expected range based on current nominal interval
# detect_mult=3: 75-100% per RFC 5880 Section 6.5.2
min_jitter = int(nominal * 0.75) # 75% (RFC minimum)
max_jitter = int(nominal * 1.00) # 100% (RFC maximum)
# All values should be in RFC 5880 compliant range
for val in actual_values:
assert min_jitter <= val <= max_jitter, \
"xmt_TO_actual {}ms outside RFC 5880 range [{}, {}] for {}ms nominal".format(
val, min_jitter, max_jitter, nominal)
# Calculate jitter percentages
jitter_percentages = [(v / nominal) * 100 for v in actual_values]
avg_pct = sum(jitter_percentages) / len(jitter_percentages)
min_pct = min(jitter_percentages)
max_pct = max(jitter_percentages)
step("Jitter formula validated: min={}% avg={}% max={}%".format(
round(min_pct, 1), round(avg_pct, 1), round(max_pct, 1)))
# Statistical check: average should be around 87.5% (midpoint of 75-100%)
assert 80 <= avg_pct <= 95, "Average jitter {}% outside reasonable range".format(avg_pct)
def _DISABLED_test_bfd_timer_change_with_jitter():
"""
DISABLED: Packet capture based test.
Would test that changing timer intervals properly recalculates jitter.
Disabled due to tcpdump parsing issues with BCM-LI-SHIM in test environment.
"""
tgen = get_topogen()
if tgen.routers_have_failure():
pytest.skip(tgen.errors)
r1 = tgen.gears["r1"]
r2 = tgen.gears["r2"]
step("Change transmit interval from 5000ms to 2000ms (speeding up)")
# Changing from 5000ms -> 2000ms is FASTER, so it should apply immediately
# No session reset needed for speed increases
for router in [r1, r2]:
router.vtysh_cmd("""
configure terminal
bfd
peer 192.168.1.{} interface {}-eth0
transmit-interval 2000
receive-interval 2000
exit
exit
exit
""".format("2" if router == r1 else "1", router.name))
step("Waiting for new 2000ms interval to be applied")
# Check that xmt_TO_actual reflects the new interval
def _check_2s_interval_applied():
output = r1.vtysh_cmd("show bfd peers")
if "Status: up" not in output:
return "BFD session not up"
if "Transmission interval: 2000ms" not in output:
return "Configured interval not 2000ms"
# Check actual interval is in 2000ms range (1500-2100ms)
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
if not match:
return "xmt_TO_actual not found"
actual = int(match.group(1))
# Should be in new range now (speeding up is allowed immediately)
if 1400 <= actual <= 2200:
return None # Success!
# Still at old 5s interval?
if 3700 <= actual <= 5300:
return "Still using 5s interval (actual={}ms)".format(actual)
return "Unexpected actual interval: {}ms".format(actual)
test_func = functools.partial(_check_2s_interval_applied)
_, result = topotest.run_and_expect(test_func, None, count=60, wait=1)
assert result is None, "New 2000ms interval not applied: {}".format(result)
step("New 2000ms interval confirmed applied (speeding up allowed immediately)")
# Capture packets with new interval
packets = capture_bfd_packets(r1, "r1-eth0", duration=30)
if len(packets) == 0:
pytest.skip("No BFD packets captured")
intervals = calculate_intervals(packets, "192.168.1.1")
assert len(intervals) >= 8, \
"Not enough intervals calculated (got {}). Total packets: {}, R1 packets: {}".format(
len(intervals), len(packets),
len([p for p in packets if p[1] == "192.168.1.1"]))
# Verify jitter range for 2000ms: 1500ms - 2000ms (75% - 100%)
# Add 5% tolerance for processing/network delays
min_expected = 1500
max_expected = 2100 # 2000ms + 5% tolerance
for interval in intervals:
assert min_expected <= interval <= max_expected, \
"Interval {}ms outside jitter range [{}, {}] (with 5% tolerance)".format(
interval, min_expected, max_expected)
# Check xmt_TO_actual field
output = r1.vtysh_cmd("show bfd peers")
match = re.search(r'Transmission interval \(actual with jitter\):\s+(\d+)ms', output)
assert match is not None, "xmt_TO_actual not found"
actual = int(match.group(1))
assert min_expected <= actual <= max_expected, \
"xmt_TO_actual {}ms outside range [{}, {}]".format(actual, min_expected, max_expected)
step("Timer change with jitter verified: new interval 2000ms, jitter in range")
if __name__ == "__main__":
args = ["-s"] + sys.argv[1:]
sys.exit(pytest.main(args))