Skip to content

Commit

Permalink
tcp: add accessors to read/set tp->snd_cwnd
Browse files Browse the repository at this point in the history
We had various bugs over the years with code
breaking the assumption that tp->snd_cwnd is greater
than zero.

Lately, syzbot reported the WARN_ON_ONCE(!tp->prior_cwnd) added
in commit 8b8a321 ("tcp: fix zero cwnd in tcp_cwnd_reduction")
can trigger, and without a repro we would have to spend
considerable time finding the bug.

Instead of complaining too late, we want to catch where
and when tp->snd_cwnd is set to an illegal value.

Signed-off-by: Eric Dumazet <[email protected]>
Suggested-by: Yuchung Cheng <[email protected]>
Cc: Neal Cardwell <[email protected]>
Acked-by: Yuchung Cheng <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
Signed-off-by: Juhyung Park <[email protected]>
  • Loading branch information
Eric Dumazet authored and bachnxuan committed Feb 28, 2025
1 parent a9c89e6 commit 9fd8a0c
Show file tree
Hide file tree
Showing 27 changed files with 207 additions and 191 deletions.
19 changes: 15 additions & 4 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1212,9 +1212,20 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)

#define TCP_INFINITE_SSTHRESH 0x7fffffff

static inline u32 tcp_snd_cwnd(const struct tcp_sock *tp)
{
return tp->snd_cwnd;
}

static inline void tcp_snd_cwnd_set(struct tcp_sock *tp, u32 val)
{
WARN_ON_ONCE((int)val <= 0);
tp->snd_cwnd = val;
}

static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
return tcp_snd_cwnd(tp) < tp->snd_ssthresh;
}

static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
Expand All @@ -1240,8 +1251,8 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
return tp->snd_ssthresh;
else
return max(tp->snd_ssthresh,
((tp->snd_cwnd >> 1) +
(tp->snd_cwnd >> 2)));
((tcp_snd_cwnd(tp) >> 1) +
(tcp_snd_cwnd(tp) >> 2)));
}

/* Use define here intentionally to get WARN_ON location shown at the caller */
Expand Down Expand Up @@ -1286,7 +1297,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)

/* If in slow start, ensure cwnd grows to twice what was ACKed. */
if (tcp_in_slow_start(tp))
return tp->snd_cwnd < 2 * tp->max_packets_out;
return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out;

return false;
}
Expand Down
2 changes: 1 addition & 1 deletion include/trace/events/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ TRACE_EVENT(tcp_probe,
__entry->data_len = skb->len - __tcp_hdrlen(th);
__entry->snd_nxt = tp->snd_nxt;
__entry->snd_una = tp->snd_una;
__entry->snd_cwnd = tp->snd_cwnd;
__entry->snd_cwnd = tcp_snd_cwnd(tp);
__entry->snd_wnd = tp->snd_wnd;
__entry->rcv_wnd = tp->rcv_wnd;
__entry->ssthresh = tcp_current_ssthresh(sk);
Expand Down
2 changes: 1 addition & 1 deletion net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -4892,7 +4892,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (val <= 0 || tp->data_segs_out > tp->syn_data)
ret = -EINVAL;
else
tp->snd_cwnd = val;
tcp_snd_cwnd_set(tp, val);
break;
case TCP_BPF_SNDCWND_CLAMP:
if (val <= 0) {
Expand Down
8 changes: 4 additions & 4 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ void tcp_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
tp->snd_cwnd = TCP_INIT_CWND;
tcp_snd_cwnd_set(tp, TCP_INIT_CWND);

/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
Expand Down Expand Up @@ -2816,7 +2816,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_rto_min = TCP_RTO_MIN;
icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd = TCP_INIT_CWND;
tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
tp->is_cwnd_limited = 0;
tp->max_packets_out = 0;
Expand Down Expand Up @@ -3527,7 +3527,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_max_pacing_rate = rate64;

info->tcpi_reordering = tp->reordering;
info->tcpi_snd_cwnd = tp->snd_cwnd;
info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);

if (info->tcpi_state == TCP_LISTEN) {
/* listeners aliased fields :
Expand Down Expand Up @@ -3686,7 +3686,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
rate64 = tcp_compute_delivery_rate(tp);
nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);

nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));

Expand Down
20 changes: 10 additions & 10 deletions net/ipv4/tcp_bbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
} else { /* no RTT sample yet */
rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
}
bw = (u64)tp->snd_cwnd * BW_UNIT;
bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
do_div(bw, rtt_us);
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
Expand Down Expand Up @@ -321,9 +321,9 @@ static void bbr_save_cwnd(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);

if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}

static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
Expand Down Expand Up @@ -480,7 +480,7 @@ static bool bbr_set_cwnd_to_recover_or_restore(
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
u32 cwnd = tp->snd_cwnd;
u32 cwnd = tcp_snd_cwnd(tp);

/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
Expand Down Expand Up @@ -518,7 +518,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;

if (!acked)
goto done; /* no packet fully ACKed; just apply caps */
Expand All @@ -542,9 +542,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
cwnd = max(cwnd, bbr_cwnd_min_target);

done:
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
}

/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
Expand Down Expand Up @@ -854,7 +854,7 @@ static void bbr_update_ack_aggregation(struct sock *sk,
bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
bbr->ack_epoch_acked + rs->acked_sacked);
extra_acked = bbr->ack_epoch_acked - expected_acked;
extra_acked = min(extra_acked, tp->snd_cwnd);
extra_acked = min(extra_acked, tcp_snd_cwnd(tp));
if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
}
Expand Down Expand Up @@ -912,7 +912,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
return;

bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */
tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
bbr_reset_mode(sk);
}

Expand Down Expand Up @@ -1091,7 +1091,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
bbr->full_bw_cnt = 0;
bbr_reset_lt_bw_sampling(sk);
return tcp_sk(sk)->snd_cwnd;
return tcp_snd_cwnd(tcp_sk(sk));
}

/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
Expand Down
14 changes: 7 additions & 7 deletions net/ipv4/tcp_bic.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
bictcp_update(ca, tp->snd_cwnd);
bictcp_update(ca, tcp_snd_cwnd(tp));
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}

Expand All @@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */

/* Wmax and fast convergence */
if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
ca->last_max_cwnd = tp->snd_cwnd;
ca->last_max_cwnd = tcp_snd_cwnd(tp);

if (tp->snd_cwnd <= low_window)
return max(tp->snd_cwnd >> 1U, 2U);
if (tcp_snd_cwnd(tp) <= low_window)
return max(tcp_snd_cwnd(tp) >> 1U, 2U);
else
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}

static void bictcp_state(struct sock *sk, u8 new_state)
Expand Down
30 changes: 15 additions & 15 deletions net/ipv4/tcp_cdg.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
tcp_snd_cwnd(tp));
tp->snd_ssthresh = tcp_snd_cwnd(tp);
return;
}
}
Expand All @@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
tp->snd_cwnd);
tp->snd_ssthresh = tp->snd_cwnd;
tcp_snd_cwnd(tp));
tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
Expand Down Expand Up @@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
return false;
}

ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp));
ca->state = CDG_BACKOFF;
tcp_enter_cwr(sk);
return true;
Expand Down Expand Up @@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}

if (!tcp_is_cwnd_limited(sk)) {
ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp));
return;
}

prior_snd_cwnd = tp->snd_cwnd;
prior_snd_cwnd = tcp_snd_cwnd(tp);
tcp_reno_cong_avoid(sk, ack, acked);

incr = tp->snd_cwnd - prior_snd_cwnd;
incr = tcp_snd_cwnd(tp) - prior_snd_cwnd;
ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
}

Expand Down Expand Up @@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);

if (ca->state == CDG_BACKOFF)
return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10);

if (ca->state == CDG_NONFULL && use_tolerance)
return tp->snd_cwnd;
return tcp_snd_cwnd(tp);

ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp));
if (use_shadow)
return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
return max(2U, tp->snd_cwnd >> 1);
return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1);
return max(2U, tcp_snd_cwnd(tp) >> 1);
}

static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
Expand All @@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)

ca->gradients = gradients;
ca->rtt_seq = tp->snd_nxt;
ca->shadow_wnd = tp->snd_cwnd;
ca->shadow_wnd = tcp_snd_cwnd(tp);
break;
case CA_EVENT_COMPLETE_CWR:
ca->state = CDG_UNKNOWN;
Expand All @@ -381,7 +381,7 @@ static void tcp_cdg_init(struct sock *sk)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
GFP_NOWAIT | __GFP_NOWARN);
ca->rtt_seq = tp->snd_nxt;
ca->shadow_wnd = tp->snd_cwnd;
ca->shadow_wnd = tcp_snd_cwnd(tp);
}

static void tcp_cdg_release(struct sock *sk)
Expand Down
18 changes: 9 additions & 9 deletions net/ipv4/tcp_cong.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);

acked -= cwnd - tp->snd_cwnd;
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
acked -= cwnd - tcp_snd_cwnd(tp);
tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));

return acked;
}
Expand All @@ -412,17 +412,17 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
/* If credits accumulated at a higher w, apply them gently now. */
if (tp->snd_cwnd_cnt >= w) {
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd++;
tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}

tp->snd_cwnd_cnt += acked;
if (tp->snd_cwnd_cnt >= w) {
u32 delta = tp->snd_cwnd_cnt / w;

tp->snd_cwnd_cnt -= delta * w;
tp->snd_cwnd += delta;
tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta);
}
tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);

Expand All @@ -447,7 +447,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
}
/* In dangerous area, increase slowly. */
tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);

Expand All @@ -456,15 +456,15 @@ u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);

return max(tp->snd_cwnd >> 1U, 2U);
return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);

u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);

return max(tp->snd_cwnd, tp->prior_cwnd);
return max(tcp_snd_cwnd(tp), tp->prior_cwnd);
}
EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);

Expand Down
Loading

0 comments on commit 9fd8a0c

Please sign in to comment.