@@ -172,16 +172,17 @@ enum ovn_stage {
172
172
PIPELINE_STAGE (ROUTER , IN , DEFRAG , 4 , "lr_in_defrag ") \
173
173
PIPELINE_STAGE (ROUTER , IN , UNSNAT , 5 , "lr_in_unsnat ") \
174
174
PIPELINE_STAGE (ROUTER , IN , DNAT , 6 , "lr_in_dnat ") \
175
- PIPELINE_STAGE (ROUTER , IN , ND_RA_OPTIONS , 7 , "lr_in_nd_ra_options ") \
176
- PIPELINE_STAGE (ROUTER , IN , ND_RA_RESPONSE , 8 , "lr_in_nd_ra_response ") \
177
- PIPELINE_STAGE (ROUTER , IN , IP_ROUTING , 9 , "lr_in_ip_routing ") \
178
- PIPELINE_STAGE (ROUTER , IN , IP_ROUTING_ECMP , 10 , "lr_in_ip_routing_ecmp ") \
179
- PIPELINE_STAGE (ROUTER , IN , POLICY , 11 , "lr_in_policy ") \
180
- PIPELINE_STAGE (ROUTER , IN , ARP_RESOLVE , 12 , "lr_in_arp_resolve ") \
181
- PIPELINE_STAGE (ROUTER , IN , CHK_PKT_LEN , 13 , "lr_in_chk_pkt_len ") \
182
- PIPELINE_STAGE (ROUTER , IN , LARGER_PKTS , 14 ,"lr_in_larger_pkts ") \
183
- PIPELINE_STAGE (ROUTER , IN , GW_REDIRECT , 15 , "lr_in_gw_redirect ") \
184
- PIPELINE_STAGE (ROUTER , IN , ARP_REQUEST , 16 , "lr_in_arp_request ") \
175
+ PIPELINE_STAGE (ROUTER , IN , ECMP_STATEFUL , 7 , "lr_in_ecmp_stateful ") \
176
+ PIPELINE_STAGE (ROUTER , IN , ND_RA_OPTIONS , 8 , "lr_in_nd_ra_options ") \
177
+ PIPELINE_STAGE (ROUTER , IN , ND_RA_RESPONSE , 9 , "lr_in_nd_ra_response ") \
178
+ PIPELINE_STAGE (ROUTER , IN , IP_ROUTING , 10 , "lr_in_ip_routing ") \
179
+ PIPELINE_STAGE (ROUTER , IN , IP_ROUTING_ECMP , 11 , "lr_in_ip_routing_ecmp ") \
180
+ PIPELINE_STAGE (ROUTER , IN , POLICY , 12 , "lr_in_policy ") \
181
+ PIPELINE_STAGE (ROUTER , IN , ARP_RESOLVE , 13 , "lr_in_arp_resolve ") \
182
+ PIPELINE_STAGE (ROUTER , IN , CHK_PKT_LEN , 14 , "lr_in_chk_pkt_len ") \
183
+ PIPELINE_STAGE (ROUTER , IN , LARGER_PKTS , 15 ,"lr_in_larger_pkts ") \
184
+ PIPELINE_STAGE (ROUTER , IN , GW_REDIRECT , 16 , "lr_in_gw_redirect ") \
185
+ PIPELINE_STAGE (ROUTER , IN , ARP_REQUEST , 17 , "lr_in_arp_request ") \
185
186
\
186
187
/* Logical router egress stages. */ \
187
188
PIPELINE_STAGE (ROUTER , OUT , UNDNAT , 0 , "lr_out_undnat ") \
@@ -7418,6 +7419,7 @@ struct parsed_route {
7418
7419
bool is_src_route ;
7419
7420
uint32_t hash ;
7420
7421
const struct nbrec_logical_router_static_route * route ;
7422
+ bool ecmp_symmetric_reply ;
7421
7423
};
7422
7424
7423
7425
static uint32_t
@@ -7479,6 +7481,8 @@ parsed_routes_add(struct ovs_list *routes,
7479
7481
"src-ip" ));
7480
7482
pr -> hash = route_hash (pr );
7481
7483
pr -> route = route ;
7484
+ pr -> ecmp_symmetric_reply = smap_get_bool (& route -> options ,
7485
+ "ecmp_symmetric_reply" , false);
7482
7486
ovs_list_insert (routes , & pr -> list_node );
7483
7487
return pr ;
7484
7488
}
@@ -7727,26 +7731,102 @@ find_static_route_outport(struct ovn_datapath *od, struct hmap *ports,
7727
7731
return true;
7728
7732
}
7729
7733
7734
+ static void
7735
+ add_ecmp_symmetric_reply_flows (struct hmap * lflows ,
7736
+ struct ovn_datapath * od ,
7737
+ const char * port_ip ,
7738
+ struct ovn_port * out_port ,
7739
+ const struct parsed_route * route ,
7740
+ struct ds * route_match )
7741
+ {
7742
+ const struct nbrec_logical_router_static_route * st_route = route -> route ;
7743
+ struct ds match = DS_EMPTY_INITIALIZER ;
7744
+ struct ds actions = DS_EMPTY_INITIALIZER ;
7745
+ struct ds ecmp_reply = DS_EMPTY_INITIALIZER ;
7746
+ char * cidr = normalize_v46_prefix (& route -> prefix , route -> plen );
7747
+
7748
+ /* If symmetric ECMP replies are enabled, then packets that arrive over
7749
+ * an ECMP route need to go through conntrack.
7750
+ */
7751
+ ds_put_format (& match , "inport == %s && ip%s.%s == %s" ,
7752
+ out_port -> json_key ,
7753
+ route -> prefix .family == AF_INET ? "4" : "6" ,
7754
+ route -> is_src_route ? "dst" : "src" ,
7755
+ cidr );
7756
+ ovn_lflow_add_with_hint (lflows , od , S_ROUTER_IN_DEFRAG , 100 ,
7757
+ ds_cstr (& match ), "ct_next;" ,
7758
+ & st_route -> header_ );
7759
+
7760
+ /* And packets that go out over an ECMP route need conntrack */
7761
+ ovn_lflow_add_with_hint (lflows , od , S_ROUTER_IN_DEFRAG , 100 ,
7762
+ ds_cstr (route_match ), "ct_next;" ,
7763
+ & st_route -> header_ );
7764
+
7765
+ /* Save src eth and inport in ct_label for packets that arrive over
7766
+ * an ECMP route.
7767
+ *
7768
+ * NOTE: we purposely are not clearing match before this
7769
+ * ds_put_cstr() call. The previous contents are needed.
7770
+ */
7771
+ ds_put_cstr (& match , " && (ct.new && !ct.est)" );
7772
+
7773
+ ds_put_format (& actions , "ct_commit { ct_label.ecmp_reply_eth = eth.src;"
7774
+ " ct_label.ecmp_reply_port = %" PRId64 ";}; next;" ,
7775
+ out_port -> sb -> tunnel_key );
7776
+ ovn_lflow_add_with_hint (lflows , od , S_ROUTER_IN_ECMP_STATEFUL , 100 ,
7777
+ ds_cstr (& match ), ds_cstr (& actions ),
7778
+ & st_route -> header_ );
7779
+
7780
+ /* Bypass ECMP selection if we already have ct_label information
7781
+ * for where to route the packet.
7782
+ */
7783
+ ds_put_format (& ecmp_reply , "ct.rpl && ct_label.ecmp_reply_port == %"
7784
+ PRId64 , out_port -> sb -> tunnel_key );
7785
+ ds_clear (& match );
7786
+ ds_put_format (& match , "%s && %s" , ds_cstr (& ecmp_reply ),
7787
+ ds_cstr (route_match ));
7788
+ ds_clear (& actions );
7789
+ ds_put_format (& actions , "ip.ttl--; flags.loopback = 1; "
7790
+ "eth.src = %s; %sreg1 = %s; outport = %s; next;" ,
7791
+ out_port -> lrp_networks .ea_s ,
7792
+ route -> prefix .family == AF_INET ? "" : "xx" ,
7793
+ port_ip , out_port -> json_key );
7794
+ ovn_lflow_add_with_hint (lflows , od , S_ROUTER_IN_IP_ROUTING , 100 ,
7795
+ ds_cstr (& match ), ds_cstr (& actions ),
7796
+ & st_route -> header_ );
7797
+
7798
+ /* Egress reply traffic for symmetric ECMP routes skips router policies. */
7799
+ ovn_lflow_add_with_hint (lflows , od , S_ROUTER_IN_POLICY , 65535 ,
7800
+ ds_cstr (& ecmp_reply ), "next;" ,
7801
+ & st_route -> header_ );
7802
+
7803
+ ds_clear (& actions );
7804
+ ds_put_cstr (& actions , "eth.dst = ct_label.ecmp_reply_eth; next;" );
7805
+ ovn_lflow_add_with_hint (lflows , od , S_ROUTER_IN_ARP_RESOLVE ,
7806
+ 200 , ds_cstr (& ecmp_reply ),
7807
+ ds_cstr (& actions ), & st_route -> header_ );
7808
+ }
7809
+
7730
7810
static void
7731
7811
build_ecmp_route_flow (struct hmap * lflows , struct ovn_datapath * od ,
7732
7812
struct hmap * ports , struct ecmp_groups_node * eg )
7733
7813
7734
7814
{
7735
7815
bool is_ipv4 = (eg -> prefix .family == AF_INET );
7736
- struct ds match = DS_EMPTY_INITIALIZER ;
7737
7816
uint16_t priority ;
7817
+ struct ecmp_route_list_node * er ;
7818
+ struct ds route_match = DS_EMPTY_INITIALIZER ;
7738
7819
7739
7820
char * prefix_s = build_route_prefix_s (& eg -> prefix , eg -> plen );
7740
7821
build_route_match (NULL , prefix_s , eg -> plen , eg -> is_src_route , is_ipv4 ,
7741
- & match , & priority );
7822
+ & route_match , & priority );
7742
7823
free (prefix_s );
7743
7824
7744
7825
struct ds actions = DS_EMPTY_INITIALIZER ;
7745
7826
ds_put_format (& actions , "ip.ttl--; flags.loopback = 1; %s = %" PRIu16
7746
7827
"; %s = select(" , REG_ECMP_GROUP_ID , eg -> id ,
7747
7828
REG_ECMP_MEMBER_ID );
7748
7829
7749
- struct ecmp_route_list_node * er ;
7750
7830
bool is_first = true;
7751
7831
LIST_FOR_EACH (er , list_node , & eg -> route_list ) {
7752
7832
if (is_first ) {
@@ -7760,11 +7840,12 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
7760
7840
ds_put_cstr (& actions , ");" );
7761
7841
7762
7842
ovn_lflow_add (lflows , od , S_ROUTER_IN_IP_ROUTING , priority ,
7763
- ds_cstr (& match ), ds_cstr (& actions ));
7843
+ ds_cstr (& route_match ), ds_cstr (& actions ));
7764
7844
7765
7845
/* Add per member flow */
7846
+ struct ds match = DS_EMPTY_INITIALIZER ;
7847
+ struct sset visited_ports = SSET_INITIALIZER (& visited_ports );
7766
7848
LIST_FOR_EACH (er , list_node , & eg -> route_list ) {
7767
-
7768
7849
const struct parsed_route * route_ = er -> route ;
7769
7850
const struct nbrec_logical_router_static_route * route = route_ -> route ;
7770
7851
/* Find the outgoing port. */
@@ -7774,6 +7855,15 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
7774
7855
& out_port )) {
7775
7856
continue ;
7776
7857
}
7858
+ /* Symmetric ECMP reply is only usable on gateway routers.
7859
+ * It is NOT usable on distributed routers with a gateway port.
7860
+ */
7861
+ if (smap_get (& od -> nbr -> options , "chassis" ) &&
7862
+ route_ -> ecmp_symmetric_reply && sset_add (& visited_ports ,
7863
+ out_port -> key )) {
7864
+ add_ecmp_symmetric_reply_flows (lflows , od , lrp_addr_s , out_port ,
7865
+ route_ , & route_match );
7866
+ }
7777
7867
ds_clear (& match );
7778
7868
ds_put_format (& match , REG_ECMP_GROUP_ID " == %" PRIu16 " && "
7779
7869
REG_ECMP_MEMBER_ID " == %" PRIu16 ,
@@ -7794,7 +7884,9 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
7794
7884
ds_cstr (& match ), ds_cstr (& actions ),
7795
7885
& route -> header_ );
7796
7886
}
7887
+ sset_destroy (& visited_ports );
7797
7888
ds_destroy (& match );
7889
+ ds_destroy (& route_match );
7798
7890
ds_destroy (& actions );
7799
7891
}
7800
7892
@@ -9078,6 +9170,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
9078
9170
ovn_lflow_add (lflows , od , S_ROUTER_IN_DNAT , 0 , "1" , "next;" );
9079
9171
ovn_lflow_add (lflows , od , S_ROUTER_OUT_UNDNAT , 0 , "1" , "next;" );
9080
9172
ovn_lflow_add (lflows , od , S_ROUTER_OUT_EGR_LOOP , 0 , "1" , "next;" );
9173
+ ovn_lflow_add (lflows , od , S_ROUTER_IN_ECMP_STATEFUL , 0 , "1" , "next;" );
9081
9174
9082
9175
/* Send the IPv6 NS packets to next table. When ovn-controller
9083
9176
* generates IPv6 NS (for the action - nd_ns{}), the injected
0 commit comments