2727
2828#include "nf_internals.h"
2929
30+ #define NF_NAT_MAX_ATTEMPTS 128
31+ #define NF_NAT_HARDER_THRESH (NF_NAT_MAX_ATTEMPTS / 4)
32+
3033static spinlock_t nf_nat_locks [CONNTRACK_LOCKS ];
3134
3235static DEFINE_MUTEX (nf_nat_proto_mutex );
@@ -197,6 +200,88 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
197200 return nf_conntrack_tuple_taken (& reply , ignored_conntrack );
198201}
199202
203+ static bool nf_nat_may_kill (struct nf_conn * ct , unsigned long flags )
204+ {
205+ static const unsigned long flags_refuse = IPS_FIXED_TIMEOUT |
206+ IPS_DYING ;
207+ static const unsigned long flags_needed = IPS_SRC_NAT ;
208+ enum tcp_conntrack old_state ;
209+
210+ old_state = READ_ONCE (ct -> proto .tcp .state );
211+ if (old_state < TCP_CONNTRACK_TIME_WAIT )
212+ return false;
213+
214+ if (flags & flags_refuse )
215+ return false;
216+
217+ return (flags & flags_needed ) == flags_needed ;
218+ }
219+
220+ /* reverse direction will send packets to new source, so
221+ * make sure such packets are invalid.
222+ */
223+ static bool nf_seq_has_advanced (const struct nf_conn * old , const struct nf_conn * new )
224+ {
225+ return (__s32 )(new -> proto .tcp .seen [0 ].td_end -
226+ old -> proto .tcp .seen [0 ].td_end ) > 0 ;
227+ }
228+
229+ static int
230+ nf_nat_used_tuple_harder (const struct nf_conntrack_tuple * tuple ,
231+ const struct nf_conn * ignored_conntrack ,
232+ unsigned int attempts_left )
233+ {
234+ static const unsigned long flags_offload = IPS_OFFLOAD | IPS_HW_OFFLOAD ;
235+ struct nf_conntrack_tuple_hash * thash ;
236+ const struct nf_conntrack_zone * zone ;
237+ struct nf_conntrack_tuple reply ;
238+ unsigned long flags ;
239+ struct nf_conn * ct ;
240+ bool taken = true;
241+ struct net * net ;
242+
243+ nf_ct_invert_tuple (& reply , tuple );
244+
245+ if (attempts_left > NF_NAT_HARDER_THRESH ||
246+ tuple -> dst .protonum != IPPROTO_TCP ||
247+ ignored_conntrack -> proto .tcp .state != TCP_CONNTRACK_SYN_SENT )
248+ return nf_conntrack_tuple_taken (& reply , ignored_conntrack );
249+
250+ /* :ast few attempts to find a free tcp port. Destructive
251+ * action: evict colliding if its in timewait state and the
252+ * tcp sequence number has advanced past the one used by the
253+ * old entry.
254+ */
255+ net = nf_ct_net (ignored_conntrack );
256+ zone = nf_ct_zone (ignored_conntrack );
257+
258+ thash = nf_conntrack_find_get (net , zone , & reply );
259+ if (!thash )
260+ return false;
261+
262+ ct = nf_ct_tuplehash_to_ctrack (thash );
263+
264+ if (thash -> tuple .dst .dir == IP_CT_DIR_ORIGINAL )
265+ goto out ;
266+
267+ if (WARN_ON_ONCE (ct == ignored_conntrack ))
268+ goto out ;
269+
270+ flags = READ_ONCE (ct -> status );
271+ if (!nf_nat_may_kill (ct , flags ))
272+ goto out ;
273+
274+ if (!nf_seq_has_advanced (ct , ignored_conntrack ))
275+ goto out ;
276+
277+ /* Even if we can evict do not reuse if entry is offloaded. */
278+ if (nf_ct_kill (ct ))
279+ taken = flags & flags_offload ;
280+ out :
281+ nf_ct_put (ct );
282+ return taken ;
283+ }
284+
200285static bool nf_nat_inet_in_range (const struct nf_conntrack_tuple * t ,
201286 const struct nf_nat_range2 * range )
202287{
@@ -385,7 +470,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
385470 unsigned int range_size , min , max , i , attempts ;
386471 __be16 * keyptr ;
387472 u16 off ;
388- static const unsigned int max_attempts = 128 ;
389473
390474 switch (tuple -> dst .protonum ) {
391475 case IPPROTO_ICMP :
@@ -471,8 +555,8 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
471555 off = get_random_u16 ();
472556
473557 attempts = range_size ;
474- if (attempts > max_attempts )
475- attempts = max_attempts ;
558+ if (attempts > NF_NAT_MAX_ATTEMPTS )
559+ attempts = NF_NAT_MAX_ATTEMPTS ;
476560
477561 /* We are in softirq; doing a search of the entire range risks
478562 * soft lockup when all tuples are already used.
@@ -483,7 +567,7 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
483567another_round :
484568 for (i = 0 ; i < attempts ; i ++ , off ++ ) {
485569 * keyptr = htons (min + off % range_size );
486- if (!nf_nat_used_tuple (tuple , ct ))
570+ if (!nf_nat_used_tuple_harder (tuple , ct , attempts - i ))
487571 return ;
488572 }
489573
0 commit comments