xref: /third_party/lwip/src/core/tcp_in.c (revision 195972f6)
1/**
2 * @file
3 * Transmission Control Protocol, incoming traffic
4 *
5 * The input processing functions of the TCP layer.
6 *
7 * These functions are generally called in the order (ip_input() ->)
8 * tcp_input() -> * tcp_process() -> tcp_receive() (-> application).
9 *
10 */
11
12/*
13 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without modification,
17 * are permitted provided that the following conditions are met:
18 *
19 * 1. Redistributions of source code must retain the above copyright notice,
20 *    this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright notice,
22 *    this list of conditions and the following disclaimer in the documentation
23 *    and/or other materials provided with the distribution.
24 * 3. The name of the author may not be used to endorse or promote products
25 *    derived from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
30 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
32 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
36 * OF SUCH DAMAGE.
37 *
38 * This file is part of the lwIP TCP/IP stack.
39 *
40 * Author: Adam Dunkels <adam@sics.se>
41 *
42 */
43
44#include "lwip/opt.h"
45
46#if LWIP_TCP /* don't build if not configured for use in lwipopts.h */
47
48#include "lwip/priv/tcp_priv.h"
49#include "lwip/def.h"
50#include "lwip/ip_addr.h"
51#include "lwip/netif.h"
52#include "lwip/mem.h"
53#include "lwip/memp.h"
54#include "lwip/inet_chksum.h"
55#include "lwip/stats.h"
56#include "lwip/ip6.h"
57#include "lwip/ip6_addr.h"
58#if LWIP_ND6_TCP_REACHABILITY_HINTS
59#include "lwip/nd6.h"
60#endif /* LWIP_ND6_TCP_REACHABILITY_HINTS */
61
62#include <string.h>
63
64#ifdef LWIP_HOOK_FILENAME
65#include LWIP_HOOK_FILENAME
66#endif
67
68/** Initial CWND calculation as defined RFC 2581 */
69#define LWIP_TCP_CALC_INITIAL_CWND(mss) ((tcpwnd_size_t)LWIP_MIN((4U * (mss)), LWIP_MAX((2U * (mss)), 4380U)))
70
71/* These variables are global to all functions involved in the input
72   processing of TCP segments. They are set by the tcp_input()
73   function. */
74static struct tcp_seg inseg;
75static struct tcp_hdr *tcphdr;
76static u16_t tcphdr_optlen;
77static u16_t tcphdr_opt1len;
78static u8_t *tcphdr_opt2;
79static u16_t tcp_optidx;
80static u32_t seqno, ackno;
81static tcpwnd_size_t recv_acked;
82static u16_t tcplen;
83static u8_t flags;
84
85static u8_t recv_flags;
86static struct pbuf *recv_data;
87
88struct tcp_pcb *tcp_input_pcb;
89
90/* Forward declarations. */
91static err_t tcp_process(struct tcp_pcb *pcb);
92static void tcp_receive(struct tcp_pcb *pcb);
93static void tcp_parseopt(struct tcp_pcb *pcb);
94
95static void tcp_listen_input(struct tcp_pcb_listen *pcb);
96static void tcp_timewait_input(struct tcp_pcb *pcb);
97
98static int tcp_input_delayed_close(struct tcp_pcb *pcb);
99
100#if LWIP_TCP_SACK_OUT
101static void tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right);
102static void tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq);
103#if defined(TCP_OOSEQ_BYTES_LIMIT) || defined(TCP_OOSEQ_PBUFS_LIMIT)
104static void tcp_remove_sacks_gt(struct tcp_pcb *pcb, u32_t seq);
105#endif /* TCP_OOSEQ_BYTES_LIMIT || TCP_OOSEQ_PBUFS_LIMIT */
106#endif /* LWIP_TCP_SACK_OUT */
107
108/**
109 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
110 * the segment between the PCBs and passes it on to tcp_process(), which implements
111 * the TCP finite state machine. This function is called by the IP layer (in
112 * ip_input()).
113 *
114 * @param p received TCP segment to process (p->payload pointing to the TCP header)
115 * @param inp network interface on which this segment was received
116 */
117void
118tcp_input(struct pbuf *p, struct netif *inp)
119{
120  struct tcp_pcb *pcb, *prev;
121  struct tcp_pcb_listen *lpcb;
122#ifdef LOSCFG_NET_CONTAINER
123  struct net_group *group = get_net_group_from_netif(inp);
124#endif
125#if SO_REUSE
126  struct tcp_pcb *lpcb_prev = NULL;
127  struct tcp_pcb_listen *lpcb_any = NULL;
128#endif /* SO_REUSE */
129  u8_t hdrlen_bytes;
130  err_t err;
131
132  LWIP_UNUSED_ARG(inp);
133  LWIP_ASSERT_CORE_LOCKED();
134  LWIP_ASSERT("tcp_input: invalid pbuf", p != NULL);
135
136  PERF_START;
137
138  TCP_STATS_INC(tcp.recv);
139  MIB2_STATS_INC(mib2.tcpinsegs);
140
141  tcphdr = (struct tcp_hdr *)p->payload;
142
143#if TCP_INPUT_DEBUG
144  tcp_debug_print(tcphdr);
145#endif
146
147  /* Check that TCP header fits in payload */
148  if (p->len < TCP_HLEN) {
149    /* drop short packets */
150    LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: short packet (%"U16_F" bytes) discarded\n", p->tot_len));
151    TCP_STATS_INC(tcp.lenerr);
152    goto dropped;
153  }
154
155  /* Don't even process incoming broadcasts/multicasts. */
156  if (ip_addr_isbroadcast(ip_current_dest_addr(), ip_current_netif()) ||
157      ip_addr_ismulticast(ip_current_dest_addr())) {
158    TCP_STATS_INC(tcp.proterr);
159    goto dropped;
160  }
161
162#if CHECKSUM_CHECK_TCP
163  IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_CHECK_TCP) {
164    /* Verify TCP checksum. */
165    u16_t chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len,
166                                    ip_current_src_addr(), ip_current_dest_addr());
167    if (chksum != 0) {
168      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum 0x%04"X16_F"\n",
169                                    chksum));
170      tcp_debug_print(tcphdr);
171      TCP_STATS_INC(tcp.chkerr);
172      goto dropped;
173    }
174  }
175#endif /* CHECKSUM_CHECK_TCP */
176
177  /* sanity-check header length */
178  hdrlen_bytes = TCPH_HDRLEN_BYTES(tcphdr);
179  if ((hdrlen_bytes < TCP_HLEN) || (hdrlen_bytes > p->tot_len)) {
180    LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: invalid header length (%"U16_F")\n", (u16_t)hdrlen_bytes));
181    TCP_STATS_INC(tcp.lenerr);
182    goto dropped;
183  }
184
185  /* Move the payload pointer in the pbuf so that it points to the
186     TCP data instead of the TCP header. */
187  tcphdr_optlen = (u16_t)(hdrlen_bytes - TCP_HLEN);
188  tcphdr_opt2 = NULL;
189  if (p->len >= hdrlen_bytes) {
190    /* all options are in the first pbuf */
191    tcphdr_opt1len = tcphdr_optlen;
192    pbuf_remove_header(p, hdrlen_bytes); /* cannot fail */
193  } else {
194    u16_t opt2len;
195    /* TCP header fits into first pbuf, options don't - data is in the next pbuf */
196    /* there must be a next pbuf, due to hdrlen_bytes sanity check above */
197    LWIP_ASSERT("p->next != NULL", p->next != NULL);
198
199    /* advance over the TCP header (cannot fail) */
200    pbuf_remove_header(p, TCP_HLEN);
201
202    /* determine how long the first and second parts of the options are */
203    tcphdr_opt1len = p->len;
204    opt2len = (u16_t)(tcphdr_optlen - tcphdr_opt1len);
205
206    /* options continue in the next pbuf: set p to zero length and hide the
207        options in the next pbuf (adjusting p->tot_len) */
208    pbuf_remove_header(p, tcphdr_opt1len);
209
210    /* check that the options fit in the second pbuf */
211    if (opt2len > p->next->len) {
212      /* drop short packets */
213      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: options overflow second pbuf (%"U16_F" bytes)\n", p->next->len));
214      TCP_STATS_INC(tcp.lenerr);
215      goto dropped;
216    }
217
218    /* remember the pointer to the second part of the options */
219    tcphdr_opt2 = (u8_t *)p->next->payload;
220
221    /* advance p->next to point after the options, and manually
222        adjust p->tot_len to keep it consistent with the changed p->next */
223    pbuf_remove_header(p->next, opt2len);
224    p->tot_len = (u16_t)(p->tot_len - opt2len);
225
226    LWIP_ASSERT("p->len == 0", p->len == 0);
227    LWIP_ASSERT("p->tot_len == p->next->tot_len", p->tot_len == p->next->tot_len);
228  }
229
230  /* Convert fields in TCP header to host byte order. */
231  tcphdr->src = lwip_ntohs(tcphdr->src);
232  tcphdr->dest = lwip_ntohs(tcphdr->dest);
233  seqno = tcphdr->seqno = lwip_ntohl(tcphdr->seqno);
234  ackno = tcphdr->ackno = lwip_ntohl(tcphdr->ackno);
235  tcphdr->wnd = lwip_ntohs(tcphdr->wnd);
236
237  flags = TCPH_FLAGS(tcphdr);
238  tcplen = p->tot_len;
239  if (flags & (TCP_FIN | TCP_SYN)) {
240    tcplen++;
241    if (tcplen < p->tot_len) {
242      /* u16_t overflow, cannot handle this */
243      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: length u16_t overflow, cannot handle this\n"));
244      TCP_STATS_INC(tcp.lenerr);
245      goto dropped;
246    }
247  }
248
249  /* Demultiplex an incoming segment. First, we check if it is destined
250     for an active connection. */
251  prev = NULL;
252
253  for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
254    LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb->state != CLOSED);
255    LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT);
256    LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb->state != LISTEN);
257
258    /* check if PCB is bound to specific netif */
259    if ((pcb->netif_idx != NETIF_NO_INDEX) &&
260        (pcb->netif_idx != netif_get_index(ip_data.current_input_netif))) {
261      prev = pcb;
262      continue;
263    }
264
265#ifdef LOSCFG_NET_CONTAINER
266    if (group == get_net_group_from_tcp_pcb(pcb) &&
267        pcb->remote_port == tcphdr->src &&
268#else
269    if (pcb->remote_port == tcphdr->src &&
270#endif
271        pcb->local_port == tcphdr->dest &&
272        ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) &&
273        ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) {
274      /* Move this PCB to the front of the list so that subsequent
275         lookups will be faster (we exploit locality in TCP segment
276         arrivals). */
277      LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb->next != pcb);
278      if (prev != NULL) {
279        prev->next = pcb->next;
280        pcb->next = tcp_active_pcbs;
281        tcp_active_pcbs = pcb;
282      } else {
283        TCP_STATS_INC(tcp.cachehit);
284      }
285      LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb->next != pcb);
286      break;
287    }
288    prev = pcb;
289  }
290
291  if (pcb == NULL) {
292    /* If it did not go to an active connection, we check the connections
293       in the TIME-WAIT state. */
294    for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
295      LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
296
297      /* check if PCB is bound to specific netif */
298      if ((pcb->netif_idx != NETIF_NO_INDEX) &&
299          (pcb->netif_idx != netif_get_index(ip_data.current_input_netif))) {
300        continue;
301      }
302
303#ifdef LOSCFG_NET_CONTAINER
304      if (group == get_net_group_from_tcp_pcb(pcb) &&
305          pcb->remote_port == tcphdr->src &&
306#else
307      if (pcb->remote_port == tcphdr->src &&
308#endif
309          pcb->local_port == tcphdr->dest &&
310          ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) &&
311          ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) {
312        /* We don't really care enough to move this PCB to the front
313           of the list since we are not very likely to receive that
314           many segments for connections in TIME-WAIT. */
315        LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for TIME_WAITing connection.\n"));
316#ifdef LWIP_HOOK_TCP_INPACKET_PCB
317        if (LWIP_HOOK_TCP_INPACKET_PCB(pcb, tcphdr, tcphdr_optlen, tcphdr_opt1len,
318                                       tcphdr_opt2, p) == ERR_OK)
319#endif
320        {
321          tcp_timewait_input(pcb);
322        }
323        pbuf_free(p);
324        return;
325      }
326    }
327
328    /* Finally, if we still did not get a match, we check all PCBs that
329       are LISTENing for incoming connections. */
330    prev = NULL;
331    for (lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
332      /* check if PCB is bound to specific netif */
333      if ((lpcb->netif_idx != NETIF_NO_INDEX) &&
334          (lpcb->netif_idx != netif_get_index(ip_data.current_input_netif))) {
335        prev = (struct tcp_pcb *)lpcb;
336        continue;
337      }
338
339#ifdef LOSCFG_NET_CONTAINER
340      if (group == get_net_group_from_tcp_pcb((struct tcp_pcb *)lpcb) && lpcb->local_port == tcphdr->dest) {
341#else
342      if (lpcb->local_port == tcphdr->dest) {
343#endif
344        if (IP_IS_ANY_TYPE_VAL(lpcb->local_ip)) {
345          /* found an ANY TYPE (IPv4/IPv6) match */
346#if SO_REUSE
347          lpcb_any = lpcb;
348          lpcb_prev = prev;
349#else /* SO_REUSE */
350          break;
351#endif /* SO_REUSE */
352        } else if (IP_ADDR_PCB_VERSION_MATCH_EXACT(lpcb, ip_current_dest_addr())) {
353          if (ip_addr_cmp(&lpcb->local_ip, ip_current_dest_addr())) {
354            /* found an exact match */
355            break;
356          } else if (ip_addr_isany(&lpcb->local_ip)) {
357            /* found an ANY-match */
358#if SO_REUSE
359            lpcb_any = lpcb;
360            lpcb_prev = prev;
361#else /* SO_REUSE */
362            break;
363#endif /* SO_REUSE */
364          }
365        }
366      }
367      prev = (struct tcp_pcb *)lpcb;
368    }
369#if SO_REUSE
370    /* first try specific local IP */
371    if (lpcb == NULL) {
372      /* only pass to ANY if no specific local IP has been found */
373      lpcb = lpcb_any;
374      prev = lpcb_prev;
375    }
376#endif /* SO_REUSE */
377    if (lpcb != NULL) {
378      /* Move this PCB to the front of the list so that subsequent
379         lookups will be faster (we exploit locality in TCP segment
380         arrivals). */
381      if (prev != NULL) {
382        ((struct tcp_pcb_listen *)prev)->next = lpcb->next;
383        /* our successor is the remainder of the listening list */
384        lpcb->next = tcp_listen_pcbs.listen_pcbs;
385        /* put this listening pcb at the head of the listening list */
386        tcp_listen_pcbs.listen_pcbs = lpcb;
387      } else {
388        TCP_STATS_INC(tcp.cachehit);
389      }
390
391      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for LISTENing connection.\n"));
392#ifdef LWIP_HOOK_TCP_INPACKET_PCB
393      if (LWIP_HOOK_TCP_INPACKET_PCB((struct tcp_pcb *)lpcb, tcphdr, tcphdr_optlen,
394                                     tcphdr_opt1len, tcphdr_opt2, p) == ERR_OK)
395#endif
396      {
397        tcp_listen_input(lpcb);
398      }
399      pbuf_free(p);
400      return;
401    }
402  }
403
404#if TCP_INPUT_DEBUG
405  LWIP_DEBUGF(TCP_INPUT_DEBUG, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
406  tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
407  LWIP_DEBUGF(TCP_INPUT_DEBUG, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
408#endif /* TCP_INPUT_DEBUG */
409
410
411#ifdef LWIP_HOOK_TCP_INPACKET_PCB
412  if ((pcb != NULL) && LWIP_HOOK_TCP_INPACKET_PCB(pcb, tcphdr, tcphdr_optlen,
413      tcphdr_opt1len, tcphdr_opt2, p) != ERR_OK) {
414    pbuf_free(p);
415    return;
416  }
417#endif
418  if (pcb != NULL) {
419    /* The incoming segment belongs to a connection. */
420#if TCP_INPUT_DEBUG
421    tcp_debug_print_state(pcb->state);
422#endif /* TCP_INPUT_DEBUG */
423
424    /* Set up a tcp_seg structure. */
425    inseg.next = NULL;
426    inseg.len = p->tot_len;
427    inseg.p = p;
428    inseg.tcphdr = tcphdr;
429
430    recv_data = NULL;
431    recv_flags = 0;
432    recv_acked = 0;
433
434    if (flags & TCP_PSH) {
435      p->flags |= PBUF_FLAG_PUSH;
436    }
437
438    /* If there is data which was previously "refused" by upper layer */
439    if (pcb->refused_data != NULL) {
440      if ((tcp_process_refused_data(pcb) == ERR_ABRT) ||
441          ((pcb->refused_data != NULL) && (tcplen > 0))) {
442        /* pcb has been aborted or refused data is still refused and the new
443           segment contains data */
444        if (pcb->rcv_ann_wnd == 0) {
445          /* this is a zero-window probe, we respond to it with current RCV.NXT
446          and drop the data segment */
447          tcp_send_empty_ack(pcb);
448        }
449        TCP_STATS_INC(tcp.drop);
450        MIB2_STATS_INC(mib2.tcpinerrs);
451        goto aborted;
452      }
453    }
454    tcp_input_pcb = pcb;
455    err = tcp_process(pcb);
456    /* A return value of ERR_ABRT means that tcp_abort() was called
457       and that the pcb has been freed. If so, we don't do anything. */
458    if (err != ERR_ABRT) {
459      if (recv_flags & TF_RESET) {
460        /* TF_RESET means that the connection was reset by the other
461           end. We then call the error callback to inform the
462           application that the connection is dead before we
463           deallocate the PCB. */
464        TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_RST);
465        tcp_pcb_remove(&tcp_active_pcbs, pcb);
466        tcp_free(pcb);
467      } else {
468        err = ERR_OK;
469        /* If the application has registered a "sent" function to be
470           called when new send buffer space is available, we call it
471           now. */
472        if (recv_acked > 0) {
473          u16_t acked16;
474#if LWIP_WND_SCALE
475          /* recv_acked is u32_t but the sent callback only takes a u16_t,
476             so we might have to call it multiple times. */
477          u32_t acked = recv_acked;
478          while (acked > 0) {
479            acked16 = (u16_t)LWIP_MIN(acked, 0xffffu);
480            acked -= acked16;
481#else
482          {
483            acked16 = recv_acked;
484#endif
485            TCP_EVENT_SENT(pcb, (u16_t)acked16, err);
486            if (err == ERR_ABRT) {
487              goto aborted;
488            }
489          }
490          recv_acked = 0;
491        }
492        if (tcp_input_delayed_close(pcb)) {
493          goto aborted;
494        }
495#if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
496        while (recv_data != NULL) {
497          struct pbuf *rest = NULL;
498          pbuf_split_64k(recv_data, &rest);
499#else /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
500        if (recv_data != NULL) {
501#endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
502
503          LWIP_ASSERT("pcb->refused_data == NULL", pcb->refused_data == NULL);
504          if (pcb->flags & TF_RXCLOSED) {
505            /* received data although already closed -> abort (send RST) to
506               notify the remote host that not all data has been processed */
507            pbuf_free(recv_data);
508#if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
509            if (rest != NULL) {
510              pbuf_free(rest);
511            }
512#endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
513            tcp_abort(pcb);
514            goto aborted;
515          }
516
517          /* Notify application that data has been received. */
518          TCP_EVENT_RECV(pcb, recv_data, ERR_OK, err);
519          if (err == ERR_ABRT) {
520#if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
521            if (rest != NULL) {
522              pbuf_free(rest);
523            }
524#endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
525            goto aborted;
526          }
527
528          /* If the upper layer can't receive this data, store it */
529          if (err != ERR_OK) {
530#if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
531            if (rest != NULL) {
532              pbuf_cat(recv_data, rest);
533            }
534#endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
535            pcb->refused_data = recv_data;
536            LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: keep incoming packet, because pcb is \"full\"\n"));
537#if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
538            break;
539          } else {
540            /* Upper layer received the data, go on with the rest if > 64K */
541            recv_data = rest;
542#endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
543          }
544        }
545
546        /* If a FIN segment was received, we call the callback
547           function with a NULL buffer to indicate EOF. */
548        if (recv_flags & TF_GOT_FIN) {
549          if (pcb->refused_data != NULL) {
550            /* Delay this if we have refused data. */
551            pcb->refused_data->flags |= PBUF_FLAG_TCP_FIN;
552          } else {
553            /* correct rcv_wnd as the application won't call tcp_recved()
554               for the FIN's seqno */
555            if (pcb->rcv_wnd != TCP_WND_MAX(pcb)) {
556              pcb->rcv_wnd++;
557            }
558            TCP_EVENT_CLOSED(pcb, err);
559            if (err == ERR_ABRT) {
560              goto aborted;
561            }
562          }
563        }
564
565        tcp_input_pcb = NULL;
566        if (tcp_input_delayed_close(pcb)) {
567          goto aborted;
568        }
569        /* Try to send something out. */
570        tcp_output(pcb);
571#if TCP_INPUT_DEBUG
572#if TCP_DEBUG
573        tcp_debug_print_state(pcb->state);
574#endif /* TCP_DEBUG */
575#endif /* TCP_INPUT_DEBUG */
576      }
577    }
578    /* Jump target if pcb has been aborted in a callback (by calling tcp_abort()).
579       Below this line, 'pcb' may not be dereferenced! */
580aborted:
581    tcp_input_pcb = NULL;
582    recv_data = NULL;
583
584    /* give up our reference to inseg.p */
585    if (inseg.p != NULL) {
586      pbuf_free(inseg.p);
587      inseg.p = NULL;
588    }
589  } else {
590    /* If no matching PCB was found, send a TCP RST (reset) to the
591       sender. */
592    LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_input: no PCB match found, resetting.\n"));
593    if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) {
594      TCP_STATS_INC(tcp.proterr);
595      TCP_STATS_INC(tcp.drop);
596      tcp_rst(NULL, ackno, seqno + tcplen, ip_current_dest_addr(),
597              ip_current_src_addr(), tcphdr->dest, tcphdr->src);
598    }
599    pbuf_free(p);
600  }
601
602  LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
603  PERF_STOP("tcp_input");
604  return;
605dropped:
606  TCP_STATS_INC(tcp.drop);
607  MIB2_STATS_INC(mib2.tcpinerrs);
608  pbuf_free(p);
609}
610
611/** Called from tcp_input to check for TF_CLOSED flag. This results in closing
612 * and deallocating a pcb at the correct place to ensure noone references it
613 * any more.
614 * @returns 1 if the pcb has been closed and deallocated, 0 otherwise
615 */
616static int
617tcp_input_delayed_close(struct tcp_pcb *pcb)
618{
619  LWIP_ASSERT("tcp_input_delayed_close: invalid pcb", pcb != NULL);
620
621  if (recv_flags & TF_CLOSED) {
622    /* The connection has been closed and we will deallocate the
623        PCB. */
624    if (!(pcb->flags & TF_RXCLOSED)) {
625      /* Connection closed although the application has only shut down the
626          tx side: call the PCB's err callback and indicate the closure to
627          ensure the application doesn't continue using the PCB. */
628      TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_CLSD);
629    }
630    tcp_pcb_remove(&tcp_active_pcbs, pcb);
631    tcp_free(pcb);
632    return 1;
633  }
634  return 0;
635}
636
637/**
638 * Called by tcp_input() when a segment arrives for a listening
639 * connection (from tcp_input()).
640 *
641 * @param pcb the tcp_pcb_listen for which a segment arrived
642 *
643 * @note the segment which arrived is saved in global variables, therefore only the pcb
644 *       involved is passed as a parameter to this function
645 */
646static void
647tcp_listen_input(struct tcp_pcb_listen *pcb)
648{
649  struct tcp_pcb *npcb;
650  u32_t iss;
651  err_t rc;
652
653  if (flags & TCP_RST) {
654    /* An incoming RST should be ignored. Return. */
655    return;
656  }
657
658  LWIP_ASSERT("tcp_listen_input: invalid pcb", pcb != NULL);
659
660#ifdef LOSCFG_NET_CONTAINER
661  struct net_group *group = get_net_group_from_tcp_pcb((struct tcp_pcb *)pcb);
662#endif
663  /* In the LISTEN state, we check for incoming SYN segments,
664     creates a new PCB, and responds with a SYN|ACK. */
665  if (flags & TCP_ACK) {
666    /* For incoming segments with the ACK flag set, respond with a
667       RST. */
668    LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
669    tcp_rst((const struct tcp_pcb *)pcb, ackno, seqno + tcplen, ip_current_dest_addr(),
670            ip_current_src_addr(), tcphdr->dest, tcphdr->src);
671  } else if (flags & TCP_SYN) {
672    LWIP_DEBUGF(TCP_DEBUG, ("TCP connection request %"U16_F" -> %"U16_F".\n", tcphdr->src, tcphdr->dest));
673#if TCP_LISTEN_BACKLOG
674    if (pcb->accepts_pending >= pcb->backlog) {
675      LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: listen backlog exceeded for port %"U16_F"\n", tcphdr->dest));
676      return;
677    }
678#endif /* TCP_LISTEN_BACKLOG */
679    npcb = tcp_alloc(pcb->prio);
680    /* If a new PCB could not be created (probably due to lack of memory),
681       we don't do anything, but rely on the sender will retransmit the
682       SYN at a time when we have more memory available. */
683    if (npcb == NULL) {
684      err_t err;
685      LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: could not allocate PCB\n"));
686      TCP_STATS_INC(tcp.memerr);
687      TCP_EVENT_ACCEPT(pcb, NULL, pcb->callback_arg, ERR_MEM, err);
688      LWIP_UNUSED_ARG(err); /* err not useful here */
689      return;
690    }
691#ifdef LOSCFG_NET_CONTAINER
692    set_tcp_pcb_net_group(npcb, group);
693#endif
694#if TCP_LISTEN_BACKLOG
695    pcb->accepts_pending++;
696    tcp_set_flags(npcb, TF_BACKLOGPEND);
697#endif /* TCP_LISTEN_BACKLOG */
698    /* Set up the new PCB. */
699    ip_addr_copy(npcb->local_ip, *ip_current_dest_addr());
700    ip_addr_copy(npcb->remote_ip, *ip_current_src_addr());
701    npcb->local_port = pcb->local_port;
702    npcb->remote_port = tcphdr->src;
703    npcb->state = SYN_RCVD;
704    npcb->rcv_nxt = seqno + 1;
705    npcb->rcv_ann_right_edge = npcb->rcv_nxt;
706    iss = tcp_next_iss(npcb);
707    npcb->snd_wl2 = iss;
708    npcb->snd_nxt = iss;
709    npcb->lastack = iss;
710    npcb->snd_lbb = iss;
711    npcb->snd_wl1 = seqno - 1;/* initialise to seqno-1 to force window update */
712    npcb->callback_arg = pcb->callback_arg;
713#if LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG
714    npcb->listener = pcb;
715#endif /* LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG */
716    /* inherit socket options */
717    npcb->so_options = pcb->so_options & SOF_INHERITED;
718    npcb->netif_idx = pcb->netif_idx;
719    /* Register the new PCB so that we can begin receiving segments
720       for it. */
721    TCP_REG_ACTIVE(npcb);
722
723    /* Parse any options in the SYN. */
724    tcp_parseopt(npcb);
725    npcb->snd_wnd = tcphdr->wnd;
726    npcb->snd_wnd_max = npcb->snd_wnd;
727
728#if TCP_CALCULATE_EFF_SEND_MSS
729#ifdef LOSCFG_NET_CONTAINER
730    npcb->mss = tcp_eff_send_mss(npcb->mss, &npcb->local_ip, &npcb->remote_ip, group);
731#else
732    npcb->mss = tcp_eff_send_mss(npcb->mss, &npcb->local_ip, &npcb->remote_ip);
733#endif
734#endif /* TCP_CALCULATE_EFF_SEND_MSS */
735
736    MIB2_STATS_INC(mib2.tcppassiveopens);
737
738#if LWIP_TCP_PCB_NUM_EXT_ARGS
739    if (tcp_ext_arg_invoke_callbacks_passive_open(pcb, npcb) != ERR_OK) {
740      tcp_abandon(npcb, 0);
741      return;
742    }
743#endif
744
745    /* Send a SYN|ACK together with the MSS option. */
746    rc = tcp_enqueue_flags(npcb, TCP_SYN | TCP_ACK);
747    if (rc != ERR_OK) {
748      tcp_abandon(npcb, 0);
749      return;
750    }
751    tcp_output(npcb);
752  }
753  return;
754}
755
756/**
757 * Called by tcp_input() when a segment arrives for a connection in
758 * TIME_WAIT.
759 *
760 * @param pcb the tcp_pcb for which a segment arrived
761 *
762 * @note the segment which arrived is saved in global variables, therefore only the pcb
763 *       involved is passed as a parameter to this function
764 */
765static void
766tcp_timewait_input(struct tcp_pcb *pcb)
767{
768  /* RFC 1337: in TIME_WAIT, ignore RST and ACK FINs + any 'acceptable' segments */
769  /* RFC 793 3.9 Event Processing - Segment Arrives:
770   * - first check sequence number - we skip that one in TIME_WAIT (always
771   *   acceptable since we only send ACKs)
772   * - second check the RST bit (... return) */
773  if (flags & TCP_RST) {
774    return;
775  }
776
777  LWIP_ASSERT("tcp_timewait_input: invalid pcb", pcb != NULL);
778
779  /* - fourth, check the SYN bit, */
780  if (flags & TCP_SYN) {
781    /* If an incoming segment is not acceptable, an acknowledgment
782       should be sent in reply */
783    if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd)) {
784      /* If the SYN is in the window it is an error, send a reset */
785      tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(),
786              ip_current_src_addr(), tcphdr->dest, tcphdr->src);
787      return;
788    }
789  } else if (flags & TCP_FIN) {
790    /* - eighth, check the FIN bit: Remain in the TIME-WAIT state.
791         Restart the 2 MSL time-wait timeout.*/
792    pcb->tmr = tcp_ticks;
793  }
794
795  if ((tcplen > 0)) {
796    /* Acknowledge data, FIN or out-of-window SYN */
797    tcp_ack_now(pcb);
798    tcp_output(pcb);
799  }
800  return;
801}
802
803/**
804 * Implements the TCP state machine. Called by tcp_input. In some
805 * states tcp_receive() is called to receive data. The tcp_seg
806 * argument will be freed by the caller (tcp_input()) unless the
807 * recv_data pointer in the pcb is set.
808 *
809 * @param pcb the tcp_pcb for which a segment arrived
810 *
811 * @note the segment which arrived is saved in global variables, therefore only the pcb
812 *       involved is passed as a parameter to this function
813 */
814static err_t
815tcp_process(struct tcp_pcb *pcb)
816{
817  struct tcp_seg *rseg;
818  u8_t acceptable = 0;
819  err_t err;
820
821  err = ERR_OK;
822#ifdef LOSCFG_NET_CONTAINER
823  struct net_group *group = get_net_group_from_tcp_pcb(pcb);
824#endif
825
826  LWIP_ASSERT("tcp_process: invalid pcb", pcb != NULL);
827
828  /* Process incoming RST segments. */
829  if (flags & TCP_RST) {
830    /* First, determine if the reset is acceptable. */
831    if (pcb->state == SYN_SENT) {
832      /* "In the SYN-SENT state (a RST received in response to an initial SYN),
833          the RST is acceptable if the ACK field acknowledges the SYN." */
834      if (ackno == pcb->snd_nxt) {
835        acceptable = 1;
836      }
837    } else {
838      /* "In all states except SYN-SENT, all reset (RST) segments are validated
839          by checking their SEQ-fields." */
840      if (seqno == pcb->rcv_nxt) {
841        acceptable = 1;
842      } else  if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt,
843                                  pcb->rcv_nxt + pcb->rcv_wnd)) {
844        /* If the sequence number is inside the window, we send a challenge ACK
845           and wait for a re-send with matching sequence number.
846           This follows RFC 5961 section 3.2 and addresses CVE-2004-0230
847           (RST spoofing attack), which is present in RFC 793 RST handling. */
848        tcp_ack_now(pcb);
849      }
850    }
851
852    if (acceptable) {
853      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: Connection RESET\n"));
854      LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb->state != CLOSED);
855      recv_flags |= TF_RESET;
856      tcp_clear_flags(pcb, TF_ACK_DELAY);
857      return ERR_RST;
858    } else {
859      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
860                                    seqno, pcb->rcv_nxt));
861      LWIP_DEBUGF(TCP_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
862                              seqno, pcb->rcv_nxt));
863      return ERR_OK;
864    }
865  }
866
867  if ((flags & TCP_SYN) && (pcb->state != SYN_SENT && pcb->state != SYN_RCVD)) {
868    /* Cope with new connection attempt after remote end crashed */
869    tcp_ack_now(pcb);
870    return ERR_OK;
871  }
872
873  if ((pcb->flags & TF_RXCLOSED) == 0) {
874    /* Update the PCB (in)activity timer unless rx is closed (see tcp_shutdown) */
875    pcb->tmr = tcp_ticks;
876  }
877  pcb->keep_cnt_sent = 0;
878  pcb->persist_probe = 0;
879
880  tcp_parseopt(pcb);
881
882  /* Do different things depending on the TCP state. */
883  switch (pcb->state) {
884    case SYN_SENT:
885      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("SYN-SENT: ackno %"U32_F" pcb->snd_nxt %"U32_F" unacked %s %"U32_F"\n",
886                                    ackno, pcb->snd_nxt, pcb->unacked ? "" : " empty:",
887                                    pcb->unacked ? lwip_ntohl(pcb->unacked->tcphdr->seqno) : 0));
888      /* received SYN ACK with expected sequence number? */
889      if ((flags & TCP_ACK) && (flags & TCP_SYN)
890          && (ackno == pcb->lastack + 1)) {
891        pcb->rcv_nxt = seqno + 1;
892        pcb->rcv_ann_right_edge = pcb->rcv_nxt;
893        pcb->lastack = ackno;
894        pcb->snd_wnd = tcphdr->wnd;
895        pcb->snd_wnd_max = pcb->snd_wnd;
896        pcb->snd_wl1 = seqno - 1; /* initialise to seqno - 1 to force window update */
897        pcb->state = ESTABLISHED;
898
899#if TCP_CALCULATE_EFF_SEND_MSS
900#ifdef LOSCFG_NET_CONTAINER
901        pcb->mss = tcp_eff_send_mss(pcb->mss, &pcb->local_ip, &pcb->remote_ip, group);
902#else
903        pcb->mss = tcp_eff_send_mss(pcb->mss, &pcb->local_ip, &pcb->remote_ip);
904#endif
905#endif /* TCP_CALCULATE_EFF_SEND_MSS */
906
907        pcb->cwnd = LWIP_TCP_CALC_INITIAL_CWND(pcb->mss);
908        LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_process (SENT): cwnd %"TCPWNDSIZE_F
909                                     " ssthresh %"TCPWNDSIZE_F"\n",
910                                     pcb->cwnd, pcb->ssthresh));
911        LWIP_ASSERT("pcb->snd_queuelen > 0", (pcb->snd_queuelen > 0));
912        --pcb->snd_queuelen;
913        LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_process: SYN-SENT --queuelen %"TCPWNDSIZE_F"\n", (tcpwnd_size_t)pcb->snd_queuelen));
914        rseg = pcb->unacked;
915        if (rseg == NULL) {
916          /* might happen if tcp_output fails in tcp_rexmit_rto()
917             in which case the segment is on the unsent list */
918          rseg = pcb->unsent;
919          LWIP_ASSERT("no segment to free", rseg != NULL);
920          pcb->unsent = rseg->next;
921        } else {
922          pcb->unacked = rseg->next;
923        }
924        tcp_seg_free(rseg);
925
926        /* If there's nothing left to acknowledge, stop the retransmit
927           timer, otherwise reset it to start again */
928        if (pcb->unacked == NULL) {
929          pcb->rtime = -1;
930        } else {
931          pcb->rtime = 0;
932          pcb->nrtx = 0;
933        }
934
935        /* Call the user specified function to call when successfully
936         * connected. */
937        TCP_EVENT_CONNECTED(pcb, ERR_OK, err);
938        if (err == ERR_ABRT) {
939          return ERR_ABRT;
940        }
941        tcp_ack_now(pcb);
942      }
943      /* received ACK? possibly a half-open connection */
944      else if (flags & TCP_ACK) {
945        /* send a RST to bring the other side in a non-synchronized state. */
946        tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(),
947                ip_current_src_addr(), tcphdr->dest, tcphdr->src);
948        /* Resend SYN immediately (don't wait for rto timeout) to establish
949          connection faster, but do not send more SYNs than we otherwise would
950          have, or we might get caught in a loop on loopback interfaces. */
951        if (pcb->nrtx < TCP_SYNMAXRTX) {
952          pcb->rtime = 0;
953          tcp_rexmit_rto(pcb);
954        }
955      }
956      break;
957    case SYN_RCVD:
958      if (flags & TCP_ACK) {
959        /* expected ACK number? */
960        if (TCP_SEQ_BETWEEN(ackno, pcb->lastack + 1, pcb->snd_nxt)) {
961          pcb->state = ESTABLISHED;
962          LWIP_DEBUGF(TCP_DEBUG, ("TCP connection established %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
963#if LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG
964          if (pcb->listener == NULL) {
965            /* listen pcb might be closed by now */
966            err = ERR_VAL;
967          } else
968#endif /* LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG */
969          {
970#if LWIP_CALLBACK_API
971            LWIP_ASSERT("pcb->listener->accept != NULL", pcb->listener->accept != NULL);
972#endif
973            tcp_backlog_accepted(pcb);
974            /* Call the accept function. */
975            TCP_EVENT_ACCEPT(pcb->listener, pcb, pcb->callback_arg, ERR_OK, err);
976          }
977          if (err != ERR_OK) {
978            /* If the accept function returns with an error, we abort
979             * the connection. */
980            /* Already aborted? */
981            if (err != ERR_ABRT) {
982              tcp_abort(pcb);
983            }
984            return ERR_ABRT;
985          }
986          /* If there was any data contained within this ACK,
987           * we'd better pass it on to the application as well. */
988          tcp_receive(pcb);
989
990          /* Prevent ACK for SYN to generate a sent event */
991          if (recv_acked != 0) {
992            recv_acked--;
993          }
994
995          pcb->cwnd = LWIP_TCP_CALC_INITIAL_CWND(pcb->mss);
996          LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_process (SYN_RCVD): cwnd %"TCPWNDSIZE_F
997                                       " ssthresh %"TCPWNDSIZE_F"\n",
998                                       pcb->cwnd, pcb->ssthresh));
999
1000          if (recv_flags & TF_GOT_FIN) {
1001            tcp_ack_now(pcb);
1002            pcb->state = CLOSE_WAIT;
1003          }
1004        } else {
1005          /* incorrect ACK number, send RST */
1006          tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(),
1007                  ip_current_src_addr(), tcphdr->dest, tcphdr->src);
1008        }
1009      } else if ((flags & TCP_SYN) && (seqno == pcb->rcv_nxt - 1)) {
1010        /* Looks like another copy of the SYN - retransmit our SYN-ACK */
1011        tcp_rexmit(pcb);
1012      }
1013      break;
1014    case CLOSE_WAIT:
1015    /* FALLTHROUGH */
1016    case ESTABLISHED:
1017      tcp_receive(pcb);
1018      if (recv_flags & TF_GOT_FIN) { /* passive close */
1019        tcp_ack_now(pcb);
1020        pcb->state = CLOSE_WAIT;
1021      }
1022      break;
1023    case FIN_WAIT_1:
1024      tcp_receive(pcb);
1025      if (recv_flags & TF_GOT_FIN) {
1026        if ((flags & TCP_ACK) && (ackno == pcb->snd_nxt) &&
1027            pcb->unsent == NULL) {
1028          LWIP_DEBUGF(TCP_DEBUG,
1029                      ("TCP connection closed: FIN_WAIT_1 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1030          tcp_ack_now(pcb);
1031          tcp_pcb_purge(pcb);
1032          TCP_RMV_ACTIVE(pcb);
1033          pcb->state = TIME_WAIT;
1034          TCP_REG(&tcp_tw_pcbs, pcb);
1035        } else {
1036          tcp_ack_now(pcb);
1037          pcb->state = CLOSING;
1038        }
1039      } else if ((flags & TCP_ACK) && (ackno == pcb->snd_nxt) &&
1040                 pcb->unsent == NULL) {
1041        pcb->state = FIN_WAIT_2;
1042      }
1043      break;
1044    case FIN_WAIT_2:
1045      tcp_receive(pcb);
1046      if (recv_flags & TF_GOT_FIN) {
1047        LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: FIN_WAIT_2 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1048        tcp_ack_now(pcb);
1049        tcp_pcb_purge(pcb);
1050        TCP_RMV_ACTIVE(pcb);
1051        pcb->state = TIME_WAIT;
1052        TCP_REG(&tcp_tw_pcbs, pcb);
1053      }
1054      break;
1055    case CLOSING:
1056      tcp_receive(pcb);
1057      if ((flags & TCP_ACK) && ackno == pcb->snd_nxt && pcb->unsent == NULL) {
1058        LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: CLOSING %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1059        tcp_pcb_purge(pcb);
1060        TCP_RMV_ACTIVE(pcb);
1061        pcb->state = TIME_WAIT;
1062        TCP_REG(&tcp_tw_pcbs, pcb);
1063      }
1064      break;
1065    case LAST_ACK:
1066      tcp_receive(pcb);
1067      if ((flags & TCP_ACK) && ackno == pcb->snd_nxt && pcb->unsent == NULL) {
1068        LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: LAST_ACK %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1069        /* bugfix #21699: don't set pcb->state to CLOSED here or we risk leaking segments */
1070        recv_flags |= TF_CLOSED;
1071      }
1072      break;
1073    default:
1074      break;
1075  }
1076  return ERR_OK;
1077}
1078
1079#if TCP_QUEUE_OOSEQ
1080/**
1081 * Insert segment into the list (segments covered with new one will be deleted)
1082 *
1083 * Called from tcp_receive()
1084 */
1085static void
1086tcp_oos_insert_segment(struct tcp_seg *cseg, struct tcp_seg *next)
1087{
1088  struct tcp_seg *old_seg;
1089
1090  LWIP_ASSERT("tcp_oos_insert_segment: invalid cseg", cseg != NULL);
1091
1092  if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
1093    /* received segment overlaps all following segments */
1094    tcp_segs_free(next);
1095    next = NULL;
1096  } else {
1097    /* delete some following segments
1098       oos queue may have segments with FIN flag */
1099    while (next &&
1100           TCP_SEQ_GEQ((seqno + cseg->len),
1101                       (next->tcphdr->seqno + next->len))) {
1102      /* cseg with FIN already processed */
1103      if (TCPH_FLAGS(next->tcphdr) & TCP_FIN) {
1104        TCPH_SET_FLAG(cseg->tcphdr, TCP_FIN);
1105      }
1106      old_seg = next;
1107      next = next->next;
1108      tcp_seg_free(old_seg);
1109    }
1110    if (next &&
1111        TCP_SEQ_GT(seqno + cseg->len, next->tcphdr->seqno)) {
1112      /* We need to trim the incoming segment. */
1113      cseg->len = (u16_t)(next->tcphdr->seqno - seqno);
1114      pbuf_realloc(cseg->p, cseg->len);
1115    }
1116  }
1117  cseg->next = next;
1118}
1119#endif /* TCP_QUEUE_OOSEQ */
1120
1121/** Remove segments from a list if the incoming ACK acknowledges them */
1122static struct tcp_seg *
1123tcp_free_acked_segments(struct tcp_pcb *pcb, struct tcp_seg *seg_list, const char *dbg_list_name,
1124                        struct tcp_seg *dbg_other_seg_list)
1125{
1126  struct tcp_seg *next;
1127  u16_t clen;
1128
1129  LWIP_UNUSED_ARG(dbg_list_name);
1130  LWIP_UNUSED_ARG(dbg_other_seg_list);
1131
1132  while (seg_list != NULL &&
1133         TCP_SEQ_LEQ(lwip_ntohl(seg_list->tcphdr->seqno) +
1134                     TCP_TCPLEN(seg_list), ackno)) {
1135    LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %"U32_F":%"U32_F" from pcb->%s\n",
1136                                  lwip_ntohl(seg_list->tcphdr->seqno),
1137                                  lwip_ntohl(seg_list->tcphdr->seqno) + TCP_TCPLEN(seg_list),
1138                                  dbg_list_name));
1139
1140    next = seg_list;
1141    seg_list = seg_list->next;
1142
1143    clen = pbuf_clen(next->p);
1144    LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %"TCPWNDSIZE_F" ... ",
1145                                 (tcpwnd_size_t)pcb->snd_queuelen));
1146    LWIP_ASSERT("pcb->snd_queuelen >= pbuf_clen(next->p)", (pcb->snd_queuelen >= clen));
1147
1148    pcb->snd_queuelen = (u16_t)(pcb->snd_queuelen - clen);
1149    recv_acked = (tcpwnd_size_t)(recv_acked + next->len);
1150    tcp_seg_free(next);
1151
1152    LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%"TCPWNDSIZE_F" (after freeing %s)\n",
1153                                 (tcpwnd_size_t)pcb->snd_queuelen,
1154                                 dbg_list_name));
1155    if (pcb->snd_queuelen != 0) {
1156      LWIP_ASSERT("tcp_receive: valid queue length",
1157                  seg_list != NULL || dbg_other_seg_list != NULL);
1158    }
1159  }
1160  return seg_list;
1161}
1162
1163/**
1164 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
1165 * data, and if so frees the memory of the buffered data. Next, it places the
1166 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
1167 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
1168 * it has been removed from the buffer.
1169 *
1170 * If the incoming segment constitutes an ACK for a segment that was used for RTT
1171 * estimation, the RTT is estimated here as well.
1172 *
1173 * Called from tcp_process().
1174 */
1175static void
1176tcp_receive(struct tcp_pcb *pcb)
1177{
1178  s16_t m;
1179  u32_t right_wnd_edge;
1180  int found_dupack = 0;
1181
1182  LWIP_ASSERT("tcp_receive: invalid pcb", pcb != NULL);
1183  LWIP_ASSERT("tcp_receive: wrong state", pcb->state >= ESTABLISHED);
1184
1185  if (flags & TCP_ACK) {
1186    right_wnd_edge = pcb->snd_wnd + pcb->snd_wl2;
1187
1188    /* Update window. */
1189    if (TCP_SEQ_LT(pcb->snd_wl1, seqno) ||
1190        (pcb->snd_wl1 == seqno && TCP_SEQ_LT(pcb->snd_wl2, ackno)) ||
1191        (pcb->snd_wl2 == ackno && (u32_t)SND_WND_SCALE(pcb, tcphdr->wnd) > pcb->snd_wnd)) {
1192      pcb->snd_wnd = SND_WND_SCALE(pcb, tcphdr->wnd);
1193      /* keep track of the biggest window announced by the remote host to calculate
1194         the maximum segment size */
1195      if (pcb->snd_wnd_max < pcb->snd_wnd) {
1196        pcb->snd_wnd_max = pcb->snd_wnd;
1197      }
1198      pcb->snd_wl1 = seqno;
1199      pcb->snd_wl2 = ackno;
1200      LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: window update %"TCPWNDSIZE_F"\n", pcb->snd_wnd));
1201#if TCP_WND_DEBUG
1202    } else {
1203      if (pcb->snd_wnd != (tcpwnd_size_t)SND_WND_SCALE(pcb, tcphdr->wnd)) {
1204        LWIP_DEBUGF(TCP_WND_DEBUG,
1205                    ("tcp_receive: no window update lastack %"U32_F" ackno %"
1206                     U32_F" wl1 %"U32_F" seqno %"U32_F" wl2 %"U32_F"\n",
1207                     pcb->lastack, ackno, pcb->snd_wl1, seqno, pcb->snd_wl2));
1208      }
1209#endif /* TCP_WND_DEBUG */
1210    }
1211
1212    /* (From Stevens TCP/IP Illustrated Vol II, p970.) Its only a
1213     * duplicate ack if:
1214     * 1) It doesn't ACK new data
1215     * 2) length of received packet is zero (i.e. no payload)
1216     * 3) the advertised window hasn't changed
1217     * 4) There is outstanding unacknowledged data (retransmission timer running)
1218     * 5) The ACK is == biggest ACK sequence number so far seen (snd_una)
1219     *
1220     * If it passes all five, should process as a dupack:
1221     * a) dupacks < 3: do nothing
1222     * b) dupacks == 3: fast retransmit
1223     * c) dupacks > 3: increase cwnd
1224     *
1225     * If it only passes 1-3, should reset dupack counter (and add to
1226     * stats, which we don't do in lwIP)
1227     *
1228     * If it only passes 1, should reset dupack counter
1229     *
1230     */
1231
1232    /* Clause 1 */
1233    if (TCP_SEQ_LEQ(ackno, pcb->lastack)) {
1234      /* Clause 2 */
1235      if (tcplen == 0) {
1236        /* Clause 3 */
1237        if (pcb->snd_wl2 + pcb->snd_wnd == right_wnd_edge) {
1238          /* Clause 4 */
1239          if (pcb->rtime >= 0) {
1240            /* Clause 5 */
1241            if (pcb->lastack == ackno) {
1242              found_dupack = 1;
1243              if ((u8_t)(pcb->dupacks + 1) > pcb->dupacks) {
1244                ++pcb->dupacks;
1245              }
1246              if (pcb->dupacks > 3) {
1247                /* Inflate the congestion window */
1248                TCP_WND_INC(pcb->cwnd, pcb->mss);
1249              }
1250              if (pcb->dupacks >= 3) {
1251                /* Do fast retransmit (checked via TF_INFR, not via dupacks count) */
1252                tcp_rexmit_fast(pcb);
1253              }
1254            }
1255          }
1256        }
1257      }
1258      /* If Clause (1) or more is true, but not a duplicate ack, reset
1259       * count of consecutive duplicate acks */
1260      if (!found_dupack) {
1261        pcb->dupacks = 0;
1262      }
1263    } else if (TCP_SEQ_BETWEEN(ackno, pcb->lastack + 1, pcb->snd_nxt)) {
1264      /* We come here when the ACK acknowledges new data. */
1265      tcpwnd_size_t acked;
1266
1267      /* Reset the "IN Fast Retransmit" flag, since we are no longer
1268         in fast retransmit. Also reset the congestion window to the
1269         slow start threshold. */
1270      if (pcb->flags & TF_INFR) {
1271        tcp_clear_flags(pcb, TF_INFR);
1272        pcb->cwnd = pcb->ssthresh;
1273        pcb->bytes_acked = 0;
1274      }
1275
1276      /* Reset the number of retransmissions. */
1277      pcb->nrtx = 0;
1278
1279      /* Reset the retransmission time-out. */
1280      pcb->rto = (s16_t)((pcb->sa >> 3) + pcb->sv);
1281
1282      /* Record how much data this ACK acks */
1283      acked = (tcpwnd_size_t)(ackno - pcb->lastack);
1284
1285      /* Reset the fast retransmit variables. */
1286      pcb->dupacks = 0;
1287      pcb->lastack = ackno;
1288
1289      /* Update the congestion control variables (cwnd and
1290         ssthresh). */
1291      if (pcb->state >= ESTABLISHED) {
1292        if (pcb->cwnd < pcb->ssthresh) {
1293          tcpwnd_size_t increase;
1294          /* limit to 1 SMSS segment during period following RTO */
1295          u8_t num_seg = (pcb->flags & TF_RTO) ? 1 : 2;
1296          /* RFC 3465, section 2.2 Slow Start */
1297          increase = LWIP_MIN(acked, (tcpwnd_size_t)(num_seg * pcb->mss));
1298          TCP_WND_INC(pcb->cwnd, increase);
1299          LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
1300        } else {
1301          /* RFC 3465, section 2.1 Congestion Avoidance */
1302          TCP_WND_INC(pcb->bytes_acked, acked);
1303          if (pcb->bytes_acked >= pcb->cwnd) {
1304            pcb->bytes_acked = (tcpwnd_size_t)(pcb->bytes_acked - pcb->cwnd);
1305            TCP_WND_INC(pcb->cwnd, pcb->mss);
1306          }
1307          LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: congestion avoidance cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
1308        }
1309      }
1310      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: ACK for %"U32_F", unacked->seqno %"U32_F":%"U32_F"\n",
1311                                    ackno,
1312                                    pcb->unacked != NULL ?
1313                                    lwip_ntohl(pcb->unacked->tcphdr->seqno) : 0,
1314                                    pcb->unacked != NULL ?
1315                                    lwip_ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked) : 0));
1316
1317      /* Remove segment from the unacknowledged list if the incoming
1318         ACK acknowledges them. */
1319      pcb->unacked = tcp_free_acked_segments(pcb, pcb->unacked, "unacked", pcb->unsent);
1320      /* We go through the ->unsent list to see if any of the segments
1321         on the list are acknowledged by the ACK. This may seem
1322         strange since an "unsent" segment shouldn't be acked. The
1323         rationale is that lwIP puts all outstanding segments on the
1324         ->unsent list after a retransmission, so these segments may
1325         in fact have been sent once. */
1326      pcb->unsent = tcp_free_acked_segments(pcb, pcb->unsent, "unsent", pcb->unacked);
1327
1328      /* If there's nothing left to acknowledge, stop the retransmit
1329         timer, otherwise reset it to start again */
1330      if (pcb->unacked == NULL) {
1331        pcb->rtime = -1;
1332      } else {
1333        pcb->rtime = 0;
1334      }
1335
1336      pcb->polltmr = 0;
1337
1338#if TCP_OVERSIZE
1339      if (pcb->unsent == NULL) {
1340        pcb->unsent_oversize = 0;
1341      }
1342#endif /* TCP_OVERSIZE */
1343
1344#if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS
1345      if (ip_current_is_v6()) {
1346        /* Inform neighbor reachability of forward progress. */
1347        nd6_reachability_hint(ip6_current_src_addr());
1348      }
1349#endif /* LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS*/
1350
1351      pcb->snd_buf = (tcpwnd_size_t)(pcb->snd_buf + recv_acked);
1352      /* check if this ACK ends our retransmission of in-flight data */
1353      if (pcb->flags & TF_RTO) {
1354        /* RTO is done if
1355            1) both queues are empty or
1356            2) unacked is empty and unsent head contains data not part of RTO or
1357            3) unacked head contains data not part of RTO */
1358        if (pcb->unacked == NULL) {
1359          if ((pcb->unsent == NULL) ||
1360              (TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unsent->tcphdr->seqno)))) {
1361            tcp_clear_flags(pcb, TF_RTO);
1362          }
1363        } else if (TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unacked->tcphdr->seqno))) {
1364          tcp_clear_flags(pcb, TF_RTO);
1365        }
1366      }
1367      /* End of ACK for new data processing. */
1368    } else {
1369      /* Out of sequence ACK, didn't really ack anything */
1370      tcp_send_empty_ack(pcb);
1371    }
1372
1373    LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: pcb->rttest %"U32_F" rtseq %"U32_F" ackno %"U32_F"\n",
1374                                pcb->rttest, pcb->rtseq, ackno));
1375
1376    /* RTT estimation calculations. This is done by checking if the
1377       incoming segment acknowledges the segment we use to take a
1378       round-trip time measurement. */
1379    if (pcb->rttest && TCP_SEQ_LT(pcb->rtseq, ackno)) {
1380      /* diff between this shouldn't exceed 32K since this are tcp timer ticks
1381         and a round-trip shouldn't be that long... */
1382      m = (s16_t)(tcp_ticks - pcb->rttest);
1383
1384      LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: experienced rtt %"U16_F" ticks (%"U16_F" msec).\n",
1385                                  m, (u16_t)(m * TCP_SLOW_INTERVAL)));
1386
1387      /* This is taken directly from VJs original code in his paper */
1388      m = (s16_t)(m - (pcb->sa >> 3));
1389      pcb->sa = (s16_t)(pcb->sa + m);
1390      if (m < 0) {
1391        m = (s16_t) - m;
1392      }
1393      m = (s16_t)(m - (pcb->sv >> 2));
1394      pcb->sv = (s16_t)(pcb->sv + m);
1395      pcb->rto = (s16_t)((pcb->sa >> 3) + pcb->sv);
1396
1397      LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: RTO %"U16_F" (%"U16_F" milliseconds)\n",
1398                                  pcb->rto, (u16_t)(pcb->rto * TCP_SLOW_INTERVAL)));
1399
1400      pcb->rttest = 0;
1401    }
1402  }
1403
1404  /* If the incoming segment contains data, we must process it
1405     further unless the pcb already received a FIN.
1406     (RFC 793, chapter 3.9, "SEGMENT ARRIVES" in states CLOSE-WAIT, CLOSING,
1407     LAST-ACK and TIME-WAIT: "Ignore the segment text.") */
1408  if ((tcplen > 0) && (pcb->state < CLOSE_WAIT)) {
1409    /* This code basically does three things:
1410
1411    +) If the incoming segment contains data that is the next
1412    in-sequence data, this data is passed to the application. This
1413    might involve trimming the first edge of the data. The rcv_nxt
1414    variable and the advertised window are adjusted.
1415
1416    +) If the incoming segment has data that is above the next
1417    sequence number expected (->rcv_nxt), the segment is placed on
1418    the ->ooseq queue. This is done by finding the appropriate
1419    place in the ->ooseq queue (which is ordered by sequence
1420    number) and trim the segment in both ends if needed. An
1421    immediate ACK is sent to indicate that we received an
1422    out-of-sequence segment.
1423
1424    +) Finally, we check if the first segment on the ->ooseq queue
1425    now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
1426    rcv_nxt > ooseq->seqno, we must trim the first edge of the
1427    segment on ->ooseq before we adjust rcv_nxt. The data in the
1428    segments that are now on sequence are chained onto the
1429    incoming segment so that we only need to call the application
1430    once.
1431    */
1432
1433    /* First, we check if we must trim the first edge. We have to do
1434       this if the sequence number of the incoming segment is less
1435       than rcv_nxt, and the sequence number plus the length of the
1436       segment is larger than rcv_nxt. */
1437    /*    if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
1438          if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {*/
1439    if (TCP_SEQ_BETWEEN(pcb->rcv_nxt, seqno + 1, seqno + tcplen - 1)) {
1440      /* Trimming the first edge is done by pushing the payload
1441         pointer in the pbuf downwards. This is somewhat tricky since
1442         we do not want to discard the full contents of the pbuf up to
1443         the new starting point of the data since we have to keep the
1444         TCP header which is present in the first pbuf in the chain.
1445
1446         What is done is really quite a nasty hack: the first pbuf in
1447         the pbuf chain is pointed to by inseg.p. Since we need to be
1448         able to deallocate the whole pbuf, we cannot change this
1449         inseg.p pointer to point to any of the later pbufs in the
1450         chain. Instead, we point the ->payload pointer in the first
1451         pbuf to data in one of the later pbufs. We also set the
1452         inseg.data pointer to point to the right place. This way, the
1453         ->p pointer will still point to the first pbuf, but the
1454         ->p->payload pointer will point to data in another pbuf.
1455
1456         After we are done with adjusting the pbuf pointers we must
1457         adjust the ->data pointer in the seg and the segment
1458         length.*/
1459
1460      struct pbuf *p = inseg.p;
1461      u32_t off32 = pcb->rcv_nxt - seqno;
1462      u16_t new_tot_len, off;
1463      LWIP_ASSERT("inseg.p != NULL", inseg.p);
1464      LWIP_ASSERT("insane offset!", (off32 < 0xffff));
1465      off = (u16_t)off32;
1466      LWIP_ASSERT("pbuf too short!", (((s32_t)inseg.p->tot_len) >= off));
1467      inseg.len -= off;
1468      new_tot_len = (u16_t)(inseg.p->tot_len - off);
1469      while (p->len < off) {
1470        off -= p->len;
1471        /* all pbufs up to and including this one have len==0, so tot_len is equal */
1472        p->tot_len = new_tot_len;
1473        p->len = 0;
1474        p = p->next;
1475      }
1476      /* cannot fail... */
1477      pbuf_remove_header(p, off);
1478      inseg.tcphdr->seqno = seqno = pcb->rcv_nxt;
1479    } else {
1480      if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
1481        /* the whole segment is < rcv_nxt */
1482        /* must be a duplicate of a packet that has already been correctly handled */
1483
1484        LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: duplicate seqno %"U32_F"\n", seqno));
1485        tcp_ack_now(pcb);
1486      }
1487    }
1488
1489    /* The sequence number must be within the window (above rcv_nxt
1490       and below rcv_nxt + rcv_wnd) in order to be further
1491       processed. */
1492    if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt,
1493                        pcb->rcv_nxt + pcb->rcv_wnd - 1)) {
1494      if (pcb->rcv_nxt == seqno) {
1495        /* The incoming segment is the next in sequence. We check if
1496           we have to trim the end of the segment and update rcv_nxt
1497           and pass the data to the application. */
1498        tcplen = TCP_TCPLEN(&inseg);
1499
1500        if (tcplen > pcb->rcv_wnd) {
1501          LWIP_DEBUGF(TCP_INPUT_DEBUG,
1502                      ("tcp_receive: other end overran receive window"
1503                       "seqno %"U32_F" len %"U16_F" right edge %"U32_F"\n",
1504                       seqno, tcplen, pcb->rcv_nxt + pcb->rcv_wnd));
1505          if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
1506            /* Must remove the FIN from the header as we're trimming
1507             * that byte of sequence-space from the packet */
1508            TCPH_FLAGS_SET(inseg.tcphdr, TCPH_FLAGS(inseg.tcphdr) & ~(unsigned int)TCP_FIN);
1509          }
1510          /* Adjust length of segment to fit in the window. */
1511          TCPWND_CHECK16(pcb->rcv_wnd);
1512          inseg.len = (u16_t)pcb->rcv_wnd;
1513          if (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) {
1514            inseg.len -= 1;
1515          }
1516          pbuf_realloc(inseg.p, inseg.len);
1517          tcplen = TCP_TCPLEN(&inseg);
1518          LWIP_ASSERT("tcp_receive: segment not trimmed correctly to rcv_wnd\n",
1519                      (seqno + tcplen) == (pcb->rcv_nxt + pcb->rcv_wnd));
1520        }
1521#if TCP_QUEUE_OOSEQ
1522        /* Received in-sequence data, adjust ooseq data if:
1523           - FIN has been received or
1524           - inseq overlaps with ooseq */
1525        if (pcb->ooseq != NULL) {
1526          if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
1527            LWIP_DEBUGF(TCP_INPUT_DEBUG,
1528                        ("tcp_receive: received in-order FIN, binning ooseq queue\n"));
1529            /* Received in-order FIN means anything that was received
1530             * out of order must now have been received in-order, so
1531             * bin the ooseq queue */
1532            while (pcb->ooseq != NULL) {
1533              struct tcp_seg *old_ooseq = pcb->ooseq;
1534              pcb->ooseq = pcb->ooseq->next;
1535              tcp_seg_free(old_ooseq);
1536            }
1537          } else {
1538            struct tcp_seg *next = pcb->ooseq;
1539            /* Remove all segments on ooseq that are covered by inseg already.
1540             * FIN is copied from ooseq to inseg if present. */
1541            while (next &&
1542                   TCP_SEQ_GEQ(seqno + tcplen,
1543                               next->tcphdr->seqno + next->len)) {
1544              struct tcp_seg *tmp;
1545              /* inseg cannot have FIN here (already processed above) */
1546              if ((TCPH_FLAGS(next->tcphdr) & TCP_FIN) != 0 &&
1547                  (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) == 0) {
1548                TCPH_SET_FLAG(inseg.tcphdr, TCP_FIN);
1549                tcplen = TCP_TCPLEN(&inseg);
1550              }
1551              tmp = next;
1552              next = next->next;
1553              tcp_seg_free(tmp);
1554            }
1555            /* Now trim right side of inseg if it overlaps with the first
1556             * segment on ooseq */
1557            if (next &&
1558                TCP_SEQ_GT(seqno + tcplen,
1559                           next->tcphdr->seqno)) {
1560              /* inseg cannot have FIN here (already processed above) */
1561              inseg.len = (u16_t)(next->tcphdr->seqno - seqno);
1562              if (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) {
1563                inseg.len -= 1;
1564              }
1565              pbuf_realloc(inseg.p, inseg.len);
1566              tcplen = TCP_TCPLEN(&inseg);
1567              LWIP_ASSERT("tcp_receive: segment not trimmed correctly to ooseq queue\n",
1568                          (seqno + tcplen) == next->tcphdr->seqno);
1569            }
1570            pcb->ooseq = next;
1571          }
1572        }
1573#endif /* TCP_QUEUE_OOSEQ */
1574
1575        pcb->rcv_nxt = seqno + tcplen;
1576
1577        /* Update the receiver's (our) window. */
1578        LWIP_ASSERT("tcp_receive: tcplen > rcv_wnd\n", pcb->rcv_wnd >= tcplen);
1579        pcb->rcv_wnd -= tcplen;
1580
1581        tcp_update_rcv_ann_wnd(pcb);
1582
1583        /* If there is data in the segment, we make preparations to
1584           pass this up to the application. The ->recv_data variable
1585           is used for holding the pbuf that goes to the
1586           application. The code for reassembling out-of-sequence data
1587           chains its data on this pbuf as well.
1588
1589           If the segment was a FIN, we set the TF_GOT_FIN flag that will
1590           be used to indicate to the application that the remote side has
1591           closed its end of the connection. */
1592        if (inseg.p->tot_len > 0) {
1593          recv_data = inseg.p;
1594          /* Since this pbuf now is the responsibility of the
1595             application, we delete our reference to it so that we won't
1596             (mistakingly) deallocate it. */
1597          inseg.p = NULL;
1598        }
1599        if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
1600          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: received FIN.\n"));
1601          recv_flags |= TF_GOT_FIN;
1602        }
1603
1604#if TCP_QUEUE_OOSEQ
1605        /* We now check if we have segments on the ->ooseq queue that
1606           are now in sequence. */
1607        while (pcb->ooseq != NULL &&
1608               pcb->ooseq->tcphdr->seqno == pcb->rcv_nxt) {
1609
1610          struct tcp_seg *cseg = pcb->ooseq;
1611          seqno = pcb->ooseq->tcphdr->seqno;
1612
1613          pcb->rcv_nxt += TCP_TCPLEN(cseg);
1614          LWIP_ASSERT("tcp_receive: ooseq tcplen > rcv_wnd\n",
1615                      pcb->rcv_wnd >= TCP_TCPLEN(cseg));
1616          pcb->rcv_wnd -= TCP_TCPLEN(cseg);
1617
1618          tcp_update_rcv_ann_wnd(pcb);
1619
1620          if (cseg->p->tot_len > 0) {
1621            /* Chain this pbuf onto the pbuf that we will pass to
1622               the application. */
1623            /* With window scaling, this can overflow recv_data->tot_len, but
1624               that's not a problem since we explicitly fix that before passing
1625               recv_data to the application. */
1626            if (recv_data) {
1627              pbuf_cat(recv_data, cseg->p);
1628            } else {
1629              recv_data = cseg->p;
1630            }
1631            cseg->p = NULL;
1632          }
1633          if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
1634            LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: dequeued FIN.\n"));
1635            recv_flags |= TF_GOT_FIN;
1636            if (pcb->state == ESTABLISHED) { /* force passive close or we can move to active close */
1637              pcb->state = CLOSE_WAIT;
1638            }
1639          }
1640
1641          pcb->ooseq = cseg->next;
1642          tcp_seg_free(cseg);
1643        }
1644#if LWIP_TCP_SACK_OUT
1645        if (pcb->flags & TF_SACK) {
1646          if (pcb->ooseq != NULL) {
1647            /* Some segments may have been removed from ooseq, let's remove all SACKs that
1648               describe anything before the new beginning of that list. */
1649            tcp_remove_sacks_lt(pcb, pcb->ooseq->tcphdr->seqno);
1650          } else if (LWIP_TCP_SACK_VALID(pcb, 0)) {
1651            /* ooseq has been cleared. Nothing to SACK */
1652            memset(pcb->rcv_sacks, 0, sizeof(pcb->rcv_sacks));
1653          }
1654        }
1655#endif /* LWIP_TCP_SACK_OUT */
1656#endif /* TCP_QUEUE_OOSEQ */
1657
1658
1659        /* Acknowledge the segment(s). */
1660        tcp_ack(pcb);
1661
1662#if LWIP_TCP_SACK_OUT
1663        if (LWIP_TCP_SACK_VALID(pcb, 0)) {
1664          /* Normally the ACK for the data received could be piggy-backed on a data packet,
1665             but lwIP currently does not support including SACKs in data packets. So we force
1666             it to respond with an empty ACK packet (only if there is at least one SACK to be sent).
1667             NOTE: tcp_send_empty_ack() on success clears the ACK flags (set by tcp_ack()) */
1668          tcp_send_empty_ack(pcb);
1669        }
1670#endif /* LWIP_TCP_SACK_OUT */
1671
1672#if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS
1673        if (ip_current_is_v6()) {
1674          /* Inform neighbor reachability of forward progress. */
1675          nd6_reachability_hint(ip6_current_src_addr());
1676        }
1677#endif /* LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS*/
1678
1679      } else {
1680        /* We get here if the incoming segment is out-of-sequence. */
1681
1682#if TCP_QUEUE_OOSEQ
1683        /* We queue the segment on the ->ooseq queue. */
1684        if (pcb->ooseq == NULL) {
1685          pcb->ooseq = tcp_seg_copy(&inseg);
1686#if LWIP_TCP_SACK_OUT
1687          if (pcb->flags & TF_SACK) {
1688            /* All the SACKs should be invalid, so we can simply store the most recent one: */
1689            pcb->rcv_sacks[0].left = seqno;
1690            pcb->rcv_sacks[0].right = seqno + inseg.len;
1691          }
1692#endif /* LWIP_TCP_SACK_OUT */
1693        } else {
1694          /* If the queue is not empty, we walk through the queue and
1695             try to find a place where the sequence number of the
1696             incoming segment is between the sequence numbers of the
1697             previous and the next segment on the ->ooseq queue. That is
1698             the place where we put the incoming segment. If needed, we
1699             trim the second edges of the previous and the incoming
1700             segment so that it will fit into the sequence.
1701
1702             If the incoming segment has the same sequence number as a
1703             segment on the ->ooseq queue, we discard the segment that
1704             contains less data. */
1705
1706#if LWIP_TCP_SACK_OUT
1707          /* This is the left edge of the lowest possible SACK range.
1708             It may start before the newly received segment (possibly adjusted below). */
1709          u32_t sackbeg = TCP_SEQ_LT(seqno, pcb->ooseq->tcphdr->seqno) ? seqno : pcb->ooseq->tcphdr->seqno;
1710#endif /* LWIP_TCP_SACK_OUT */
1711          struct tcp_seg *next, *prev = NULL;
1712          for (next = pcb->ooseq; next != NULL; next = next->next) {
1713            if (seqno == next->tcphdr->seqno) {
1714              /* The sequence number of the incoming segment is the
1715                 same as the sequence number of the segment on
1716                 ->ooseq. We check the lengths to see which one to
1717                 discard. */
1718              if (inseg.len > next->len) {
1719                /* The incoming segment is larger than the old
1720                   segment. We replace some segments with the new
1721                   one. */
1722                struct tcp_seg *cseg = tcp_seg_copy(&inseg);
1723                if (cseg != NULL) {
1724                  if (prev != NULL) {
1725                    prev->next = cseg;
1726                  } else {
1727                    pcb->ooseq = cseg;
1728                  }
1729                  tcp_oos_insert_segment(cseg, next);
1730                }
1731                break;
1732              } else {
1733                /* Either the lengths are the same or the incoming
1734                   segment was smaller than the old one; in either
1735                   case, we ditch the incoming segment. */
1736                break;
1737              }
1738            } else {
1739              if (prev == NULL) {
1740                if (TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
1741                  /* The sequence number of the incoming segment is lower
1742                     than the sequence number of the first segment on the
1743                     queue. We put the incoming segment first on the
1744                     queue. */
1745                  struct tcp_seg *cseg = tcp_seg_copy(&inseg);
1746                  if (cseg != NULL) {
1747                    pcb->ooseq = cseg;
1748                    tcp_oos_insert_segment(cseg, next);
1749                  }
1750                  break;
1751                }
1752              } else {
1753                /*if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
1754                  TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {*/
1755                if (TCP_SEQ_BETWEEN(seqno, prev->tcphdr->seqno + 1, next->tcphdr->seqno - 1)) {
1756                  /* The sequence number of the incoming segment is in
1757                     between the sequence numbers of the previous and
1758                     the next segment on ->ooseq. We trim trim the previous
1759                     segment, delete next segments that included in received segment
1760                     and trim received, if needed. */
1761                  struct tcp_seg *cseg = tcp_seg_copy(&inseg);
1762                  if (cseg != NULL) {
1763                    if (TCP_SEQ_GT(prev->tcphdr->seqno + prev->len, seqno)) {
1764                      /* We need to trim the prev segment. */
1765                      prev->len = (u16_t)(seqno - prev->tcphdr->seqno);
1766                      pbuf_realloc(prev->p, prev->len);
1767                    }
1768                    prev->next = cseg;
1769                    tcp_oos_insert_segment(cseg, next);
1770                  }
1771                  break;
1772                }
1773              }
1774
1775#if LWIP_TCP_SACK_OUT
1776              /* The new segment goes after the 'next' one. If there is a "hole" in sequence numbers
1777                 between 'prev' and the beginning of 'next', we want to move sackbeg. */
1778              if (prev != NULL && prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) {
1779                sackbeg = next->tcphdr->seqno;
1780              }
1781#endif /* LWIP_TCP_SACK_OUT */
1782
1783              /* We don't use 'prev' below, so let's set it to current 'next'.
1784                 This way even if we break the loop below, 'prev' will be pointing
1785                 at the segment right in front of the newly added one. */
1786              prev = next;
1787
1788              /* If the "next" segment is the last segment on the
1789                 ooseq queue, we add the incoming segment to the end
1790                 of the list. */
1791              if (next->next == NULL &&
1792                  TCP_SEQ_GT(seqno, next->tcphdr->seqno)) {
1793                if (TCPH_FLAGS(next->tcphdr) & TCP_FIN) {
1794                  /* segment "next" already contains all data */
1795                  break;
1796                }
1797                next->next = tcp_seg_copy(&inseg);
1798                if (next->next != NULL) {
1799                  if (TCP_SEQ_GT(next->tcphdr->seqno + next->len, seqno)) {
1800                    /* We need to trim the last segment. */
1801                    next->len = (u16_t)(seqno - next->tcphdr->seqno);
1802                    pbuf_realloc(next->p, next->len);
1803                  }
1804                  /* check if the remote side overruns our receive window */
1805                  if (TCP_SEQ_GT((u32_t)tcplen + seqno, pcb->rcv_nxt + (u32_t)pcb->rcv_wnd)) {
1806                    LWIP_DEBUGF(TCP_INPUT_DEBUG,
1807                                ("tcp_receive: other end overran receive window"
1808                                 "seqno %"U32_F" len %"U16_F" right edge %"U32_F"\n",
1809                                 seqno, tcplen, pcb->rcv_nxt + pcb->rcv_wnd));
1810                    if (TCPH_FLAGS(next->next->tcphdr) & TCP_FIN) {
1811                      /* Must remove the FIN from the header as we're trimming
1812                       * that byte of sequence-space from the packet */
1813                      TCPH_FLAGS_SET(next->next->tcphdr, TCPH_FLAGS(next->next->tcphdr) & ~TCP_FIN);
1814                    }
1815                    /* Adjust length of segment to fit in the window. */
1816                    next->next->len = (u16_t)(pcb->rcv_nxt + pcb->rcv_wnd - seqno);
1817                    pbuf_realloc(next->next->p, next->next->len);
1818                    tcplen = TCP_TCPLEN(next->next);
1819                    LWIP_ASSERT("tcp_receive: segment not trimmed correctly to rcv_wnd\n",
1820                                (seqno + tcplen) == (pcb->rcv_nxt + pcb->rcv_wnd));
1821                  }
1822                }
1823                break;
1824              }
1825            }
1826          }
1827
1828#if LWIP_TCP_SACK_OUT
1829          if (pcb->flags & TF_SACK) {
1830            if (prev == NULL) {
1831              /* The new segment is at the beginning. sackbeg should already be set properly.
1832                 We need to find the right edge. */
1833              next = pcb->ooseq;
1834            } else if (prev->next != NULL) {
1835              /* The new segment was added after 'prev'. If there is a "hole" between 'prev' and 'prev->next',
1836                 we need to move sackbeg. After that we should find the right edge. */
1837              next = prev->next;
1838              if (prev->tcphdr->seqno + prev->len != next->tcphdr->seqno) {
1839                sackbeg = next->tcphdr->seqno;
1840              }
1841            } else {
1842              next = NULL;
1843            }
1844            if (next != NULL) {
1845              u32_t sackend = next->tcphdr->seqno;
1846              for ( ; (next != NULL) && (sackend == next->tcphdr->seqno); next = next->next) {
1847                sackend += next->len;
1848              }
1849              tcp_add_sack(pcb, sackbeg, sackend);
1850            }
1851          }
1852#endif /* LWIP_TCP_SACK_OUT */
1853        }
1854#if defined(TCP_OOSEQ_BYTES_LIMIT) || defined(TCP_OOSEQ_PBUFS_LIMIT)
1855        {
1856          /* Check that the data on ooseq doesn't exceed one of the limits
1857             and throw away everything above that limit. */
1858#ifdef TCP_OOSEQ_BYTES_LIMIT
1859          const u32_t ooseq_max_blen = TCP_OOSEQ_BYTES_LIMIT(pcb);
1860          u32_t ooseq_blen = 0;
1861#endif
1862#ifdef TCP_OOSEQ_PBUFS_LIMIT
1863          const u16_t ooseq_max_qlen = TCP_OOSEQ_PBUFS_LIMIT(pcb);
1864          u16_t ooseq_qlen = 0;
1865#endif
1866          struct tcp_seg *next, *prev = NULL;
1867          for (next = pcb->ooseq; next != NULL; prev = next, next = next->next) {
1868            struct pbuf *p = next->p;
1869            int stop_here = 0;
1870#ifdef TCP_OOSEQ_BYTES_LIMIT
1871            ooseq_blen += p->tot_len;
1872            if (ooseq_blen > ooseq_max_blen) {
1873              stop_here = 1;
1874            }
1875#endif
1876#ifdef TCP_OOSEQ_PBUFS_LIMIT
1877            ooseq_qlen += pbuf_clen(p);
1878            if (ooseq_qlen > ooseq_max_qlen) {
1879              stop_here = 1;
1880            }
1881#endif
1882            if (stop_here) {
1883#if LWIP_TCP_SACK_OUT
1884              if (pcb->flags & TF_SACK) {
1885                /* Let's remove all SACKs from next's seqno up. */
1886                tcp_remove_sacks_gt(pcb, next->tcphdr->seqno);
1887              }
1888#endif /* LWIP_TCP_SACK_OUT */
1889              /* too much ooseq data, dump this and everything after it */
1890              tcp_segs_free(next);
1891              if (prev == NULL) {
1892                /* first ooseq segment is too much, dump the whole queue */
1893                pcb->ooseq = NULL;
1894              } else {
1895                /* just dump 'next' and everything after it */
1896                prev->next = NULL;
1897              }
1898              break;
1899            }
1900          }
1901        }
1902#endif /* TCP_OOSEQ_BYTES_LIMIT || TCP_OOSEQ_PBUFS_LIMIT */
1903#endif /* TCP_QUEUE_OOSEQ */
1904
1905        /* We send the ACK packet after we've (potentially) dealt with SACKs,
1906           so they can be included in the acknowledgment. */
1907        tcp_send_empty_ack(pcb);
1908      }
1909    } else {
1910      /* The incoming segment is not within the window. */
1911      tcp_send_empty_ack(pcb);
1912    }
1913  } else {
1914    /* Segments with length 0 is taken care of here. Segments that
1915       fall out of the window are ACKed. */
1916    if (!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd - 1)) {
1917      tcp_ack_now(pcb);
1918    }
1919  }
1920}
1921
1922static u8_t
1923tcp_get_next_optbyte(void)
1924{
1925  u16_t optidx = tcp_optidx++;
1926  if ((tcphdr_opt2 == NULL) || (optidx < tcphdr_opt1len)) {
1927    u8_t *opts = (u8_t *)tcphdr + TCP_HLEN;
1928    return opts[optidx];
1929  } else {
1930    u8_t idx = (u8_t)(optidx - tcphdr_opt1len);
1931    return tcphdr_opt2[idx];
1932  }
1933}
1934
1935/**
1936 * Parses the options contained in the incoming segment.
1937 *
1938 * Called from tcp_listen_input() and tcp_process().
1939 * Currently, only the MSS option is supported!
1940 *
1941 * @param pcb the tcp_pcb for which a segment arrived
1942 */
1943static void
1944tcp_parseopt(struct tcp_pcb *pcb)
1945{
1946  u8_t data;
1947  u16_t mss;
1948#if LWIP_TCP_TIMESTAMPS
1949  u32_t tsval;
1950#endif
1951
1952  LWIP_ASSERT("tcp_parseopt: invalid pcb", pcb != NULL);
1953
1954  /* Parse the TCP MSS option, if present. */
1955  if (tcphdr_optlen != 0) {
1956    for (tcp_optidx = 0; tcp_optidx < tcphdr_optlen; ) {
1957      u8_t opt = tcp_get_next_optbyte();
1958      switch (opt) {
1959        case LWIP_TCP_OPT_EOL:
1960          /* End of options. */
1961          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: EOL\n"));
1962          return;
1963        case LWIP_TCP_OPT_NOP:
1964          /* NOP option. */
1965          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: NOP\n"));
1966          break;
1967        case LWIP_TCP_OPT_MSS:
1968          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: MSS\n"));
1969          if (tcp_get_next_optbyte() != LWIP_TCP_OPT_LEN_MSS || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_MSS) > tcphdr_optlen) {
1970            /* Bad length */
1971            LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
1972            return;
1973          }
1974          /* An MSS option with the right option length. */
1975          mss = (u16_t)(tcp_get_next_optbyte() << 8);
1976          mss |= tcp_get_next_optbyte();
1977          /* Limit the mss to the configured TCP_MSS and prevent division by zero */
1978          pcb->mss = ((mss > TCP_MSS) || (mss == 0)) ? TCP_MSS : mss;
1979          break;
1980#if LWIP_WND_SCALE
1981        case LWIP_TCP_OPT_WS:
1982          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: WND_SCALE\n"));
1983          if (tcp_get_next_optbyte() != LWIP_TCP_OPT_LEN_WS || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_WS) > tcphdr_optlen) {
1984            /* Bad length */
1985            LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
1986            return;
1987          }
1988          /* An WND_SCALE option with the right option length. */
1989          data = tcp_get_next_optbyte();
1990          /* If syn was received with wnd scale option,
1991             activate wnd scale opt, but only if this is not a retransmission */
1992          if ((flags & TCP_SYN) && !(pcb->flags & TF_WND_SCALE)) {
1993            pcb->snd_scale = data;
1994            if (pcb->snd_scale > 14U) {
1995              pcb->snd_scale = 14U;
1996            }
1997            pcb->rcv_scale = TCP_RCV_SCALE;
1998            tcp_set_flags(pcb, TF_WND_SCALE);
1999            /* window scaling is enabled, we can use the full receive window */
2000            LWIP_ASSERT("window not at default value", pcb->rcv_wnd == TCPWND_MIN16(TCP_WND));
2001            LWIP_ASSERT("window not at default value", pcb->rcv_ann_wnd == TCPWND_MIN16(TCP_WND));
2002            pcb->rcv_wnd = pcb->rcv_ann_wnd = TCP_WND;
2003          }
2004          break;
2005#endif /* LWIP_WND_SCALE */
2006#if LWIP_TCP_TIMESTAMPS
2007        case LWIP_TCP_OPT_TS:
2008          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: TS\n"));
2009          if (tcp_get_next_optbyte() != LWIP_TCP_OPT_LEN_TS || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_TS) > tcphdr_optlen) {
2010            /* Bad length */
2011            LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2012            return;
2013          }
2014          /* TCP timestamp option with valid length */
2015          tsval = tcp_get_next_optbyte();
2016          tsval |= (tcp_get_next_optbyte() << 8);
2017          tsval |= (tcp_get_next_optbyte() << 16);
2018          tsval |= (tcp_get_next_optbyte() << 24);
2019          if (flags & TCP_SYN) {
2020            pcb->ts_recent = lwip_ntohl(tsval);
2021            /* Enable sending timestamps in every segment now that we know
2022               the remote host supports it. */
2023            tcp_set_flags(pcb, TF_TIMESTAMP);
2024          } else if (TCP_SEQ_BETWEEN(pcb->ts_lastacksent, seqno, seqno + tcplen)) {
2025            pcb->ts_recent = lwip_ntohl(tsval);
2026          }
2027          /* Advance to next option (6 bytes already read) */
2028          tcp_optidx += LWIP_TCP_OPT_LEN_TS - 6;
2029          break;
2030#endif /* LWIP_TCP_TIMESTAMPS */
2031#if LWIP_TCP_SACK_OUT
2032        case LWIP_TCP_OPT_SACK_PERM:
2033          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: SACK_PERM\n"));
2034          if (tcp_get_next_optbyte() != LWIP_TCP_OPT_LEN_SACK_PERM || (tcp_optidx - 2 + LWIP_TCP_OPT_LEN_SACK_PERM) > tcphdr_optlen) {
2035            /* Bad length */
2036            LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2037            return;
2038          }
2039          /* TCP SACK_PERM option with valid length */
2040          if (flags & TCP_SYN) {
2041            /* We only set it if we receive it in a SYN (or SYN+ACK) packet */
2042            tcp_set_flags(pcb, TF_SACK);
2043          }
2044          break;
2045#endif /* LWIP_TCP_SACK_OUT */
2046        default:
2047          LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: other\n"));
2048          data = tcp_get_next_optbyte();
2049          if (data < 2) {
2050            LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2051            /* If the length field is zero, the options are malformed
2052               and we don't process them further. */
2053            return;
2054          }
2055          /* All other options have a length field, so that we easily
2056             can skip past them. */
2057          tcp_optidx += data - 2;
2058      }
2059    }
2060  }
2061}
2062
2063void
2064tcp_trigger_input_pcb_close(void)
2065{
2066  recv_flags |= TF_CLOSED;
2067}
2068
2069#if LWIP_TCP_SACK_OUT
2070/**
2071 * Called by tcp_receive() to add new SACK entry.
2072 *
2073 * The new SACK entry will be placed at the beginning of rcv_sacks[], as the newest one.
2074 * Existing SACK entries will be "pushed back", to preserve their order.
2075 * This is the behavior described in RFC 2018, section 4.
2076 *
2077 * @param pcb the tcp_pcb for which a segment arrived
2078 * @param left the left side of the SACK (the first sequence number)
2079 * @param right the right side of the SACK (the first sequence number past this SACK)
2080 */
2081static void
2082tcp_add_sack(struct tcp_pcb *pcb, u32_t left, u32_t right)
2083{
2084  u8_t i;
2085  u8_t unused_idx;
2086
2087  if ((pcb->flags & TF_SACK) == 0 || !TCP_SEQ_LT(left, right)) {
2088    return;
2089  }
2090
2091  /* First, let's remove all SACKs that are no longer needed (because they overlap with the newest one),
2092     while moving all other SACKs forward.
2093     We run this loop for all entries, until we find the first invalid one.
2094     There is no point checking after that. */
2095  for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && LWIP_TCP_SACK_VALID(pcb, i); ++i) {
2096    /* We only want to use SACK at [i] if it doesn't overlap with left:right range.
2097       It does not overlap if its right side is before the newly added SACK,
2098       or if its left side is after the newly added SACK.
2099       NOTE: The equality should not really happen, but it doesn't hurt. */
2100    if (TCP_SEQ_LEQ(pcb->rcv_sacks[i].right, left) || TCP_SEQ_LEQ(right, pcb->rcv_sacks[i].left)) {
2101      if (unused_idx != i) {
2102        /* We don't need to copy if it's already in the right spot */
2103        pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i];
2104      }
2105      ++unused_idx;
2106    }
2107  }
2108
2109  /* Now 'unused_idx' is the index of the first invalid SACK entry,
2110     anywhere between 0 (no valid entries) and LWIP_TCP_MAX_SACK_NUM (all entries are valid).
2111     We want to clear this and all following SACKs.
2112     However, we will be adding another one in the front (and shifting everything else back).
2113     So let's just iterate from the back, and set each entry to the one to the left if it's valid,
2114     or to 0 if it is not. */
2115  for (i = LWIP_TCP_MAX_SACK_NUM - 1; i > 0; --i) {
2116    /* [i] is the index we are setting, and the value should be at index [i-1],
2117       or 0 if that index is unused (>= unused_idx). */
2118    if (i - 1 >= unused_idx) {
2119      /* [i-1] is unused. Let's clear [i]. */
2120      pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0;
2121    } else {
2122      pcb->rcv_sacks[i] = pcb->rcv_sacks[i - 1];
2123    }
2124  }
2125
2126  /* And now we can store the newest SACK */
2127  pcb->rcv_sacks[0].left = left;
2128  pcb->rcv_sacks[0].right = right;
2129}
2130
2131/**
2132 * Called to remove a range of SACKs.
2133 *
2134 * SACK entries will be removed or adjusted to not acknowledge any sequence
2135 * numbers that are less than 'seq' passed. It not only invalidates entries,
2136 * but also moves all entries that are still valid to the beginning.
2137 *
2138 * @param pcb the tcp_pcb to modify
2139 * @param seq the lowest sequence number to keep in SACK entries
2140 */
2141static void
2142tcp_remove_sacks_lt(struct tcp_pcb *pcb, u32_t seq)
2143{
2144  u8_t i;
2145  u8_t unused_idx;
2146
2147  /* We run this loop for all entries, until we find the first invalid one.
2148     There is no point checking after that. */
2149  for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && LWIP_TCP_SACK_VALID(pcb, i); ++i) {
2150    /* We only want to use SACK at index [i] if its right side is > 'seq'. */
2151    if (TCP_SEQ_GT(pcb->rcv_sacks[i].right, seq)) {
2152      if (unused_idx != i) {
2153        /* We only copy it if it's not in the right spot already. */
2154        pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i];
2155      }
2156      /* NOTE: It is possible that its left side is < 'seq', in which case we should adjust it. */
2157      if (TCP_SEQ_LT(pcb->rcv_sacks[unused_idx].left, seq)) {
2158        pcb->rcv_sacks[unused_idx].left = seq;
2159      }
2160      ++unused_idx;
2161    }
2162  }
2163
2164  /* We also need to invalidate everything from 'unused_idx' till the end */
2165  for (i = unused_idx; i < LWIP_TCP_MAX_SACK_NUM; ++i) {
2166    pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0;
2167  }
2168}
2169
2170#if defined(TCP_OOSEQ_BYTES_LIMIT) || defined(TCP_OOSEQ_PBUFS_LIMIT)
2171/**
2172 * Called to remove a range of SACKs.
2173 *
2174 * SACK entries will be removed or adjusted to not acknowledge any sequence
2175 * numbers that are greater than (or equal to) 'seq' passed. It not only invalidates entries,
2176 * but also moves all entries that are still valid to the beginning.
2177 *
2178 * @param pcb the tcp_pcb to modify
2179 * @param seq the highest sequence number to keep in SACK entries
2180 */
2181static void
2182tcp_remove_sacks_gt(struct tcp_pcb *pcb, u32_t seq)
2183{
2184  u8_t i;
2185  u8_t unused_idx;
2186
2187  /* We run this loop for all entries, until we find the first invalid one.
2188     There is no point checking after that. */
2189  for (i = unused_idx = 0; (i < LWIP_TCP_MAX_SACK_NUM) && LWIP_TCP_SACK_VALID(pcb, i); ++i) {
2190    /* We only want to use SACK at index [i] if its left side is < 'seq'. */
2191    if (TCP_SEQ_LT(pcb->rcv_sacks[i].left, seq)) {
2192      if (unused_idx != i) {
2193        /* We only copy it if it's not in the right spot already. */
2194        pcb->rcv_sacks[unused_idx] = pcb->rcv_sacks[i];
2195      }
2196      /* NOTE: It is possible that its right side is > 'seq', in which case we should adjust it. */
2197      if (TCP_SEQ_GT(pcb->rcv_sacks[unused_idx].right, seq)) {
2198        pcb->rcv_sacks[unused_idx].right = seq;
2199      }
2200      ++unused_idx;
2201    }
2202  }
2203
2204  /* We also need to invalidate everything from 'unused_idx' till the end */
2205  for (i = unused_idx; i < LWIP_TCP_MAX_SACK_NUM; ++i) {
2206    pcb->rcv_sacks[i].left = pcb->rcv_sacks[i].right = 0;
2207  }
2208}
2209#endif /* TCP_OOSEQ_BYTES_LIMIT || TCP_OOSEQ_PBUFS_LIMIT */
2210
2211#endif /* LWIP_TCP_SACK_OUT */
2212
2213#endif /* LWIP_TCP */
2214