/* 
 * Copyright (c)  1996, 2001 Portland State University
 * All rights reserved.
 *   
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer as
 *     the first lines of this file unmodified.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR/S ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL Portland State University or the authors BE
 *  LIABLE FOR ANY DIRECT, INDIRECT, 
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *            
 *  SACK and FACK implementation in FreeBSD Release-4.3.
 *
 *              Dr. Suresh Singh, Shiv Saxena and Harkirat Singh
 *              Portland State University
 *              Computer Science Dept. - Aug 16, 2001
 *
 *
 *              email: {singh, saxenas, harkirat}@cs.pdx.edu
 *              project page: http://www.cs.pdx.edu/~singh/pacman.html
 *
 *
 */
 
diff -c -r ORIG-SRC-FreeBSD4.3/tcp.h SACK-SRC-FreeBSD4.3/tcp.h
*** ORIG-SRC-FreeBSD4.3/tcp.h	Thu Aug 16 13:16:23 2001
--- SACK-SRC-FreeBSD4.3/tcp.h	Thu Aug 16 13:20:55 2001
***************
*** 85,90 ****
--- 85,91 ----
  #define TCPOPT_SACK_PERMITTED	4		/* Experimental */
  #define    TCPOLEN_SACK_PERMITTED	2
  #define TCPOPT_SACK		5		/* Experimental */
+ #define    TCPOLEN_SACK                 8   /*2*sizeof(tcp_seq):len of sack blk */    
  #define TCPOPT_TIMESTAMP	8
  #define    TCPOLEN_TIMESTAMP		10
  #define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
***************
*** 91,96 ****
--- 92,106 ----
  #define    TCPOPT_TSTAMP_HDR		\
      (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
  
+ #define TCPOPT_SACK_PERMIT_HDR  \
+  (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK_PERMITTED<<8|TCPOLEN_SACK_PERMITTED)
+ #define TCPOPT_SACK_HDR          (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK<<8)
+ 
+ /* Miscellaneous constants */
+ #define MAX_SACK_BLKS	6	/* Max # SACK blocks stored at sender side */
+ #define TCP_MAX_SACK	3	/* MAX # SACKs sent in any segment */
+ 
+ 
  #define	TCPOPT_CC		11		/* CC options: RFC-1644 */
  #define TCPOPT_CCNEW		12
  #define TCPOPT_CCECHO		13
***************
*** 133,137 ****
--- 143,148 ----
  #define	TCP_MAXSEG	0x02	/* set maximum segment size */
  #define TCP_NOPUSH	0x04	/* don't push last block of write */
  #define TCP_NOOPT	0x08	/* don't use TCP options */
+ #define	TCP_SACK_DISABLE 0x300  /* disable SACKs (if enabled by def.) */
  
  #endif
diff -c -r ORIG-SRC-FreeBSD4.3/tcp_input.c SACK-SRC-FreeBSD4.3/tcp_input.c
*** ORIG-SRC-FreeBSD4.3/tcp_input.c	Thu Aug 16 13:16:24 2001
--- SACK-SRC-FreeBSD4.3/tcp_input.c	Thu Aug 16 13:26:23 2001
***************
*** 100,106 ****
  
  MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
  
! static int	tcprexmtthresh = 3;
  tcp_seq	tcp_iss;
  tcp_cc	tcp_ccgen;
  
--- 100,106 ----
  
  MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
  
! int	tcprexmtthresh = 3;
  tcp_seq	tcp_iss;
  tcp_cc	tcp_ccgen;
  
***************
*** 847,852 ****
--- 847,856 ----
  	tp->t_rcvtime = ticks;
  	if (TCPS_HAVEESTABLISHED(tp->t_state))
  		callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+ #ifdef TCP_SACK
+ 	if (!tp->sack_disable)
+ 		tcp_del_sackholes(tp, th); /* Delete stale SACK holes */
+ #endif /* TCP_SACK */
  
  	/*
  	 * Process options if not in LISTEN state,
***************
*** 855,860 ****
--- 859,870 ----
  	if (tp->t_state != TCPS_LISTEN)
  		tcp_dooptions(tp, optp, optlen, th, &to);
  
+ #ifdef TCP_SACK
+ 	if (!tp->sack_disable) {
+ 		tp->rcv_laststart = th->th_seq; /* last rec'vd segment*/
+ 		tp->rcv_lastend = th->th_seq + tlen;
+ 	}
+ #endif /* TCP_SACK */
  	/*
  	 * Header prediction: check for the two common cases
  	 * of a uni-directional data xfer.  If the packet has
***************
*** 931,936 ****
--- 941,959 ----
  				tcpstat.tcps_rcvackbyte += acked;
  				sbdrop(&so->so_snd, acked);
  				tp->snd_una = th->th_ack;
+ 
+ #if defined(TCP_SACK)
+ 				/* 
+ 				 * We want snd_last to track snd_una so
+ 				 * as to avoid sequence wraparound problems
+ 				 * for very large transfers.
+ 				 */
+ 				tp->snd_last = tp->snd_una;
+ #endif /* TCP_SACK */
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 				tp->snd_fack = tp->snd_una;
+ 				tp->retran_data = 0;
+ #endif /* TCP_FACK */
  				m_freem(m);
  				ND6_HINT(tp); /* some progress has been done */
  
***************
*** 963,968 ****
--- 986,997 ----
  			 * with nothing on the reassembly queue and
  			 * we have enough buffer space to take it.
  			 */
+ 
+ #ifdef TCP_SACK
+ 			/* Clean receiver SACK report if present */
+ 			if (!tp->sack_disable && tp->rcv_numsacks)
+ 				tcp_clean_sackreport(tp);
+ #endif /* TCP_SACK */
  			++tcpstat.tcps_preddat;
  			tp->rcv_nxt += tlen;
  			tcpstat.tcps_rcvpack++;
***************
*** 1115,1120 ****
--- 1144,1160 ----
  			bzero(taop, sizeof(*taop));
  		}
  		tcp_dooptions(tp, optp, optlen, th, &to);
+ 
+ #ifdef TCP_SACK
+ 		/*
+ 		 * If peer did not send a SACK_PERMITTED option (i.e., if
+ 		 * tcp_dooptions() did not set TF_SACK_PERMIT), set 
+                  * sack_disable to 1 if it is currently 0.
+                  */
+                 if (!tp->sack_disable)
+                         if ((tp->t_flags & TF_SACK_PERMIT) == 0) 
+                                 tp->sack_disable = 1;
+ #endif
  		if (iss)
  			tp->iss = iss;
  		else {
***************
*** 1127,1132 ****
--- 1167,1180 ----
   		}
  		tp->irs = th->th_seq;
  		tcp_sendseqinit(tp);
+ #if defined (TCP_SACK)
+ 		tp->snd_last = tp->snd_una;
+ #endif /* TCP_SACK */
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 		tp->snd_fack = tp->snd_una;
+ 		tp->retran_data = 0;
+ 		tp->snd_awnd = 0;
+ #endif /* TCP_FACK */
  		tcp_rcvseqinit(tp);
  		tp->snd_recover = tp->snd_una;
  		/*
***************
*** 1278,1284 ****
  
  		tp->irs = th->th_seq;
  		tcp_rcvseqinit(tp);
! 		if (thflags & TH_ACK) {
  			/*
  			 * Our SYN was acked.  If segment contains CC.ECHO
  			 * option, check it to make sure this segment really
--- 1326,1332 ----
  
  		tp->irs = th->th_seq;
  		tcp_rcvseqinit(tp);
! 		if (thflags & TH_ACK) { 
  			/*
  			 * Our SYN was acked.  If segment contains CC.ECHO
  			 * option, check it to make sure this segment really
***************
*** 1298,1303 ****
--- 1346,1362 ----
  				}
  			} else
  				tp->t_flags &= ~TF_RCVD_CC;
+ #ifdef TCP_SACK
+                 /*
+                  * If we've sent a SACK_PERMITTED option, and the peer
+                  * also replied with one, then TF_SACK_PERMIT should have
+                  * been set in tcp_dooptions().  If it was not, disable SACKs.
+                  */
+                 if (!tp->sack_disable)
+                         if ((tp->t_flags & TF_SACK_PERMIT) == 0) 
+                                 tp->sack_disable = 1;
+ #endif
+ 	
  			tcpstat.tcps_connects++;
  			soisconnected(so);
  			/* Do window scaling on this connection? */
***************
*** 1813,1888 ****
  				 * to keep a constant cwnd packets in the
  				 * network.
  				 */
  				if (!callout_active(tp->tt_rexmt) ||
  				    th->th_ack != tp->snd_una)
! 					tp->t_dupacks = 0;
  				else if (++tp->t_dupacks == tcprexmtthresh) {
! 					tcp_seq onxt = tp->snd_nxt;
! 					u_int win =
! 					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
! 						tp->t_maxseg;
! 					if (tcp_do_newreno && SEQ_LT(th->th_ack,
! 					    tp->snd_recover)) {
! 						/* False retransmit, should not
! 						 * cut window
! 						 */
! 						tp->snd_cwnd += tp->t_maxseg;
! 						tp->t_dupacks = 0;
! 						(void) tcp_output(tp);
! 						goto drop;
! 					}
! 					if (win < 2)
! 						win = 2;
! 					tp->snd_ssthresh = win * tp->t_maxseg;
! 					tp->snd_recover = tp->snd_max;
! 					callout_stop(tp->tt_rexmt);
! 					tp->t_rtttime = 0;
! 					tp->snd_nxt = th->th_ack;
! 					tp->snd_cwnd = tp->t_maxseg;
! 					(void) tcp_output(tp);
! 					tp->snd_cwnd = tp->snd_ssthresh +
! 					       tp->t_maxseg * tp->t_dupacks;
! 					if (SEQ_GT(onxt, tp->snd_nxt))
! 						tp->snd_nxt = onxt;
! 					goto drop;
  				} else if (tp->t_dupacks > tcprexmtthresh) {
! 					tp->snd_cwnd += tp->t_maxseg;
! 					(void) tcp_output(tp);
! 					goto drop;
  				}
! 			} else
! 				tp->t_dupacks = 0;
! 			break;
! 		}
! 		/*
! 		 * If the congestion window was inflated to account
! 		 * for the other side's cached packets, retract it.
! 		 */
! 		if (tcp_do_newreno == 0) {
!                         if (tp->t_dupacks >= tcprexmtthresh &&
!                                 tp->snd_cwnd > tp->snd_ssthresh)
!                                 tp->snd_cwnd = tp->snd_ssthresh;
!                         tp->t_dupacks = 0;
!                 } else if (tp->t_dupacks >= tcprexmtthresh &&
! 		    !tcp_newreno(tp, th)) {
!                         /*
!                          * Window inflation should have left us with approx.
!                          * snd_ssthresh outstanding data.  But in case we
!                          * would be inclined to send a burst, better to do
!                          * it via the slow start mechanism.
!                          */
! 			if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
!                                 tp->snd_cwnd =
! 				    tp->snd_max - th->th_ack + tp->t_maxseg;
! 			else
!                         	tp->snd_cwnd = tp->snd_ssthresh;
!                         tp->t_dupacks = 0;
!                 }
! 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
! 			tcpstat.tcps_rcvacktoomuch++;
! 			goto dropafterack;
! 		}
! 		/*
  		 *  If we reach this point, ACK is not a duplicate,
  		 *     i.e., it ACKs something we sent.
  		 */
--- 1872,2059 ----
  				 * to keep a constant cwnd packets in the
  				 * network.
  				 */
+ 
  				if (!callout_active(tp->tt_rexmt) ||
  				    th->th_ack != tp->snd_una)
! 				  tp->t_dupacks = 0;
! #if defined(TCP_SACK) && defined(TCP_FACK)
! 				/* 
! 				 * In FACK, can enter fast rec. if the receiver
! 				 * reports a reass. queue longer than 3 segs.
! 				 */
! 				else if (++tp->t_dupacks == tcprexmtthresh ||
! 					 ((SEQ_GT(tp->snd_fack, tcprexmtthresh * 
! 						  tp->t_maxseg + tp->snd_una)) &&
! 					  SEQ_GT(tp->snd_una, tp->snd_last))) {
! #else
  				else if (++tp->t_dupacks == tcprexmtthresh) {
! #endif /* TCP_FACK */
! 				  tcp_seq onxt = tp->snd_nxt;
! 				  u_int win =
! 				    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
! 				    tp->t_maxseg;
! #if defined(TCP_SACK)
! 				  if (SEQ_LT(th->th_ack, tp->snd_last)){
! 				    /* 
! 				     * False fast retx after 
! 				     * timeout.  Do not cut window.
! 				     */
! 				    tp->t_dupacks = 0;
! 				    goto drop;
! 				  }
! #else
! 				  if (tcp_do_newreno && SEQ_LT(th->th_ack,
! 							       tp->snd_recover)) {
! 				    /* False retransmit, should not
! 				     * cut window
! 				     */
! 				    tp->snd_cwnd += tp->t_maxseg;
! 				    tp->t_dupacks = 0;
! 				    (void) tcp_output(tp);
! 				    goto drop;
! 				  }
! #endif
! 				  if (win < 2)
! 				    win = 2;
! 				  tp->snd_ssthresh = win * tp->t_maxseg;
! #if defined(TCP_SACK)
! 				  tp->snd_last = tp->snd_max;
! #else
!                                   tp->snd_recover = tp->snd_max 
! #endif
! #ifdef TCP_SACK
! 				    if (!tp->sack_disable) {
! 				      callout_stop(tp->tt_rexmt);
! 				      tp->t_rtttime = 0;
! #if defined(TCP_SACK) && defined(TCP_FACK) 
! 				      tp->t_dupacks = tcprexmtthresh;
! 				      (void) tcp_output(tp);
! 				      /*
! 				       * During FR, snd_cwnd is held
! 				       * constant for FACK.
! 				       */
! 				      tp->snd_cwnd = tp->snd_ssthresh;
! #else
! 				      /* 
! 				       * tcp_output() will send
! 				       * oldest SACK-eligible rtx.
! 				       */
! 				      (void) tcp_output(tp);
! 				      tp->snd_cwnd = tp->snd_ssthresh+
! 				      tp->t_maxseg * tp->t_dupacks;
! #endif /* TCP_FACK */
! 				      goto drop;
! 				    }
! #endif /* TCP_SACK */
! 				  callout_stop(tp->tt_rexmt);
! 				  tp->t_rtttime = 0;
! 				  tp->snd_nxt = th->th_ack;
! 				  tp->snd_cwnd = tp->t_maxseg;
! 				  (void) tcp_output(tp);
! 				  
! 				  tp->snd_cwnd = tp->snd_ssthresh +
! 				    tp->t_maxseg * tp->t_dupacks;
! 				  if (SEQ_GT(onxt, tp->snd_nxt))
! 				    tp->snd_nxt = onxt;
! 				  goto drop;
  				} else if (tp->t_dupacks > tcprexmtthresh) {
! #if defined(TCP_SACK) && defined(TCP_FACK)
! 				  /* 
! 				   * while (awnd < cwnd) 
! 				   *         sendsomething(); 
! 				   */
! 				  if (!tp->sack_disable) {
! 				    if (tp->snd_awnd < tp->snd_cwnd)
! 				      tcp_output(tp);
! 				    goto drop;
! 				  }
! #endif /* TCP_FACK */
! 				  tp->snd_cwnd += tp->t_maxseg;
! 				  (void) tcp_output(tp);
! 				  goto drop;
  				}
! 				} else
! 				  tp->t_dupacks = 0;
! 				break;
! 			}
! 			
! 			/*
! 			 * If the congestion window was inflated to account
! 			 * for the other side's cached packets, retract it.
! 			 * If the congestion window was inflated to account
! 			 * for the other side's cached packets, retract it.
! 			 */
! #if defined(TCP_SACK)
! 			if (!tp->sack_disable) {
! 			  if (tp->t_dupacks >= tcprexmtthresh) {
! 				/* Check for a partial ACK */
! 			    if (tcp_sack_partialack(tp, th)) {
! #if defined(TCP_SACK) && defined(TCP_FACK)
! 			      /* Force call to tcp_output */
! 			      if (tp->snd_awnd < tp->snd_cwnd) 
! 				needoutput = 1;
! #else
! 			      tp->snd_cwnd += tp->t_maxseg;
! 			      needoutput = 1;
! #endif /* TCP_FACK */
! 			    } else {
! 			      /* Out of fast recovery */
! 			      tp->snd_cwnd = tp->snd_ssthresh;
! 			      if (tcp_seq_subtract(tp->snd_max, 
! 						   th->th_ack) < tp->snd_ssthresh)
! 				tp->snd_cwnd = 
! 				  tcp_seq_subtract(tp->snd_max,
! 					           th->th_ack);
! 			      tp->t_dupacks = 0;
! #if defined(TCP_SACK) && defined(TCP_FACK)
! 			      if (SEQ_GT(th->th_ack, tp->snd_fack))
! 				tp->snd_fack = th->th_ack;
! #endif /* TCP_FACK */
! 			    }
! 			  } 
! 			} else {
! 			  if (tp->t_dupacks >= tcprexmtthresh && 
! 			      !tcp_newreno(tp, th)) {
! 				/* Out of fast recovery */
! 			    tp->snd_cwnd = tp->snd_ssthresh;
! 			    if (tcp_seq_subtract(tp->snd_max, th->th_ack) <
! 				tp->snd_ssthresh)
! 			      tp->snd_cwnd = 
! 				tcp_seq_subtract(tp->snd_max,
! 						 th->th_ack);
! 			    tp->t_dupacks = 0;
! 			  }
! 			}
! 			if (tp->t_dupacks < tcprexmtthresh)
! 			  tp->t_dupacks = 0;
! #else /* else no TCP_SACK */
! 			if (tcp_do_newreno == 0) {
! 			  if (tp->t_dupacks >= tcprexmtthresh &&
! 			      tp->snd_cwnd > tp->snd_ssthresh)
! 			    tp->snd_cwnd = tp->snd_ssthresh;
! 			  tp->t_dupacks = 0;
! 			} else if (tp->t_dupacks >= tcprexmtthresh &&
! 				   !tcp_newreno(tp, th)) {
! 			  /*
! 			   * Window inflation should have left us with approx.
! 			   * snd_ssthresh outstanding data.  But in case we
! 			   * would be inclined to send a burst, better to do
! 			   * it via the slow start mechanism.
! 			   */
! 			  if (SEQ_GT(th->th_ack + tp->snd_ssthresh, tp->snd_max))
! 			    tp->snd_cwnd =
! 			      tp->snd_max - th->th_ack + tp->t_maxseg;
! 			  else
! 			    tp->snd_cwnd = tp->snd_ssthresh;
! 			  tp->t_dupacks = 0;
! 			}
! #endif
! 			
! 			if (SEQ_GT(th->th_ack, tp->snd_max)) {
! 			  tcpstat.tcps_rcvacktoomuch++;
! 			  goto dropafterack;
! 			}
! 			/*
  		 *  If we reach this point, ACK is not a duplicate,
  		 *     i.e., it ACKs something we sent.
  		 */
***************
*** 1975,1982 ****
  		 * in NewReno fast recovery mode, so we leave the congestion
  		 * window alone.
  		 */
  		if (tcp_do_newreno == 0 || tp->t_dupacks == 0)
! 			tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
  		}
  		if (acked > so->so_snd.sb_cc) {
  			tp->snd_wnd -= so->so_snd.sb_cc;
--- 2146,2159 ----
  		 * in NewReno fast recovery mode, so we leave the congestion
  		 * window alone.
  		 */
+ 
+ #if defined (TCP_SACK)
+ 		if (tp->t_dupacks < tcprexmtthresh)
+ 		  tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
+ #else	
  		if (tcp_do_newreno == 0 || tp->t_dupacks == 0)
! 		  tp->snd_cwnd = min(cw + incr,TCP_MAXWIN<<tp->snd_scale);
! #endif
  		}
  		if (acked > so->so_snd.sb_cc) {
  			tp->snd_wnd -= so->so_snd.sb_cc;
***************
*** 1990,1997 ****
  		sowwakeup(so);
  		tp->snd_una = th->th_ack;
  		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
! 			tp->snd_nxt = tp->snd_una;
! 
  		switch (tp->t_state) {
  
  		/*
--- 2167,2184 ----
  		sowwakeup(so);
  		tp->snd_una = th->th_ack;
  		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
! 		  tp->snd_nxt = tp->snd_una;
! #if defined (TCP_SACK) && defined (TCP_FACK)
! 		if (SEQ_GT(tp->snd_una, tp->snd_fack)) {
! 		  tp->snd_fack = tp->snd_una;
! 		  /* Update snd_awnd for partial ACK
! 		   * without any SACK blocks.
! 		   */
! 		  tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt,
! 						  tp->snd_fack) + tp->retran_data;
! 		}
! #endif
! 		
  		switch (tp->t_state) {
  
  		/*
***************
*** 2161,2166 ****
--- 2348,2357 ----
  	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
  		m_adj(m, drop_hdrlen);	/* delayed header drop */
  		TCP_REASS(tp, th, &tlen, m, so, thflags);
+ #ifdef TCP_SACK
+ 		if (!tp->sack_disable)
+ 			tcp_update_sack_list(tp); 
+ #endif 
  		/*
  		 * Note the amount of data that peer has sent into
  		 * our window, in order to estimate the sender's
***************
*** 2172,2177 ****
--- 2363,2369 ----
  		thflags &= ~TH_FIN;
  	}
  
+ 
  	/*
  	 * If FIN is received ACK the FIN and let the user know
  	 * that the connection is closing.
***************
*** 2468,2473 ****
--- 2660,2678 ----
  			    (char *)&to->to_ccecho, sizeof(to->to_ccecho));
  			NTOHL(to->to_ccecho);
  			break;
+ #ifdef TCP_SACK 
+ 		case TCPOPT_SACK_PERMITTED:
+ 			if (tp->sack_disable || optlen!=TCPOLEN_SACK_PERMITTED)
+ 				continue;
+ 			if (th->th_flags & TH_SYN)
+ 				/* MUST only be set on SYN */
+ 				tp->t_flags |= TF_SACK_PERMIT;
+ 			break;
+ 		case TCPOPT_SACK:
+ 			if (tcp_sack_option(tp, th, cp, optlen))
+ 				continue;
+ 			break;
+ #endif     
  		}
  	}
  	if (th->th_flags & TH_SYN)
***************
*** 2474,2480 ****
--- 2679,3098 ----
  		tcp_mss(tp, mss);	/* sets t_maxseg */
  }
  
+ #if defined(TCP_SACK)
+ u_long 
+ tcp_seq_subtract(a, b)
+ 	u_long a, b;
+ { 
+ 	return ((long)(a - b)); 
+ }
+ #endif
+ 
+ 
+ 
+ #ifdef TCP_SACK 
  /*
+  * This function is called upon receipt of new valid data (while not in header
+  * prediction mode), and it updates the ordered list of sacks. 
+  */
+ void 
+ tcp_update_sack_list(tp)
+ 	struct tcpcb *tp; 
+ {    
+ 	/* 
+ 	 * First reported block MUST be the most recent one.  Subsequent
+ 	 * blocks SHOULD be in the order in which they arrived at the
+ 	 * receiver.  These two conditions make the implementation fully
+ 	 * compliant with RFC 2018.
+ 	 */     
+ 	int i, j = 0, count = 0, lastpos = -1;
+ 	struct sackblk sack, firstsack, temp[MAX_SACK_BLKS];
+     
+ 	/* First clean up current list of sacks */
+ 	for (i = 0; i < tp->rcv_numsacks; i++) {
+ 		sack = tp->sackblks[i];
+ 		if (sack.start == 0 && sack.end == 0) {
+ 			count++; /* count = number of blocks to be discarded */
+ 			continue;
+ 		}
+ 		if (SEQ_LEQ(sack.end, tp->rcv_nxt)) {
+ 			tp->sackblks[i].start = tp->sackblks[i].end = 0;
+ 			count++;
+ 		} else { 
+ 			temp[j].start = tp->sackblks[i].start;
+ 			temp[j++].end = tp->sackblks[i].end;
+ 		}
+ 	}   
+ 	tp->rcv_numsacks -= count;
+ 	if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */
+ 		tcp_clean_sackreport(tp);
+ 		if (SEQ_LT(tp->rcv_nxt, tp->rcv_laststart)) {
+ 			/* ==> need first sack block */
+ 			tp->sackblks[0].start = tp->rcv_laststart;
+ 			tp->sackblks[0].end = tp->rcv_lastend;
+ 			tp->rcv_numsacks = 1;
+ 		}
+ 		return;
+ 	}
+ 	/* Otherwise, sack blocks are already present. */
+ 	for (i = 0; i < tp->rcv_numsacks; i++)
+ 		tp->sackblks[i] = temp[i]; /* first copy back sack list */
+ 	if (SEQ_GEQ(tp->rcv_nxt, tp->rcv_lastend)) 
+ 		return;     /* sack list remains unchanged */
+ 	/* 
+ 	 * From here, segment just received should be (part of) the 1st sack.
+ 	 * Go through list, possibly coalescing sack block entries.
+ 	 */
+ 	firstsack.start = tp->rcv_laststart;
+ 	firstsack.end = tp->rcv_lastend;
+ 	for (i = 0; i < tp->rcv_numsacks; i++) {
+ 		sack = tp->sackblks[i];
+ 		if (SEQ_LT(sack.end, firstsack.start) ||
+ 		    SEQ_GT(sack.start, firstsack.end))
+ 			continue; /* no overlap */
+ 		if (sack.start == firstsack.start && sack.end == firstsack.end){
+ 			/* 
+ 			 * identical block; delete it here since we will
+ 			 * move it to the front of the list.
+ 			 */
+ 			tp->sackblks[i].start = tp->sackblks[i].end = 0;
+ 			lastpos = i;    /* last posn with a zero entry */
+ 			continue;
+ 		}
+ 		if (SEQ_LEQ(sack.start, firstsack.start))
+ 			firstsack.start = sack.start; /* merge blocks */
+ 		if (SEQ_GEQ(sack.end, firstsack.end))
+ 			firstsack.end = sack.end;     /* merge blocks */
+ 		tp->sackblks[i].start = tp->sackblks[i].end = 0;
+ 		lastpos = i;    /* last posn with a zero entry */
+ 	}
+ 	if (lastpos != -1) {    /* at least one merge */
+ 		for (i = 0, j = 1; i < tp->rcv_numsacks; i++) {
+ 			sack = tp->sackblks[i];
+ 			if (sack.start == 0 && sack.end == 0)
+ 				continue;
+ 			temp[j++] = sack;
+ 		}
+ 		tp->rcv_numsacks = j; /* including first blk (added later) */
+ 		for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */
+ 			tp->sackblks[i] = temp[i];
+ 	} else {        /* no merges -- shift sacks by 1 */
+ 		if (tp->rcv_numsacks < MAX_SACK_BLKS)
+ 			tp->rcv_numsacks++;
+ 		for (i = tp->rcv_numsacks-1; i > 0; i--)
+ 			tp->sackblks[i] = tp->sackblks[i-1];
+ 	}
+ 	tp->sackblks[0] = firstsack;
+ 	return;
+ }  
+ 
+ /*
+  * Process the TCP SACK option.  Returns 1 if tcp_dooptions() should continue,
+  * and 0 otherwise, if the option was fine.  tp->snd_holes is an ordered list
+  * of holes (oldest to newest, in terms of the sequence space).  
+  */             
+ int
+ tcp_sack_option(tp, th, cp, optlen)
+ 	struct tcpcb *tp;
+ 	struct tcphdr *th;
+ 	u_char *cp;
+ 	int    optlen;
+ {       
+ 	int tmp_olen;
+ 	u_char *tmp_cp;
+ 	struct sackhole *cur, *p, *temp;
+    
+ 	if (tp->sack_disable)
+ 		return 1;
+            
+ 	/* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
+ 	if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
+ 		return 1;
+ 	tmp_cp = cp + 2;
+ 	tmp_olen = optlen - 2;
+ 	if (tp->snd_numholes < 0)
+ 		tp->snd_numholes = 0;
+ 	if (tp->t_maxseg == 0)
+ 		panic("tcp_sack_option"); /* Should never happen */
+ 	while (tmp_olen > 0) {
+ 		struct sackblk sack;
+             
+ 		bcopy((char *) tmp_cp, (char *) &(sack.start), sizeof(tcp_seq));
+ 		NTOHL(sack.start); 
+ 		bcopy((char *) tmp_cp + sizeof(tcp_seq),
+ 		    (char *) &(sack.end), sizeof(tcp_seq));
+ 		NTOHL(sack.end);
+ 		tmp_olen -= TCPOLEN_SACK;
+ 		tmp_cp += TCPOLEN_SACK;
+ 		if (SEQ_LEQ(sack.end, sack.start))
+ 			continue; /* bad SACK fields */
+ 		if (SEQ_LEQ(sack.end, tp->snd_una)) 
+ 			continue; /* old block */
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 		/* Updates snd_fack.  */
+ 		if (SEQ_GEQ(sack.end, tp->snd_fack))
+ 			tp->snd_fack = sack.end;
+ #endif /* TCP_FACK */
+ 		if (SEQ_GT(th->th_ack, tp->snd_una)) {
+ 			if (SEQ_LT(sack.start, th->th_ack))
+ 				continue;
+ 		} else {
+ 			if (SEQ_LT(sack.start, tp->snd_una))
+ 				continue;
+ 		}
+ 		if (SEQ_GT(sack.end, tp->snd_max))
+ 			continue;
+ 		if (tp->snd_holes == 0) { /* first hole */
+ 			tp->snd_holes = (struct sackhole *)
+ 			    malloc(sizeof(struct sackhole), M_PCB, M_NOWAIT);
+ 			if (tp->snd_holes == NULL) {
+ 				/* ENOBUFS, so ignore SACKed block for now*/
+ 				continue;  
+ 			}
+ 			cur = tp->snd_holes;
+ 			cur->start = th->th_ack;
+ 			cur->end = sack.start;
+ 			cur->rxmit = cur->start;
+ 			cur->next = 0;
+ 			tp->snd_numholes = 1;
+ 			tp->rcv_lastsack = sack.end;
+ 			/* 
+ 			 * dups is at least one.  If more data has been 
+ 			 * SACKed, it can be greater than one.
+ 			 */
+ 			cur->dups = min(tcprexmtthresh, 
+ 			    ((sack.end - cur->end)/tp->t_maxseg));
+ 			if (cur->dups < 1)
+ 				cur->dups = 1;
+ 			continue; /* with next sack block */
+ 		}
+ 		/* Go thru list of holes:  p = previous,  cur = current */
+ 		p = cur = tp->snd_holes;
+ 		while (cur) {
+ 			if (SEQ_LEQ(sack.end, cur->start)) 
+ 				/* SACKs data before the current hole */ 
+ 				break; /* no use going through more holes */
+ 			if (SEQ_GEQ(sack.start, cur->end)) {
+ 				/* SACKs data beyond the current hole */
+ 				cur->dups++;
+ 				if ( ((sack.end - cur->end)/tp->t_maxseg) >=
+ 					tcprexmtthresh)
+ 					cur->dups = tcprexmtthresh;
+ 				p = cur;
+ 				cur = cur->next;
+ 				continue;
+ 			}
+ 			if (SEQ_LEQ(sack.start, cur->start)) {
+ 				/* Data acks at least the beginning of hole */
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 				if (SEQ_GT(sack.end, cur->rxmit))
+ 					tp->retran_data -= 
+ 				    	    tcp_seq_subtract(cur->rxmit, 
+ 					    cur->start);
+ 				else
+ 					tp->retran_data -=
+ 					    tcp_seq_subtract(sack.end, 
+ 					    cur->start);
+ #endif /* TCP_FACK */
+ 				if (SEQ_GEQ(sack.end,cur->end)){
+ 					/* Acks entire hole, so delete hole */
+ 					if (p != cur) {
+ 						p->next = cur->next;
+ 						free(cur, M_PCB);
+ 						cur = p->next;
+ 					} else {
+ 						cur=cur->next;
+ 						free(p, M_PCB);
+ 						p = cur;
+ 						tp->snd_holes = p;
+ 					}
+ 					tp->snd_numholes--;
+ 					continue;
+ 				}
+ 				/* otherwise, move start of hole forward */
+ 				cur->start = sack.end;
+ 				cur->rxmit = max (cur->rxmit, cur->start);
+ 				p = cur;
+ 				cur = cur->next;
+ 				continue;
+ 			}
+ 			/* move end of hole backward */
+ 			if (SEQ_GEQ(sack.end, cur->end)) {
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 				if (SEQ_GT(cur->rxmit, sack.start)) 
+ 					tp->retran_data -= 
+ 					    tcp_seq_subtract(cur->rxmit, 
+ 					    sack.start);
+ #endif /* TCP_FACK */
+ 				cur->end = sack.start;
+ 				cur->rxmit = min (cur->rxmit, cur->end);
+ 				cur->dups++;
+ 				if ( ((sack.end - cur->end)/tp->t_maxseg) >=
+ 					tcprexmtthresh)
+ 					cur->dups = tcprexmtthresh;
+ 				p = cur;
+ 				cur = cur->next;
+ 				continue;
+ 			}
+ 			if (SEQ_LT(cur->start, sack.start) &&
+ 			    SEQ_GT(cur->end, sack.end)) {
+ 				/* 
+ 				 * ACKs some data in middle of a hole; need to 
+ 				 * split current hole
+ 				 */
+ 				temp = (struct sackhole *)malloc(sizeof(*temp),
+ 				    M_PCB,M_NOWAIT);
+ 				if (temp == NULL) 
+ 					continue; /* ENOBUFS */
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 				if (SEQ_GT(cur->rxmit, sack.end)) 
+ 					tp->retran_data -= 
+ 					    tcp_seq_subtract(sack.end, 
+ 					    sack.start);
+ 				else if (SEQ_GT(cur->rxmit, sack.start))
+ 					tp->retran_data -= 
+ 					    tcp_seq_subtract(cur->rxmit, 
+ 					    sack.start);
+ #endif /* TCP_FACK */
+ 				temp->next = cur->next;
+ 				temp->start = sack.end;
+ 				temp->end = cur->end;
+ 				temp->dups = cur->dups;
+ 				temp->rxmit = max (cur->rxmit, temp->start);
+ 				cur->end = sack.start;
+ 				cur->rxmit = min (cur->rxmit, cur->end);
+ 				cur->dups++;
+ 				if ( ((sack.end - cur->end)/tp->t_maxseg) >=
+ 					tcprexmtthresh)
+ 					cur->dups = tcprexmtthresh;
+ 				cur->next = temp;
+ 				p = temp;
+ 				cur = p->next;
+ 				tp->snd_numholes++;
+ 			}
+ 		}
+ 		/* At this point, p points to the last hole on the list */
+ 		if (SEQ_LT(tp->rcv_lastsack, sack.start)) {
+ 			/*
+ 			 * Need to append new hole at end.
+ 			 * Last hole is p (and it's not NULL).
+ 			 */
+ 			temp = (struct sackhole *) malloc(sizeof(*temp),
+ 			    M_PCB, M_NOWAIT);
+ 			if (temp == NULL) 
+ 				continue; /* ENOBUFS */
+ 			temp->start = tp->rcv_lastsack;
+ 			temp->end = sack.start;
+ 			temp->dups = min(tcprexmtthresh, 
+ 			    ((sack.end - sack.start)/tp->t_maxseg));
+ 			if (temp->dups < 1)
+ 				temp->dups = 1;
+ 			temp->rxmit = temp->start;
+ 			temp->next = 0;
+ 			p->next = temp;
+ 			tp->rcv_lastsack = sack.end;
+ 			tp->snd_numholes++;
+ 		}
+ 	}
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 	/* 
+ 	 * Update retran_data and snd_awnd.  Go through the list of 
+ 	 * holes.   Increment retran_data by (hole->rxmit - hole->start).
+ 	 */
+ 	tp->retran_data = 0;
+ 	cur = tp->snd_holes;
+ 	while (cur) {
+ 		tp->retran_data += cur->rxmit - cur->start;
+ 		cur = cur->next;
+ 	}
+ 	tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) + 
+ 	    tp->retran_data;
+ #endif /* TCP_FACK */
+ 
+ 	return 0;
+ }   
+ 
+ /*
+  * Delete stale (i.e, cumulatively ack'd) holes.  Hole is deleted only if
+  * it is completely acked; otherwise, tcp_sack_option(), called from 
+  * tcp_dooptions(), will fix up the hole.
+  */
+ void
+ tcp_del_sackholes(tp, th)
+ 	struct tcpcb *tp;
+ 	struct tcphdr *th;
+ {
+ 	if (!tp->sack_disable && tp->t_state != TCPS_LISTEN) {
+ 		/* max because this could be an older ack just arrived */
+ 		tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
+ 			th->th_ack : tp->snd_una;
+ 		struct sackhole *cur = tp->snd_holes;
+ 		struct sackhole *prev = cur;
+ 		while (cur)
+ 			if (SEQ_LEQ(cur->end, lastack)) {
+ 				cur = cur->next;
+ 				free(prev, M_PCB);
+ 				prev = cur;
+ 				tp->snd_numholes--;
+ 			} else if (SEQ_LT(cur->start, lastack)) {
+ 				cur->start = lastack;
+ 				if (SEQ_LT(cur->rxmit, cur->start))
+ 					cur->rxmit = cur->start;
+ 				break;
+ 			} else
+ 				break;
+ 		tp->snd_holes = cur;
+ 	}
+ }
+ 
+ /* 
+  * Delete all receiver-side SACK information.
+  */
+ void
+ tcp_clean_sackreport(tp)
+ 	struct tcpcb *tp;
+ {
+ 	int i;
+ 
+ 	tp->rcv_numsacks = 0;
+ 	for (i = 0; i < MAX_SACK_BLKS; i++)
+ 		tp->sackblks[i].start = tp->sackblks[i].end=0;
+ 
+ }
+ 
+ /* 
+  * Checks for partial ack.  If partial ack arrives, turn off retransmission
+  * timer, deflate the window, do not clear tp->t_dupacks, and return 1.
+  * If the ack advances at least to tp->snd_last, return 0.
+  */
+ int
+ tcp_sack_partialack(tp, th)
+ 	struct tcpcb *tp;
+ 	struct tcphdr *th;
+ {
+ 	if (SEQ_LT(th->th_ack, tp->snd_last)) {
+ 		/* Turn off retx. timer (will start again next segment) */
+               callout_stop(tp->tt_rexmt);
+ 	      tp->t_rtttime = 0;
+ #ifndef TCP_FACK
+ 		/* 
+ 		 * Partial window deflation.  This statement relies on the 
+ 		 * fact that tp->snd_una has not been updated yet.  In FACK
+ 		 * hold snd_cwnd constant during fast recovery.
+ 		 */
+ 		if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) {
+ 			tp->snd_cwnd -= th->th_ack - tp->snd_una;
+ 			tp->snd_cwnd += tp->t_maxseg;
+ 		} else
+ 			tp->snd_cwnd = tp->t_maxseg;
+ #endif
+ 		return 1;
+ 	}
+ 	return 0;
+ }
+ #endif TCP_SACK
+ 
+ /*
   * Pull out of band byte out of a segment so
   * it doesn't appear in the user's data queue.
   * It is still reflected in the segment length for
***************
*** 2879,2885 ****
--- 3497,3508 ----
  	struct tcpcb *tp;
  	struct tcphdr *th;
  {
+ 
+ #if defined (TCP_SACK)
+ if (SEQ_LT(th->th_ack, tp->snd_last)) {
+ #else
  	if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+ #endif
  		tcp_seq onxt = tp->snd_nxt;
  		u_long  ocwnd = tp->snd_cwnd;
  
diff -c -r ORIG-SRC-FreeBSD4.3/tcp_output.c SACK-SRC-FreeBSD4.3/tcp_output.c
*** ORIG-SRC-FreeBSD4.3/tcp_output.c	Thu Aug 16 13:16:25 2001
--- SACK-SRC-FreeBSD4.3/tcp_output.c	Thu Aug 16 13:27:36 2001
***************
*** 96,102 ****
--- 96,202 ----
  int     tcp_do_newreno = 1;
  SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
          0, "Enable NewReno Algorithms");
+ 
+ #ifdef TCP_SACK
+ extern int tcprexmtthresh;
+ #endif
+ 
+ #ifdef TCP_SACK
+ #ifdef TCP_SACK_DEBUG
+ void
+ tcp_print_holes(tp)
+ struct tcpcb *tp;
+ {
+ 	struct sackhole *p = tp->snd_holes;
+ 	if (p == 0)
+ 		return;
+ 	printf("Hole report: start--end dups rxmit\n");
+ 	while (p) {
+ 		printf("%d--%d d %d r %d\n",  p->start, p->end, p->dups,
+                     p->rxmit);
+ 		p = p->next;
+ 	}
+ 	printf("\n");
+ }
+ #endif /* TCP_SACK_DEBUG */
+ 
  /*
+  * Returns pointer to a sackhole if there are any pending retransmissions;
+  * NULL otherwise.
+  */
+ struct sackhole *
+ tcp_sack_output(tp)
+ register struct tcpcb *tp;
+ {
+ 	struct sackhole *p;
+ 	if (tp->sack_disable)
+ 		return 0;
+ 	p = tp->snd_holes;
+ 	while (p) {
+ #ifndef TCP_FACK
+ 		if (p->dups >= tcprexmtthresh && SEQ_LT(p->rxmit, p->end)) {
+ #else
+ 		/* In FACK, if p->dups is less than tcprexmtthresh, but
+ 		 * snd_fack advances more than tcprextmtthresh * tp->t_maxseg,
+ 		 * tcp_input() will try fast retransmit. This forces output.
+ 		 */
+ 		if ((p->dups >= tcprexmtthresh ||
+ 		     tp->t_dupacks == tcprexmtthresh) &&
+ 		    SEQ_LT(p->rxmit, p->end)) {
+ #endif /* TCP_FACK */
+ 			if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
+ 				p = p->next;
+ 				continue;
+ 			}
+ #ifdef TCP_SACK_DEBUG
+ 			if (p)
+ 				tcp_print_holes(tp);
+ #endif
+ 			return p;
+ 		}
+         	p = p->next;
+ 	}
+ 	return 0;
+ }
+ 
+ /*
+  * After a timeout, the SACK list may be rebuilt.  This SACK information
+  * should be used to avoid retransmitting SACKed data.  This function
+  * traverses the SACK list to see if snd_nxt should be moved forward.
+  */
+ void
+ tcp_sack_adjust(tp)
+ 	struct tcpcb *tp;
+ {
+ 	struct sackhole *cur = tp->snd_holes;
+ 	if (cur == 0)
+ 		return; /* No holes */
+ 	if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
+ 		return; /* We're already beyond any SACKed blocks */
+ 	/* 
+ 	 * Two cases for which we want to advance snd_nxt:  
+ 	 * i) snd_nxt lies between end of one hole and beginning of another
+ 	 * ii) snd_nxt lies between end of last hole and rcv_lastsack
+ 	 */
+ 	while (cur->next) {
+ 		if (SEQ_LT(tp->snd_nxt, cur->end))
+ 			return;
+ 		if (SEQ_GEQ(tp->snd_nxt, cur->next->start)) 
+ 			cur = cur->next;
+ 		else {
+ 			tp->snd_nxt = cur->next->start;
+ 			return;
+ 		}
+ 	}
+ 	if (SEQ_LT(tp->snd_nxt, cur->end))
+ 		return;
+ 	tp->snd_nxt = tp->rcv_lastsack;
+ 	return;
+ }
+ #endif /* TCP_SACK */
+ 
+ 
+ /*
   * Tcp output routine: figure out what should be sent and send it.
   */
  int
***************
*** 116,121 ****
--- 216,225 ----
  	u_char opt[TCP_MAXOLEN];
  	unsigned ipoptlen, optlen, hdrlen;
  	int idle, sendalot;
+ #ifdef TCP_SACK
+ 	int i, sack_rxmit = 0;
+ 	struct sackhole *p;
+ #endif
  	int maxburst = TCP_MAXBURST;
  	struct rmxp_tao *taop;
  	struct rmxp_tao tao_noncached;
***************
*** 159,177 ****
  	}
  again:
  	sendalot = 0;
  	off = tp->snd_nxt - tp->snd_una;
! 	win = min(tp->snd_wnd, tp->snd_cwnd);
! 
  	flags = tcp_outflags[tp->t_state];
  	/*
  	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
  	 * state flags.
  	 */
  	if (tp->t_flags & TF_NEEDFIN)
! 		flags |= TH_FIN;
  	if (tp->t_flags & TF_NEEDSYN)
! 		flags |= TH_SYN;
! 
  	/*
  	 * If in persist timeout with window of 0, send 1 byte.
  	 * Otherwise, if window is small but nonzero
--- 263,327 ----
  	}
  again:
  	sendalot = 0;
+ #ifdef TCP_SACK
+ 	/*
+ 	 * If we've recently taken a timeout, snd_max will be greater than
+ 	 * snd_nxt.  There may be SACK information that allows us to avoid
+ 	 * resending already delivered data.  Adjust snd_nxt accordingly.
+ 	 */
+ 	if (!tp->sack_disable && SEQ_LT(tp->snd_nxt, tp->snd_max))
+ 		tcp_sack_adjust(tp);
+ #endif
  	off = tp->snd_nxt - tp->snd_una;
! #if defined(TCP_SACK) && defined(TCP_FACK)
! 	/* Normally, sendable data is limited by off < tp->snd_cwnd.
! 	 * But in FACK, sendable data is limited by snd_awnd < snd_cwnd,
! 	 * regardless of offset.
! 	 */
! 	if (!tp->sack_disable && (tp->t_dupacks > tcprexmtthresh))
! 	  win = tp->snd_wnd;
! 	else
! #endif
! 	 win = min(tp->snd_wnd, tp->snd_cwnd);
! 	
  	flags = tcp_outflags[tp->t_state];
+ 	
+ 	
  	/*
  	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
  	 * state flags.
  	 */
  	if (tp->t_flags & TF_NEEDFIN)
! 	  flags |= TH_FIN;
  	if (tp->t_flags & TF_NEEDSYN)
! 	  flags |= TH_SYN;
! #ifdef TCP_SACK
! 	/* 
! 	 * Send any SACK-generated retransmissions.  If we're explicitly trying
! 	 * to send out new data (when sendalot is 1), bypass this function.
! 	 * If we retransmit in fast recovery mode, decrement snd_cwnd, since
! 	 * we're replacing a (future) new transmission with a retransmission 
! 	 * now, and we previously incremented snd_cwnd in tcp_input().
! 	 */
! 	if (!tp->sack_disable && !sendalot) {
! 	  if (tp->t_dupacks >= tcprexmtthresh &&
! 	      (p = tcp_sack_output(tp))) {
! 	    off = p->rxmit - tp->snd_una;
! 	    sack_rxmit = 1;
! #if 0
! 	    /* Coalesce holes into a single retransmission */
! #endif
! 	    len = min(tp->t_maxseg, p->end - p->rxmit);
! #ifndef TCP_FACK
! 	    /* in FACK, hold snd_cwnd constant during recovery */
! 	    if (SEQ_LT(tp->snd_una, tp->snd_last))
! 	      tp->snd_cwnd -= tp->t_maxseg;
! #endif
! 	  }
! 	}
! #endif /* TCP_SACK */
! 	
! 	sendalot = 0;
  	/*
  	 * If in persist timeout with window of 0, send 1 byte.
  	 * Otherwise, if window is small but nonzero
***************
*** 205,212 ****
--- 355,380 ----
  		}
  	}
  
+ #ifdef TCP_SACK
+ 	if (!sack_rxmit) {
+ #endif
  	len = (long)ulmin(so->so_snd.sb_cc, win) - off;
  
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 	/* 
+ 	 * If we're in fast recovery (SEQ_GT(tp->snd_last, tp->snd_una)), and 
+ 	 * amount of outstanding data (snd_awnd) is >= snd_cwnd, then
+ 	 * do not send data (like zero window conditions)
+ 	 */
+ 	if (!tp->sack_disable && len && SEQ_GT(tp->snd_last, tp->snd_una) && 
+ 	    (tp->snd_awnd >= tp->snd_cwnd)) 
+ 		len = 0;
+ #endif /* TCP_FACK */
+ #ifdef TCP_SACK
+ 	}
+ #endif
+ 
+ 
  	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
  		taop = &tao_noncached;
  		bzero(taop, sizeof(*taop));
***************
*** 291,296 ****
--- 459,468 ----
  			goto send;
  		if (SEQ_LT(tp->snd_nxt, tp->snd_max))
  			goto send;
+ #ifdef TCP_SACK
+ 		if (sack_rxmit)
+ 		  goto send;
+ #endif
  	}
  
  	/*
***************
*** 333,338 ****
--- 505,524 ----
  	if (flags & TH_FIN &&
  	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
  		goto send;
+ #ifdef TCP_SACK
+ 	/*
+ 	 * In SACK, it is possible for tcp_output to fail to send a segment 
+ 	 * after the retransmission timer has been turned off.  Make sure
+ 	 * that the retransmission timer is set.
+ 	 */
+ 	if (SEQ_GT(tp->snd_max, tp->snd_una) &&
+ 	    !callout_active(tp->tt_rexmt) &&
+ 	    !callout_active(tp->tt_persist)){
+ 	  callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+ 			tcp_timer_rexmt, tp);
+ 	  return (0);
+ 	}
+ #endif /* TCP_SACK */
  
  	/*
  	 * TCP window updates are not reliable, rather a polling protocol
***************
*** 393,399 ****
--- 579,600 ----
  			mss = htons((u_short) tcp_mssopt(tp));
  			(void)memcpy(opt + 2, &mss, sizeof(mss));
  			optlen = TCPOLEN_MAXSEG;
+ #ifdef TCP_SACK
+ 			/* 
+ 			 * If this is the first SYN of connection (not a SYN 
+ 			 * ACK), include SACK_PERMIT_HDR option.  If this is a 
+ 			 * SYN ACK, include SACK_PERMIT_HDR option if peer has 
+ 			 * already done so.
+ 			 */
+ 			if (!tp->sack_disable && ((flags & TH_ACK) == 0 ||
+ 			    (tp->t_flags & TF_SACK_PERMIT))) {
+ 				*((u_int32_t *) (opt + optlen)) =
+ 				    htonl(TCPOPT_SACK_PERMIT_HDR);
+ 				optlen += 4;
+ 			}
+ #endif
  
+ 
  			if ((tp->t_flags & TF_REQ_SCALE) &&
  			    ((flags & TH_ACK) == 0 ||
  			    (tp->t_flags & TF_RCVD_SCALE))) {
***************
*** 424,430 ****
--- 625,658 ----
   		*lp   = htonl(tp->ts_recent);
   		optlen += TCPOLEN_TSTAMP_APPA;
   	}
+ #ifdef TCP_SACK
+ 	/*
+ 	 * Send SACKs if necessary.  This should be the last option processed.
+ 	 * Only as many SACKs are sent as are permitted by the maximum options
+ 	 * size.  No more than three SACKs are sent.
+ 	 */
+ 	if (!tp->sack_disable && tp->t_state == TCPS_ESTABLISHED &&
+ 	    (tp->t_flags & (TF_SACK_PERMIT|TF_NOOPT)) == TF_SACK_PERMIT &&
+ 	    tp->rcv_numsacks) {
+ 		u_int32_t *lp = (u_int32_t *)(opt + optlen);
+ 		u_int32_t *olp = lp++;
+ 		int count = 0;  /* actual number of SACKs inserted */
+ 		int maxsack = (TCP_MAXOLEN - (optlen + 4))/TCPOLEN_SACK;
  
+ 		maxsack = min(maxsack, TCP_MAX_SACK);
+ 		for (i = 0; (i < tp->rcv_numsacks && count < maxsack); i++) {
+ 			struct sackblk sack = tp->sackblks[i];
+ 			if (sack.start == 0 && sack.end == 0)
+ 				continue;
+ 			*lp++ = htonl(sack.start);
+ 			*lp++ = htonl(sack.end);
+ 			count++;
+ 		}
+ 		*olp = htonl(TCPOPT_SACK_HDR|(TCPOLEN_SACK*count+2));
+ 		optlen += TCPOLEN_SACK*count + 4; /* including leading NOPs */
+ 	}
+ #endif /* TCP_SACK */
+ 
   	/*
  	 * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
  	 * options are allowed (!TF_NOOPT) and it's not a RST.
***************
*** 679,685 ****
  		th->th_seq = htonl(tp->snd_nxt);
  	else
  		th->th_seq = htonl(tp->snd_max);
! 	th->th_ack = htonl(tp->rcv_nxt);
  	if (optlen) {
  		bcopy(opt, th + 1, optlen);
  		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
--- 907,930 ----
  		th->th_seq = htonl(tp->snd_nxt);
  	else
  		th->th_seq = htonl(tp->snd_max);
! #ifdef TCP_SACK
! 	if (sack_rxmit) {
! 	  /* 
! 	   * If sendalot was turned on (due to option stuffing), turn it 
! 	   * off. Properly set th_seq field.  Advance the ret'x pointer 
! 	   * by len.  
! 	   */
! 	  if (sendalot)
! 	    sendalot = 0;
! 	  th->th_seq = htonl(p->rxmit);
! 	  p->rxmit += len;
! #if defined(TCP_SACK) && defined(TCP_FACK)
! 	  tp->retran_data += len;
! #endif /* TCP_FACK */
! 	}
! #endif /* TCP_SACK */
! 
! 	  th->th_ack = htonl(tp->rcv_nxt);
  	if (optlen) {
  		bcopy(opt, th + 1, optlen);
  		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
***************
*** 753,758 ****
--- 998,1011 ----
  				tp->t_flags |= TF_SENTFIN;
  			}
  		}
+ #ifdef TCP_SACK
+ 		if (!tp->sack_disable) {
+ 			if (sack_rxmit && (p->rxmit != tp->snd_nxt)) {
+ 				goto timer;
+ 			}
+ 		}
+ #endif
+ 
  		tp->snd_nxt += len;
  		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
  			tp->snd_max = tp->snd_nxt;
***************
*** 775,781 ****
  		 * Initialize shift counter which is used for backoff
  		 * of retransmit time.
  		 */
! 		if (!callout_active(tp->tt_rexmt) &&
  		    tp->snd_nxt != tp->snd_una) {
  			if (callout_active(tp->tt_persist)) {
  				callout_stop(tp->tt_persist);
--- 1028,1037 ----
  		 * Initialize shift counter which is used for backoff
  		 * of retransmit time.
  		 */
! #ifdef TCP_SACK
!  timer:
! 		if (!tp->sack_disable && sack_rxmit &&
! 		    !callout_active(tp->tt_rexmt) &&
  		    tp->snd_nxt != tp->snd_una) {
  			if (callout_active(tp->tt_persist)) {
  				callout_stop(tp->tt_persist);
***************
*** 784,789 ****
--- 1040,1055 ----
  			callout_reset(tp->tt_rexmt, tp->t_rxtcur,
  				      tcp_timer_rexmt, tp);
  		}
+ #endif
+ 		if (!callout_active(tp->tt_rexmt) &&
+ 		    tp->snd_nxt != tp->snd_una) {
+ 		  if (callout_active(tp->tt_persist)) {
+ 		    callout_stop(tp->tt_persist);
+ 		    tp->t_rxtshift = 0;
+ 		  }
+ 		  callout_reset(tp->tt_rexmt, tp->t_rxtcur,
+ 				tcp_timer_rexmt, tp);
+ 		}
  	} else
  		if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
  			tp->snd_max = tp->snd_nxt + len;
***************
*** 861,866 ****
--- 1127,1138 ----
  	error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
  	    (so->so_options & SO_DONTROUTE), 0);
      }
+ 	
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 	/* Update snd_awnd to reflect the new data that was sent.  */
+ 	tp->snd_awnd = tcp_seq_subtract(tp->snd_max, tp->snd_fack) +
+ 	  tp->retran_data;                
+ #endif /* defined(TCP_SACK) && defined(TCP_FACK) */
  	if (error) {
  
  		/*
Only in ORIG-SRC-FreeBSD4.3: tcp_seq.h
diff -c -r ORIG-SRC-FreeBSD4.3/tcp_subr.c SACK-SRC-FreeBSD4.3/tcp_subr.c
*** ORIG-SRC-FreeBSD4.3/tcp_subr.c	Thu Aug 16 13:16:26 2001
--- SACK-SRC-FreeBSD4.3/tcp_subr.c	Thu Aug 16 13:28:24 2001
***************
*** 139,144 ****
--- 139,150 ----
  SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, 
      "Certain ICMP unreachable messages may abort connections in SYN_SENT");
  
+ #ifdef TCP_SACK
+ static int 	tcp_do_sack = 1;
+ SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_do_sack, CTLFLAG_RW, &tcp_do_sack, 0, 
+     "Experimental Sack");
+ #endif
+ 
  static void	tcp_cleartaocache __P((void));
  static void	tcp_notify __P((struct inpcb *, int));
  
***************
*** 152,157 ****
--- 158,171 ----
  #define TCBHASHSIZE	512
  #endif
  
+ #ifndef TCP_DO_SACK
+ #ifdef TCP_SACK
+ #define TCP_DO_SACK	1
+ #else
+ #define TCP_DO_SACK	0
+ #endif
+ #endif
+ 
  /*
   * This is the actual shape of what we allocate using the zone
   * allocator.  Doing it this way allows us to protect both structures
***************
*** 503,508 ****
--- 517,525 ----
  	callout_init(tp->tt_2msl = &it->inp_tp_2msl);
  	callout_init(tp->tt_delack = &it->inp_tp_delack);
  
+ #ifdef TCP_SACK
+ 	tp->sack_disable = tcp_do_sack ? 0 : 1;
+ #endif
  	if (tcp_do_rfc1323)
  		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
  	if (tcp_do_rfc1644)
***************
*** 567,572 ****
--- 584,592 ----
  	register struct tseg_qent *q;
  	struct inpcb *inp = tp->t_inpcb;
  	struct socket *so = inp->inp_socket;
+ #ifdef TCP_SACK
+ 	struct sackhole *p, *q_sack;
+ #endif
  #ifdef INET6
  	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
  #endif /* INET6 */
***************
*** 705,710 ****
--- 725,740 ----
  		m_freem(q->tqe_m);
  		FREE(q, M_TSEGQ);
  	}
+ 
+ #ifdef TCP_SACK
+ 	/* Free SACK holes. */
+ 	q_sack = p = tp->snd_holes;
+ 	while (p != 0) {
+ 		q_sack = p->next;
+ 		free(p, M_PCB);
+ 		p = q_sack;
+ 	}
+ #endif
  	if (tp->t_template)
  		(void) m_free(dtom(tp->t_template));
  	inp->inp_ppcb = NULL;
diff -c -r ORIG-SRC-FreeBSD4.3/tcp_timer.c SACK-SRC-FreeBSD4.3/tcp_timer.c
*** ORIG-SRC-FreeBSD4.3/tcp_timer.c	Thu Aug 16 13:16:26 2001
--- SACK-SRC-FreeBSD4.3/tcp_timer.c	Thu Aug 16 13:21:06 2001
***************
*** 40,45 ****
--- 40,47 ----
  
  #include <sys/param.h>
  #include <sys/systm.h>
+ #include <sys/malloc.h>
+ #include <sys/mbuf.h>
  #include <sys/kernel.h>
  #include <sys/sysctl.h>
  #include <sys/socket.h>
***************
*** 190,195 ****
--- 192,200 ----
  {
  	struct tcpcb *tp = xtp;
  	int s;
+ #ifdef TCP_SACK
+ 	struct sackhole *p, *q;
+ #endif
  #ifdef TCPDEBUG
  	int ostate;
  
***************
*** 201,207 ****
--- 206,230 ----
  		return;
  	}
  	callout_deactivate(tp->tt_2msl);
+ 	
+ #ifdef TCP_SACK
  	/*
+ 	 * Free SACK holes for 2MSL and REXMT timers.
+ 	 */
+ 	q = p = tp->snd_holes;
+ 	while (p != 0) {
+ 	  q = p->next;
+ 	  free(p, M_PCB);
+ 	  p = q;
+ 	}
+ 	tp->snd_holes = 0;
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 	tp->snd_fack = tp->snd_una;
+ 	tp->retran_data = 0;
+ 	tp->snd_awnd = 0;
+ #endif /* TCP_FACK */
+ #endif /* TCP_SACK */	
+ 	/*
  	 * 2 MSL timeout in shutdown went off.  If we're closed but
  	 * still waiting for peer to close and connection has been idle
  	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
***************
*** 358,363 ****
--- 381,389 ----
  	struct tcpcb *tp = xtp;
  	int s;
  	int rexmt;
+ #ifdef TCP_SACK
+ 	struct sackhole *p, *q;
+ #endif
  #ifdef TCPDEBUG
  	int ostate;
  
***************
*** 369,375 ****
--- 395,419 ----
  		return;
  	}
  	callout_deactivate(tp->tt_rexmt);
+ #ifdef TCP_SACK
  	/*
+ 	 * Free SACK holes for 2MSL and REXMT timers.
+ 	 */
+ 	q = p = tp->snd_holes;
+ 	while (p != 0) {
+ 	  q = p->next;
+ 	  free(p, M_PCB);
+ 	  p = q;
+ 	}
+ 	tp->snd_holes = 0;
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 	tp->snd_fack = tp->snd_una;
+ 	tp->retran_data = 0;
+ 	tp->snd_awnd = 0;
+ #endif /* TCP_FACK */
+ #endif /* TCP_SACK */
+ 
+ 	/*
  	 * Retransmission timer went off.  Message has not
  	 * been acked within retransmit interval.  Back off
  	 * to a longer retransmit interval and retransmit one segment.
***************
*** 421,431 ****
--- 465,484 ----
  		tp->t_srtt = 0;
  	}
  	tp->snd_nxt = tp->snd_una;
+ #if defined(TCP_SACK)
  	/*
+ 	 * Note:  We overload snd_last to function also as the
+ 	 * snd_last variable described in RFC 2582
+ 	 */
+ 	tp->snd_last = tp->snd_max;
+ #else
+ 	/*
  	 * Note:  We overload snd_recover to function also as the
  	 * snd_last variable described in RFC 2582
  	 */
+ 
  	tp->snd_recover = tp->snd_max;
+ #endif /* TCP_SACK */
  	/*
  	 * Force a segment to be sent.
  	 */
diff -c -r ORIG-SRC-FreeBSD4.3/tcp_usrreq.c SACK-SRC-FreeBSD4.3/tcp_usrreq.c
*** ORIG-SRC-FreeBSD4.3/tcp_usrreq.c	Thu Aug 16 13:16:33 2001
--- SACK-SRC-FreeBSD4.3/tcp_usrreq.c	Thu Aug 16 13:29:16 2001
***************
*** 767,772 ****
--- 767,782 ----
  #endif /* !TCP_COMPAT_42 */
  	tcp_sendseqinit(tp);
  
+ #if defined(TCP_SACK)
+ 		tp->snd_last = tp->snd_una;
+ #endif
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 		tp->snd_fack = tp->snd_una;
+ 		tp->retran_data = 0;
+ 		tp->snd_awnd = 0;
+ #endif
+ 
+ 
  	/*
  	 * Generate a CC value for this connection and
  	 * check whether CC or CCnew should be used.
***************
*** 984,989 ****
--- 994,1004 ----
  		case TCP_NOPUSH:
  			optval = tp->t_flags & TF_NOPUSH;
  			break;
+ #ifdef TCP_SACK
+ 		case TCP_SACK_DISABLE:
+ 			optval = tp->sack_disable;
+ 			break;
+ #endif			
  		default:
  			error = ENOPROTOOPT;
  			break;
diff -c -r ORIG-SRC-FreeBSD4.3/tcp_var.h SACK-SRC-FreeBSD4.3/tcp_var.h
*** ORIG-SRC-FreeBSD4.3/tcp_var.h	Thu Aug 16 13:16:34 2001
--- SACK-SRC-FreeBSD4.3/tcp_var.h	Thu Aug 16 13:32:13 2001
***************
*** 36,41 ****
--- 36,56 ----
  
  #ifndef _NETINET_TCP_VAR_H_
  #define _NETINET_TCP_VAR_H_
+ 
+ struct sackblk {
+ 	tcp_seq start;		/* start seq no. of sack block */
+ 	tcp_seq end; 		/* end seq no. */
+ };  
+ 
+ struct sackhole {
+ 	tcp_seq start;		/* start seq no. of hole */ 
+ 	tcp_seq end;		/* end seq no. */
+ 	int	dups;		/* number of dup(s)acks for this hole */
+ 	tcp_seq rxmit;		/* next seq. no in hole to be retransmitted */
+ 	struct sackhole *next;	/* next in list */
+ };
+ 
+ 
  /*
   * Kernel variables for tcp.
   */
***************
*** 114,119 ****
--- 129,159 ----
  	u_long	rcv_wnd;		/* receive window */
  	tcp_seq	rcv_up;			/* receive urgent pointer */
  
+ #ifdef TCP_SACK
+ 	int	sack_disable;		/* disable SACK for this connection */
+ 	int	snd_numholes;		/* number of holes seen by sender */
+ 	struct sackhole *snd_holes;	/* linked list of holes (sorted) */
+ #if defined(TCP_SACK) && defined(TCP_FACK)
+ 	tcp_seq snd_fack;		/* for FACK congestion control */
+ 	u_long	snd_awnd;		/* snd_nxt - snd_fack + */
+ 					/* retransmitted data */
+ 	int retran_data;		/* amount of outstanding retx. data  */
+ #endif /* TCP_FACK */
+ #endif /* TCP_SACK */
+ #if defined(TCP_SACK)
+ 	tcp_seq snd_last;		/* for use in fast recovery */
+ #endif
+ 
+ #ifdef TCP_SACK
+ 	tcp_seq rcv_laststart;		/* start of last segment recd. */
+ 	tcp_seq rcv_lastend;		/* end of ... */
+ 	tcp_seq rcv_lastsack;		/* last seq number(+1) sack'd by rcv'r*/
+ 	int	rcv_numsacks;		/* # distinct sack blks present */
+ 	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
+ #endif
+ 
+ 
+ 
  	u_long	snd_wnd;		/* send window */
  	u_long	snd_cwnd;		/* congestion-controlled window */
  	u_long	snd_ssthresh;		/* snd_cwnd size threshold for
***************
*** 338,345 ****
  #define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
  #define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
  #define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
  #define	TCPCTL_MAXID		14
! 
  #define TCPCTL_NAMES { \
  	{ 0, 0 }, \
  	{ "rfc1323", CTLTYPE_INT }, \
--- 378,389 ----
  #define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
  #define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
  #define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
+ #ifdef  TCP_SACK
+ #define TCPCTL_SACK             14      /*needs to revist*/
+ #define	TCPCTL_MAXID		15
+ #else
  #define	TCPCTL_MAXID		14
! #endif
  #define TCPCTL_NAMES { \
  	{ 0, 0 }, \
  	{ "rfc1323", CTLTYPE_INT }, \
***************
*** 355,363 ****
  	{ "pcblist", CTLTYPE_STRUCT }, \
  	{ "delacktime", CTLTYPE_INT }, \
  	{ "v6mssdflt", CTLTYPE_INT }, \
  }
  
- 
  #ifdef _KERNEL
  #ifdef SYSCTL_DECL
  SYSCTL_DECL(_net_inet_tcp);
--- 399,410 ----
  	{ "pcblist", CTLTYPE_STRUCT }, \
  	{ "delacktime", CTLTYPE_INT }, \
  	{ "v6mssdflt", CTLTYPE_INT }, \
+         { "sack",      CTLTYPE_INT}, \
  }
+ /*
+ #define TCP_SACK_DEBUG
+ */
  
  #ifdef _KERNEL
  #ifdef SYSCTL_DECL
  SYSCTL_DECL(_net_inet_tcp);
***************
*** 367,372 ****
--- 414,422 ----
  extern	struct inpcbinfo tcbinfo;
  extern	struct tcpstat tcpstat;	/* tcp statistics */
  extern	int tcp_mssdflt;	/* XXX */
+ #ifdef TCP_SACK
+ extern	int tcp_do_sack;	/* SACK enabled/disabled */
+ #endif
  extern	int tcp_delack_enabled;
  extern	int tcp_do_newreno;
  extern	int ss_fltsz;
***************
*** 405,411 ****
--- 455,474 ----
  	 tcp_timers __P((struct tcpcb *, int));
  void	 tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *,
  			int));
+ #ifdef TCP_SACK
+ int	 tcp_sack_option __P((struct tcpcb *,struct tcphdr *,u_char *,int));
+ void	 tcp_update_sack_list __P((struct tcpcb *tp));
+ void	 tcp_del_sackholes __P((struct tcpcb *, struct tcphdr *));
+ void	 tcp_clean_sackreport __P((struct tcpcb *tp));
+ void	 tcp_sack_adjust __P((struct tcpcb *tp));
+ struct sackhole *
+ 	 tcp_sack_output __P((struct tcpcb *tp));
+ int	 tcp_sack_partialack __P((struct tcpcb *, struct tcphdr *));
  
+ #endif /* TCP_SACK */
+ #if defined(TCP_SACK)
+ u_long	 tcp_seq_subtract  __P((u_long, u_long )); 
+ #endif /* TCP_SACK */
  extern	struct pr_usrreqs tcp_usrreqs;
  extern	u_long tcp_sendspace;
  extern	u_long tcp_recvspace;
