2004-01-13 Ralf Corsepius <corsepiu@faw.uni-ulm.de>

PR/549 networking

	* netinet/in_cksum_powerpc.h: New (copy of in_cksum_powerpc.c).
	* netinet/in_cksum_powerpc.c: Remove.
	* netinet/in_cksum_i386.h: New (copy of in_cksum_i386.c).
	* netinet/in_cksum_i386.c: Remove.
	* netinet/in_cksum_arm.h: New (copy of in_cksum_arm.c).
	* netinet/in_cksum_arm.c: Remove.
	* netinet/in_cksum_m68k.c: New (copy of in_cksum_m68k.c).
	* netinet/in_cksum_m68k.c: Remove.
	* netinet/Makefile.am: Reflect changes above.
This commit is contained in:
Ralf Corsepius
2004-01-14 00:10:20 +00:00
parent a2df3eaa42
commit ae60724872
6 changed files with 16 additions and 882 deletions

View File

@@ -1,3 +1,17 @@
2004-01-13 Ralf Corsepius <corsepiu@faw.uni-ulm.de>
PR/549 networking
* netinet/in_cksum_powerpc.h: New (copy of in_cksum_powerpc.c).
* netinet/in_cksum_powerpc.c: Remove.
* netinet/in_cksum_i386.h: New (copy of in_cksum_i386.c).
* netinet/in_cksum_i386.c: Remove.
* netinet/in_cksum_arm.h: New (copy of in_cksum_arm.c).
* netinet/in_cksum_arm.c: Remove.
* netinet/in_cksum_m68k.c: New (copy of in_cksum_m68k.c).
* netinet/in_cksum_m68k.c: Remove.
* netinet/Makefile.am: Reflect changes above.
2004-01-11 Ralf Corsepius <corsepiu@faw.uni-ulm.de> 2004-01-11 Ralf Corsepius <corsepiu@faw.uni-ulm.de>
* Makefile.am: Include compile.am, again. * Makefile.am: Include compile.am, again.

View File

@@ -91,12 +91,8 @@ libnetworking_a_SOURCES += netinet/if_ether.c netinet/igmp.c netinet/in.c \
netinet/ip_mroute.c netinet/ip_output.c netinet/raw_ip.c \ netinet/ip_mroute.c netinet/ip_output.c netinet/raw_ip.c \
netinet/tcp_debug.c netinet/tcp_input.c netinet/tcp_output.c \ netinet/tcp_debug.c netinet/tcp_input.c netinet/tcp_output.c \
netinet/tcp_subr.c netinet/tcp_timer.c netinet/tcp_usrreq.c \ netinet/tcp_subr.c netinet/tcp_timer.c netinet/tcp_usrreq.c \
netinet/udp_usrreq.c netinet/udp_usrreq.c netinet/in_cksum_arm.h netinet/in_cksum_i386.h \
netinet/in_cksum_m68k.h netinet/in_cksum_powerpc.h
## FIXME: these files should be renamed into *.h and put into
## libnetworking_a_SOURCES
EXTRA_DIST += netinet/in_cksum_arm.c netinet/in_cksum_i386.c \
netinet/in_cksum_m68k.c netinet/in_cksum_powerpc.c
## nfs ## nfs

View File

@@ -1,276 +0,0 @@
/* $NetBSD: in_cksum_arm.c,v 1.3 2001/12/08 21:18:50 chris Exp $ */
/*
* ARM version:
*
* Copyright (c) 1997 Mark Brinicome
* Copyright (c) 1997 Causality Limited
*
* Based on the sparc version.
*/
/*
* Sparc version:
*
* Copyright (c) 1995 Zubin Dittia.
* Copyright (c) 1995 Matthew R. Green.
* Copyright (c) 1994 Charles M. Hannum.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_cksum.c 8.1 (Berkeley) 6/11/93
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
/*
* Checksum routine for Internet Protocol family headers.
*
* This routine is very heavily used in the network
* code and should be modified for each CPU to be as fast as possible.
*
* ARM version.
*/
#define ADD64 __asm __volatile(" \n\
ldmia %0!, {%2, %3, %4, %5} \n\
adds %1,%7,%2; adcs %1,%1,%3 \n\
adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %0!, {%2, %3, %4, %5} \n\
adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %0!, {%2, %3, %4, %5} \n\
adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %0!, {%2, %3, %4, %5} \n\
adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %1,%1,%4; adcs %1,%1,%5 \n\
adcs %1,%1,#0\n" \
: "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
: "0" (w), "r" (sum) \
: "cc")
#define ADD32 __asm __volatile(" \n\
ldmia %0!, {%2, %3, %4, %5} \n\
adds %1,%7,%2; adcs %1,%1,%3 \n\
adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %0!, {%2, %3, %4, %5} \n\
adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %1,%1,%4; adcs %1,%1,%5 \n\
adcs %1,%1,#0\n" \
: "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
: "0" (w), "r" (sum) \
: "cc")
#define ADD16 __asm __volatile(" \n\
ldmia %0!, {%2, %3, %4, %5} \n\
adds %1,%7,%2; adcs %1,%1,%3 \n\
adcs %1,%1,%4; adcs %1,%1,%5 \n\
adcs %1,%1,#0\n" \
: "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
: "0" (w), "r" (sum) \
: "cc")
#define ADD8 __asm __volatile(" \n\
ldmia %0!, {%2, %3} \n\
adds %1,%5,%2; adcs %1,%1,%3 \n\
adcs %1,%1,#0\n" \
: "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2) \
: "0" (w), "r" (sum) \
: "cc" )
#define ADD4 __asm __volatile(" \n\
ldr %2,[%0],#4 \n\
adds %1,%4,%2 \n\
adcs %1,%1,#0\n" \
: "=r" (w), "=r" (sum), "=&r" (tmp1) \
: "0" (w), "r" (sum) \
: "cc")
/*#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}*/
#define REDUCE __asm __volatile(" \n\
mov %2, #0x00ff \n\
orr %2, %2, #0xff00 \n\
and %2, %0, %2 \n\
add %0, %2, %0, lsr #16\n" \
: "=r" (sum) \
: "0" (sum), "r" (tmp1))
#define ADDCARRY {if (sum > 0xffff) sum -= 0xffff;}
#define ROL {sum = sum << 8;} /* depends on recent REDUCE */
#define ADDBYTE {ROL; sum += (*w << 8); byte_swapped ^= 1;}
#define ADDSHORT {sum += *(u_short *)w;}
#define ADVANCE(n) {w += n; mlen -= n;}
#define ADVANCEML(n) {mlen -= n;}
static __inline__ int
in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
{
u_char *w;
int mlen = 0;
int byte_swapped = 0;
/*
* Declare four temporary registers for use by the asm code. We
* allow the compiler to pick which specific machine registers to
* use, instead of hard-coding this in the asm code above.
*/
register u_int tmp1=0, tmp2, tmp3, tmp4;
for (; m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_char *) + off;
mlen = m->m_len - off;
off = 0;
if (len < mlen)
mlen = len;
len -= mlen;
/*
* Ensure that we're aligned on a word boundary here so
* that we can do 32 bit operations below.
*/
if ((3 & (long)w) != 0) {
REDUCE;
if ((1 & (long)w) != 0 && mlen >= 1) {
ADDBYTE;
ADVANCE(1);
}
if ((2 & (long)w) != 0 && mlen >= 2) {
ADDSHORT;
ADVANCE(2);
}
}
/*
* Do as many 32 bit operations as possible using the
* 64/32/16/8/4 macro's above, using as many as possible of
* these.
*/
while (mlen >= 64) {
ADD64;
ADVANCEML(64);
}
if (mlen >= 32) {
ADD32;
ADVANCEML(32);
}
if (mlen >= 16) {
ADD16;
ADVANCEML(16);
}
if (mlen >= 8) {
ADD8;
ADVANCEML(8);
}
if (mlen >= 4) {
ADD4;
ADVANCEML(4)
}
if (mlen == 0)
continue;
REDUCE;
if (mlen >= 2) {
ADDSHORT;
ADVANCE(2);
}
if (mlen == 1) {
ADDBYTE;
}
}
if (byte_swapped) {
REDUCE;
ROL;
}
REDUCE;
ADDCARRY;
return (0xffff ^ sum);
}
int
in_cksum(m, len)
struct mbuf *m;
int len;
{
int cksum;
cksum =in_cksum_internal(m, 0, len, 0);
return cksum;
}
int
in4_cksum(m, nxt, off, len)
struct mbuf *m;
u_int8_t nxt;
int off, len;
{
u_int sum = 0;
if (nxt != 0) {
/* for ADD macros */
register u_int tmp1, tmp2, tmp3, tmp4;
u_char *w;
struct ipovly ipov;
/* pseudo header */
if (off < sizeof(struct ipovly))
panic("in4_cksum: offset too short");
if (m->m_len < sizeof(struct ip))
panic("in4_cksum: bad mbuf chain");
bzero(&ipov, sizeof(ipov));
ipov.ih_len = htons(len);
ipov.ih_pr = nxt;
ipov.ih_src = mtod(m, struct ip *)->ip_src;
ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
w = (u_char *)&ipov;
/* assumes sizeof(ipov) == 20 */
ADD16;
ADD4;
}
/* skip unnecessary part */
while (m && off > 0) {
if (m->m_len > off)
break;
off -= m->m_len;
m = m->m_next;
}
return (in_cksum_internal(m, off, len, sum));
}

View File

@@ -1,204 +0,0 @@
/*
* Checksum routine for Internet Protocol family headers.
*
* This routine is very heavily used in the network
* code and should be modified for each CPU to be as fast as possible.
*
* This implementation is 386 version.
*
* $Id$
*/
#include <stdio.h> /* for puts */
#undef ADDCARRY
#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
/*
* Thanks to gcc we don't have to guess
* which registers contain sum & w.
*/
#define ADD(n) __asm__ volatile \
("addl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
#define ADDC(n) __asm__ volatile \
("adcl " #n "(%2), %0" : "=r" (sum) : "0" (sum), "r" (w))
#define LOAD(n) __asm__ volatile \
("movb " #n "(%1), %0" : "=r" (junk) : "r" (w))
#define MOP __asm__ volatile \
("adcl $0, %0" : "=r" (sum) : "0" (sum))
int
in_cksum(m, len)
register struct mbuf *m;
register int len;
{
register u_short *w;
register unsigned sum = 0;
register int mlen = 0;
int byte_swapped = 0;
union { char c[2]; u_short s; } su;
for (;m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_short *);
if (mlen == -1) {
/*
* The first byte of this mbuf is the continuation
* of a word spanning between this mbuf and the
* last mbuf.
*/
/* su.c[0] is already saved when scanning previous
* mbuf. sum was REDUCEd when we found mlen == -1
*/
su.c[1] = *(u_char *)w;
sum += su.s;
w = (u_short *)((char *)w + 1);
mlen = m->m_len - 1;
len--;
} else
mlen = m->m_len;
if (len < mlen)
mlen = len;
len -= mlen;
/*
* Force to long boundary so we do longword aligned
* memory operations
*/
if (3 & (int) w) {
REDUCE;
if ((1 & (int) w) && (mlen > 0)) {
sum <<= 8;
su.c[0] = *(char *)w;
w = (u_short *)((char *)w + 1);
mlen--;
byte_swapped = 1;
}
if ((2 & (int) w) && (mlen >= 2)) {
sum += *w++;
mlen -= 2;
}
}
/*
* Advance to a 486 cache line boundary.
*/
if (4 & (int) w && mlen >= 4) {
ADD(0);
MOP;
w += 2;
mlen -= 4;
}
if (8 & (int) w && mlen >= 8) {
ADD(0);
ADDC(4);
MOP;
w += 4;
mlen -= 8;
}
/*
* Do as much of the checksum as possible 32 bits at at time.
* In fact, this loop is unrolled to make overhead from
* branches &c small.
*/
mlen -= 1;
while ((mlen -= 32) >= 0) {
u_char junk;
/*
* Add with carry 16 words and fold in the last
* carry by adding a 0 with carry.
*
* The early ADD(16) and the LOAD(32) are to load
* the next 2 cache lines in advance on 486's. The
* 486 has a penalty of 2 clock cycles for loading
* a cache line, plus whatever time the external
* memory takes to load the first word(s) addressed.
* These penalties are unavoidable. Subsequent
* accesses to a cache line being loaded (and to
* other external memory?) are delayed until the
* whole load finishes. These penalties are mostly
* avoided by not accessing external memory for
* 8 cycles after the ADD(16) and 12 cycles after
* the LOAD(32). The loop terminates when mlen
* is initially 33 (not 32) to guaranteed that
* the LOAD(32) is within bounds.
*/
ADD(16);
ADDC(0);
ADDC(4);
ADDC(8);
ADDC(12);
LOAD(32);
ADDC(20);
ADDC(24);
ADDC(28);
MOP;
w += 16;
}
mlen += 32 + 1;
if (mlen >= 32) {
ADD(16);
ADDC(0);
ADDC(4);
ADDC(8);
ADDC(12);
ADDC(20);
ADDC(24);
ADDC(28);
MOP;
w += 16;
mlen -= 32;
}
if (mlen >= 16) {
ADD(0);
ADDC(4);
ADDC(8);
ADDC(12);
MOP;
w += 8;
mlen -= 16;
}
if (mlen >= 8) {
ADD(0);
ADDC(4);
MOP;
w += 4;
mlen -= 8;
}
if (mlen == 0 && byte_swapped == 0)
continue; /* worth 1% maybe ?? */
REDUCE;
while ((mlen -= 2) >= 0) {
sum += *w++;
}
if (byte_swapped) {
sum <<= 8;
byte_swapped = 0;
if (mlen == -1) {
su.c[1] = *(char *)w;
sum += su.s;
mlen = 0;
} else
mlen = -1;
} else if (mlen == -1)
/*
* This mbuf has odd number of bytes.
* There could be a word split betwen
* this mbuf and the next mbuf.
* Save the last byte (to prepend to next mbuf).
*/
su.c[0] = *(char *)w;
}
if (len)
puts("cksum: out of data");
if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the
standard (the odd byte is shifted left by 8 bits) */
su.c[1] = 0;
sum += su.s;
}
REDUCE;
return (~sum & 0xffff);
}

View File

@@ -1,223 +0,0 @@
/*
* Copyright (c) 1988, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
* $Id$
*/
#include <sys/param.h>
#include <sys/mbuf.h>
#if (defined (__mcf5200__))
# define IS_COLDFIRE 1
#else
# define IS_COLDFIRE 0
#endif
#define REDUCE { sum = (sum & 0xFFFF) + (sum >> 16); if (sum > 0xFFFF) sum -= 0xFFFF; }
/*
* Motorola 68k version of Internet Protocol Checksum routine
* W. Eric Norum
* Saskatchewan Accelerator Laboratory
* August, 1998
*/
int
in_cksum(m, len)
struct mbuf *m;
int len;
{
unsigned short *w;
unsigned long sum = 0;
int mlen = 0;
int byte_swapped = 0;
union {
char c[2];
u_short s;
} s_util;
for ( ; m && len ; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_short *);
if (mlen == -1) {
/*
* The first byte of this mbuf is the continuation
* of a word spanning between this mbuf and the
* last mbuf.
*
* s_util.c[0] is already saved when scanning previous
* mbuf.
*/
s_util.c[1] = *(char *)w;
sum += s_util.s;
w = (u_short *)((char *)w + 1);
mlen = m->m_len - 1;
len--;
} else
mlen = m->m_len;
if (len < mlen)
mlen = len;
len -= mlen;
/*
* Force to longword boundary.
*/
if (3 & (int)w) {
REDUCE;
if ((1 & (int) w) && (mlen > 0)) {
sum <<= 8;
s_util.c[0] = *(u_char *)w;
w = (u_short *)((char *)w + 1);
mlen--;
byte_swapped = 1;
}
if ((2 & (int) w) && (mlen >= 2)) {
sum += *w++;
mlen -= 2;
}
}
/*
* Sum all the longwords in the buffer.
* See RFC 1071 -- Computing the Internet Checksum.
* It should work for all 68k family members.
*/
{
unsigned long tcnt = mlen, t1;
__asm__ volatile (
"movel %2,%3\n\t"
"lsrl #6,%2 | count/64 = # loop traversals\n\t"
"andl #0x3c,%3 | Then find fractions of a chunk\n\t"
"negl %3\n\t | Each long uses 4 instruction bytes\n\t"
#if IS_COLDFIRE
"addql #1,%2 | Clear X (extended carry flag)\n\t"
"subql #1,%2 | \n\t"
#else
"andi #0xf,%%cc | Clear X (extended carry flag)\n\t"
#endif
"jmp %%pc@(lcsum2_lbl-.-2:b,%3) | Jump into loop\n"
"lcsum1_lbl: | Begin inner loop...\n\t"
"movel %1@+,%3 | 0: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 1: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 2: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 3: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 4: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 5: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 6: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 7: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 8: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | 9: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | A: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | B: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | C: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | D: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | E: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n\t"
"movel %1@+,%3 | F: Fetch 32-bit word\n\t"
"addxl %3,%0 | Add word + previous carry\n"
"lcsum2_lbl: | End of unrolled loop\n\t"
#if IS_COLDFIRE
"moveq #0,%3 | Add in last carry\n\t"
"addxl %3,%0 |\n\t"
"subql #1,%2 | Update loop count\n\t"
"bplb lcsum1_lbl | Loop (with X clear) if not done\n\t"
"movel #0xffff,%2 | Get word mask\n\t"
"movel %0,%3 | Fold 32 bit sum to 16 bits\n\t"
"swap %3 |\n\t"
"andl %2,%0 | Mask to 16-bit sum\n\t"
"andl %2,%3 | Mask to 16-bit sum\n\t"
"addl %3,%0 |\n\t"
"movel %0,%3 | Add in last carry\n\t"
"swap %3 |\n\t"
"addl %3,%0 |\n\t"
"andl %2,%0 | Mask to 16-bit sum\n\t"
#else
"dbf %2,lcsum1_lbl | (NB- dbf doesn't affect X)\n\t"
"movel %0,%3 | Fold 32 bit sum to 16 bits\n\t"
"swap %3 | (NB- swap doesn't affect X)\n\t"
"addxw %3,%0 |\n\t"
"moveq #0,%3 | Add in last carry\n\t"
"addxw %3,%0 |\n\t"
"andl #0xffff,%0 | Mask to 16-bit sum\n"
#endif
:
"=d" (sum), "=a" (w), "=d" (tcnt) , "=d" (t1) :
"0" (sum), "1" (w), "2" (tcnt) :
"cc", "memory");
}
mlen &= 3;
/*
* Soak up the last 1, 2 or 3 bytes
*/
while ((mlen -= 2) >= 0)
sum += *w++;
if (byte_swapped) {
REDUCE;
sum <<= 8;
byte_swapped = 0;
if (mlen == -1) {
s_util.c[1] = *(char *)w;
sum += s_util.s;
mlen = 0;
} else
mlen = -1;
} else if (mlen == -1)
s_util.c[0] = *(char *)w;
}
if (len)
sum = 0xDEAD;
if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the
standard (the odd byte may be shifted left by 8 bits
or not as determined by endian-ness of the machine) */
s_util.c[1] = 0;
sum += s_util.s;
}
REDUCE;
return (~sum & 0xffff);
}

View File

@@ -1,173 +0,0 @@
/*
* Checksum routine for Internet Protocol family headers.
*
* This routine is very heavily used in the network
* code and should be modified for each CPU to be as fast as possible.
*
* This implementation is the PowerPC version.
*
* $Id$
*/
#include <stdio.h> /* for puts */
#undef ADDCARRY
#define ADDCARRY(x) if ((x) > 0xffff) (x) -= 0xffff
#define REDUCE {sum = (sum & 0xffff) + (sum >> 16); ADDCARRY(sum);}
/*
* Thanks to gcc we don't have to guess
* which registers contain sum & w.
*/
#define LDTMP(n) tmp = *((u_int *)((u_char *)w + n))
#define ADD(n) \
LDTMP(n); \
__asm__ volatile("addc %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp))
#define ADDC(n) \
LDTMP(n); \
__asm__ volatile("adde %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp))
#define MOP \
tmp = 0; \
__asm__ volatile("adde %0,%0,%2" : "=r" (sum) : "0" (sum), "r" (tmp))
#define LOAD(n) junk = (u_char) *((volatile u_char *) w + n)
int
in_cksum(m, len)
register struct mbuf *m;
register int len;
{
u_char junk;
register u_short *w;
register unsigned sum = 0;
register unsigned tmp;
register int mlen = 0;
int byte_swapped = 0;
union { char c[2]; u_short s; } su;
for (;m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_short *);
if (mlen == -1) {
/*
* The first byte of this mbuf is the continuation
* of a word spanning between this mbuf and the
* last mbuf.
*/
/* su.c[0] is already saved when scanning previous
* mbuf. sum was REDUCEd when we found mlen == -1
*/
su.c[1] = *(u_char *)w;
sum += su.s;
w = (u_short *)((char *)w + 1);
mlen = m->m_len - 1;
len--;
} else
mlen = m->m_len;
if (len < mlen)
mlen = len;
len -= mlen;
/*
* Force to long boundary so we do longword aligned
* memory operations
*/
if (3 & (int) w) {
REDUCE;
if ((1 & (int) w) && (mlen > 0)) {
sum <<= 8;
su.c[0] = *(char *)w;
w = (u_short *)((char *)w + 1);
mlen--;
byte_swapped = 1;
}
if ((2 & (int) w) && (mlen >= 2)) {
sum += *w++;
mlen -= 2;
}
}
/*
* Do as much of the checksum as possible 32 bits at at time.
* In fact, this loop is unrolled to keep overhead from
* branches small.
*/
while (mlen >= 32) {
/*
* Add with carry 16 words and fold in the last
* carry by adding a 0 with carry.
*
* The early ADD(16) and the LOAD(32) are intended
* to help get the data into the cache.
*/
ADD(16);
ADDC(0);
ADDC(4);
ADDC(8);
ADDC(12);
LOAD(32);
ADDC(20);
ADDC(24);
ADDC(28);
MOP;
w += 16;
mlen -= 32;
}
if (mlen >= 16) {
ADD(0);
ADDC(4);
ADDC(8);
ADDC(12);
MOP;
w += 8;
mlen -= 16;
}
if (mlen >= 8) {
ADD(0);
ADDC(4);
MOP;
w += 4;
mlen -= 8;
}
if (mlen == 0 && byte_swapped == 0)
continue; /* worth 1% maybe ?? */
REDUCE;
while ((mlen -= 2) >= 0) {
sum += *w++;
}
if (byte_swapped) {
sum <<= 8;
byte_swapped = 0;
if (mlen == -1) {
su.c[1] = *(char *)w;
sum += su.s;
mlen = 0;
} else
mlen = -1;
} else if (mlen == -1)
/*
* This mbuf has odd number of bytes.
* There could be a word split betwen
* this mbuf and the next mbuf.
* Save the last byte (to prepend to next mbuf).
*/
su.c[0] = *(char *)w;
}
if (len)
puts("cksum: out of data");
if (mlen == -1) {
/* The last mbuf has odd # of bytes. Follow the
standard (the odd byte is shifted left by 8 bits) */
su.c[1] = 0;
sum += su.s;
}
REDUCE;
return (~sum & 0xffff);
}