[rtems commit] nios2: Add optimized IP checksum support

Sebastian Huber sebh at rtems.org
Mon Mar 11 16:54:34 UTC 2013


Module:    rtems
Branch:    master
Commit:    d8e44ecaf82b9063a7e556f2e040069d446c3ac6
Changeset: http://git.rtems.org/rtems/commit/?id=d8e44ecaf82b9063a7e556f2e040069d446c3ac6

Author:    Jeffrey O. Hill <hill at wombat.lanl.gov>
Date:      Tue Feb  5 18:03:30 2013 +0100

nios2: Add optimized IP checksum support

---

 cpukit/libnetworking/machine/in_cksum.h       |   69 ++++++
 cpukit/libnetworking/netinet/in_cksum.c       |    4 +
 cpukit/libnetworking/netinet/in_cksum_nios2.h |  292 +++++++++++++++++++++++++
 3 files changed, 365 insertions(+), 0 deletions(-)

diff --git a/cpukit/libnetworking/machine/in_cksum.h b/cpukit/libnetworking/machine/in_cksum.h
index e9a4ea4..e625726 100644
--- a/cpukit/libnetworking/machine/in_cksum.h
+++ b/cpukit/libnetworking/machine/in_cksum.h
@@ -1,4 +1,16 @@
 /*
+ * Nios II version by Jeffrey O. Hill
+ *
+ * Copyright 2012. Los Alamos National Security, LLC.
+ * The Nios II specific part was produced under U.S. Government contract
+ * DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL),
+ * which is operated by Los Alamos National Security, LLC for
+ * the U.S. Department of Energy. The U.S. Government has rights
+ * to use, reproduce, and distribute this software.  NEITHER THE
+ * GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY
+ * WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR
+ * THE USE OF THIS SOFTWARE.
+ *
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
@@ -208,6 +220,63 @@ in_cksum_hdr(const struct ip *ip)
 		__tmpsum = (int)ntohs(ip->ip_sum) + 256; \
 		ip->ip_sum = htons(__tmpsum + (__tmpsum >> 16)); \
 	} while(0)
+
+/*
+ *  Optimized version for the Altera Nios II softcore
+ */
+#elif defined ( __GNUC__ ) && defined ( __nios2__ )
+
+static inline uint32_t _NIOS2_Add_ones_complement ( const uint32_t a,
+                                                    const uint32_t b )
+{
+  uint32_t sum;
+  uint32_t C;
+  __asm__ __volatile__ (
+              "   add     %0, %2, %3  \n" /* sum <= a + b */
+              "   cmpltu  %1, %0, %2  \n" /* C <= carryBit32 */
+              "   add     %0, %1, %0  \n" /* sum <= sum + C */
+              : "=&r"(sum), "=&r"(C)
+              : "r"(a), "r"(b)
+  );
+  return sum;
+}
+
+static inline uint16_t _NIOS2_Add_ones_complement_word_halves
+                                                ( const uint32_t a )
+{
+  uint16_t sum;
+  uint32_t tmp;
+  __asm__ __volatile__ (
+              "   roli    %1, %2, 16   \n"  /* tmp <= a rotate left 16 */
+              "   add     %1, %2, %1   \n"  /* tmp <= a + tmp + carryBit16 */
+              "   srli    %0, %1, 16   \n"  /* sum <= tmp shift right 16 */
+              : "=&r"(sum),"=&r"(tmp)
+              : "r"(a)
+  );
+  return sum;
+}
+
+static __inline u_int in_cksum_hdr ( const struct ip * pHdrIP )
+{
+  const uint32_t * const pWd = ( const uint32_t * ) pHdrIP;
+  uint32_t sum = pWd[0];
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+  sum = _NIOS2_Add_ones_complement_word_halves ( sum );
+  sum ^= 0xffff;
+  return sum;
+}
+
+static __inline void in_cksum_update ( struct ip * pHdrIP )
+{
+  uint32_t __tmpsum = ntohs ( pHdrIP->ip_sum );
+  __tmpsum += 256u;
+  __tmpsum += __tmpsum >> 16u;
+  pHdrIP->ip_sum = htons ( ( uint16_t ) __tmpsum );
+}
+
 /*
  *  Here is the generic, portable, inefficient algorithm.
  */
diff --git a/cpukit/libnetworking/netinet/in_cksum.c b/cpukit/libnetworking/netinet/in_cksum.c
index dfe39b1..ec7e49c 100644
--- a/cpukit/libnetworking/netinet/in_cksum.c
+++ b/cpukit/libnetworking/netinet/in_cksum.c
@@ -57,6 +57,10 @@
 
 #include "in_cksum_powerpc.h"
 
+#elif (defined(__GNUC__) && defined(__nios2__))
+
+#include "in_cksum_nios2.h"
+
 #else
 
 #include <stdio.h> /* for puts */
diff --git a/cpukit/libnetworking/netinet/in_cksum_nios2.h b/cpukit/libnetworking/netinet/in_cksum_nios2.h
new file mode 100644
index 0000000..8382f11
--- /dev/null
+++ b/cpukit/libnetworking/netinet/in_cksum_nios2.h
@@ -0,0 +1,292 @@
+
+/*
+ * Altera Nios2 CRC checksum computation
+ *
+ * Author: Jeffrey O. Hill
+ *
+ * Copyright 2012. Los Alamos National Security, LLC.
+ * This material was produced under U.S. Government contract
+ * DE-AC52-06NA25396 for Los Alamos National Laboratory (LANL),
+ * which is operated by Los Alamos National Security, LLC for
+ * the U.S. Department of Energy. The U.S. Government has rights
+ * to use, reproduce, and distribute this software.  NEITHER THE
+ * GOVERNMENT NOR LOS ALAMOS NATIONAL SECURITY, LLC MAKES ANY
+ * WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LIABILITY FOR
+ * THE USE OF THIS SOFTWARE.
+ *
+ * COPYRIGHT (c) 1989-2012.
+ * On-Line Applications Research Corporation (OAR).
+ *
+ * Copyright (c) 1997 Mark Brinicome
+ * Copyright (c) 1997 Causality Limited
+ *
+ * Copyright (c) 1995 Zubin Dittia.
+ * Copyright (c) 1995 Matthew R. Green.
+ * Copyright (c) 1994 Charles M. Hannum.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Based on the arm / sparc version, but using instead
+ * mostly inline functions in place of naaasty macros.
+ *
+ * It would be a great idea to somehow detect at runtime
+ * that the Nios2 has a user defined instruction that
+ * computes the CRC and invoke it here (we could call a
+ * function in the BSP).
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <machine/in_cksum.h>
+
+/*
+ * Checksum routine for Internet Protocol family headers.
+ *
+ * This routine is very heavily used in the network
+ * code and should be modified for each CPU to be as fast as possible.
+ */
+static inline uint32_t _NIOS2_Add_ones_complement_64
+( uint32_t sum, const uint32_t * const pWd )
+{
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[5] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[6] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[7] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[8] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[9] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[10] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[11] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[12] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[13] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[14] );
+  return _NIOS2_Add_ones_complement ( sum, pWd[15] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_32
+( uint32_t sum, const uint32_t * const pWd )
+{
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[3] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[4] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[5] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[6] );
+  return _NIOS2_Add_ones_complement ( sum, pWd[7] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_16
+( uint32_t sum, const uint32_t * const pWd )
+{
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[1] );
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[2] );
+  return _NIOS2_Add_ones_complement ( sum, pWd[3] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_8
+( uint32_t sum, const uint32_t * const pWd )
+{
+  sum = _NIOS2_Add_ones_complement ( sum, pWd[0] );
+  return _NIOS2_Add_ones_complement ( sum, pWd[1] );
+}
+
+static inline uint32_t _NIOS2_Add_ones_complement_4
+( uint32_t sum, const uint32_t * const pWd )
+{
+  return _NIOS2_Add_ones_complement ( sum, pWd[0] );
+}
+
+static inline uint32_t _NIOS2_Reduce_checksum ( uint32_t a )
+{
+  uint32_t tmp;
+  __asm__ __volatile__ (
+    "   srli    %1, %0, 16      \n" /* tmp = a >> 16 */
+    "   andi    %0, %0, 0xffff  \n" /* a = a & 0xffff */
+    "   add     %0, %0, %1      \n" /* a = a + tmp */
+  : "+&r"(a), "=&r"(tmp)
+  );
+  return a;
+}
+
+#define combineTokens( A, B ) A ## B
+
+#define ADD_AND_ADVANCE( N ) \
+if ( mlen >= N ) { \
+  sum = combineTokens ( _NIOS2_Add_ones_complement_,  N ) \
+                      ( sum, ( uint32_t * ) w ); \
+  mlen -= N; \
+  w += N; \
+}
+
+static int
+in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
+{
+  const uint8_t * w;
+  int mlen = 0;
+  int byte_swapped = 0;
+
+  for (; m && len; m = m->m_next)
+  {
+    if (m->m_len == 0)
+      continue;
+    w = mtod(m, u_char *) + off;
+    mlen = m->m_len - off;
+    off = 0;
+    if (len < mlen)
+      mlen = len;
+    len -= mlen;
+
+    /*
+    * Ensure that we're aligned on a word boundary here so
+    * that we can do 32 bit operations below.
+    */
+    if ((3 & (uint32_t)w) != 0)
+    {
+      sum = _NIOS2_Reduce_checksum ( sum );
+      if ((1 & (uint32_t)w) != 0 && mlen >= 1)
+      {
+        sum <<= 8u;
+        sum += *w << 8u;
+        byte_swapped ^= 1;
+        w += 1;
+        mlen -= 1;
+      }
+      if ((2 & (uint32_t)w) != 0 && mlen >= 2)
+      {
+        sum += *(uint16_t *)w;
+        w += 2;
+        mlen -= 2;
+      }
+    }
+
+    /*
+     * instead of using a loop, process in unrolled chunks
+     */
+    while ( mlen >= 64 )
+    {
+      sum = _NIOS2_Add_ones_complement_64
+            ( sum, ( uint32_t * ) w );
+      mlen -= 64;
+      w += 64;
+    }
+    ADD_AND_ADVANCE ( 32 );
+    ADD_AND_ADVANCE ( 16 );
+    ADD_AND_ADVANCE (  8 );
+    ADD_AND_ADVANCE (  4 );
+
+    if ( mlen > 0 )
+    {
+      sum = _NIOS2_Reduce_checksum ( sum );
+      if ( mlen >= 2 )
+      {
+        sum += *(uint16_t *)w;
+        w += 2;
+        mlen -= 2;
+      }
+      if ( mlen == 1 )
+      {
+        sum <<= 8u;
+        sum += *w << 8u;
+        byte_swapped ^= 1;
+      }
+    }
+  }
+  if ( byte_swapped )
+  {
+    sum = _NIOS2_Reduce_checksum ( sum );
+    sum <<= 8u;
+  }
+  sum = _NIOS2_Add_ones_complement_word_halves ( sum );
+  sum ^= 0xffff;
+  return sum;
+}
+
+int
+in_cksum (
+  struct mbuf *m,
+  int len )
+{
+  return in_cksum_internal ( m, 0, len, 0 );
+}
+
+int
+in4_cksum (
+  struct mbuf *m,
+  u_int8_t nxt,
+  int off,
+  int len )
+{
+  u_int sum = 0;
+
+  if ( nxt != 0 )
+  {
+    struct ipovly ipov;
+    /* pseudo header */
+    if (off < sizeof(struct ipovly))
+      panic("in4_cksum: offset too short");
+    if (m->m_len < sizeof(struct ip))
+      panic("in4_cksum: bad mbuf chain");
+
+    bzero(&ipov, sizeof(ipov));
+    ipov.ih_len = htons(len);
+    ipov.ih_pr = nxt;
+    ipov.ih_src = mtod(m, struct ip *)->ip_src;
+    ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
+    u_char * w = (u_char *)&ipov;
+
+    if ( sizeof(ipov) != 20 )
+      panic( "in4_cksum: sizeof(ipov) != 20" );
+    sum = _NIOS2_Add_ones_complement_16 ( sum, (uint32_t *) w );
+    w += 16;
+    sum = _NIOS2_Add_ones_complement_4 ( sum, (uint32_t *) w );
+  }
+  /* skip unnecessary part */
+  while (m && off > 0)
+  {
+    if (m->m_len > off)
+      break;
+    off -= m->m_len;
+    m = m->m_next;
+  }
+  return (in_cksum_internal(m, off, len, sum));
+}
+
+




More information about the vc mailing list