Most recent kernel where this bug did *NOT* occur: errrrr Distribution: FC6 Hardware Environment: x86+64 Software Environment: FC6 based using stock 2.6.19-rc6 Problem Description: eth0: hw csum failure. Steps to reproduce: just use the network Sorry, I couldn't quite work out the mechanism to convert the stack output into line numbers despite having the kernel source and compiled objects to hand 8/ the way I've gotten around this is to simply use ethtool -K eth0 rx off tx off which only fixs the symptom not the problem. Though going by the trace it seems to be on the rx side though thats no guarentte it doesn't exist in the tx path Phil =--= eth0: hw csum failure. Call Trace: [<ffffffff80268741>] show_trace+0x34/0x47 [<ffffffff80268766>] dump_stack+0x12/0x17 [<ffffffff8024aa75>] __skb_checksum_complete+0x4a/0x62 [<ffffffff802268dd>] tcp_v4_rcv+0x17f/0x9b7 [<ffffffff80233621>] ip_local_deliver+0x19b/0x25f [<ffffffff802345dd>] ip_rcv+0x499/0x4e0 [<ffffffff8021ff8f>] netif_receive_skb+0x350/0x3db [<ffffffff8819cb95>] :sky2:sky2_poll+0x7f0/0xa86 [<ffffffff8020bf8c>] net_rx_action+0xa4/0x1a7 [<ffffffff80211924>] __do_softirq+0x55/0xc3 [<ffffffff8025c24c>] call_softirq+0x1c/0x30 DWARF2 unwinder stuck at call_softirq+0x1c/0x30 Leftover inexact backtrace: <IRQ> [<ffffffff802697ab>] do_softirq+0x2c/0x7d [<ffffffff8026992c>] do_IRQ+0x130/0x151 [<ffffffff802546db>] mwait_idle+0x0/0x20 [<ffffffff8025b641>] ret_from_intr+0x0/0xa <EOI> [<ffffffff80267ead>] mwait_idle_with_hints+0x44/0x45 [<ffffffff802546e7>] mwait_idle+0xc/0x20 [<ffffffff80246ab7>] cpu_idle+0x8b/0xae [<ffffffff80895697>] start_kernel+0x22a/0x22f [<ffffffff8089515c>] _sinittext+0x15c/0x160 **** relevent routines and locations (based on 2.6.19-rc6) -------------------------------------------------- __skb_checksum_complete net/core/datagram.c +414 -------------------------------------------------- unsigned int __skb_checksum_complete(struct sk_buff *skb) { unsigned int sum; sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); skb->ip_summed = CHECKSUM_UNNECESSARY; } return sum; } EXPORT_SYMBOL(__skb_checksum_complete); -------------------------------------------------- tcp_v4_rcv net/ipv4/tcp_ipv4.c +1053 -------------------------------------------------- int tcp_v4_rcv(struct sk_buff *skb) { struct tcphdr *th; struct sock *sk; int ret; if (skb->pkt_type != PACKET_HOST) goto discard_it; /* Count it even if it's bad */ TCP_INC_STATS_BH(TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; th = skb->h.th; if (th->doff < sizeof(struct tcphdr) / 4) goto bad_packet; if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; /* An explanation is required here, I think. * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is eliminated. * So, we defer the checks. */ if ((skb->ip_summed != CHECKSUM_UNNECESSARY && tcp_v4_checksum_init(skb))) goto bad_packet; th = skb->h.th; TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->when = 0; TCP_SKB_CB(skb)->flags = skb->nh.iph->tos; TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (!sk) goto no_tcp_socket; process: if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; bh_lock_sock_nested(sk); ret = 0; if (!sock_owned_by_user(sk)) { #ifdef CONFIG_NET_DMA struct tcp_sock *tp = tcp_sk(sk); if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) tp->ucopy.dma_chan = get_softnet_dma(); if (tp->ucopy.dma_chan) ret = tcp_v4_do_rcv(sk, skb); else #endif { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } } else sk_add_backlog(sk, skb); bh_unlock_sock(sk); sock_put(sk); return ret; no_tcp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { bad_packet: TCP_INC_STATS_BH(TCP_MIB_INERRS); } else { tcp_v4_send_reset(skb); } discard_it: /* Discard frame. */ kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; do_time_wait: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { inet_twsk_put(inet_twsk(sk)); goto discard_it; } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); inet_twsk_put(inet_twsk(sk)); goto discard_it; } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (sk2) { inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); inet_twsk_put(inet_twsk(sk)); sk = sk2; goto process; } /* Fall through to ACK */ } case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: goto no_tcp_socket; case TCP_TW_SUCCESS:; } goto discard_it; } -------------------------------------------------- ip_local_deliver net/ipv4/ip_input.c +263 -------------------------------------------------- int ip_local_deliver(struct sk_buff *skb) { /* * Reassemble IP fragments. */ if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); if (!skb) return 0; } return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL, ip_local_deliver_finish); } -------------------------------------------------- ip_rcv net/ipv4/ip_input.c +373 -------------------------------------------------- int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct iphdr *iph; u32 len; /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); goto out; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; iph = skb->nh.iph; /* * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length */ if (iph->ihl < 5 || iph->version != 4) goto inhdr_error; if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; iph = skb->nh.iph; if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto inhdr_error; len = ntohs(iph->tot_len); if (skb->len < len || len < (iph->ihl*4)) goto inhdr_error; /* Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); goto drop; } /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); return NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); inhdr_error: IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: return NET_RX_DROP; } -------------------------------------------------- netif_receive_skb net/core/dev.c +1765 -------------------------------------------------- int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; int ret = NET_RX_DROP; unsigned short type; /* if we've gotten here through NAPI, check netpoll */ if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; if (!skb->tstamp.off_sec) net_timestamp(skb); if (!skb->input_dev) skb->input_dev = skb->dev; orig_dev = skb_bond(skb); if (!orig_dev) return NET_RX_DROP; __get_cpu_var(netdev_rx_stat).total++; skb->h.raw = skb->nh.raw = skb->data; skb->mac_len = skb->nh.raw - skb->mac.raw; pt_prev = NULL; rcu_read_lock(); #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); goto ncls; } #endif list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } #ifdef CONFIG_NET_CLS_ACT if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; /* noone else should process this after*/ } else { skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); } ret = ing_filter(skb); if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { kfree_skb(skb); goto out; } skb->tc_verd = 0; ncls: #endif handle_diverter(skb); if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining * me how you were going to use this. :-) */ ret = NET_RX_DROP; } out: rcu_read_unlock(); return ret; } -------------------------------------------------- sky2_poll drivers/net/sky2.c +2304 -------------------------------------------------- static int sky2_poll(struct net_device *dev0, int *budget) { struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw; int work_limit = min(dev0->quota, *budget); int work_done = 0; u32 status = sky2_read32(hw, B0_Y2_SP_EISR); if (status & Y2_IS_HW_ERR) sky2_hw_intr(hw); if (status & Y2_IS_IRQ_PHY1) sky2_phy_intr(hw, 0); if (status & Y2_IS_IRQ_PHY2) sky2_phy_intr(hw, 1); if (status & Y2_IS_IRQ_MAC1) sky2_mac_intr(hw, 0); if (status & Y2_IS_IRQ_MAC2) sky2_mac_intr(hw, 1); if (status & Y2_IS_CHK_RX1) sky2_descriptor_error(hw, 0, "receive", Y2_IS_CHK_RX1); if (status & Y2_IS_CHK_RX2) sky2_descriptor_error(hw, 1, "receive", Y2_IS_CHK_RX2); if (status & Y2_IS_CHK_TXA1) sky2_descriptor_error(hw, 0, "transmit", Y2_IS_CHK_TXA1); if (status & Y2_IS_CHK_TXA2) sky2_descriptor_error(hw, 1, "transmit", Y2_IS_CHK_TXA2); work_done = sky2_status_intr(hw, work_limit); if (work_done < work_limit) { netif_rx_complete(dev0); sky2_read32(hw, B0_Y2_SP_LISR); return 0; } else { *budget -= work_done; dev0->quota -= work_done; return 1; } } -------------------------------------------------- net_rx_action net/core/dev.c +1907 -------------------------------------------------- static void net_rx_action(struct softirq_action *h) { struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; int budget = netdev_budget; void *have; local_irq_disable(); while (!list_empty(&queue->poll_list)) { struct net_device *dev; if (budget <= 0 || jiffies - start_time > 1) goto softnet_break; local_irq_enable(); dev = list_entry(queue->poll_list.next, struct net_device, poll_list); have = netpoll_poll_lock(dev); if (dev->quota <= 0 || dev->poll(dev, &budget)) { netpoll_poll_unlock(have); local_irq_disable(); list_move_tail(&dev->poll_list, &queue->poll_list); if (dev->quota < 0) dev->quota += dev->weight; else dev->quota = dev->weight; } else { netpoll_poll_unlock(have); dev_put(dev); local_irq_disable(); } } out: #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ if (net_dma_client) { struct dma_chan *chan; rcu_read_lock(); list_for_each_entry_rcu(chan, &net_dma_client->channels, client_ node) dma_async_memcpy_issue_pending(chan); rcu_read_unlock(); } #endif local_irq_enable(); return; softnet_break: __get_cpu_var(netdev_rx_stat).time_squeeze++; __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; }
Need to know hardware information about chip version etc. This can be found with 'lspci -v' and 'dmesg | grep sky2'
okies,.. lspci -v: ------------------------- 03:00.0 Ethernet controller: Marvell Technology Group Ltd. 88E8062 PCI-E IPMI Gi gabit Ethernet Controller (rev 14) Subsystem: ASUSTeK Computer Inc. Unknown device 81da Flags: bus master, fast devsel, latency 0, IRQ 8411 Memory at f97fc000 (64-bit, non-prefetchable) [size=16K] I/O ports at b800 [size=256] Expansion ROM at f97c0000 [disabled] [size=128K] Capabilities: [48] Power Management version 2 Capabilities: [50] Vital Product Data Capabilities: [5c] Message Signalled Interrupts: 64bit+ Queue=0/1 Enable+ Capabilities: [e0] Express Legacy Endpoint IRQ 0 dmesg | grep sky2: ------------------------- [root@emerald-x64 ~]# dmesg | grep sky2 sky2 v1.10 addr 0xf97fc000 irq 16 Yukon-XL (0xb3) rev 3 sky2 eth0: addr 00:17:31:97:ef:2d sky2 eth1: addr 00:17:31:97:ef:2e sky2 eth0: enabling interface sky2 eth0: Link is up at 100 Mbps, full duplex, flow control both [<ffffffff88249b95>] :sky2:sky2_poll+0x7f0/0xa86 [<ffffffff88249b95>] :sky2:sky2_poll+0x7f0/0xa86 [<ffffffff88249b95>] :sky2:sky2_poll+0x7f0/0xa86 Phil =--=
mm one other thing that kinda puzzles me [root@emerald-x64 ~]# dmesg | grep sky2 sky2 v1.10 addr 0xf97fc000 irq 16 Yukon-XL (0xb3) rev 3 [root@emerald-x64 ~]# cat /proc/interrupts CPU0 CPU1 ... 8411: 3609006 0 PCI-MSI-edge eth0 err irq 16 becomes 8411 ??? Phil =--=
Probably not surprising, still there in 2.6.19 (release) eth0: hw csum failure. Call Trace: [<ffffffff80268752>] show_trace+0x34/0x47 [<ffffffff80268777>] dump_stack+0x12/0x17 [<ffffffff8024ab49>] __skb_checksum_complete+0x4a/0x62 [<ffffffff80226965>] tcp_v4_rcv+0x17f/0x9b7 [<ffffffff802336a9>] ip_local_deliver+0x19b/0x25f [<ffffffff80234665>] ip_rcv+0x499/0x4e0 [<ffffffff80220017>] netif_receive_skb+0x350/0x3db [<ffffffff88259b95>] :sky2:sky2_poll+0x7f0/0xa86 [<ffffffff8020bf8c>] net_rx_action+0xa4/0x1a7 [<ffffffff80211924>] __do_softirq+0x55/0xc3 Phil =--=
Looks like same (unsolved) problem as 7579 *** This bug has been marked as a duplicate of 7579 ***