Created attachment 274715 [details] system log Use openwrt LEDE, (gcc version 5.5.0 (OpenWrt GCC 5.5.0 r5932-9c4fe10) ). When use pptp, make WAN(eth1) down and then make WAN(eth1) up, pptp will redial itself. Do this in a loop. After a while, kernel print "recursion detected" and "net_ratelimit: 49422 callbacks suppressed", print this many times, then system reboot. When add more printk in driver "driver/net/ppp/ppp_generic.c", fond it fall in loop in "static void __ppp_xmit_process(struct ppp *ppp)", which called by "static void __ppp_channel_push(struct channel *pch)". __ppp_channel_push() locked ppp->xmit_recursion, and loop for long time( while (!ppp->xmit_pending && (skb = skb_dequeue(&ppp->file.xq))) ppp_send_frame(ppp, skb); ). Some other thread want to lock ppp->xmit_recursion, but failed. static void __ppp_channel_push(struct channel *pch) { struct sk_buff *skb; struct ppp *ppp; spin_lock_bh(&pch->downl); if (pch->chan) { while (!skb_queue_empty(&pch->file.xq)) { skb = skb_dequeue(&pch->file.xq); if (!pch->chan->ops->start_xmit(pch->chan, skb)) { /* put the packet back and try again later */ skb_queue_head(&pch->file.xq, skb); break; } } } else { /* channel got deregistered */ skb_queue_purge(&pch->file.xq); } spin_unlock_bh(&pch->downl); /* see if there is anything from the attached unit to be sent */ if (skb_queue_empty(&pch->file.xq)) { ppp = pch->ppp; if (ppp) __ppp_xmit_process(ppp); } }
root@OpenWrt:/# route -n Kernel IP routing table Destination Gateway Genmask Flags Metric Ref Use Iface 0.0.0.0 10.1.1.1 0.0.0.0 UG 0 0 0 pptp-pptp_test 10.1.1.1 0.0.0.0 255.255.255.255 UH 0 0 0 pptp-pptp_test 10.128.65.220 0.0.0.0 255.255.255.252 U 0 0 0 eth1 192.168.1.0 0.0.0.0 255.255.255.0 U 0 0 0 br-lan 218.108.76.135 10.128.65.221 255.255.255.255 UGH 0 0 0 eth1 root@OpenWrt:/#
Add printk in static void __ppp_xmit_process(struct ppp *ppp), like this: /* * Transmit-side routines. */ /* Called to do any work queued up on the transmit side that can now be done */ static void __ppp_xmit_process(struct ppp *ppp) { int i=0; struct sk_buff *skb; ppp_xmit_lock(ppp); if (!ppp->closing) { ppp_push(ppp); while (!ppp->xmit_pending && (skb = skb_dequeue(&ppp->file.xq))){ if(0!=i){ printk("%s:%d loop [%d], per-cpu ppp->xmit_recursion[%d]\n", __FUNCTION__,__LINE__,i,*this_cpu_ptr(ppp->xmit_recursion)); } ppp_send_frame(ppp, skb); i++; } /* If there's no work left to do, tell the core net code that we can accept some more. */ if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) netif_wake_queue(ppp->dev); else netif_stop_queue(ppp->dev); } ppp_xmit_unlock(ppp); } when kernel reboot itself, the last print is: [44237.690049] pptp-pptp_test: renamed from ppp0 [44266.985247] pptp-pptp_test: recursion detected [44266.989970] pptp-pptp_test: recursion detected [44266.994511] __ppp_xmit_process:1429 loop [1], per-cpu ppp->xmit_recursion[1] [44267.001813] pptp-pptp_test: recursion detected [44267.006328] __ppp_xmit_process:1429 loop [2], per-cpu ppp->xmit_recursion[1] [44267.013631] pptp-pptp_test: recursion detected [44267.018182] __ppp_xmit_process:1429 loop [3], per-cpu ppp->xmit_recursion[1] [44267.025450] pptp-pptp_test: recursion detected [44267.030001] __ppp_xmit_process:1429 loop [4], per-cpu ppp->xmit_recursion[1] [44267.037287] pptp-pptp_test: recursion detected [44267.041800] __ppp_xmit_process:1429 loop [5], per-cpu ppp->xmit_recursion[1] [44267.049119] pptp-pptp_test: recursion detected [44267.053636] __ppp_xmit_process:1429 loop [6], per-cpu ppp->xmit_recursion[1] [44267.060947] pptp-pptp_test: recursion detected [44267.065462] __ppp_xmit_process:1429 loop [7], per-cpu ppp->xmit_recursion[1] [44267.072765] pptp-pptp_test: recursion detected [44267.077317] __ppp_xmit_process:1429 loop [8], per-cpu ppp->xmit_recursion[1] [44267.084594] pptp-pptp_test: recursion detected [44267.089149] __ppp_xmit_process:1429 loop [9], per-cpu ppp->xmit_recursion[1] . . . [44293.215320] __ppp_xmit_process:1429 loop [3434], per-cpu ppp->xmit_recursion[1] [44293.222972] __ppp_xmit_process:1429 loop [3435], per-cpu ppp->xmit_recursion[1] [44293.230547] __ppp_xmit_process:1429 loop [3436], per-cpu ppp->xmit_recursion[1] [44293.238120] __ppp_xmit_process:1429 loop [3437], per-cpu ppp->xmit_recursion[1] [44293.245642] __ppp_xmit_process:1429 loop [3438], per-cpu ppp->xmit_recursion[1] [44293.253219] __ppp_xmit_process:1429 loop [3439], per-cpu ppp->xmit_recursion[1] [44293.260814] __ppp_xmit_process:1429 loop [3440], per-cpu ppp->xmit_recursion[1] [44293.268402] __ppp_xmit_process:1429 loop [3441], per-cpu ppp->xmit_recursion[1] [44293.276011] __ppp_xmit_process:1429 loop [3442], per-cpu ppp->xmit_recursion[1] [44293.283596] __ppp_xmit_process:1429 loop [3443], per-cpu ppp->xmit_recursion[1] [44293.291176] __ppp_xmit_process:1429 loop [3444], per-cpu ppp->xmit_recursion[1] [44293.298756] __ppp_xmit_process:1429 loop [3445], per-cpu ppp->xmit_recursion[1] [44293.306335] __ppp_xmit_process:1429 loop [3446], per-cpu ppp->xmit_recursion[1] [44293.313866] __ppp_xmit_process:1429 loop [3447], per-cpu ppp->xmit_recursion[1] [44293.321448] __ppp_xmit_process:1429 loop [3448], per-cp U-Boot 1.1.4 (Oct 29 2015 - 14:36:21) DB120 DRAM: sri Wasp 1.3 wasp_ddr_initial_config(249): (32bit) ddr2 init wasp_ddr_initial_config(426): Wasp ddr init done Tap value selected = 0xe [0x0 - 0x1d] 128 MB Top of RAM usable for U-Boot at: 88000000 Reserving 206k for U-Boot at: 87fcc000 Reserving 192k for malloc() at: 87f9c000 Reserving 44 Bytes for Board Info at: 87f9bfd4 Reserving 36 Bytes for Global Data at: 87f9bfb0 Reserving 128k for boot params() at: 87f7bfb0 Stack Pointer at: 87f7bf98 Now running in RAM - U-Boot at: 87fcc000 Flash Manuf Id 0xef, DeviceId0 0x40, DeviceId1 0x18 flash size 16MB, sector count = 256 Flash: 16 MB
For testing, in __ppp_channel_push(), disable sending anything from the attached unit, just disable __ppp_xmit_process(ppp) in __ppp_channel_push(). In my opinion, __ppp_xmit_process() should only called by ppp_xmit_process(), because of ppp->xmit_recursion __percpu. And, as a result of the test, the "recursion detected" not happened for 82519 seconds. I will test this for a long time. 1935| /* Try to send data out on a channel */ 1936| static void __ppp_channel_push(struct channel *pch) 1937| { 1938| struct sk_buff *skb; 1939| struct ppp *ppp; 1940| 1941| spin_lock_bh(&pch->downl); 1942| if (pch->chan) { 1943| while (!skb_queue_empty(&pch->file.xq)) { 1944| skb = skb_dequeue(&pch->file.xq); 1945| if (!pch->chan->ops->start_xmit(pch->chan, skb)) { 1946| /* put the packet back and try again later */ 1947| skb_queue_head(&pch->file.xq, skb); 1948| break; 1949| } 1950| } 1951| } else { 1952| /* channel got deregistered */ 1953| skb_queue_purge(&pch->file.xq); 1954| } 1955| spin_unlock_bh(&pch->downl); 1956| #if 0 1957| /* see if there is anything from the attached unit to be sent */ 1958| if (skb_queue_empty(&pch->file.xq)) { 1959| ppp = pch->ppp; 1960| if (ppp) 1961| __ppp_xmit_process(ppp); 1962| } 1963| #endif 1964| }
Reported-by: xu heng <xuheng333@zoho.com> Fixes: 55454a565836 ("ppp: avoid dealock on recursive xmit") Signed-off-by: Guillaume Nault <g.nault@alphalink.fr> --- drivers/net/ppp/ppp_generic.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index fa2a9bdd1866..da1937832c99 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -257,7 +257,7 @@ struct ppp_net { /* Prototypes. */ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, struct file *file, unsigned int cmd, unsigned long arg); -static void ppp_xmit_process(struct ppp *ppp); +static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb); static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb); static void ppp_push(struct ppp *ppp); static void ppp_channel_push(struct channel *pch); @@ -513,13 +513,12 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, goto out; } - skb_queue_tail(&pf->xq, skb); - switch (pf->kind) { case INTERFACE: - ppp_xmit_process(PF_TO_PPP(pf)); + ppp_xmit_process(PF_TO_PPP(pf), skb); break; case CHANNEL: + skb_queue_tail(&pf->xq, skb); ppp_channel_push(PF_TO_CHANNEL(pf)); break; } @@ -1267,8 +1266,8 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev) put_unaligned_be16(proto, pp); skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev))); - skb_queue_tail(&ppp->file.xq, skb); - ppp_xmit_process(ppp); + ppp_xmit_process(ppp, skb); + return NETDEV_TX_OK; outf: @@ -1420,13 +1419,14 @@ static void ppp_setup(struct net_device *dev) */ /* Called to do any work queued up on the transmit side that can now be done */ -static void __ppp_xmit_process(struct ppp *ppp) +static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) { - struct sk_buff *skb; - ppp_xmit_lock(ppp); if (!ppp->closing) { ppp_push(ppp); + + if (skb) + skb_queue_tail(&ppp->file.xq, skb); while (!ppp->xmit_pending && (skb = skb_dequeue(&ppp->file.xq))) ppp_send_frame(ppp, skb); @@ -1440,7 +1440,7 @@ static void __ppp_xmit_process(struct ppp *ppp) ppp_xmit_unlock(ppp); } -static void ppp_xmit_process(struct ppp *ppp) +static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) { local_bh_disable(); @@ -1448,7 +1448,7 @@ static void ppp_xmit_process(struct ppp *ppp) goto err; (*this_cpu_ptr(ppp->xmit_recursion))++; - __ppp_xmit_process(ppp); + __ppp_xmit_process(ppp, skb); (*this_cpu_ptr(ppp->xmit_recursion))--; local_bh_enable(); @@ -1458,6 +1458,8 @@ static void ppp_xmit_process(struct ppp *ppp) err: local_bh_enable(); + kfree_skb(skb); + if (net_ratelimit()) netdev_err(ppp->dev, "recursion detected\n"); } @@ -1942,7 +1944,7 @@ static void __ppp_channel_push(struct channel *pch) if (skb_queue_empty(&pch->file.xq)) { ppp = pch->ppp; if (ppp) - __ppp_xmit_process(ppp); + __ppp_xmit_process(ppp, NULL); } }