|
Description
|
During Clearview IPMP stress testing, we hit the following assertion
failure in ip_wput_local():
panic[cpu0]/thread=30005032b00: assertion failed: CLASSD(ipha->ipha_dst),
file: ../../common/inet/ip/ip.c, line: 24853
assfail+0x7e(fffffffff7b90528, fffffffff7b94cb0, 6115)
ip_wput_local+0x875(ffffff0233fdb808, ffffff023069a0e8, ...
ip_multicast_loopback+0x2cf(ffffff0233fdb808, ffffff023069a0e8, ...
igmp_sendpkt+0x1ca(ffffff02366a2510, 12, 0)
igmp_joingroup+0x156(ffffff02366a2510)
...
However, it turns out this same panic exists in Nevada, and likely goes
back to the introduction of IPMP in Solaris 8. Specifically, the immediate
issue is that we've passed a multicast packet input ip_wput_local() that
does not have a multicast destination. In fact, from the stack, we can
see that this packet was actually created locally via igmp_sendpkt(), via
igmp_joingroup(). Looking at igmp_sendpkt(), we find:
static void
igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
{
/* ... */
igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
igmpa->igmpa_type = type;
igmpa->igmpa_code = 0;
igmpa->igmpa_group = ilm->ilm_addr;
igmpa->igmpa_cksum = 0;
igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0);
/* ... */
--> ipha->ipha_dst = addr ? addr : igmpa->igmpa_group;
That is, unless an address has been passed into this function (which it
has not been), ipha_dst comes from the ILM. In this case, the ILM's
address was apparently INADDR_ANY (zero). This is normal for ILMs
created for the multicast router code via MRT_ADD_VIF, or (more recently)
via IP observability. Indeed, looking at ip_addmulti(), it's careful
*not* to call igmp_joingroup() for INADDR_ANY ILMs:
--> if (group == INADDR_ANY) {
/*
* Check how many ipif's have members in this group -
* if more then one we should not tell the driver to join
* this time
*/
if (ilm_numentries_v6(ill, &v6group) > 1)
return (0);
ret = ill_join_allmulti(ill);
if (ret != 0)
ilm_delete(ilm);
--> return (ret);
}
if (!IS_LOOPBACK(ill))
--> igmp_joingroup(ilm);
However, no similar check exists when ilm_send_multicast_reqs()
(ilm_recover_multicast() in the Clearview IPMP bits under test)
adjusts multicast memberships as part of failover/failback,
hence the issue.
|
|
Comments
|
Fro the record, here's the full stack for the panic on Nevada:
panic[cpu0]/thread=30005032b00: assertion failed: CLASSD(ipha->ipha_dst), file: ../../common/inet/ip/ip.c, line: 25417
000002a100774a10 genunix:assfail+74 (7bb59d18, 7bb55210, 6349, 1854c00, 12f4c00, 0)
%l0-3: 00000000baddc800 0000000000000000 00000300095fdbf0 ffffffffffffffff
%l4-7: 000000007bab6b70 0000000000000000 00000000018a3800 0000000000000000
000002a100774ac0 ip:ip_wput_local+54 (30016916aa0, 300258a4e28, 3001cf96470, 30012bbaf60, 0, 0)
%l0-3: 000000007bb6aa20 0000000000000000 00000000e0000000 0000030003f00000
%l4-7: 000000007bb59c00 0000000000006000 00000300095fdbf0 00000000f0000000
000002a100774c00 ip:ip_multicast_loopback+28c (30016916aa0, 300258a4e28, 30012bbaf60, 0, 0, 300095f7d00)
%l0-3: 0000030003f00000 00000000018530a0 0000030012baf600 0000000000000000
%l4-7: 0000000000000018 0000000000000000 0000000000000218 000003001cf96470
000002a100774d10 ip:igmp_sendpkt+1ac (30015cebd98, 7bb54800, 0, 0, 30003f00000, 300079c3188)
%l0-3: 0000000000000000 00000300258a4e28 0000030012bdbbe0 0000030012baf688
%l4-7: 0000030012baf690 0000030012baf670 000000000a0839da 0000030003d81bc0
000002a100774dc0 ip:igmp_joingroup+e4 (30015cebd98, 30015cebe00, e0000009, 0, 30003f00000, 7bb543b0)
%l0-3: 00000000e0000000 0000000000000000 0000000000000000 000000000192d400
%l4-7: 00000300258a4e28 0000030005032b00 00000300258a5020 000000007bb54000
000002a100774e70 ip:ilm_send_multicast_reqs+130 (30003d7a3e8, 300258a4e28, 8000002, 20000000, 300258a5020, 1000)
%l0-3: 0000000002000000 0000030015cebd98 000000000192d588 0000000000000001
%l4-7: 0000000000000000 00000307c021dc48 0000000000000080 0000000000000003
000002a100774f20 ip:ip_sioctl_move+98c (12, 30003d7a3e8, 300050983b0, 30009605a40, 0, 16)
%l0-3: 0000030003d7a3e8 000003000960fae8 00000300258a4e28 0000030003d7a3e8
%l4-7: 00000300258a4e28 ffffffffbfffffff 00000300258a4e28 00000300258a4e28
000002a100775010 ip:ip_process_ioctl+3b8 (300050983b0, 30009605a40, 30003d7a3e8, 1916890, 300079c2508, 30003d7a3e8)
%l0-3: 0000030009dd2680 0000000000000003 000000007ba7dd04 0000000000000001
%l4-7: 0000000080786999 00000300079c2508 000000008078696e 0000000080786800
000002a100775110 ip:ip_output_options+13cc (30003d86fc0, 7bb59068, 30003d86fc0, 2, 7bb59000, 0)
%l0-3: 000000007bb55ac0 000000007bb59058 0000030003f00000 0000000000005000
%l4-7: 0000000080786999 0000000004000000 000000007bb56c00 000000000000008d
000002a100775210 unix:putnext+390 (30005098658, 11092b8, 300050984a8, 30009605a40, 2a1007752c0, 0)
%l0-3: 0000000000000001 00000300050983b0 0000000001919ee0 0000000000000000
%l4-7: 0000000000000000 000000000181a000 000000007bacfdf8 0000000000000100
000002a1007752c0 genunix:strdoioctl+830 (0, c50, 30009605a40, 1800, 1, 3a98)
%l0-3: 0000030003c7e1a0 0000030003c7e22a 0000030003c7e228 00000000012fdca0
%l4-7: 0000030003c7e220 0000000000000001 0000000000000000 0000000000000000
000002a1007753b0 genunix:strioctl+1370 (18df018, ffffffff80786999, 30003c7e1a0, 30003c7e220, 1, fc00)
%l0-3: 000002a100775adc 0000000000006b00 ffffffffffffffff 0000000000001000
%l4-7: 0000030005098560 00000000018e40d8 0000000000006c00 0000000000007400
000002a100775760 genunix:ldi_ioctl+f0 (3001b9f54b8, ffffffff80786999, ffbff7f0, 100003, 3000309ae60, 2a100775adc)
%l0-3: 0000000000000001 000002a100775adc 0000000000000000 0000000000005490
%l4-7: 0000000000000001 00000000800469a9 0000000080046800 ffffffff80781b99
000002a100775820 sockfs:so_ioctl+11c (30003d88af8, ffffffff80786999, ffbff7f0, 100003, 3000309ae60, 2a100775adc)
%l0-3: 0000000080000000 0000000000000000 0000030003d88b10 ffffffffffffffff
%l4-7: ffffffffffffffff 0000000000000000 ffffffffffffffff 000000007bb1ffc4
000002a1007758d0 genunix:fop_ioctl+58 (3001b9f0a00, ffffffff80786999, ffbff7f0, 100003, 0, 2a100775adc)
%l0-3: 000003000cfca000 000000007beee490 0000030003067500 00000000012f1678
%l4-7: 0000030005096278 0000000000000000 00000000018db400 0000000000000001
000002a100775990 genunix:ioctl+164 (0, 80786999, ffbff7f0, 681b8, 30003c66508, 80808080)
%l0-3: 000000000180c000 0000000000100003 0000000000000000 0000000000000003
%l4-7: 0000000000000008 0000000000000009 0000000000000000 0000000000000000
The cause of this bug is actually split over two different integrations:
Fire Engine introduced the IPv6 issue in s10_41, and the fix for 6220619
introduced the IPv4 issue. I've set the "Introduced in Release" and
"Introduced in Build" fields to the earlier of the two.
|