|
Description
|
So while trying to test xvm 3.1 pre-integration bits i hit a hang during boot
in the new (as of snv_83) clearview code. here's where we are getting stuck:
---8<---
[0]> d3dd3230::ps -f
S PID PPID PGID SID UID FLAGS ADDR NAME
R 50 30 7 7 0 0x4a004000 d3dd3230 /sbin/ifconfig rtls0 plumb
[0]> d3dd3230::walk thread | ::findstack
stack pointer for thread d4ed0800: d4ea95fc
...
[0]> $c2
mod_hash_find+0x25(d47df7c0, d48b67d0)
softmac`softmac_hold_device+0xfa(2140001, d48b698c)
stubs_common_code+0x3b(d48b6b30, d48b69c0)
dls`dls_devnet_open+0x1f(d48b6b30, d48b6a8c)
dev`devnet_create_rvp+0x19(d48b6b30, d48b6a28)
dev`devnet_lookup+0xe8(d35e8180, d48b6b30)
fop_lookup+0xac(d35e8180, d48b6b30)
lookuppnvp+0x313(d48b6d10, 0)
lookuppnat+0xec(d48b6d10, 0)
lookupnameat+0x54(80476bc, 0)
vn_openat+0x19c(80476bc, 0)
copen+0x2f1(ffd19553, 80476bc)
open+0x1b()
sys_sysenter+0x106()
---8<---
and here's the corresponding code:
---8<---
int
softmac_hold_device(dev_t dev, dls_dev_handle_t *ddhp)
{
<... snip ...>
/*
* First try to hold this device instance to force the MAC
* to be registered.
*/
if ((dip = ddi_hold_devi_by_instance(getmajor(dev), ppa, 0)) == NULL)
return (ENOENT);
<... snip ...>
/*
* This is a network device; wait for its softmac to be registered.
*/
(void) snprintf(devname, MAXNAMELEN, "%s%d", ddi_driver_name(dip), ppa);
again:
rw_enter(&softmac_hash_lock, RW_READER);
if (mod_hash_find(softmac_hash, (mod_hash_key_t)devname,
(mod_hash_val_t *)&softmac) != 0) {
/*
* This is rare but possible. It could happen when pre-detach
* routine of the device succeeds. But the softmac will then
* be recreated when device fails to detach (as this device
* is held).
*/
rw_exit(&softmac_hash_lock);
goto again;
}
---8<---
so we're trying to plumb up rtls0, and we're getting stuck in an infinite
with the "goto again" statement above. the problem seems to stem from the
fact that we're trying to plumb ppa/instance 0, the call to
ddi_hold_devi_by_instance() in softmac_hold_device() returned a dip that
points to rtls1. let's take a look at all the path instances information
for the rtls driver.
---8<---
[0]> 2140001::dev2major
0x85 (0t133)
[0]> 2140001::dev2major | ::major2name
rtls
[0]> ::sizeof struct devnames
sizeof (struct devnames) = 0x38
[0]> *devnamesp + (0x85 * 0x38)::print struct devnames dn_inlist | ::list in_drv_t ind_next | ::print -a in_drv_t ind_driver_name ind_instance
d24683a0 ind_driver_name = 0xd24683b8 "rtls"
d24683a4 ind_instance = 0
d2468380 ind_driver_name = 0xd2468398 "rtls"
d2468384 ind_instance = 0x1
[0]> d24683a0::print in_drv_t ind_node | ::list in_node_t in_parent | ::print -a in_node_t in_node_name in_unit_addr
d244bb70 in_node_name = 0xd244bb88 "pci10ec,0"
d244bb74 in_unit_addr = 0xd244bb92 "4"
d2468580 in_node_name = 0xd2468598 "pci"
d2468584 in_unit_addr = 0xd246859c "0,0"
d2468b00 in_node_name = 0
d2468b04 in_unit_addr = 0xd2468b18 ""
[0]> d2468380::print in_drv_t ind_node | ::list in_node_t in_parent | ::print -a in_node_t in_node_name in_unit_addr
d244baa8 in_node_name = 0xd244bac0 "pci5853,1"
d244baac in_unit_addr = 0xd244baca "4"
d2468580 in_node_name = 0xd2468598 "pci"
d2468584 in_unit_addr = 0xd246859c "0,0"
d2468b00 in_node_name = 0
d2468b04 in_unit_addr = 0xd2468b18 ""
---8<---
so there are two rtls path instances:
/pci@0,0/pci10ec,0@4 0
/pci@0,0/pci5853,1@4 1
so what seems to be happening is that softmac_hold_device() is calling
ddi_hold_devi_by_instance() to find the first instance, but the
dip for the second instance is getting returned. the problem is
with the call to find_child_by_addr() in devi_config_one(). here's
the call tree:
---8<---
[0]> $c
ddi_initchild+9(d31eca48, d31ec008)
find_sibling+0xe8(d31ec900, 0, d50609ca, 14, 0)
find_child_by_addr+0x4a(d31eca48, d50609ca)
devi_config_one+0x19c(d31eca48, d50609c0, d48ac740, 4080, 0)
ndi_devi_config_one+0x87(d31eca48, d50609c0, d48ac740, 4080)
resolve_pathname+0x120(d4896dc0, d48ac770, 0, 0)
e_ddi_hold_devi_by_path+0x19(d4896dc0, 0)
hold_devi+0xe3(85, 0, 0)
ddi_hold_devi_by_instance+0x14(85, 0, 0)
softmac`softmac_hold_device+0x42(2140001, d48ac98c, 2, 0, d38f6da0, 736c7472)
stubs_common_code+0x3b(d48acb30, d48ac9c0, 0)
dls`dls_devnet_open+0x1f(d48acb30, d48aca8c, d48ac9fc)
dev`devnet_create_rvp+0x19(d48acb30, d48aca28, d48aca8c)
dev`devnet_lookup+0xe8(d35e8180, d48acb30, d48acc94, d48acd10, 0, d35e8a80)
fop_lookup+0xac(d35e8180, d48acb30, d48acc94, d48acd10, 0, d35e8a80)
lookuppnvp+0x313(d48acd10, 0, 1, 0, d48ace78, d35e8a80)
lookuppnat+0xec(d48acd10, 0, 1, 0, d48ace78, 0)
lookupnameat+0x54(80476bc, 0, 1, 0, d48ace78, 0)
vn_openat+0x19c(80476bc, 0, 3, ac8, d48acf58, 0)
copen+0x2f1(ffd19553, 80476bc, 3, 8070ac8)
open+0x1b()
sys_sysenter+0x106()
---8<---
and here's the code that is causing the problem:
---8<---
/*
* Find a child of a given address, invoking initchild to name
* unnamed children. cname is the node name.
*
* NOTE: This function is only used during boot. One would hope that
* unique sibling unit-addresses on hardware branches of the tree would
* be a requirement to avoid two drivers trying to control the same
* piece of hardware. Unfortunately there are some cases where this
* situation exists (/ssm@0,0/pci@1c,700000 /ssm@0,0/sghsc@1c,700000).
* Until unit-address uniqueness of siblings is guaranteed, use of this
* interface for purposes other than boot should be avoided.
*/
static dev_info_t *
find_child_by_addr(dev_info_t *pdip, char *caddr)
<... snip ...>
static int
devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
uint_t flags, clock_t timeout)
{
<... snip ...>
/*
* We may have a genericname on a system that creates drivername
* nodes (from .conf files). Find the drivername by nodeid. If we
* can't find a node with devnm as the node name then we search by
* drivername. This allows an implementation to supply a genericly
* named boot path (disk) and locate drivename nodes (sd). The
* NDI_PROMNAME flag does not apply to /devices/pseudo paths.
*/
if ((flags & NDI_PROMNAME) && (pdip != pseudo_dip)) {
drivername = child_path_to_driver(pdip, name, addr);
find_by_addr = 1;
}
<... snip ...>
/*
* Search for child by name, if not found then search
* for a node bound to the drivername driver with the
* specified "@addr". Break out of for(;;) loop if
* child found. To support path-oriented aliases
* binding on boot-device, we do a search_by_addr too.
*/
again: (void) i_ndi_make_spec_children(pdip, flags);
cdip = find_child_by_name(pdip, name, addr);
if ((cdip == NULL) && drivername)
cdip = find_child_by_driver(pdip,
drivername, addr);
if ((cdip == NULL) && find_by_addr)
cdip = find_child_by_addr(pdip, addr);
if (cdip)
break;
---8<---
the find_child_by_addr() function was added by the following putback:
PSARC 2007/176 path-oriented driver alias
6536151 path-oriented driver alias
so in this case we're falling back to just matching the node address.
here's the node that we end up matching after calling ddi_initchild()
in find_sibling():
---8<---
[0]> d31e8e28::prtconf
DEVINFO NAME
d31ece20 i86pc (driver name: rootnex)
d31eca48 pci, instance #0 (driver name: pci)
d31e8e28 pci10ec,8139, instance #1 (driver not attached)
[0]> d31e8e28::print struct dev_info devi_binding_name devi_node_name devi_addr
devi_binding_name = 0xd31edcfd "pci10ec,8139"
devi_node_name = 0xd31e9ee0 "pci5853,1"
devi_addr = 0xd505b600 "4"
[0]> d31e8e28::devinfo
d31e8e28 pci10ec,8139, instance #1 (driver not attached)
Hardware properties at d31eb258:
name='assigned-addresses' type=int items=10
value=81002010.00000000.0000c200.00000000.00000100.82002014.0000
0000.f4000000.00000000.00000100
name='reg' type=int items=15
value=00002000.00000000.00000000.00000000.00000000.01002010.0000
0000.00000000.00000000.00000100.02002014.00000000.00000000.0000
0000.00000100
name='compatible' type=string items=7
value='pci10ec,8139.5853.1.20' + 'pci10ec,8139.5853.1' + '
pci5853,1' + 'pci10ec,8139.20' + 'pci10ec,8139' + '
pciclass,020000' + 'pciclass,0200'
name='model' type=string items=1
value='Ethernet controller'
name='power-consumption' type=int items=2
value=00000001.00000001
name='devsel-speed' type=int items=1
value=00000000
name='interrupts' type=int items=1
value=00000001
name='max-latency' type=int items=1
value=00000000
name='min-grant' type=int items=1
value=00000000
name='subsystem-vendor-id' type=int items=1
value=00005853
name='subsystem-id' type=int items=1
value=00000001
name='unit-address' type=string items=1
value='4'
name='class-code' type=int items=1
value=00020000
name='revision-id' type=int items=1
value=00000020
name='vendor-id' type=int items=1
value=000010ec
name='device-id' type=int items=1
value=00008139
Global properties at d31e45b8:
name='ForceSpeedDuplex' type=int items=6
value=00000005.00000005.00000005.00000005.00000005.00000005
---8<---
it's probably worth mentioning that in xen 3.0, the rtls node was named
pci10ec,0 but that on xen 3.1 the name has changed to pci5853,1. this
is because in xen 3.0 we had the following subsystem ids:
name='subsystem-vendor-id' type=int items=1
value=000010ec
name='subsystem-id' type=int items=1
value=00000000
but xen 3.1 changed these to be:
name='subsystem-vendor-id' type=int items=1
value=00005853
name='subsystem-id' type=int items=1
value=00000001
this node name assignment happens in pci_autoconfig`process_devfunc().
just to be clear, this is a devfs problem and not a clearview issue.
clearview just got bitten by this problem.
the underlying problem as described above is that we're calling
find_child_by_addr() in devi_config_one() to match children by
address when we shouldn't be. of course i don't know if it's
actually possible to fix this issue there.
a much simpler fix (proposed by chris horne) would be to change
ddi_hold_devi_by_instance() so that before it returns, it check
to see if the instance number of the dip matches the instance
number that was requested. here was his recommendation:
---8<---
If the following code at the end of hold_devi()
/* reconstruct the path and drive attach by path through devfs. */
path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
if (e_ddi_majorinstance_to_path(major, instance, path) == 0)
dip = e_ddi_hold_devi_by_path(path, flags);
checked instance of dip
if (e_ddi_majorinstance_to_path(major, instance, path) == 0) {
dip = e_ddi_hold_devi_by_path(path, flags);
if (DEVI(dip)->devi_instance != instance) {
ndi_rele_devi(dip);
dip = NULL;
}
}
---8<---
|