|
Description
|
Tim Chen reported the following panic on his system:
BAD TRAP: type=e (#pf Page fault) rp=ffffff0004404580 addr=0 occurred in module
"unix" due to a NULL pointer dereference
dump content: kernel pages only
cmt_pad_disable+0x6d(7)
cpupm_set_policy+0xae(1)
pm_ioctl+0x3300(8300000001, 17, 0, 100003, ffffff017be28108, ffffff0004404dd4)
cdev_ioctl+0x45(8300000001, 17, 0, 100003, ffffff017be28108, ffffff0004404dd4)
spec_ioctl+0x83(ffffff016db8c900, 17, 0, 100003, ffffff017be28108,
ffffff0004404dd4)
fop_ioctl+0x7b(ffffff016db8c900, 17, 0, 100003, ffffff017be28108,
ffffff0004404dd4)
ioctl+0x18e(3, 17, 0)
_sys_sysenter_post_swapgs+0x23c()
%rax = 0xffffff014c64fa38 %r9 = 0xffffff014e9607f8
%rbx = 0x0000000000000007 %r10 = 0x0000000000000020
%rcx = 0x0000000000000001 %r11 = 0x0000000000000001
%rdx = 0x0000000000000000 %r12 = 0xffffff014e9607d0
%rsi = 0xffffff000440467c %r13 = 0xffffff014c64fa38
%rdi = 0xffffff014e9607d0 %r14 = 0xffffff000440467c
%r8 = 0x0000000000000000 %r15 = 0x0000000000100003
%rip = 0xfffffffffb8684c5 cmt_pad_disable+0x6d
%rbp = 0xffffff00044046b0
%rsp = 0xffffff0004404670
%rflags = 0x00010282
id=0 vip=0 vif=0 ac=0 vm=0 rf=1 nt=0 iopl=0x0
status=<of,df,IF,tf,SF,zf,af,pf,cf>
%cs = 0x0030 %ds = 0x004b %es = 0x004b
%trapno = 0xe %fs = 0x0000 %gs = 0x01c3
%err = 0x0
This corresponds to:
1228 /*
1229 * If the power domain has an only child that implements
1230 * policy other than load balancing, promote the child
1231 * above the power domain to ensure it's policy dominates.
1232 */
1233 if (GROUP_SIZE(pg->cmt_children) == 1) { <---------------
1234 child = GROUP_ACCESS(pg->cmt_children, 0);
1235 if ((child->cmt_policy & CMT_BALANCE) == 0) {
1236 cmt_hier_promote(child);
1237 }
1238 }
Looking at the PG:
{
cmt_pg = {
pghw_pg = {
pg_id = 0x1
pg_relation = 1 (PGR_PHYSICAL)
pg_class = 0xffffff014ca3de90
pg_cpus = {
grp_size = 0x2
grp_capacity = 0x2
grp_set = 0xffffff014e960820
}
pg_cb = {
thread_swtch = cmt_ev_thread_swtch_pwr
thread_remain = cmt_ev_thread_remain_pwr
}
}
pghw_hw = 7 (PGHW_POW_ACTIVE)
pghw_instance = 0
pghw_handle = 0xffffff014c7678f0
pghw_kstat = 0xffffff014d45b8d0
}
cmt_siblings = 0xffffff014e960898
cmt_parent = 0xffffff014c64faf0
cmt_children = 0
cmt_policy = 0x1
cmt_utilization = 0x2
cmt_nchildren = 0
cmt_hint = 0
cmt_cpus_actv = {
grp_size = 0x2
grp_capacity = 0x2
grp_set = 0xffffff014e960848
}
cmt_cpus_actv_set = {
bs_set = 0xffffff014ca36d80
bs_words = 0x1
}
}
The problem is obvious. On this system, the power domains are the leaves of the PG hierarchy, cmt_children is NULL. cmt_pad_disable() should check for this before trying to determine the number of children.
|