|
Description
|
Category
kernel
Sub-Category
driver-sd
Description
Concurrent unaligned writes to an optical media panics a snv_b29 debug kernel.
Root cause:
===========
- let's assume we have 3 concurrent I/O requests (unaligned writes to optical
media)
1. blocks b1 .. b2 (active)
2. blocks b0 .. b1
3. blocks b2 .. b3
Note the overlap with block b1 (request 1. & 2.) and b2 (request 1. & 3.)
- sd_range_lock() for requests 2. and 3. block because of the overlap with
request 1. Requests 2. & 3. wait with
"cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));" for request 1. to complete.
- when request 1. completes "cv_broadcast(&wm->wm_avail);" in sd_range_unlock()
wakes up both requests 2. and 3.
two different threads (for requests 2. and 3.) could try to reuse
the sl_wmp entry from request 1. for their new sd_w_map entry; the
locking data structure becomes corrupted:
29358 sl_wmp->wm_wanted_count++;
29359 cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29360 sl_wmp->wm_wanted_count--;
29361 if (!(sl_wmp->wm_flags & SD_WM_BUSY)) {
29362 if (wmp != NULL)
29363 CHK_N_FREEWMP(un, wmp);
29364 wmp = sl_wmp;
29365 }
when request 2. completes it frees the data structure, when request 3.
completes it tries to access the freed data structure and the kernel
panics due to the "deadbeef" fill pattern used with freed memory
Frequency
Always
Regression
No
Steps to Reproduce
- create zfs filesystem on an usb dvd-ram device
zpool create dvdram c2t0d0p0
- Try to unpack opensolaris sources on the new zfs filesystem
cd /dvdram
bunzip2 < ~/src/opensolaris/opensolaris-closed-bins-20051219.i386.tar.bz2 | tar xf -
- kernel panics
Alternate way to reproduce the issue: Compile the included test program,
and run it, using an argument /dev/rdsk/cXt0d0p0 (the dvd-ram drive's device
path - with a blank dvd-ram media inserted into the drive).
Expected Result
write()s that are not aligned to 2kbyte sector boundaries can be used.
Actual Result
The kernel panics, like this:
# mdb -k 0
Loading modules: [ unix krtld genunix specfs dtrace nfs ip sctp usba uhci fctl nca lofs audiosup cpc fcip random zfs logindmux ptm ufs sppp ]
>> ::status
debugging crash dump vmcore.0 (32-bit) from max
operating system: 5.11 wos_b29 (i86pc)
panic message: assertion failed: cp->cv_waiters <= 1024, file: ../../common/os/condvar.c, line: 503
dump content: kernel pages only
>> $c
vpanic(fea41e94, fea1f8bc, fea1f8a0, 1f7)
assfail+0x5c(fea1f8bc, fea1f8a0, 1f7)
cv_broadcast+0x84(c9d3cfb4)
sd_range_unlock+0x70(c7c8e100, c9d3cf98)
sd_mapblocksize_iodone+0x80(b, c7c8e100, ccba8e88)
sd_return_command+0x1b2(c7c8e100, ccba8e88)
sdintr+0x5a3()
scsa2usb_pkt_completion+0xe6(c7c8d8c0, c878c840)
scsa2usb_transport_request+0x149(c7c8d8c0, 0)
scsa2usb_work_thread+0xbd()
taskq_d_thread+0xa6(c9919ba8, 0)
thread_start+8()
>> c9d3cfb4::whatis
c9d3cfb4 is c9d3cf98+1c, bufctl c9bfece0 freed from sd1_cache
>> c9bfece0$<bufctl
ADDR BUFADDR TIMESTAMP THREAD CALLER
c9bfece0 c9d3cf98 67cc2bf14a c9999de0 sd_free_inlist_wmap+0x4e
>> c9bfece0$<bufctl_audit
ADDR BUFADDR TIMESTAMP THREAD
CACHE LASTLOG CONTENTS
c9bfece0 c9d3cf98 67cc2bf14a c9999de0
c95c46f0 ce059428 cfbe4070
kmem_cache_free_debug+0xf6
kmem_cache_free+0x2f
sd_free_inlist_wmap+0x4e
sd_range_lock+0x13f
sd_mapblocksize_iostart+0x1ba
sd_mapblockaddr_iostart+0x1a1
sd_xbuf_strategy+0x4e
xbuf_iostart+0x118
ddi_xbuf_qstrategy+0x6c
sdstrategy+0x10a
bdev_strategy+0x4d
ldi_strategy+0x40
vdev_disk_io_start+0x1c7
vdev_io_start+0x18
zio_vdev_io_start+0xd6
>> c9d3cfb4::print kcondvar_t
{
_opaque = 0xbeef
}
>>
>> $c
vpanic(fea41e94, fea1f8bc, fea1f8a0, 1f7)
assfail+0x5c(fea1f8bc, fea1f8a0, 1f7)
cv_broadcast+0x84(c9d3cfb4)
sd_range_unlock+0x70(c7c8e100, c9d3cf98)
sd_mapblocksize_iodone+0x80(b, c7c8e100, ccba8e88)
sd_return_command+0x1b2(c7c8e100, ccba8e88)
sdintr+0x5a3()
scsa2usb_pkt_completion+0xe6(c7c8d8c0, c878c840)
scsa2usb_transport_request+0x149(c7c8d8c0, 0)
scsa2usb_work_thread+0xbd()
taskq_d_thread+0xa6(c9919ba8, 0)
thread_start+8()
>> c9d3cf98::whatis
c9d3cf98 is c9d3cf98+0, bufctl c9bfece0 freed from sd1_cache
>> c9d3cf98::print sd_w_map
mdb: failed to look up type sd_w_map: no symbol corresponds to address
>> c9d3cf98::print struct sd_w_map
{
wm_start = 0xdeadbeef
wm_end = 0xdeadbeef
wm_flags = 0
wm_wanted_count = 0xdead
wm_private = 0xdeadbeef
wm_bufp = 0xdeadbeef
wm_next = 0xdeadbeef
wm_prev = 0xdeadbeef
wm_avail = {
_opaque = 0xbeef
}
}
>> c9d3cf98::whatis
c9d3cf98 is c9d3cf98+0, bufctl c9bfece0 freed from sd1_cache
>> c9bfece0$<bufctl_audit
ADDR BUFADDR TIMESTAMP THREAD
CACHE LASTLOG CONTENTS
c9bfece0 c9d3cf98 67cc2bf14a c9999de0
c95c46f0 ce059428 cfbe4070
kmem_cache_free_debug+0xf6
kmem_cache_free+0x2f
sd_free_inlist_wmap+0x4e
sd_range_lock+0x13f
sd_mapblocksize_iostart+0x1ba
sd_mapblockaddr_iostart+0x1a1
sd_xbuf_strategy+0x4e
xbuf_iostart+0x118
ddi_xbuf_qstrategy+0x6c
sdstrategy+0x10a
bdev_strategy+0x4d
ldi_strategy+0x40
vdev_disk_io_start+0x1c7
vdev_io_start+0x18
zio_vdev_io_start+0xd6
>>
>> c7c8e100::whatis
c7c8e100 is c7c8e100+0, bufctl c7d8dc90 allocated from kmem_alloc_2048
>> c7c8e100::print struct sd_lun
{
un_sd = 0xc836a548
un_rqs_bp = 0xc836c9a8
un_rqs_pktp = 0xc8833140
un_sense_isbusy = 0
un_buf_chain_type = 0x3
un_uscsi_chain_type = 0x8
un_direct_chain_type = 0x8
un_priority_chain_type = 0x9
un_waitq_headp = 0
un_waitq_tailp = 0
un_retry_bp = 0
un_retry_statp = 0
un_xbuf_attr = 0xc881da38
un_sys_blocksize = 0x200
un_tgt_blocksize = 0x800
un_blockcount = 0x888480
un_ctype = 0
un_node_type = 0xc813ac98 "ddi_block:cdrom:channel"
un_interconnect_type = 0
un_notready_retry_count = 0xa
un_busy_retry_count = 0x5
un_retry_count = 0x5
un_victim_retry_count = 0xa
un_reset_retry_count = 0x2
un_reserve_release_time = 0x5
un_reservation_type = 0
un_max_xfer_size = 0x40000
un_mincdb = 0x1
un_maxcdb = 0x2
un_status_len = 0x20
un_pkt_flags = 0x40000
un_cmd_timeout = 0x3c
un_uscsi_timeout = 0x78
un_state = 0
un_last_state = 0
un_last_pkt_reason = 0
un_tagflags = 0
un_resvd_status = 0
un_detach_count = 0
un_layer_count = 0
un_opens_in_progress = 0
un_semoclose = {
_opaque = [ 0, 1 ]
}
un_ncmds_in_driver = 0x4
un_ncmds_in_transport = 0
un_throttle = 0x3
un_saved_throttle = 0x3
un_busy_throttle = 0
un_min_throttle = 0x8
un_reset_throttle_timeid = 0
un_mhd_token = 0
un_resvd_timeid = 0
un_insert_event = 0
un_insert_cb_id = 0
un_remove_event = 0
un_remove_cb_id = 0
un_start_stop_cycle_page = 0
un_dcvb_timeid = 0
un_ocmap = {
chkd = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... ]
rinfo = {
lyr_open = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
reg_open = [ 0x10000, 0, 0, 0 ]
}
}
un_map = [
{
dkl_cylno = 0
dkl_nblk = 0x888480
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0x888480
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0x888480
}
{
dkl_cylno = 0xffffffff
dkl_nblk = 0
}
{
dkl_cylno = 0xffffffff
dkl_nblk = 0
}
{
dkl_cylno = 0xffffffff
dkl_nblk = 0
}
{
dkl_cylno = 0xffffffff
dkl_nblk = 0
}
{
dkl_cylno = 0
dkl_nblk = 0
}
]
un_offset = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
un_pstats = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
un_vtoc = {
v_bootinfo = [ 0, 0, 0 ]
v_sanity = 0x600ddeee
v_version = 0x1
v_volume = [ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0' ]
v_sectorsz = 0
v_nparts = 0x10
v_reserved = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
v_part = [
{
p_tag = 0x5
p_flag = 0x1
p_start = 0
p_size = 0x888480
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0x5
p_flag = 0x1
p_start = 0
p_size = 0x888480
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
{
p_tag = 0
p_flag = 0
p_start = 0
p_size = 0
}
]
timestamp = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]
v_asciilabel = [ "DEFAULT cyl 4366 alt 2 hd 1 sec 1" ]
}
un_g = {
dkg_ncyl = 0x110e
dkg_acyl = 0x2
dkg_bcyl = 0
dkg_nhead = 0x1
dkg_obs1 = 0
dkg_nsect = 0x1
dkg_intrlv = 0x1
dkg_obs2 = 0
dkg_obs3 = 0
dkg_apc = 0
dkg_rpm = 0xc7c8
dkg_pcyl = 0x1110
dkg_write_reinstruct = 0
dkg_read_reinstruct = 0
dkg_extra = [ 0, 0, 0, 0, 0, 0, 0 ]
}
un_stats = 0xc7fd9aa0
un_errstats = 0xc7fd9bc0
un_exclopen = 0
un_devid = 0
un_vpd_page_mask = 0
un_asciilabel = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
]
un_solaris_size = 0x888480
un_solaris_offset = 0
un_dkg_skew = 0
un_fmap = [
{
fmap_start = 0
fmap_nblk = 0
}
{
fmap_start = 0
fmap_nblk = 0
}
{
fmap_start = 0
fmap_nblk = 0
}
{
fmap_start = 0
fmap_nblk = 0
}
]
un_lgeom = {
g_ncyl = 0x1110
g_acyl = 0
g_nhead = 0x40
g_nsect = 0x20
g_secsize = 0x200
g_capacity = 0x888480
g_intrlv = 0
g_rpm = 0
}
un_pgeom = {
g_ncyl = 0x1110
g_acyl = 0
g_nhead = 0x1
g_nsect = 0x800
g_secsize = 0x800
g_capacity = 0x888480
g_intrlv = 0xe100
g_rpm = 0xc7c8
}
un_f_arq_enabled = 0x1
un_f_blockcount_is_valid = 0x1
un_f_tgt_blocksize_is_valid = 0x1
un_f_geometry_is_valid = 0x1
un_f_allow_bus_device_reset = 0x1
un_f_is_fibre = 0
un_f_sync_cache_unsupported = 0
un_f_format_in_progress = 0
un_f_opt_queueing = 0x1
un_f_opt_fab_devid = 0
un_f_opt_disable_cache = 0
un_f_cfg_is_atapi = 0
un_f_write_cache_enabled = 0x1
un_f_cfg_playmsf_bcd = 0
un_f_cfg_readsub_bcd = 0
un_f_cfg_read_toc_trk_bcd = 0
un_f_cfg_read_toc_addr_bcd = 0
un_f_cfg_no_read_header = 0
un_f_cfg_read_cd_xd4 = 0
un_f_mmc_cap = 0x1
un_f_mmc_writable_media = 0x1
un_f_dvdram_writable_device = 0x1
un_f_cfg_cdda = 0x1
un_f_cfg_tur_check = 0
un_f_use_adaptive_throttle = 0
un_f_pm_is_enabled = 0x1
un_f_watcht_stopped = 0
un_f_pkstats_enabled = 0x1
un_f_disksort_disabled = 0
un_f_lun_reset_enabled = 0
un_f_doorlock_supported = 0x1
un_f_start_stop_supported = 0x1
un_additional_codes = sd_additional_codes
un_pm_mutex = {
_opaque = [ 0, 0 ]
}
un_pm_count = 0
un_pm_timeid = 0
un_pm_busy = 0
un_pm_busy_cv = {
_opaque = 0
}
un_pm_capable_prop = 0xffffffff
un_power_level = 0x1
un_save_state = 0
un_suspend_cv = {
_opaque = 0
}
un_disk_busy_cv = {
_opaque = 0
}
un_state_cv = {
_opaque = 0
}
un_mediastate = 2 (DKIO_INSERTED)
un_specified_mediastate = 2 (DKIO_INSERTED)
un_swr_token = 0
un_wm_cache = 0xc95c46f0
un_rmw_count = 0x1
un_wm = 0xc9d3cfc8
un_startstop_timeid = 0
un_direct_priority_timeid = 0
un_tran_fatal_count = 0
un_retry_timeid = 0
un_pm_idle_time = 2005 Dec 23 17:06:45
un_pm_idle_timeid = 0x3fe12c28
un_sonoma_failure_count = 0
un_reserved = 0xffffffff
un_failfast_bp = 0
un_failfast_headp = 0
un_failfast_tailp = 0
un_failfast_state = 0
un_in_callback = 0x1
un_f_cfg_is_lsi = 0
un_f_pad_uint32 = 0
}
Error Message(s)
kernel panic:
assertion failed: cp->cv_waiters <= 1024, file: ../../common/os/condvar.c, line: 503
Test Case
/*
* cc -o twr twr.c -lpthread -lrt
*/
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <pthread.h>
#define NUM_WRITES 30
struct thread_arg {
int fd;
int delay;
int offs;
};
static void *
write_func(void *_arg)
{
struct thread_arg *arg = _arg;
struct timespec ts;
char data[16 * 2048];
int rval;
memset(data, arg->offs, sizeof(data));
if (arg->delay > 0) {
ts.tv_sec = 0;
ts.tv_nsec = arg->delay * 10000000;
nanosleep(&ts, NULL);
}
/* unaligned write */
rval = pwrite(arg->fd, data, sizeof(data), arg->offs * sizeof(data) + 1024);
if (rval < 0) {
perror("pwrite");
exit(1);
}
if (rval != sizeof(data)) {
fprintf(stderr, "incomplete write %d != %d\n", rval, sizeof(data));
}
return NULL;
}
int
main(int argc, char **argv)
{
int fd;
char *filename = argv[1];
pthread_t tid[NUM_WRITES];
struct thread_arg args[NUM_WRITES];
int i;
int err;
fd = open(filename, O_RDWR);
if (fd < 0) {
perror(filename);
exit(1);
}
for (i = 0; i < NUM_WRITES; i++) {
args[i].fd = fd;
args[i].delay = 0;
args[i].offs = i;
err = pthread_create(tid+i, NULL, write_func, args+i);
if (err != 0) {
fprintf(stderr, "create thead failed: %s\n",
strerror(err));
exit(1);
}
}
for (i = 0; i < NUM_WRITES; i++) {
void *status;
pthread_join(tid[i], &status);
}
close(fd);
return 0;
}
Workaround
Fix:
====
In the sd.c driver, change line 29361, for example to:
if (!(sl_wmp->wm_flags & SD_WM_BUSY) && sl_wmp->wm_wanted_count == 0)
so that only one thread can reuse the "struct sd_w_map" entry from the
completed I/O request, the other thread must allocate a new entry from
the "un->un_wm_cache" object cache.
Patch:
======
--- usr/src/uts/common/io/scsi/targets/sd-orig.c 2005-12-17 06:14:59.000000000 +0100
+++ usr/src/uts/common/io/scsi/targets/sd.c 2005-12-26 11:46:37.522858000 +0100
@@ -29358,7 +29358,13 @@
sl_wmp->wm_wanted_count++;
cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
sl_wmp->wm_wanted_count--;
- if (!(sl_wmp->wm_flags & SD_WM_BUSY)) {
+ /*
+ * We can reuse the memory from the completed sl_wmp
+ * lock range for our new lock, but only if noone is
+ * waiting for it.
+ */
+ ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
+ if (sl_wmp->wm_wanted_count == 0) {
if (wmp != NULL)
CHK_N_FREEWMP(un, wmp);
wmp = sl_wmp;
Submitter wants to work on bug
Yes
Additional configuration information
snv_28, bfu'ed to snv_29 (debug kernel)
USB2.0 LG "HL-DT-ST DVDRAM GSA-4082B" DVD writer device (supports DVD-RAM media)
|