|
@@ -1,212 +0,0 @@
|
|
|
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
-From: Brian Behlendorf <[email protected]>
|
|
|
-Date: Fri, 17 Dec 2021 09:52:13 -0800
|
|
|
-Subject: [PATCH] Fix zvol_open() lock inversion
|
|
|
-
|
|
|
-When restructuring the zvol_open() logic for the Linux 5.13 kernel
|
|
|
-a lock inversion was accidentally introduced. In the updated code
|
|
|
-the spa_namespace_lock is now taken before the zv_suspend_lock
|
|
|
-allowing the following scenario to occur:
|
|
|
-
|
|
|
- down_read <=== waiting for zv_suspend_lock
|
|
|
- zvol_open <=== holds spa_namespace_lock
|
|
|
- __blkdev_get
|
|
|
- blkdev_get_by_dev
|
|
|
- blkdev_open
|
|
|
- ...
|
|
|
-
|
|
|
- mutex_lock <== waiting for spa_namespace_lock
|
|
|
- spa_open_common
|
|
|
- spa_open
|
|
|
- dsl_pool_hold
|
|
|
- dmu_objset_hold_flags
|
|
|
- dmu_objset_hold
|
|
|
- dsl_prop_get
|
|
|
- dsl_prop_get_integer
|
|
|
- zvol_create_minor
|
|
|
- dmu_recv_end
|
|
|
- zfs_ioc_recv_impl <=== holds zv_suspend_lock via zvol_suspend()
|
|
|
- zfs_ioc_recv
|
|
|
- ...
|
|
|
-
|
|
|
-This commit resolves the issue by moving the acquisition of the
|
|
|
-spa_namespace_lock back to after the zv_suspend_lock which restores
|
|
|
-the original ordering.
|
|
|
-
|
|
|
-Additionally, as part of this change the error exit paths were
|
|
|
-simplified where possible.
|
|
|
-
|
|
|
-Reviewed-by: Tony Hutter <[email protected]>
|
|
|
-Reviewed-by: Rich Ercolani <[email protected]>
|
|
|
-Signed-off-by: Brian Behlendorf <[email protected]>
|
|
|
-Closes #12863
|
|
|
-(cherry picked from commit 8a02d01e85556bbe3a1c6947bc11b8ef028d4023)
|
|
|
-Signed-off-by: Stoiko Ivanov <[email protected]>
|
|
|
----
|
|
|
- module/os/linux/zfs/zvol_os.c | 121 ++++++++++++++++------------------
|
|
|
- 1 file changed, 58 insertions(+), 63 deletions(-)
|
|
|
-
|
|
|
-diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
|
|
|
-index 44caadd58..69479b3f7 100644
|
|
|
---- a/module/os/linux/zfs/zvol_os.c
|
|
|
-+++ b/module/os/linux/zfs/zvol_os.c
|
|
|
-@@ -496,8 +496,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
|
|
- {
|
|
|
- zvol_state_t *zv;
|
|
|
- int error = 0;
|
|
|
-- boolean_t drop_suspend = B_TRUE;
|
|
|
-- boolean_t drop_namespace = B_FALSE;
|
|
|
-+ boolean_t drop_suspend = B_FALSE;
|
|
|
- #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
|
|
- hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
|
|
|
- hrtime_t start = gethrtime();
|
|
|
-@@ -517,7 +516,36 @@ retry:
|
|
|
- return (SET_ERROR(-ENXIO));
|
|
|
- }
|
|
|
-
|
|
|
-- if (zv->zv_open_count == 0 && !mutex_owned(&spa_namespace_lock)) {
|
|
|
-+ mutex_enter(&zv->zv_state_lock);
|
|
|
-+ /*
|
|
|
-+ * Make sure zvol is not suspended during first open
|
|
|
-+ * (hold zv_suspend_lock) and respect proper lock acquisition
|
|
|
-+ * ordering - zv_suspend_lock before zv_state_lock
|
|
|
-+ */
|
|
|
-+ if (zv->zv_open_count == 0) {
|
|
|
-+ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
|
|
-+ mutex_exit(&zv->zv_state_lock);
|
|
|
-+ rw_enter(&zv->zv_suspend_lock, RW_READER);
|
|
|
-+ mutex_enter(&zv->zv_state_lock);
|
|
|
-+ /* check to see if zv_suspend_lock is needed */
|
|
|
-+ if (zv->zv_open_count != 0) {
|
|
|
-+ rw_exit(&zv->zv_suspend_lock);
|
|
|
-+ } else {
|
|
|
-+ drop_suspend = B_TRUE;
|
|
|
-+ }
|
|
|
-+ } else {
|
|
|
-+ drop_suspend = B_TRUE;
|
|
|
-+ }
|
|
|
-+ }
|
|
|
-+ rw_exit(&zvol_state_lock);
|
|
|
-+
|
|
|
-+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
|
|
-+
|
|
|
-+ if (zv->zv_open_count == 0) {
|
|
|
-+ boolean_t drop_namespace = B_FALSE;
|
|
|
-+
|
|
|
-+ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
|
|
-+
|
|
|
- /*
|
|
|
- * In all other call paths the spa_namespace_lock is taken
|
|
|
- * before the bdev->bd_mutex lock. However, on open(2)
|
|
|
-@@ -542,84 +570,51 @@ retry:
|
|
|
- * the kernel so the only option is to return the error for
|
|
|
- * the caller to handle it.
|
|
|
- */
|
|
|
-- if (!mutex_tryenter(&spa_namespace_lock)) {
|
|
|
-- rw_exit(&zvol_state_lock);
|
|
|
-+ if (!mutex_owned(&spa_namespace_lock)) {
|
|
|
-+ if (!mutex_tryenter(&spa_namespace_lock)) {
|
|
|
-+ mutex_exit(&zv->zv_state_lock);
|
|
|
-+ rw_exit(&zv->zv_suspend_lock);
|
|
|
-
|
|
|
- #ifdef HAVE_BLKDEV_GET_ERESTARTSYS
|
|
|
-- schedule();
|
|
|
-- return (SET_ERROR(-ERESTARTSYS));
|
|
|
--#else
|
|
|
-- if ((gethrtime() - start) > timeout)
|
|
|
-+ schedule();
|
|
|
- return (SET_ERROR(-ERESTARTSYS));
|
|
|
-+#else
|
|
|
-+ if ((gethrtime() - start) > timeout)
|
|
|
-+ return (SET_ERROR(-ERESTARTSYS));
|
|
|
-
|
|
|
-- schedule_timeout(MSEC_TO_TICK(10));
|
|
|
-- goto retry;
|
|
|
-+ schedule_timeout(MSEC_TO_TICK(10));
|
|
|
-+ goto retry;
|
|
|
- #endif
|
|
|
-- } else {
|
|
|
-- drop_namespace = B_TRUE;
|
|
|
-- }
|
|
|
-- }
|
|
|
--
|
|
|
-- mutex_enter(&zv->zv_state_lock);
|
|
|
-- /*
|
|
|
-- * make sure zvol is not suspended during first open
|
|
|
-- * (hold zv_suspend_lock) and respect proper lock acquisition
|
|
|
-- * ordering - zv_suspend_lock before zv_state_lock
|
|
|
-- */
|
|
|
-- if (zv->zv_open_count == 0) {
|
|
|
-- if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
|
|
-- mutex_exit(&zv->zv_state_lock);
|
|
|
-- rw_enter(&zv->zv_suspend_lock, RW_READER);
|
|
|
-- mutex_enter(&zv->zv_state_lock);
|
|
|
-- /* check to see if zv_suspend_lock is needed */
|
|
|
-- if (zv->zv_open_count != 0) {
|
|
|
-- rw_exit(&zv->zv_suspend_lock);
|
|
|
-- drop_suspend = B_FALSE;
|
|
|
-+ } else {
|
|
|
-+ drop_namespace = B_TRUE;
|
|
|
- }
|
|
|
- }
|
|
|
-- } else {
|
|
|
-- drop_suspend = B_FALSE;
|
|
|
-- }
|
|
|
-- rw_exit(&zvol_state_lock);
|
|
|
--
|
|
|
-- ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
|
|
-
|
|
|
-- if (zv->zv_open_count == 0) {
|
|
|
-- ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
|
|
- error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
|
|
|
-- if (error)
|
|
|
-- goto out_mutex;
|
|
|
-- }
|
|
|
-
|
|
|
-- if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
|
|
-- error = -EROFS;
|
|
|
-- goto out_open_count;
|
|
|
-+ if (drop_namespace)
|
|
|
-+ mutex_exit(&spa_namespace_lock);
|
|
|
- }
|
|
|
-
|
|
|
-- zv->zv_open_count++;
|
|
|
--
|
|
|
-- mutex_exit(&zv->zv_state_lock);
|
|
|
-- if (drop_namespace)
|
|
|
-- mutex_exit(&spa_namespace_lock);
|
|
|
-- if (drop_suspend)
|
|
|
-- rw_exit(&zv->zv_suspend_lock);
|
|
|
--
|
|
|
-- zfs_check_media_change(bdev);
|
|
|
--
|
|
|
-- return (0);
|
|
|
-+ if (error == 0) {
|
|
|
-+ if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
|
|
-+ if (zv->zv_open_count == 0)
|
|
|
-+ zvol_last_close(zv);
|
|
|
-
|
|
|
--out_open_count:
|
|
|
-- if (zv->zv_open_count == 0)
|
|
|
-- zvol_last_close(zv);
|
|
|
-+ error = SET_ERROR(-EROFS);
|
|
|
-+ } else {
|
|
|
-+ zv->zv_open_count++;
|
|
|
-+ }
|
|
|
-+ }
|
|
|
-
|
|
|
--out_mutex:
|
|
|
- mutex_exit(&zv->zv_state_lock);
|
|
|
-- if (drop_namespace)
|
|
|
-- mutex_exit(&spa_namespace_lock);
|
|
|
- if (drop_suspend)
|
|
|
- rw_exit(&zv->zv_suspend_lock);
|
|
|
-
|
|
|
-- return (SET_ERROR(error));
|
|
|
-+ if (error == 0)
|
|
|
-+ zfs_check_media_change(bdev);
|
|
|
-+
|
|
|
-+ return (error);
|
|
|
- }
|
|
|
-
|
|
|
- static void
|