@@ -114,6 +114,8 @@ static int atomic_dec_return_safe(atomic_t *v)
114114
115115#define RBD_OBJ_PREFIX_LEN_MAX 64
116116
117+ #define RBD_RETRY_DELAY msecs_to_jiffies(1000)
118+
117119/* Feature bits */
118120
119121#define RBD_FEATURE_LAYERING (1<<0)
@@ -319,6 +321,12 @@ struct rbd_img_request {
319321#define for_each_obj_request_safe (ireq , oreq , n ) \
320322 list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
321323
324+ enum rbd_watch_state {
325+ RBD_WATCH_STATE_UNREGISTERED ,
326+ RBD_WATCH_STATE_REGISTERED ,
327+ RBD_WATCH_STATE_ERROR ,
328+ };
329+
322330struct rbd_mapping {
323331 u64 size ;
324332 u64 features ;
@@ -352,7 +360,11 @@ struct rbd_device {
352360
353361 struct ceph_file_layout layout ; /* used for all rbd requests */
354362
363+ struct mutex watch_mutex ;
364+ enum rbd_watch_state watch_state ;
355365 struct ceph_osd_linger_request * watch_handle ;
366+ u64 watch_cookie ;
367+ struct delayed_work watch_dwork ;
356368
357369 struct workqueue_struct * task_wq ;
358370
@@ -3083,9 +3095,6 @@ static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
30833095 obj_request_done_set (obj_request );
30843096}
30853097
3086- static int rbd_dev_header_watch_sync (struct rbd_device * rbd_dev );
3087- static void __rbd_dev_header_unwatch_sync (struct rbd_device * rbd_dev );
3088-
30893098static void rbd_watch_cb (void * arg , u64 notify_id , u64 cookie ,
30903099 u64 notifier_id , void * data , size_t data_len )
30913100{
@@ -3113,35 +3122,34 @@ static void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
31133122 rbd_warn (rbd_dev , "notify_ack ret %d" , ret );
31143123}
31153124
3125+ static void __rbd_unregister_watch (struct rbd_device * rbd_dev );
3126+
31163127static void rbd_watch_errcb (void * arg , u64 cookie , int err )
31173128{
31183129 struct rbd_device * rbd_dev = arg ;
3119- int ret ;
31203130
31213131 rbd_warn (rbd_dev , "encountered watch error: %d" , err );
31223132
3123- __rbd_dev_header_unwatch_sync (rbd_dev );
3133+ mutex_lock (& rbd_dev -> watch_mutex );
3134+ if (rbd_dev -> watch_state == RBD_WATCH_STATE_REGISTERED ) {
3135+ __rbd_unregister_watch (rbd_dev );
3136+ rbd_dev -> watch_state = RBD_WATCH_STATE_ERROR ;
31243137
3125- ret = rbd_dev_header_watch_sync (rbd_dev );
3126- if (ret ) {
3127- rbd_warn (rbd_dev , "failed to reregister watch: %d" , ret );
3128- return ;
3138+ queue_delayed_work (rbd_dev -> task_wq , & rbd_dev -> watch_dwork , 0 );
31293139 }
3130-
3131- ret = rbd_dev_refresh (rbd_dev );
3132- if (ret )
3133- rbd_warn (rbd_dev , "reregisteration refresh failed: %d" , ret );
3140+ mutex_unlock (& rbd_dev -> watch_mutex );
31343141}
31353142
31363143/*
3137- * Initiate a watch request, synchronously.
3144+ * watch_mutex must be locked
31383145 */
3139- static int rbd_dev_header_watch_sync (struct rbd_device * rbd_dev )
3146+ static int __rbd_register_watch (struct rbd_device * rbd_dev )
31403147{
31413148 struct ceph_osd_client * osdc = & rbd_dev -> rbd_client -> client -> osdc ;
31423149 struct ceph_osd_linger_request * handle ;
31433150
31443151 rbd_assert (!rbd_dev -> watch_handle );
3152+ dout ("%s rbd_dev %p\n" , __func__ , rbd_dev );
31453153
31463154 handle = ceph_osdc_watch (osdc , & rbd_dev -> header_oid ,
31473155 & rbd_dev -> header_oloc , rbd_watch_cb ,
@@ -3153,13 +3161,16 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
31533161 return 0 ;
31543162}
31553163
3156- static void __rbd_dev_header_unwatch_sync (struct rbd_device * rbd_dev )
3164+ /*
3165+ * watch_mutex must be locked
3166+ */
3167+ static void __rbd_unregister_watch (struct rbd_device * rbd_dev )
31573168{
31583169 struct ceph_osd_client * osdc = & rbd_dev -> rbd_client -> client -> osdc ;
31593170 int ret ;
31603171
3161- if (! rbd_dev -> watch_handle )
3162- return ;
3172+ rbd_assert ( rbd_dev -> watch_handle );
3173+ dout ( "%s rbd_dev %p\n" , __func__ , rbd_dev ) ;
31633174
31643175 ret = ceph_osdc_unwatch (osdc , rbd_dev -> watch_handle );
31653176 if (ret )
@@ -3168,17 +3179,80 @@ static void __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
31683179 rbd_dev -> watch_handle = NULL ;
31693180}
31703181
3171- /*
3172- * Tear down a watch request, synchronously.
3173- */
3174- static void rbd_dev_header_unwatch_sync (struct rbd_device * rbd_dev )
3182+ static int rbd_register_watch (struct rbd_device * rbd_dev )
3183+ {
3184+ int ret ;
3185+
3186+ mutex_lock (& rbd_dev -> watch_mutex );
3187+ rbd_assert (rbd_dev -> watch_state == RBD_WATCH_STATE_UNREGISTERED );
3188+ ret = __rbd_register_watch (rbd_dev );
3189+ if (ret )
3190+ goto out ;
3191+
3192+ rbd_dev -> watch_state = RBD_WATCH_STATE_REGISTERED ;
3193+ rbd_dev -> watch_cookie = rbd_dev -> watch_handle -> linger_id ;
3194+
3195+ out :
3196+ mutex_unlock (& rbd_dev -> watch_mutex );
3197+ return ret ;
3198+ }
3199+
3200+ static void cancel_tasks_sync (struct rbd_device * rbd_dev )
31753201{
3176- __rbd_dev_header_unwatch_sync (rbd_dev );
3202+ dout ("%s rbd_dev %p\n" , __func__ , rbd_dev );
3203+
3204+ cancel_delayed_work_sync (& rbd_dev -> watch_dwork );
3205+ }
3206+
3207+ static void rbd_unregister_watch (struct rbd_device * rbd_dev )
3208+ {
3209+ cancel_tasks_sync (rbd_dev );
3210+
3211+ mutex_lock (& rbd_dev -> watch_mutex );
3212+ if (rbd_dev -> watch_state == RBD_WATCH_STATE_REGISTERED )
3213+ __rbd_unregister_watch (rbd_dev );
3214+ rbd_dev -> watch_state = RBD_WATCH_STATE_UNREGISTERED ;
3215+ mutex_unlock (& rbd_dev -> watch_mutex );
31773216
3178- dout ("%s flushing notifies\n" , __func__ );
31793217 ceph_osdc_flush_notifies (& rbd_dev -> rbd_client -> client -> osdc );
31803218}
31813219
3220+ static void rbd_reregister_watch (struct work_struct * work )
3221+ {
3222+ struct rbd_device * rbd_dev = container_of (to_delayed_work (work ),
3223+ struct rbd_device , watch_dwork );
3224+ int ret ;
3225+
3226+ dout ("%s rbd_dev %p\n" , __func__ , rbd_dev );
3227+
3228+ mutex_lock (& rbd_dev -> watch_mutex );
3229+ if (rbd_dev -> watch_state != RBD_WATCH_STATE_ERROR )
3230+ goto fail_unlock ;
3231+
3232+ ret = __rbd_register_watch (rbd_dev );
3233+ if (ret ) {
3234+ rbd_warn (rbd_dev , "failed to reregister watch: %d" , ret );
3235+ if (ret != - EBLACKLISTED )
3236+ queue_delayed_work (rbd_dev -> task_wq ,
3237+ & rbd_dev -> watch_dwork ,
3238+ RBD_RETRY_DELAY );
3239+ goto fail_unlock ;
3240+ }
3241+
3242+ rbd_dev -> watch_state = RBD_WATCH_STATE_REGISTERED ;
3243+ rbd_dev -> watch_cookie = rbd_dev -> watch_handle -> linger_id ;
3244+ mutex_unlock (& rbd_dev -> watch_mutex );
3245+
3246+ ret = rbd_dev_refresh (rbd_dev );
3247+ if (ret )
3248+ rbd_warn (rbd_dev , "reregisteration refresh failed: %d" , ret );
3249+
3250+ return ;
3251+
3252+ fail_unlock :
3253+ mutex_unlock (& rbd_dev -> watch_mutex );
3254+ }
3255+
31823256/*
31833257 * Synchronous osd object method call. Returns the number of bytes
31843258 * returned in the outbound buffer, or a negative error code.
@@ -3945,6 +4019,8 @@ static void rbd_spec_free(struct kref *kref)
39454019
39464020static void rbd_dev_free (struct rbd_device * rbd_dev )
39474021{
4022+ WARN_ON (rbd_dev -> watch_state != RBD_WATCH_STATE_UNREGISTERED );
4023+
39484024 ceph_oid_destroy (& rbd_dev -> header_oid );
39494025 ceph_oloc_destroy (& rbd_dev -> header_oloc );
39504026
@@ -3991,6 +4067,10 @@ static struct rbd_device *__rbd_dev_create(struct rbd_client *rbdc,
39914067 ceph_oid_init (& rbd_dev -> header_oid );
39924068 ceph_oloc_init (& rbd_dev -> header_oloc );
39934069
4070+ mutex_init (& rbd_dev -> watch_mutex );
4071+ rbd_dev -> watch_state = RBD_WATCH_STATE_UNREGISTERED ;
4072+ INIT_DELAYED_WORK (& rbd_dev -> watch_dwork , rbd_reregister_watch );
4073+
39944074 rbd_dev -> dev .bus = & rbd_bus_type ;
39954075 rbd_dev -> dev .type = & rbd_device_type ;
39964076 rbd_dev -> dev .parent = & rbd_root_dev ;
@@ -5222,7 +5302,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
52225302 goto err_out_format ;
52235303
52245304 if (!depth ) {
5225- ret = rbd_dev_header_watch_sync (rbd_dev );
5305+ ret = rbd_register_watch (rbd_dev );
52265306 if (ret ) {
52275307 if (ret == - ENOENT )
52285308 pr_info ("image %s/%s does not exist\n" ,
@@ -5281,7 +5361,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
52815361 rbd_dev_unprobe (rbd_dev );
52825362err_out_watch :
52835363 if (!depth )
5284- rbd_dev_header_unwatch_sync (rbd_dev );
5364+ rbd_unregister_watch (rbd_dev );
52855365err_out_format :
52865366 rbd_dev -> image_format = 0 ;
52875367 kfree (rbd_dev -> spec -> image_id );
@@ -5348,11 +5428,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
53485428 rc = rbd_dev_device_setup (rbd_dev );
53495429 if (rc ) {
53505430 /*
5351- * rbd_dev_header_unwatch_sync () can't be moved into
5431+ * rbd_unregister_watch () can't be moved into
53525432 * rbd_dev_image_release() without refactoring, see
53535433 * commit 1f3ef78861ac.
53545434 */
5355- rbd_dev_header_unwatch_sync (rbd_dev );
5435+ rbd_unregister_watch (rbd_dev );
53565436 rbd_dev_image_release (rbd_dev );
53575437 goto out ;
53585438 }
@@ -5473,7 +5553,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
54735553 if (ret < 0 || already )
54745554 return ret ;
54755555
5476- rbd_dev_header_unwatch_sync (rbd_dev );
5556+ rbd_unregister_watch (rbd_dev );
54775557
54785558 /*
54795559 * Don't free anything from rbd_dev->disk until after all
0 commit comments