1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 4 */ 5 6#include <linux/fs.h> 7#include <linux/miscdevice.h> 8#include <linux/poll.h> 9#include <linux/dlm.h> 10#include <linux/dlm_plock.h> 11#include <linux/slab.h> 12 13#include "dlm_internal.h" 14#include "lockspace.h" 15 16static spinlock_t ops_lock; 17static struct list_head send_list; 18static struct list_head recv_list; 19static wait_queue_head_t send_wq; 20static wait_queue_head_t recv_wq; 21 22struct plock_async_data { 23 void *fl; 24 void *file; 25 struct file_lock flc; 26 int (*callback)(struct file_lock *fl, int result); 27}; 28 29struct plock_op { 30 struct list_head list; 31 int done; 32 struct dlm_plock_info info; 33 /* if set indicates async handling */ 34 struct plock_async_data *data; 35}; 36 37static inline void set_version(struct dlm_plock_info *info) 38{ 39 info->version[0] = DLM_PLOCK_VERSION_MAJOR; 40 info->version[1] = DLM_PLOCK_VERSION_MINOR; 41 info->version[2] = DLM_PLOCK_VERSION_PATCH; 42} 43 44static int check_version(struct dlm_plock_info *info) 45{ 46 if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || 47 (DLM_PLOCK_VERSION_MINOR < info->version[1])) { 48 log_print("plock device version mismatch: " 49 "kernel (%u.%u.%u), user (%u.%u.%u)", 50 DLM_PLOCK_VERSION_MAJOR, 51 DLM_PLOCK_VERSION_MINOR, 52 DLM_PLOCK_VERSION_PATCH, 53 info->version[0], 54 info->version[1], 55 info->version[2]); 56 return -EINVAL; 57 } 58 return 0; 59} 60 61static void dlm_release_plock_op(struct plock_op *op) 62{ 63 kfree(op->data); 64 kfree(op); 65} 66 67static void send_op(struct plock_op *op) 68{ 69 set_version(&op->info); 70 INIT_LIST_HEAD(&op->list); 71 spin_lock(&ops_lock); 72 list_add_tail(&op->list, &send_list); 73 spin_unlock(&ops_lock); 74 wake_up(&send_wq); 75} 76 77/* If a process was killed while waiting for the only plock on a file, 78 locks_remove_posix will not see any lock on the file so it won't 79 send an unlock-close to us to pass on to userspace to clean up the 80 abandoned waiter. So, we have to insert the unlock-close when the 81 lock call is interrupted. */ 82 83static void do_unlock_close(const struct dlm_plock_info *info) 84{ 85 struct plock_op *op; 86 87 op = kzalloc(sizeof(*op), GFP_NOFS); 88 if (!op) 89 return; 90 91 op->info.optype = DLM_PLOCK_OP_UNLOCK; 92 op->info.pid = info->pid; 93 op->info.fsid = info->fsid; 94 op->info.number = info->number; 95 op->info.start = 0; 96 op->info.end = OFFSET_MAX; 97 op->info.owner = info->owner; 98 99 op->info.flags |= DLM_PLOCK_FL_CLOSE; 100 send_op(op); 101} 102 103int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 104 int cmd, struct file_lock *fl) 105{ 106 struct plock_async_data *op_data; 107 struct dlm_ls *ls; 108 struct plock_op *op; 109 int rv; 110 111 ls = dlm_find_lockspace_local(lockspace); 112 if (!ls) 113 return -EINVAL; 114 115 op = kzalloc(sizeof(*op), GFP_NOFS); 116 if (!op) { 117 rv = -ENOMEM; 118 goto out; 119 } 120 121 op->info.optype = DLM_PLOCK_OP_LOCK; 122 op->info.pid = fl->fl_pid; 123 op->info.ex = (fl->fl_type == F_WRLCK); 124 op->info.wait = IS_SETLKW(cmd); 125 op->info.fsid = ls->ls_global_id; 126 op->info.number = number; 127 op->info.start = fl->fl_start; 128 op->info.end = fl->fl_end; 129 /* async handling */ 130 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { 131 op_data = kzalloc(sizeof(*op_data), GFP_NOFS); 132 if (!op_data) { 133 dlm_release_plock_op(op); 134 rv = -ENOMEM; 135 goto out; 136 } 137 138 /* fl_owner is lockd which doesn't distinguish 139 processes on the nfs client */ 140 op->info.owner = (__u64) fl->fl_pid; 141 op_data->callback = fl->fl_lmops->lm_grant; 142 locks_init_lock(&op_data->flc); 143 locks_copy_lock(&op_data->flc, fl); 144 op_data->fl = fl; 145 op_data->file = file; 146 147 op->data = op_data; 148 149 send_op(op); 150 rv = FILE_LOCK_DEFERRED; 151 goto out; 152 } else { 153 op->info.owner = (__u64)(long) fl->fl_owner; 154 } 155 156 send_op(op); 157 158 rv = wait_event_killable(recv_wq, (op->done != 0)); 159 if (rv == -ERESTARTSYS) { 160 spin_lock(&ops_lock); 161 list_del(&op->list); 162 spin_unlock(&ops_lock); 163 log_debug(ls, "%s: wait interrupted %x %llx pid %d", 164 __func__, ls->ls_global_id, 165 (unsigned long long)number, op->info.pid); 166 dlm_release_plock_op(op); 167 do_unlock_close(&op->info); 168 goto out; 169 } 170 171 spin_lock(&ops_lock); 172 if (!list_empty(&op->list)) { 173 log_error(ls, "dlm_posix_lock: op on list %llx", 174 (unsigned long long)number); 175 list_del(&op->list); 176 } 177 spin_unlock(&ops_lock); 178 179 rv = op->info.rv; 180 181 if (!rv) { 182 if (locks_lock_file_wait(file, fl) < 0) 183 log_error(ls, "dlm_posix_lock: vfs lock error %llx", 184 (unsigned long long)number); 185 } 186 187 dlm_release_plock_op(op); 188out: 189 dlm_put_lockspace(ls); 190 return rv; 191} 192EXPORT_SYMBOL_GPL(dlm_posix_lock); 193 194/* Returns failure iff a successful lock operation should be canceled */ 195static int dlm_plock_callback(struct plock_op *op) 196{ 197 struct plock_async_data *op_data = op->data; 198 struct file *file; 199 struct file_lock *fl; 200 struct file_lock *flc; 201 int (*notify)(struct file_lock *fl, int result) = NULL; 202 int rv = 0; 203 204 spin_lock(&ops_lock); 205 if (!list_empty(&op->list)) { 206 log_print("dlm_plock_callback: op on list %llx", 207 (unsigned long long)op->info.number); 208 list_del(&op->list); 209 } 210 spin_unlock(&ops_lock); 211 212 /* check if the following 2 are still valid or make a copy */ 213 file = op_data->file; 214 flc = &op_data->flc; 215 fl = op_data->fl; 216 notify = op_data->callback; 217 218 if (op->info.rv) { 219 notify(fl, op->info.rv); 220 goto out; 221 } 222 223 /* got fs lock; bookkeep locally as well: */ 224 flc->fl_flags &= ~FL_SLEEP; 225 if (posix_lock_file(file, flc, NULL)) { 226 /* 227 * This can only happen in the case of kmalloc() failure. 228 * The filesystem's own lock is the authoritative lock, 229 * so a failure to get the lock locally is not a disaster. 230 * As long as the fs cannot reliably cancel locks (especially 231 * in a low-memory situation), we're better off ignoring 232 * this failure than trying to recover. 233 */ 234 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", 235 (unsigned long long)op->info.number, file, fl); 236 } 237 238 rv = notify(fl, 0); 239 if (rv) { 240 /* XXX: We need to cancel the fs lock here: */ 241 log_print("dlm_plock_callback: lock granted after lock request " 242 "failed; dangling lock!\n"); 243 goto out; 244 } 245 246out: 247 dlm_release_plock_op(op); 248 return rv; 249} 250 251int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 252 struct file_lock *fl) 253{ 254 struct dlm_ls *ls; 255 struct plock_op *op; 256 int rv; 257 unsigned char fl_flags = fl->fl_flags; 258 259 ls = dlm_find_lockspace_local(lockspace); 260 if (!ls) 261 return -EINVAL; 262 263 op = kzalloc(sizeof(*op), GFP_NOFS); 264 if (!op) { 265 rv = -ENOMEM; 266 goto out; 267 } 268 269 /* cause the vfs unlock to return ENOENT if lock is not found */ 270 fl->fl_flags |= FL_EXISTS; 271 272 rv = locks_lock_file_wait(file, fl); 273 if (rv == -ENOENT) { 274 rv = 0; 275 goto out_free; 276 } 277 if (rv < 0) { 278 log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx", 279 rv, (unsigned long long)number); 280 } 281 282 op->info.optype = DLM_PLOCK_OP_UNLOCK; 283 op->info.pid = fl->fl_pid; 284 op->info.fsid = ls->ls_global_id; 285 op->info.number = number; 286 op->info.start = fl->fl_start; 287 op->info.end = fl->fl_end; 288 if (fl->fl_lmops && fl->fl_lmops->lm_grant) 289 op->info.owner = (__u64) fl->fl_pid; 290 else 291 op->info.owner = (__u64)(long) fl->fl_owner; 292 293 if (fl->fl_flags & FL_CLOSE) { 294 op->info.flags |= DLM_PLOCK_FL_CLOSE; 295 send_op(op); 296 rv = 0; 297 goto out; 298 } 299 300 send_op(op); 301 wait_event(recv_wq, (op->done != 0)); 302 303 spin_lock(&ops_lock); 304 if (!list_empty(&op->list)) { 305 log_error(ls, "dlm_posix_unlock: op on list %llx", 306 (unsigned long long)number); 307 list_del(&op->list); 308 } 309 spin_unlock(&ops_lock); 310 311 rv = op->info.rv; 312 313 if (rv == -ENOENT) 314 rv = 0; 315 316out_free: 317 dlm_release_plock_op(op); 318out: 319 dlm_put_lockspace(ls); 320 fl->fl_flags = fl_flags; 321 return rv; 322} 323EXPORT_SYMBOL_GPL(dlm_posix_unlock); 324 325int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, 326 struct file_lock *fl) 327{ 328 struct dlm_ls *ls; 329 struct plock_op *op; 330 int rv; 331 332 ls = dlm_find_lockspace_local(lockspace); 333 if (!ls) 334 return -EINVAL; 335 336 op = kzalloc(sizeof(*op), GFP_NOFS); 337 if (!op) { 338 rv = -ENOMEM; 339 goto out; 340 } 341 342 op->info.optype = DLM_PLOCK_OP_GET; 343 op->info.pid = fl->fl_pid; 344 op->info.ex = (fl->fl_type == F_WRLCK); 345 op->info.fsid = ls->ls_global_id; 346 op->info.number = number; 347 op->info.start = fl->fl_start; 348 op->info.end = fl->fl_end; 349 if (fl->fl_lmops && fl->fl_lmops->lm_grant) 350 op->info.owner = (__u64) fl->fl_pid; 351 else 352 op->info.owner = (__u64)(long) fl->fl_owner; 353 354 send_op(op); 355 wait_event(recv_wq, (op->done != 0)); 356 357 spin_lock(&ops_lock); 358 if (!list_empty(&op->list)) { 359 log_error(ls, "dlm_posix_get: op on list %llx", 360 (unsigned long long)number); 361 list_del(&op->list); 362 } 363 spin_unlock(&ops_lock); 364 365 /* info.rv from userspace is 1 for conflict, 0 for no-conflict, 366 -ENOENT if there are no locks on the file */ 367 368 rv = op->info.rv; 369 370 fl->fl_type = F_UNLCK; 371 if (rv == -ENOENT) 372 rv = 0; 373 else if (rv > 0) { 374 locks_init_lock(fl); 375 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; 376 fl->fl_flags = FL_POSIX; 377 fl->fl_pid = op->info.pid; 378 if (op->info.nodeid != dlm_our_nodeid()) 379 fl->fl_pid = -fl->fl_pid; 380 fl->fl_start = op->info.start; 381 fl->fl_end = op->info.end; 382 rv = 0; 383 } 384 385 dlm_release_plock_op(op); 386out: 387 dlm_put_lockspace(ls); 388 return rv; 389} 390EXPORT_SYMBOL_GPL(dlm_posix_get); 391 392/* a read copies out one plock request from the send list */ 393static ssize_t dev_read(struct file *file, char __user *u, size_t count, 394 loff_t *ppos) 395{ 396 struct dlm_plock_info info; 397 struct plock_op *op = NULL; 398 399 if (count < sizeof(info)) 400 return -EINVAL; 401 402 spin_lock(&ops_lock); 403 if (!list_empty(&send_list)) { 404 op = list_entry(send_list.next, struct plock_op, list); 405 if (op->info.flags & DLM_PLOCK_FL_CLOSE) 406 list_del(&op->list); 407 else 408 list_move_tail(&op->list, &recv_list); 409 memcpy(&info, &op->info, sizeof(info)); 410 } 411 spin_unlock(&ops_lock); 412 413 if (!op) 414 return -EAGAIN; 415 416 /* there is no need to get a reply from userspace for unlocks 417 that were generated by the vfs cleaning up for a close 418 (the process did not make an unlock call). */ 419 420 if (op->info.flags & DLM_PLOCK_FL_CLOSE) 421 dlm_release_plock_op(op); 422 423 if (copy_to_user(u, &info, sizeof(info))) 424 return -EFAULT; 425 return sizeof(info); 426} 427 428/* a write copies in one plock result that should match a plock_op 429 on the recv list */ 430static ssize_t dev_write(struct file *file, const char __user *u, size_t count, 431 loff_t *ppos) 432{ 433 struct plock_op *op = NULL, *iter; 434 struct dlm_plock_info info; 435 int do_callback = 0; 436 437 if (count != sizeof(info)) 438 return -EINVAL; 439 440 if (copy_from_user(&info, u, sizeof(info))) 441 return -EFAULT; 442 443 if (check_version(&info)) 444 return -EINVAL; 445 446 /* 447 * The results for waiting ops (SETLKW) can be returned in any 448 * order, so match all fields to find the op. The results for 449 * non-waiting ops are returned in the order that they were sent 450 * to userspace, so match the result with the first non-waiting op. 451 */ 452 spin_lock(&ops_lock); 453 if (info.wait) { 454 list_for_each_entry(iter, &recv_list, list) { 455 if (iter->info.fsid == info.fsid && 456 iter->info.number == info.number && 457 iter->info.owner == info.owner && 458 iter->info.pid == info.pid && 459 iter->info.start == info.start && 460 iter->info.end == info.end && 461 iter->info.ex == info.ex && 462 iter->info.wait) { 463 op = iter; 464 break; 465 } 466 } 467 } else { 468 list_for_each_entry(iter, &recv_list, list) { 469 if (!iter->info.wait && 470 iter->info.fsid == info.fsid) { 471 op = iter; 472 break; 473 } 474 } 475 } 476 477 if (op) { 478 /* Sanity check that op and info match. */ 479 if (info.wait) 480 WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK); 481 else 482 WARN_ON(op->info.number != info.number || 483 op->info.owner != info.owner || 484 op->info.optype != info.optype); 485 486 list_del_init(&op->list); 487 memcpy(&op->info, &info, sizeof(info)); 488 if (op->data) 489 do_callback = 1; 490 else 491 op->done = 1; 492 } 493 spin_unlock(&ops_lock); 494 495 if (op) { 496 if (do_callback) 497 dlm_plock_callback(op); 498 else 499 wake_up(&recv_wq); 500 } else 501 log_print("%s: no op %x %llx", __func__, 502 info.fsid, (unsigned long long)info.number); 503 return count; 504} 505 506static __poll_t dev_poll(struct file *file, poll_table *wait) 507{ 508 __poll_t mask = 0; 509 510 poll_wait(file, &send_wq, wait); 511 512 spin_lock(&ops_lock); 513 if (!list_empty(&send_list)) 514 mask = EPOLLIN | EPOLLRDNORM; 515 spin_unlock(&ops_lock); 516 517 return mask; 518} 519 520static const struct file_operations dev_fops = { 521 .read = dev_read, 522 .write = dev_write, 523 .poll = dev_poll, 524 .owner = THIS_MODULE, 525 .llseek = noop_llseek, 526}; 527 528static struct miscdevice plock_dev_misc = { 529 .minor = MISC_DYNAMIC_MINOR, 530 .name = DLM_PLOCK_MISC_NAME, 531 .fops = &dev_fops 532}; 533 534int dlm_plock_init(void) 535{ 536 int rv; 537 538 spin_lock_init(&ops_lock); 539 INIT_LIST_HEAD(&send_list); 540 INIT_LIST_HEAD(&recv_list); 541 init_waitqueue_head(&send_wq); 542 init_waitqueue_head(&recv_wq); 543 544 rv = misc_register(&plock_dev_misc); 545 if (rv) 546 log_print("dlm_plock_init: misc_register failed %d", rv); 547 return rv; 548} 549 550void dlm_plock_exit(void) 551{ 552 misc_deregister(&plock_dev_misc); 553} 554 555