1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (C) International Business Machines Corp., 2000-2005 4 * Portions Copyright (C) Christoph Hellwig, 2001-2002 5 */ 6 7/* 8 * jfs_txnmgr.c: transaction manager 9 * 10 * notes: 11 * transaction starts with txBegin() and ends with txCommit() 12 * or txAbort(). 13 * 14 * tlock is acquired at the time of update; 15 * (obviate scan at commit time for xtree and dtree) 16 * tlock and mp points to each other; 17 * (no hashlist for mp -> tlock). 18 * 19 * special cases: 20 * tlock on in-memory inode: 21 * in-place tlock in the in-memory inode itself; 22 * converted to page lock by iWrite() at commit time. 23 * 24 * tlock during write()/mmap() under anonymous transaction (tid = 0): 25 * transferred (?) to transaction at commit time. 26 * 27 * use the page itself to update allocation maps 28 * (obviate intermediate replication of allocation/deallocation data) 29 * hold on to mp+lock thru update of maps 30 */ 31 32#include <linux/fs.h> 33#include <linux/vmalloc.h> 34#include <linux/completion.h> 35#include <linux/freezer.h> 36#include <linux/module.h> 37#include <linux/moduleparam.h> 38#include <linux/kthread.h> 39#include <linux/seq_file.h> 40#include "jfs_incore.h" 41#include "jfs_inode.h" 42#include "jfs_filsys.h" 43#include "jfs_metapage.h" 44#include "jfs_dinode.h" 45#include "jfs_imap.h" 46#include "jfs_dmap.h" 47#include "jfs_superblock.h" 48#include "jfs_debug.h" 49 50/* 51 * transaction management structures 52 */ 53static struct { 54 int freetid; /* index of a free tid structure */ 55 int freelock; /* index first free lock word */ 56 wait_queue_head_t freewait; /* eventlist of free tblock */ 57 wait_queue_head_t freelockwait; /* eventlist of free tlock */ 58 wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ 59 int tlocksInUse; /* Number of tlocks in use */ 60 spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ 61/* struct tblock *sync_queue; * Transactions waiting for data sync */ 62 struct list_head unlock_queue; /* Txns waiting to be released */ 63 struct list_head anon_list; /* inodes having anonymous txns */ 64 struct list_head anon_list2; /* inodes having anonymous txns 65 that couldn't be sync'ed */ 66} TxAnchor; 67 68int jfs_tlocks_low; /* Indicates low number of available tlocks */ 69 70#ifdef CONFIG_JFS_STATISTICS 71static struct { 72 uint txBegin; 73 uint txBegin_barrier; 74 uint txBegin_lockslow; 75 uint txBegin_freetid; 76 uint txBeginAnon; 77 uint txBeginAnon_barrier; 78 uint txBeginAnon_lockslow; 79 uint txLockAlloc; 80 uint txLockAlloc_freelock; 81} TxStat; 82#endif 83 84static int nTxBlock = -1; /* number of transaction blocks */ 85module_param(nTxBlock, int, 0); 86MODULE_PARM_DESC(nTxBlock, 87 "Number of transaction blocks (max:65536)"); 88 89static int nTxLock = -1; /* number of transaction locks */ 90module_param(nTxLock, int, 0); 91MODULE_PARM_DESC(nTxLock, 92 "Number of transaction locks (max:65536)"); 93 94struct tblock *TxBlock; /* transaction block table */ 95static int TxLockLWM; /* Low water mark for number of txLocks used */ 96static int TxLockHWM; /* High water mark for number of txLocks used */ 97static int TxLockVHWM; /* Very High water mark */ 98struct tlock *TxLock; /* transaction lock table */ 99 100/* 101 * transaction management lock 102 */ 103static DEFINE_SPINLOCK(jfsTxnLock); 104 105#define TXN_LOCK() spin_lock(&jfsTxnLock) 106#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) 107 108#define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); 109#define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) 110#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) 111 112static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); 113static int jfs_commit_thread_waking; 114 115/* 116 * Retry logic exist outside these macros to protect from spurrious wakeups. 117 */ 118static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) 119{ 120 DECLARE_WAITQUEUE(wait, current); 121 122 add_wait_queue(event, &wait); 123 set_current_state(TASK_UNINTERRUPTIBLE); 124 TXN_UNLOCK(); 125 io_schedule(); 126 remove_wait_queue(event, &wait); 127} 128 129#define TXN_SLEEP(event)\ 130{\ 131 TXN_SLEEP_DROP_LOCK(event);\ 132 TXN_LOCK();\ 133} 134 135#define TXN_WAKEUP(event) wake_up_all(event) 136 137/* 138 * statistics 139 */ 140static struct { 141 tid_t maxtid; /* 4: biggest tid ever used */ 142 lid_t maxlid; /* 4: biggest lid ever used */ 143 int ntid; /* 4: # of transactions performed */ 144 int nlid; /* 4: # of tlocks acquired */ 145 int waitlock; /* 4: # of tlock wait */ 146} stattx; 147 148/* 149 * forward references 150 */ 151static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 152 struct tlock * tlck, struct commit * cd); 153static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 154 struct tlock * tlck); 155static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 156 struct tlock * tlck); 157static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 158 struct tlock * tlck); 159static void txAllocPMap(struct inode *ip, struct maplock * maplock, 160 struct tblock * tblk); 161static void txForce(struct tblock * tblk); 162static int txLog(struct jfs_log * log, struct tblock * tblk, 163 struct commit * cd); 164static void txUpdateMap(struct tblock * tblk); 165static void txRelease(struct tblock * tblk); 166static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 167 struct tlock * tlck); 168static void LogSyncRelease(struct metapage * mp); 169 170/* 171 * transaction block/lock management 172 * --------------------------------- 173 */ 174 175/* 176 * Get a transaction lock from the free list. If the number in use is 177 * greater than the high water mark, wake up the sync daemon. This should 178 * free some anonymous transaction locks. (TXN_LOCK must be held.) 179 */ 180static lid_t txLockAlloc(void) 181{ 182 lid_t lid; 183 184 INCREMENT(TxStat.txLockAlloc); 185 if (!TxAnchor.freelock) { 186 INCREMENT(TxStat.txLockAlloc_freelock); 187 } 188 189 while (!(lid = TxAnchor.freelock)) 190 TXN_SLEEP(&TxAnchor.freelockwait); 191 TxAnchor.freelock = TxLock[lid].next; 192 HIGHWATERMARK(stattx.maxlid, lid); 193 if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { 194 jfs_info("txLockAlloc tlocks low"); 195 jfs_tlocks_low = 1; 196 wake_up_process(jfsSyncThread); 197 } 198 199 return lid; 200} 201 202static void txLockFree(lid_t lid) 203{ 204 TxLock[lid].tid = 0; 205 TxLock[lid].next = TxAnchor.freelock; 206 TxAnchor.freelock = lid; 207 TxAnchor.tlocksInUse--; 208 if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { 209 jfs_info("txLockFree jfs_tlocks_low no more"); 210 jfs_tlocks_low = 0; 211 TXN_WAKEUP(&TxAnchor.lowlockwait); 212 } 213 TXN_WAKEUP(&TxAnchor.freelockwait); 214} 215 216/* 217 * NAME: txInit() 218 * 219 * FUNCTION: initialize transaction management structures 220 * 221 * RETURN: 222 * 223 * serialization: single thread at jfs_init() 224 */ 225int txInit(void) 226{ 227 int k, size; 228 struct sysinfo si; 229 230 /* Set defaults for nTxLock and nTxBlock if unset */ 231 232 if (nTxLock == -1) { 233 if (nTxBlock == -1) { 234 /* Base default on memory size */ 235 si_meminfo(&si); 236 if (si.totalram > (256 * 1024)) /* 1 GB */ 237 nTxLock = 64 * 1024; 238 else 239 nTxLock = si.totalram >> 2; 240 } else if (nTxBlock > (8 * 1024)) 241 nTxLock = 64 * 1024; 242 else 243 nTxLock = nTxBlock << 3; 244 } 245 if (nTxBlock == -1) 246 nTxBlock = nTxLock >> 3; 247 248 /* Verify tunable parameters */ 249 if (nTxBlock < 16) 250 nTxBlock = 16; /* No one should set it this low */ 251 if (nTxBlock > 65536) 252 nTxBlock = 65536; 253 if (nTxLock < 256) 254 nTxLock = 256; /* No one should set it this low */ 255 if (nTxLock > 65536) 256 nTxLock = 65536; 257 258 printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", 259 nTxBlock, nTxLock); 260 /* 261 * initialize transaction block (tblock) table 262 * 263 * transaction id (tid) = tblock index 264 * tid = 0 is reserved. 265 */ 266 TxLockLWM = (nTxLock * 4) / 10; 267 TxLockHWM = (nTxLock * 7) / 10; 268 TxLockVHWM = (nTxLock * 8) / 10; 269 270 size = sizeof(struct tblock) * nTxBlock; 271 TxBlock = vmalloc(size); 272 if (TxBlock == NULL) 273 return -ENOMEM; 274 275 for (k = 1; k < nTxBlock - 1; k++) { 276 TxBlock[k].next = k + 1; 277 init_waitqueue_head(&TxBlock[k].gcwait); 278 init_waitqueue_head(&TxBlock[k].waitor); 279 } 280 TxBlock[k].next = 0; 281 init_waitqueue_head(&TxBlock[k].gcwait); 282 init_waitqueue_head(&TxBlock[k].waitor); 283 284 TxAnchor.freetid = 1; 285 init_waitqueue_head(&TxAnchor.freewait); 286 287 stattx.maxtid = 1; /* statistics */ 288 289 /* 290 * initialize transaction lock (tlock) table 291 * 292 * transaction lock id = tlock index 293 * tlock id = 0 is reserved. 294 */ 295 size = sizeof(struct tlock) * nTxLock; 296 TxLock = vmalloc(size); 297 if (TxLock == NULL) { 298 vfree(TxBlock); 299 return -ENOMEM; 300 } 301 302 /* initialize tlock table */ 303 for (k = 1; k < nTxLock - 1; k++) 304 TxLock[k].next = k + 1; 305 TxLock[k].next = 0; 306 init_waitqueue_head(&TxAnchor.freelockwait); 307 init_waitqueue_head(&TxAnchor.lowlockwait); 308 309 TxAnchor.freelock = 1; 310 TxAnchor.tlocksInUse = 0; 311 INIT_LIST_HEAD(&TxAnchor.anon_list); 312 INIT_LIST_HEAD(&TxAnchor.anon_list2); 313 314 LAZY_LOCK_INIT(); 315 INIT_LIST_HEAD(&TxAnchor.unlock_queue); 316 317 stattx.maxlid = 1; /* statistics */ 318 319 return 0; 320} 321 322/* 323 * NAME: txExit() 324 * 325 * FUNCTION: clean up when module is unloaded 326 */ 327void txExit(void) 328{ 329 vfree(TxLock); 330 TxLock = NULL; 331 vfree(TxBlock); 332 TxBlock = NULL; 333} 334 335/* 336 * NAME: txBegin() 337 * 338 * FUNCTION: start a transaction. 339 * 340 * PARAMETER: sb - superblock 341 * flag - force for nested tx; 342 * 343 * RETURN: tid - transaction id 344 * 345 * note: flag force allows to start tx for nested tx 346 * to prevent deadlock on logsync barrier; 347 */ 348tid_t txBegin(struct super_block *sb, int flag) 349{ 350 tid_t t; 351 struct tblock *tblk; 352 struct jfs_log *log; 353 354 jfs_info("txBegin: flag = 0x%x", flag); 355 log = JFS_SBI(sb)->log; 356 357 if (!log) { 358 jfs_error(sb, "read-only filesystem\n"); 359 return 0; 360 } 361 362 TXN_LOCK(); 363 364 INCREMENT(TxStat.txBegin); 365 366 retry: 367 if (!(flag & COMMIT_FORCE)) { 368 /* 369 * synchronize with logsync barrier 370 */ 371 if (test_bit(log_SYNCBARRIER, &log->flag) || 372 test_bit(log_QUIESCE, &log->flag)) { 373 INCREMENT(TxStat.txBegin_barrier); 374 TXN_SLEEP(&log->syncwait); 375 goto retry; 376 } 377 } 378 if (flag == 0) { 379 /* 380 * Don't begin transaction if we're getting starved for tlocks 381 * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately 382 * free tlocks) 383 */ 384 if (TxAnchor.tlocksInUse > TxLockVHWM) { 385 INCREMENT(TxStat.txBegin_lockslow); 386 TXN_SLEEP(&TxAnchor.lowlockwait); 387 goto retry; 388 } 389 } 390 391 /* 392 * allocate transaction id/block 393 */ 394 if ((t = TxAnchor.freetid) == 0) { 395 jfs_info("txBegin: waiting for free tid"); 396 INCREMENT(TxStat.txBegin_freetid); 397 TXN_SLEEP(&TxAnchor.freewait); 398 goto retry; 399 } 400 401 tblk = tid_to_tblock(t); 402 403 if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { 404 /* Don't let a non-forced transaction take the last tblk */ 405 jfs_info("txBegin: waiting for free tid"); 406 INCREMENT(TxStat.txBegin_freetid); 407 TXN_SLEEP(&TxAnchor.freewait); 408 goto retry; 409 } 410 411 TxAnchor.freetid = tblk->next; 412 413 /* 414 * initialize transaction 415 */ 416 417 /* 418 * We can't zero the whole thing or we screw up another thread being 419 * awakened after sleeping on tblk->waitor 420 * 421 * memset(tblk, 0, sizeof(struct tblock)); 422 */ 423 tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; 424 425 tblk->sb = sb; 426 ++log->logtid; 427 tblk->logtid = log->logtid; 428 429 ++log->active; 430 431 HIGHWATERMARK(stattx.maxtid, t); /* statistics */ 432 INCREMENT(stattx.ntid); /* statistics */ 433 434 TXN_UNLOCK(); 435 436 jfs_info("txBegin: returning tid = %d", t); 437 438 return t; 439} 440 441/* 442 * NAME: txBeginAnon() 443 * 444 * FUNCTION: start an anonymous transaction. 445 * Blocks if logsync or available tlocks are low to prevent 446 * anonymous tlocks from depleting supply. 447 * 448 * PARAMETER: sb - superblock 449 * 450 * RETURN: none 451 */ 452void txBeginAnon(struct super_block *sb) 453{ 454 struct jfs_log *log; 455 456 log = JFS_SBI(sb)->log; 457 458 TXN_LOCK(); 459 INCREMENT(TxStat.txBeginAnon); 460 461 retry: 462 /* 463 * synchronize with logsync barrier 464 */ 465 if (test_bit(log_SYNCBARRIER, &log->flag) || 466 test_bit(log_QUIESCE, &log->flag)) { 467 INCREMENT(TxStat.txBeginAnon_barrier); 468 TXN_SLEEP(&log->syncwait); 469 goto retry; 470 } 471 472 /* 473 * Don't begin transaction if we're getting starved for tlocks 474 */ 475 if (TxAnchor.tlocksInUse > TxLockVHWM) { 476 INCREMENT(TxStat.txBeginAnon_lockslow); 477 TXN_SLEEP(&TxAnchor.lowlockwait); 478 goto retry; 479 } 480 TXN_UNLOCK(); 481} 482 483/* 484 * txEnd() 485 * 486 * function: free specified transaction block. 487 * 488 * logsync barrier processing: 489 * 490 * serialization: 491 */ 492void txEnd(tid_t tid) 493{ 494 struct tblock *tblk = tid_to_tblock(tid); 495 struct jfs_log *log; 496 497 jfs_info("txEnd: tid = %d", tid); 498 TXN_LOCK(); 499 500 /* 501 * wakeup transactions waiting on the page locked 502 * by the current transaction 503 */ 504 TXN_WAKEUP(&tblk->waitor); 505 506 log = JFS_SBI(tblk->sb)->log; 507 508 /* 509 * Lazy commit thread can't free this guy until we mark it UNLOCKED, 510 * otherwise, we would be left with a transaction that may have been 511 * reused. 512 * 513 * Lazy commit thread will turn off tblkGC_LAZY before calling this 514 * routine. 515 */ 516 if (tblk->flag & tblkGC_LAZY) { 517 jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); 518 TXN_UNLOCK(); 519 520 spin_lock_irq(&log->gclock); // LOGGC_LOCK 521 tblk->flag |= tblkGC_UNLOCKED; 522 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 523 return; 524 } 525 526 jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); 527 528 assert(tblk->next == 0); 529 530 /* 531 * insert tblock back on freelist 532 */ 533 tblk->next = TxAnchor.freetid; 534 TxAnchor.freetid = tid; 535 536 /* 537 * mark the tblock not active 538 */ 539 if (--log->active == 0) { 540 clear_bit(log_FLUSH, &log->flag); 541 542 /* 543 * synchronize with logsync barrier 544 */ 545 if (test_bit(log_SYNCBARRIER, &log->flag)) { 546 TXN_UNLOCK(); 547 548 /* write dirty metadata & forward log syncpt */ 549 jfs_syncpt(log, 1); 550 551 jfs_info("log barrier off: 0x%x", log->lsn); 552 553 /* enable new transactions start */ 554 clear_bit(log_SYNCBARRIER, &log->flag); 555 556 /* wakeup all waitors for logsync barrier */ 557 TXN_WAKEUP(&log->syncwait); 558 559 goto wakeup; 560 } 561 } 562 563 TXN_UNLOCK(); 564wakeup: 565 /* 566 * wakeup all waitors for a free tblock 567 */ 568 TXN_WAKEUP(&TxAnchor.freewait); 569} 570 571/* 572 * txLock() 573 * 574 * function: acquire a transaction lock on the specified <mp> 575 * 576 * parameter: 577 * 578 * return: transaction lock id 579 * 580 * serialization: 581 */ 582struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, 583 int type) 584{ 585 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 586 int dir_xtree = 0; 587 lid_t lid; 588 tid_t xtid; 589 struct tlock *tlck; 590 struct xtlock *xtlck; 591 struct linelock *linelock; 592 xtpage_t *p; 593 struct tblock *tblk; 594 595 TXN_LOCK(); 596 597 if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && 598 !(mp->xflag & COMMIT_PAGE)) { 599 /* 600 * Directory inode is special. It can have both an xtree tlock 601 * and a dtree tlock associated with it. 602 */ 603 dir_xtree = 1; 604 lid = jfs_ip->xtlid; 605 } else 606 lid = mp->lid; 607 608 /* is page not locked by a transaction ? */ 609 if (lid == 0) 610 goto allocateLock; 611 612 jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); 613 614 /* is page locked by the requester transaction ? */ 615 tlck = lid_to_tlock(lid); 616 if ((xtid = tlck->tid) == tid) { 617 TXN_UNLOCK(); 618 goto grantLock; 619 } 620 621 /* 622 * is page locked by anonymous transaction/lock ? 623 * 624 * (page update without transaction (i.e., file write) is 625 * locked under anonymous transaction tid = 0: 626 * anonymous tlocks maintained on anonymous tlock list of 627 * the inode of the page and available to all anonymous 628 * transactions until txCommit() time at which point 629 * they are transferred to the transaction tlock list of 630 * the committing transaction of the inode) 631 */ 632 if (xtid == 0) { 633 tlck->tid = tid; 634 TXN_UNLOCK(); 635 tblk = tid_to_tblock(tid); 636 /* 637 * The order of the tlocks in the transaction is important 638 * (during truncate, child xtree pages must be freed before 639 * parent's tlocks change the working map). 640 * Take tlock off anonymous list and add to tail of 641 * transaction list 642 * 643 * Note: We really need to get rid of the tid & lid and 644 * use list_head's. This code is getting UGLY! 645 */ 646 if (jfs_ip->atlhead == lid) { 647 if (jfs_ip->atltail == lid) { 648 /* only anonymous txn. 649 * Remove from anon_list 650 */ 651 TXN_LOCK(); 652 list_del_init(&jfs_ip->anon_inode_list); 653 TXN_UNLOCK(); 654 } 655 jfs_ip->atlhead = tlck->next; 656 } else { 657 lid_t last; 658 for (last = jfs_ip->atlhead; 659 lid_to_tlock(last)->next != lid; 660 last = lid_to_tlock(last)->next) { 661 assert(last); 662 } 663 lid_to_tlock(last)->next = tlck->next; 664 if (jfs_ip->atltail == lid) 665 jfs_ip->atltail = last; 666 } 667 668 /* insert the tlock at tail of transaction tlock list */ 669 670 if (tblk->next) 671 lid_to_tlock(tblk->last)->next = lid; 672 else 673 tblk->next = lid; 674 tlck->next = 0; 675 tblk->last = lid; 676 677 goto grantLock; 678 } 679 680 goto waitLock; 681 682 /* 683 * allocate a tlock 684 */ 685 allocateLock: 686 lid = txLockAlloc(); 687 tlck = lid_to_tlock(lid); 688 689 /* 690 * initialize tlock 691 */ 692 tlck->tid = tid; 693 694 TXN_UNLOCK(); 695 696 /* mark tlock for meta-data page */ 697 if (mp->xflag & COMMIT_PAGE) { 698 699 tlck->flag = tlckPAGELOCK; 700 701 /* mark the page dirty and nohomeok */ 702 metapage_nohomeok(mp); 703 704 jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", 705 mp, mp->nohomeok, tid, tlck); 706 707 /* if anonymous transaction, and buffer is on the group 708 * commit synclist, mark inode to show this. This will 709 * prevent the buffer from being marked nohomeok for too 710 * long a time. 711 */ 712 if ((tid == 0) && mp->lsn) 713 set_cflag(COMMIT_Synclist, ip); 714 } 715 /* mark tlock for in-memory inode */ 716 else 717 tlck->flag = tlckINODELOCK; 718 719 if (S_ISDIR(ip->i_mode)) 720 tlck->flag |= tlckDIRECTORY; 721 722 tlck->type = 0; 723 724 /* bind the tlock and the page */ 725 tlck->ip = ip; 726 tlck->mp = mp; 727 if (dir_xtree) 728 jfs_ip->xtlid = lid; 729 else 730 mp->lid = lid; 731 732 /* 733 * enqueue transaction lock to transaction/inode 734 */ 735 /* insert the tlock at tail of transaction tlock list */ 736 if (tid) { 737 tblk = tid_to_tblock(tid); 738 if (tblk->next) 739 lid_to_tlock(tblk->last)->next = lid; 740 else 741 tblk->next = lid; 742 tlck->next = 0; 743 tblk->last = lid; 744 } 745 /* anonymous transaction: 746 * insert the tlock at head of inode anonymous tlock list 747 */ 748 else { 749 tlck->next = jfs_ip->atlhead; 750 jfs_ip->atlhead = lid; 751 if (tlck->next == 0) { 752 /* This inode's first anonymous transaction */ 753 jfs_ip->atltail = lid; 754 TXN_LOCK(); 755 list_add_tail(&jfs_ip->anon_inode_list, 756 &TxAnchor.anon_list); 757 TXN_UNLOCK(); 758 } 759 } 760 761 /* initialize type dependent area for linelock */ 762 linelock = (struct linelock *) & tlck->lock; 763 linelock->next = 0; 764 linelock->flag = tlckLINELOCK; 765 linelock->maxcnt = TLOCKSHORT; 766 linelock->index = 0; 767 768 switch (type & tlckTYPE) { 769 case tlckDTREE: 770 linelock->l2linesize = L2DTSLOTSIZE; 771 break; 772 773 case tlckXTREE: 774 linelock->l2linesize = L2XTSLOTSIZE; 775 776 xtlck = (struct xtlock *) linelock; 777 xtlck->header.offset = 0; 778 xtlck->header.length = 2; 779 780 if (type & tlckNEW) { 781 xtlck->lwm.offset = XTENTRYSTART; 782 } else { 783 if (mp->xflag & COMMIT_PAGE) 784 p = (xtpage_t *) mp->data; 785 else 786 p = &jfs_ip->i_xtroot; 787 xtlck->lwm.offset = 788 le16_to_cpu(p->header.nextindex); 789 } 790 xtlck->lwm.length = 0; /* ! */ 791 xtlck->twm.offset = 0; 792 xtlck->hwm.offset = 0; 793 794 xtlck->index = 2; 795 break; 796 797 case tlckINODE: 798 linelock->l2linesize = L2INODESLOTSIZE; 799 break; 800 801 case tlckDATA: 802 linelock->l2linesize = L2DATASLOTSIZE; 803 break; 804 805 default: 806 jfs_err("UFO tlock:0x%p", tlck); 807 } 808 809 /* 810 * update tlock vector 811 */ 812 grantLock: 813 tlck->type |= type; 814 815 return tlck; 816 817 /* 818 * page is being locked by another transaction: 819 */ 820 waitLock: 821 /* Only locks on ipimap or ipaimap should reach here */ 822 /* assert(jfs_ip->fileset == AGGREGATE_I); */ 823 if (jfs_ip->fileset != AGGREGATE_I) { 824 printk(KERN_ERR "txLock: trying to lock locked page!"); 825 print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, 826 ip, sizeof(*ip), 0); 827 print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, 828 mp, sizeof(*mp), 0); 829 print_hex_dump(KERN_ERR, "Locker's tblock: ", 830 DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), 831 sizeof(struct tblock), 0); 832 print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, 833 tlck, sizeof(*tlck), 0); 834 BUG(); 835 } 836 INCREMENT(stattx.waitlock); /* statistics */ 837 TXN_UNLOCK(); 838 release_metapage(mp); 839 TXN_LOCK(); 840 xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ 841 842 jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", 843 tid, xtid, lid); 844 845 /* Recheck everything since dropping TXN_LOCK */ 846 if (xtid && (tlck->mp == mp) && (mp->lid == lid)) 847 TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); 848 else 849 TXN_UNLOCK(); 850 jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); 851 852 return NULL; 853} 854 855/* 856 * NAME: txRelease() 857 * 858 * FUNCTION: Release buffers associated with transaction locks, but don't 859 * mark homeok yet. The allows other transactions to modify 860 * buffers, but won't let them go to disk until commit record 861 * actually gets written. 862 * 863 * PARAMETER: 864 * tblk - 865 * 866 * RETURN: Errors from subroutines. 867 */ 868static void txRelease(struct tblock * tblk) 869{ 870 struct metapage *mp; 871 lid_t lid; 872 struct tlock *tlck; 873 874 TXN_LOCK(); 875 876 for (lid = tblk->next; lid; lid = tlck->next) { 877 tlck = lid_to_tlock(lid); 878 if ((mp = tlck->mp) != NULL && 879 (tlck->type & tlckBTROOT) == 0) { 880 assert(mp->xflag & COMMIT_PAGE); 881 mp->lid = 0; 882 } 883 } 884 885 /* 886 * wakeup transactions waiting on a page locked 887 * by the current transaction 888 */ 889 TXN_WAKEUP(&tblk->waitor); 890 891 TXN_UNLOCK(); 892} 893 894/* 895 * NAME: txUnlock() 896 * 897 * FUNCTION: Initiates pageout of pages modified by tid in journalled 898 * objects and frees their lockwords. 899 */ 900static void txUnlock(struct tblock * tblk) 901{ 902 struct tlock *tlck; 903 struct linelock *linelock; 904 lid_t lid, next, llid, k; 905 struct metapage *mp; 906 struct jfs_log *log; 907 int difft, diffp; 908 unsigned long flags; 909 910 jfs_info("txUnlock: tblk = 0x%p", tblk); 911 log = JFS_SBI(tblk->sb)->log; 912 913 /* 914 * mark page under tlock homeok (its log has been written): 915 */ 916 for (lid = tblk->next; lid; lid = next) { 917 tlck = lid_to_tlock(lid); 918 next = tlck->next; 919 920 jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); 921 922 /* unbind page from tlock */ 923 if ((mp = tlck->mp) != NULL && 924 (tlck->type & tlckBTROOT) == 0) { 925 assert(mp->xflag & COMMIT_PAGE); 926 927 /* hold buffer 928 */ 929 hold_metapage(mp); 930 931 assert(mp->nohomeok > 0); 932 _metapage_homeok(mp); 933 934 /* inherit younger/larger clsn */ 935 LOGSYNC_LOCK(log, flags); 936 if (mp->clsn) { 937 logdiff(difft, tblk->clsn, log); 938 logdiff(diffp, mp->clsn, log); 939 if (difft > diffp) 940 mp->clsn = tblk->clsn; 941 } else 942 mp->clsn = tblk->clsn; 943 LOGSYNC_UNLOCK(log, flags); 944 945 assert(!(tlck->flag & tlckFREEPAGE)); 946 947 put_metapage(mp); 948 } 949 950 /* insert tlock, and linelock(s) of the tlock if any, 951 * at head of freelist 952 */ 953 TXN_LOCK(); 954 955 llid = ((struct linelock *) & tlck->lock)->next; 956 while (llid) { 957 linelock = (struct linelock *) lid_to_tlock(llid); 958 k = linelock->next; 959 txLockFree(llid); 960 llid = k; 961 } 962 txLockFree(lid); 963 964 TXN_UNLOCK(); 965 } 966 tblk->next = tblk->last = 0; 967 968 /* 969 * remove tblock from logsynclist 970 * (allocation map pages inherited lsn of tblk and 971 * has been inserted in logsync list at txUpdateMap()) 972 */ 973 if (tblk->lsn) { 974 LOGSYNC_LOCK(log, flags); 975 log->count--; 976 list_del(&tblk->synclist); 977 LOGSYNC_UNLOCK(log, flags); 978 } 979} 980 981/* 982 * txMaplock() 983 * 984 * function: allocate a transaction lock for freed page/entry; 985 * for freed page, maplock is used as xtlock/dtlock type; 986 */ 987struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) 988{ 989 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 990 lid_t lid; 991 struct tblock *tblk; 992 struct tlock *tlck; 993 struct maplock *maplock; 994 995 TXN_LOCK(); 996 997 /* 998 * allocate a tlock 999 */ 1000 lid = txLockAlloc(); 1001 tlck = lid_to_tlock(lid); 1002 1003 /* 1004 * initialize tlock 1005 */ 1006 tlck->tid = tid; 1007 1008 /* bind the tlock and the object */ 1009 tlck->flag = tlckINODELOCK; 1010 if (S_ISDIR(ip->i_mode)) 1011 tlck->flag |= tlckDIRECTORY; 1012 tlck->ip = ip; 1013 tlck->mp = NULL; 1014 1015 tlck->type = type; 1016 1017 /* 1018 * enqueue transaction lock to transaction/inode 1019 */ 1020 /* insert the tlock at tail of transaction tlock list */ 1021 if (tid) { 1022 tblk = tid_to_tblock(tid); 1023 if (tblk->next) 1024 lid_to_tlock(tblk->last)->next = lid; 1025 else 1026 tblk->next = lid; 1027 tlck->next = 0; 1028 tblk->last = lid; 1029 } 1030 /* anonymous transaction: 1031 * insert the tlock at head of inode anonymous tlock list 1032 */ 1033 else { 1034 tlck->next = jfs_ip->atlhead; 1035 jfs_ip->atlhead = lid; 1036 if (tlck->next == 0) { 1037 /* This inode's first anonymous transaction */ 1038 jfs_ip->atltail = lid; 1039 list_add_tail(&jfs_ip->anon_inode_list, 1040 &TxAnchor.anon_list); 1041 } 1042 } 1043 1044 TXN_UNLOCK(); 1045 1046 /* initialize type dependent area for maplock */ 1047 maplock = (struct maplock *) & tlck->lock; 1048 maplock->next = 0; 1049 maplock->maxcnt = 0; 1050 maplock->index = 0; 1051 1052 return tlck; 1053} 1054 1055/* 1056 * txLinelock() 1057 * 1058 * function: allocate a transaction lock for log vector list 1059 */ 1060struct linelock *txLinelock(struct linelock * tlock) 1061{ 1062 lid_t lid; 1063 struct tlock *tlck; 1064 struct linelock *linelock; 1065 1066 TXN_LOCK(); 1067 1068 /* allocate a TxLock structure */ 1069 lid = txLockAlloc(); 1070 tlck = lid_to_tlock(lid); 1071 1072 TXN_UNLOCK(); 1073 1074 /* initialize linelock */ 1075 linelock = (struct linelock *) tlck; 1076 linelock->next = 0; 1077 linelock->flag = tlckLINELOCK; 1078 linelock->maxcnt = TLOCKLONG; 1079 linelock->index = 0; 1080 if (tlck->flag & tlckDIRECTORY) 1081 linelock->flag |= tlckDIRECTORY; 1082 1083 /* append linelock after tlock */ 1084 linelock->next = tlock->next; 1085 tlock->next = lid; 1086 1087 return linelock; 1088} 1089 1090/* 1091 * transaction commit management 1092 * ----------------------------- 1093 */ 1094 1095/* 1096 * NAME: txCommit() 1097 * 1098 * FUNCTION: commit the changes to the objects specified in 1099 * clist. For journalled segments only the 1100 * changes of the caller are committed, ie by tid. 1101 * for non-journalled segments the data are flushed to 1102 * disk and then the change to the disk inode and indirect 1103 * blocks committed (so blocks newly allocated to the 1104 * segment will be made a part of the segment atomically). 1105 * 1106 * all of the segments specified in clist must be in 1107 * one file system. no more than 6 segments are needed 1108 * to handle all unix svcs. 1109 * 1110 * if the i_nlink field (i.e. disk inode link count) 1111 * is zero, and the type of inode is a regular file or 1112 * directory, or symbolic link , the inode is truncated 1113 * to zero length. the truncation is committed but the 1114 * VM resources are unaffected until it is closed (see 1115 * iput and iclose). 1116 * 1117 * PARAMETER: 1118 * 1119 * RETURN: 1120 * 1121 * serialization: 1122 * on entry the inode lock on each segment is assumed 1123 * to be held. 1124 * 1125 * i/o error: 1126 */ 1127int txCommit(tid_t tid, /* transaction identifier */ 1128 int nip, /* number of inodes to commit */ 1129 struct inode **iplist, /* list of inode to commit */ 1130 int flag) 1131{ 1132 int rc = 0; 1133 struct commit cd; 1134 struct jfs_log *log; 1135 struct tblock *tblk; 1136 struct lrd *lrd; 1137 struct inode *ip; 1138 struct jfs_inode_info *jfs_ip; 1139 int k, n; 1140 ino_t top; 1141 struct super_block *sb; 1142 1143 jfs_info("txCommit, tid = %d, flag = %d", tid, flag); 1144 /* is read-only file system ? */ 1145 if (isReadOnly(iplist[0])) { 1146 rc = -EROFS; 1147 goto TheEnd; 1148 } 1149 1150 sb = cd.sb = iplist[0]->i_sb; 1151 cd.tid = tid; 1152 1153 if (tid == 0) 1154 tid = txBegin(sb, 0); 1155 tblk = tid_to_tblock(tid); 1156 1157 /* 1158 * initialize commit structure 1159 */ 1160 log = JFS_SBI(sb)->log; 1161 cd.log = log; 1162 1163 /* initialize log record descriptor in commit */ 1164 lrd = &cd.lrd; 1165 lrd->logtid = cpu_to_le32(tblk->logtid); 1166 lrd->backchain = 0; 1167 1168 tblk->xflag |= flag; 1169 1170 if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) 1171 tblk->xflag |= COMMIT_LAZY; 1172 /* 1173 * prepare non-journaled objects for commit 1174 * 1175 * flush data pages of non-journaled file 1176 * to prevent the file getting non-initialized disk blocks 1177 * in case of crash. 1178 * (new blocks - ) 1179 */ 1180 cd.iplist = iplist; 1181 cd.nip = nip; 1182 1183 /* 1184 * acquire transaction lock on (on-disk) inodes 1185 * 1186 * update on-disk inode from in-memory inode 1187 * acquiring transaction locks for AFTER records 1188 * on the on-disk inode of file object 1189 * 1190 * sort the inodes array by inode number in descending order 1191 * to prevent deadlock when acquiring transaction lock 1192 * of on-disk inodes on multiple on-disk inode pages by 1193 * multiple concurrent transactions 1194 */ 1195 for (k = 0; k < cd.nip; k++) { 1196 top = (cd.iplist[k])->i_ino; 1197 for (n = k + 1; n < cd.nip; n++) { 1198 ip = cd.iplist[n]; 1199 if (ip->i_ino > top) { 1200 top = ip->i_ino; 1201 cd.iplist[n] = cd.iplist[k]; 1202 cd.iplist[k] = ip; 1203 } 1204 } 1205 1206 ip = cd.iplist[k]; 1207 jfs_ip = JFS_IP(ip); 1208 1209 /* 1210 * BUGBUG - This code has temporarily been removed. The 1211 * intent is to ensure that any file data is written before 1212 * the metadata is committed to the journal. This prevents 1213 * uninitialized data from appearing in a file after the 1214 * journal has been replayed. (The uninitialized data 1215 * could be sensitive data removed by another user.) 1216 * 1217 * The problem now is that we are holding the IWRITELOCK 1218 * on the inode, and calling filemap_fdatawrite on an 1219 * unmapped page will cause a deadlock in jfs_get_block. 1220 * 1221 * The long term solution is to pare down the use of 1222 * IWRITELOCK. We are currently holding it too long. 1223 * We could also be smarter about which data pages need 1224 * to be written before the transaction is committed and 1225 * when we don't need to worry about it at all. 1226 * 1227 * if ((!S_ISDIR(ip->i_mode)) 1228 * && (tblk->flag & COMMIT_DELETE) == 0) 1229 * filemap_write_and_wait(ip->i_mapping); 1230 */ 1231 1232 /* 1233 * Mark inode as not dirty. It will still be on the dirty 1234 * inode list, but we'll know not to commit it again unless 1235 * it gets marked dirty again 1236 */ 1237 clear_cflag(COMMIT_Dirty, ip); 1238 1239 /* inherit anonymous tlock(s) of inode */ 1240 if (jfs_ip->atlhead) { 1241 lid_to_tlock(jfs_ip->atltail)->next = tblk->next; 1242 tblk->next = jfs_ip->atlhead; 1243 if (!tblk->last) 1244 tblk->last = jfs_ip->atltail; 1245 jfs_ip->atlhead = jfs_ip->atltail = 0; 1246 TXN_LOCK(); 1247 list_del_init(&jfs_ip->anon_inode_list); 1248 TXN_UNLOCK(); 1249 } 1250 1251 /* 1252 * acquire transaction lock on on-disk inode page 1253 * (become first tlock of the tblk's tlock list) 1254 */ 1255 if (((rc = diWrite(tid, ip)))) 1256 goto out; 1257 } 1258 1259 /* 1260 * write log records from transaction locks 1261 * 1262 * txUpdateMap() resets XAD_NEW in XAD. 1263 */ 1264 if ((rc = txLog(log, tblk, &cd))) 1265 goto TheEnd; 1266 1267 /* 1268 * Ensure that inode isn't reused before 1269 * lazy commit thread finishes processing 1270 */ 1271 if (tblk->xflag & COMMIT_DELETE) { 1272 ihold(tblk->u.ip); 1273 /* 1274 * Avoid a rare deadlock 1275 * 1276 * If the inode is locked, we may be blocked in 1277 * jfs_commit_inode. If so, we don't want the 1278 * lazy_commit thread doing the last iput() on the inode 1279 * since that may block on the locked inode. Instead, 1280 * commit the transaction synchronously, so the last iput 1281 * will be done by the calling thread (or later) 1282 */ 1283 /* 1284 * I believe this code is no longer needed. Splitting I_LOCK 1285 * into two bits, I_NEW and I_SYNC should prevent this 1286 * deadlock as well. But since I don't have a JFS testload 1287 * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. 1288 * Joern 1289 */ 1290 if (tblk->u.ip->i_state & I_SYNC) 1291 tblk->xflag &= ~COMMIT_LAZY; 1292 } 1293 1294 ASSERT((!(tblk->xflag & COMMIT_DELETE)) || 1295 ((tblk->u.ip->i_nlink == 0) && 1296 !test_cflag(COMMIT_Nolink, tblk->u.ip))); 1297 1298 /* 1299 * write COMMIT log record 1300 */ 1301 lrd->type = cpu_to_le16(LOG_COMMIT); 1302 lrd->length = 0; 1303 lmLog(log, tblk, lrd, NULL); 1304 1305 lmGroupCommit(log, tblk); 1306 1307 /* 1308 * - transaction is now committed - 1309 */ 1310 1311 /* 1312 * force pages in careful update 1313 * (imap addressing structure update) 1314 */ 1315 if (flag & COMMIT_FORCE) 1316 txForce(tblk); 1317 1318 /* 1319 * update allocation map. 1320 * 1321 * update inode allocation map and inode: 1322 * free pager lock on memory object of inode if any. 1323 * update block allocation map. 1324 * 1325 * txUpdateMap() resets XAD_NEW in XAD. 1326 */ 1327 if (tblk->xflag & COMMIT_FORCE) 1328 txUpdateMap(tblk); 1329 1330 /* 1331 * free transaction locks and pageout/free pages 1332 */ 1333 txRelease(tblk); 1334 1335 if ((tblk->flag & tblkGC_LAZY) == 0) 1336 txUnlock(tblk); 1337 1338 1339 /* 1340 * reset in-memory object state 1341 */ 1342 for (k = 0; k < cd.nip; k++) { 1343 ip = cd.iplist[k]; 1344 jfs_ip = JFS_IP(ip); 1345 1346 /* 1347 * reset in-memory inode state 1348 */ 1349 jfs_ip->bxflag = 0; 1350 jfs_ip->blid = 0; 1351 } 1352 1353 out: 1354 if (rc != 0) 1355 txAbort(tid, 1); 1356 1357 TheEnd: 1358 jfs_info("txCommit: tid = %d, returning %d", tid, rc); 1359 return rc; 1360} 1361 1362/* 1363 * NAME: txLog() 1364 * 1365 * FUNCTION: Writes AFTER log records for all lines modified 1366 * by tid for segments specified by inodes in comdata. 1367 * Code assumes only WRITELOCKS are recorded in lockwords. 1368 * 1369 * PARAMETERS: 1370 * 1371 * RETURN : 1372 */ 1373static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) 1374{ 1375 int rc = 0; 1376 struct inode *ip; 1377 lid_t lid; 1378 struct tlock *tlck; 1379 struct lrd *lrd = &cd->lrd; 1380 1381 /* 1382 * write log record(s) for each tlock of transaction, 1383 */ 1384 for (lid = tblk->next; lid; lid = tlck->next) { 1385 tlck = lid_to_tlock(lid); 1386 1387 tlck->flag |= tlckLOG; 1388 1389 /* initialize lrd common */ 1390 ip = tlck->ip; 1391 lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); 1392 lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); 1393 lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); 1394 1395 /* write log record of page from the tlock */ 1396 switch (tlck->type & tlckTYPE) { 1397 case tlckXTREE: 1398 xtLog(log, tblk, lrd, tlck); 1399 break; 1400 1401 case tlckDTREE: 1402 dtLog(log, tblk, lrd, tlck); 1403 break; 1404 1405 case tlckINODE: 1406 diLog(log, tblk, lrd, tlck, cd); 1407 break; 1408 1409 case tlckMAP: 1410 mapLog(log, tblk, lrd, tlck); 1411 break; 1412 1413 case tlckDATA: 1414 dataLog(log, tblk, lrd, tlck); 1415 break; 1416 1417 default: 1418 jfs_err("UFO tlock:0x%p", tlck); 1419 } 1420 } 1421 1422 return rc; 1423} 1424 1425/* 1426 * diLog() 1427 * 1428 * function: log inode tlock and format maplock to update bmap; 1429 */ 1430static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1431 struct tlock * tlck, struct commit * cd) 1432{ 1433 int rc = 0; 1434 struct metapage *mp; 1435 pxd_t *pxd; 1436 struct pxd_lock *pxdlock; 1437 1438 mp = tlck->mp; 1439 1440 /* initialize as REDOPAGE record format */ 1441 lrd->log.redopage.type = cpu_to_le16(LOG_INODE); 1442 lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); 1443 1444 pxd = &lrd->log.redopage.pxd; 1445 1446 /* 1447 * inode after image 1448 */ 1449 if (tlck->type & tlckENTRY) { 1450 /* log after-image for logredo(): */ 1451 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1452 PXDaddress(pxd, mp->index); 1453 PXDlength(pxd, 1454 mp->logical_size >> tblk->sb->s_blocksize_bits); 1455 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1456 1457 /* mark page as homeward bound */ 1458 tlck->flag |= tlckWRITEPAGE; 1459 } else if (tlck->type & tlckFREE) { 1460 /* 1461 * free inode extent 1462 * 1463 * (pages of the freed inode extent have been invalidated and 1464 * a maplock for free of the extent has been formatted at 1465 * txLock() time); 1466 * 1467 * the tlock had been acquired on the inode allocation map page 1468 * (iag) that specifies the freed extent, even though the map 1469 * page is not itself logged, to prevent pageout of the map 1470 * page before the log; 1471 */ 1472 1473 /* log LOG_NOREDOINOEXT of the freed inode extent for 1474 * logredo() to start NoRedoPage filters, and to update 1475 * imap and bmap for free of the extent; 1476 */ 1477 lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); 1478 /* 1479 * For the LOG_NOREDOINOEXT record, we need 1480 * to pass the IAG number and inode extent 1481 * index (within that IAG) from which the 1482 * the extent being released. These have been 1483 * passed to us in the iplist[1] and iplist[2]. 1484 */ 1485 lrd->log.noredoinoext.iagnum = 1486 cpu_to_le32((u32) (size_t) cd->iplist[1]); 1487 lrd->log.noredoinoext.inoext_idx = 1488 cpu_to_le32((u32) (size_t) cd->iplist[2]); 1489 1490 pxdlock = (struct pxd_lock *) & tlck->lock; 1491 *pxd = pxdlock->pxd; 1492 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1493 1494 /* update bmap */ 1495 tlck->flag |= tlckUPDATEMAP; 1496 1497 /* mark page as homeward bound */ 1498 tlck->flag |= tlckWRITEPAGE; 1499 } else 1500 jfs_err("diLog: UFO type tlck:0x%p", tlck); 1501#ifdef _JFS_WIP 1502 /* 1503 * alloc/free external EA extent 1504 * 1505 * a maplock for txUpdateMap() to update bPWMAP for alloc/free 1506 * of the extent has been formatted at txLock() time; 1507 */ 1508 else { 1509 assert(tlck->type & tlckEA); 1510 1511 /* log LOG_UPDATEMAP for logredo() to update bmap for 1512 * alloc of new (and free of old) external EA extent; 1513 */ 1514 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1515 pxdlock = (struct pxd_lock *) & tlck->lock; 1516 nlock = pxdlock->index; 1517 for (i = 0; i < nlock; i++, pxdlock++) { 1518 if (pxdlock->flag & mlckALLOCPXD) 1519 lrd->log.updatemap.type = 1520 cpu_to_le16(LOG_ALLOCPXD); 1521 else 1522 lrd->log.updatemap.type = 1523 cpu_to_le16(LOG_FREEPXD); 1524 lrd->log.updatemap.nxd = cpu_to_le16(1); 1525 lrd->log.updatemap.pxd = pxdlock->pxd; 1526 lrd->backchain = 1527 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1528 } 1529 1530 /* update bmap */ 1531 tlck->flag |= tlckUPDATEMAP; 1532 } 1533#endif /* _JFS_WIP */ 1534 1535 return rc; 1536} 1537 1538/* 1539 * dataLog() 1540 * 1541 * function: log data tlock 1542 */ 1543static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1544 struct tlock * tlck) 1545{ 1546 struct metapage *mp; 1547 pxd_t *pxd; 1548 1549 mp = tlck->mp; 1550 1551 /* initialize as REDOPAGE record format */ 1552 lrd->log.redopage.type = cpu_to_le16(LOG_DATA); 1553 lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); 1554 1555 pxd = &lrd->log.redopage.pxd; 1556 1557 /* log after-image for logredo(): */ 1558 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1559 1560 if (jfs_dirtable_inline(tlck->ip)) { 1561 /* 1562 * The table has been truncated, we've must have deleted 1563 * the last entry, so don't bother logging this 1564 */ 1565 mp->lid = 0; 1566 grab_metapage(mp); 1567 metapage_homeok(mp); 1568 discard_metapage(mp); 1569 tlck->mp = NULL; 1570 return 0; 1571 } 1572 1573 PXDaddress(pxd, mp->index); 1574 PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); 1575 1576 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1577 1578 /* mark page as homeward bound */ 1579 tlck->flag |= tlckWRITEPAGE; 1580 1581 return 0; 1582} 1583 1584/* 1585 * dtLog() 1586 * 1587 * function: log dtree tlock and format maplock to update bmap; 1588 */ 1589static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1590 struct tlock * tlck) 1591{ 1592 struct metapage *mp; 1593 struct pxd_lock *pxdlock; 1594 pxd_t *pxd; 1595 1596 mp = tlck->mp; 1597 1598 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1599 lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); 1600 lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); 1601 1602 pxd = &lrd->log.redopage.pxd; 1603 1604 if (tlck->type & tlckBTROOT) 1605 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1606 1607 /* 1608 * page extension via relocation: entry insertion; 1609 * page extension in-place: entry insertion; 1610 * new right page from page split, reinitialized in-line 1611 * root from root page split: entry insertion; 1612 */ 1613 if (tlck->type & (tlckNEW | tlckEXTEND)) { 1614 /* log after-image of the new page for logredo(): 1615 * mark log (LOG_NEW) for logredo() to initialize 1616 * freelist and update bmap for alloc of the new page; 1617 */ 1618 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1619 if (tlck->type & tlckEXTEND) 1620 lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); 1621 else 1622 lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); 1623 PXDaddress(pxd, mp->index); 1624 PXDlength(pxd, 1625 mp->logical_size >> tblk->sb->s_blocksize_bits); 1626 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1627 1628 /* format a maplock for txUpdateMap() to update bPMAP for 1629 * alloc of the new page; 1630 */ 1631 if (tlck->type & tlckBTROOT) 1632 return; 1633 tlck->flag |= tlckUPDATEMAP; 1634 pxdlock = (struct pxd_lock *) & tlck->lock; 1635 pxdlock->flag = mlckALLOCPXD; 1636 pxdlock->pxd = *pxd; 1637 1638 pxdlock->index = 1; 1639 1640 /* mark page as homeward bound */ 1641 tlck->flag |= tlckWRITEPAGE; 1642 return; 1643 } 1644 1645 /* 1646 * entry insertion/deletion, 1647 * sibling page link update (old right page before split); 1648 */ 1649 if (tlck->type & (tlckENTRY | tlckRELINK)) { 1650 /* log after-image for logredo(): */ 1651 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1652 PXDaddress(pxd, mp->index); 1653 PXDlength(pxd, 1654 mp->logical_size >> tblk->sb->s_blocksize_bits); 1655 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1656 1657 /* mark page as homeward bound */ 1658 tlck->flag |= tlckWRITEPAGE; 1659 return; 1660 } 1661 1662 /* 1663 * page deletion: page has been invalidated 1664 * page relocation: source extent 1665 * 1666 * a maplock for free of the page has been formatted 1667 * at txLock() time); 1668 */ 1669 if (tlck->type & (tlckFREE | tlckRELOCATE)) { 1670 /* log LOG_NOREDOPAGE of the deleted page for logredo() 1671 * to start NoRedoPage filter and to update bmap for free 1672 * of the deletd page 1673 */ 1674 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1675 pxdlock = (struct pxd_lock *) & tlck->lock; 1676 *pxd = pxdlock->pxd; 1677 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1678 1679 /* a maplock for txUpdateMap() for free of the page 1680 * has been formatted at txLock() time; 1681 */ 1682 tlck->flag |= tlckUPDATEMAP; 1683 } 1684 return; 1685} 1686 1687/* 1688 * xtLog() 1689 * 1690 * function: log xtree tlock and format maplock to update bmap; 1691 */ 1692static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 1693 struct tlock * tlck) 1694{ 1695 struct inode *ip; 1696 struct metapage *mp; 1697 xtpage_t *p; 1698 struct xtlock *xtlck; 1699 struct maplock *maplock; 1700 struct xdlistlock *xadlock; 1701 struct pxd_lock *pxdlock; 1702 pxd_t *page_pxd; 1703 int next, lwm, hwm; 1704 1705 ip = tlck->ip; 1706 mp = tlck->mp; 1707 1708 /* initialize as REDOPAGE/NOREDOPAGE record format */ 1709 lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); 1710 lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); 1711 1712 page_pxd = &lrd->log.redopage.pxd; 1713 1714 if (tlck->type & tlckBTROOT) { 1715 lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); 1716 p = &JFS_IP(ip)->i_xtroot; 1717 if (S_ISDIR(ip->i_mode)) 1718 lrd->log.redopage.type |= 1719 cpu_to_le16(LOG_DIR_XTREE); 1720 } else 1721 p = (xtpage_t *) mp->data; 1722 next = le16_to_cpu(p->header.nextindex); 1723 1724 xtlck = (struct xtlock *) & tlck->lock; 1725 1726 maplock = (struct maplock *) & tlck->lock; 1727 xadlock = (struct xdlistlock *) maplock; 1728 1729 /* 1730 * entry insertion/extension; 1731 * sibling page link update (old right page before split); 1732 */ 1733 if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { 1734 /* log after-image for logredo(): 1735 * logredo() will update bmap for alloc of new/extended 1736 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1737 * after-image of XADlist; 1738 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1739 * applying the after-image to the meta-data page. 1740 */ 1741 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1742 PXDaddress(page_pxd, mp->index); 1743 PXDlength(page_pxd, 1744 mp->logical_size >> tblk->sb->s_blocksize_bits); 1745 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1746 1747 /* format a maplock for txUpdateMap() to update bPMAP 1748 * for alloc of new/extended extents of XAD[lwm:next) 1749 * from the page itself; 1750 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 1751 */ 1752 lwm = xtlck->lwm.offset; 1753 if (lwm == 0) 1754 lwm = XTPAGEMAXSLOT; 1755 1756 if (lwm == next) 1757 goto out; 1758 if (lwm > next) { 1759 jfs_err("xtLog: lwm > next"); 1760 goto out; 1761 } 1762 tlck->flag |= tlckUPDATEMAP; 1763 xadlock->flag = mlckALLOCXADLIST; 1764 xadlock->count = next - lwm; 1765 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1766 int i; 1767 pxd_t *pxd; 1768 /* 1769 * Lazy commit may allow xtree to be modified before 1770 * txUpdateMap runs. Copy xad into linelock to 1771 * preserve correct data. 1772 * 1773 * We can fit twice as may pxd's as xads in the lock 1774 */ 1775 xadlock->flag = mlckALLOCPXDLIST; 1776 pxd = xadlock->xdlist = &xtlck->pxdlock; 1777 for (i = 0; i < xadlock->count; i++) { 1778 PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); 1779 PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); 1780 p->xad[lwm + i].flag &= 1781 ~(XAD_NEW | XAD_EXTENDED); 1782 pxd++; 1783 } 1784 } else { 1785 /* 1786 * xdlist will point to into inode's xtree, ensure 1787 * that transaction is not committed lazily. 1788 */ 1789 xadlock->flag = mlckALLOCXADLIST; 1790 xadlock->xdlist = &p->xad[lwm]; 1791 tblk->xflag &= ~COMMIT_LAZY; 1792 } 1793 jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d", 1794 tlck->ip, mp, tlck, lwm, xadlock->count); 1795 1796 maplock->index = 1; 1797 1798 out: 1799 /* mark page as homeward bound */ 1800 tlck->flag |= tlckWRITEPAGE; 1801 1802 return; 1803 } 1804 1805 /* 1806 * page deletion: file deletion/truncation (ref. xtTruncate()) 1807 * 1808 * (page will be invalidated after log is written and bmap 1809 * is updated from the page); 1810 */ 1811 if (tlck->type & tlckFREE) { 1812 /* LOG_NOREDOPAGE log for NoRedoPage filter: 1813 * if page free from file delete, NoRedoFile filter from 1814 * inode image of zero link count will subsume NoRedoPage 1815 * filters for each page; 1816 * if page free from file truncattion, write NoRedoPage 1817 * filter; 1818 * 1819 * upadte of block allocation map for the page itself: 1820 * if page free from deletion and truncation, LOG_UPDATEMAP 1821 * log for the page itself is generated from processing 1822 * its parent page xad entries; 1823 */ 1824 /* if page free from file truncation, log LOG_NOREDOPAGE 1825 * of the deleted page for logredo() to start NoRedoPage 1826 * filter for the page; 1827 */ 1828 if (tblk->xflag & COMMIT_TRUNCATE) { 1829 /* write NOREDOPAGE for the page */ 1830 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 1831 PXDaddress(page_pxd, mp->index); 1832 PXDlength(page_pxd, 1833 mp->logical_size >> tblk->sb-> 1834 s_blocksize_bits); 1835 lrd->backchain = 1836 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1837 1838 if (tlck->type & tlckBTROOT) { 1839 /* Empty xtree must be logged */ 1840 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1841 lrd->backchain = 1842 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1843 } 1844 } 1845 1846 /* init LOG_UPDATEMAP of the freed extents 1847 * XAD[XTENTRYSTART:hwm) from the deleted page itself 1848 * for logredo() to update bmap; 1849 */ 1850 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1851 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); 1852 xtlck = (struct xtlock *) & tlck->lock; 1853 hwm = xtlck->hwm.offset; 1854 lrd->log.updatemap.nxd = 1855 cpu_to_le16(hwm - XTENTRYSTART + 1); 1856 /* reformat linelock for lmLog() */ 1857 xtlck->header.offset = XTENTRYSTART; 1858 xtlck->header.length = hwm - XTENTRYSTART + 1; 1859 xtlck->index = 1; 1860 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1861 1862 /* format a maplock for txUpdateMap() to update bmap 1863 * to free extents of XAD[XTENTRYSTART:hwm) from the 1864 * deleted page itself; 1865 */ 1866 tlck->flag |= tlckUPDATEMAP; 1867 xadlock->count = hwm - XTENTRYSTART + 1; 1868 if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { 1869 int i; 1870 pxd_t *pxd; 1871 /* 1872 * Lazy commit may allow xtree to be modified before 1873 * txUpdateMap runs. Copy xad into linelock to 1874 * preserve correct data. 1875 * 1876 * We can fit twice as may pxd's as xads in the lock 1877 */ 1878 xadlock->flag = mlckFREEPXDLIST; 1879 pxd = xadlock->xdlist = &xtlck->pxdlock; 1880 for (i = 0; i < xadlock->count; i++) { 1881 PXDaddress(pxd, 1882 addressXAD(&p->xad[XTENTRYSTART + i])); 1883 PXDlength(pxd, 1884 lengthXAD(&p->xad[XTENTRYSTART + i])); 1885 pxd++; 1886 } 1887 } else { 1888 /* 1889 * xdlist will point to into inode's xtree, ensure 1890 * that transaction is not committed lazily. 1891 */ 1892 xadlock->flag = mlckFREEXADLIST; 1893 xadlock->xdlist = &p->xad[XTENTRYSTART]; 1894 tblk->xflag &= ~COMMIT_LAZY; 1895 } 1896 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", 1897 tlck->ip, mp, xadlock->count); 1898 1899 maplock->index = 1; 1900 1901 /* mark page as invalid */ 1902 if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) 1903 && !(tlck->type & tlckBTROOT)) 1904 tlck->flag |= tlckFREEPAGE; 1905 /* 1906 else (tblk->xflag & COMMIT_PMAP) 1907 ? release the page; 1908 */ 1909 return; 1910 } 1911 1912 /* 1913 * page/entry truncation: file truncation (ref. xtTruncate()) 1914 * 1915 * |----------+------+------+---------------| 1916 * | | | 1917 * | | hwm - hwm before truncation 1918 * | next - truncation point 1919 * lwm - lwm before truncation 1920 * header ? 1921 */ 1922 if (tlck->type & tlckTRUNCATE) { 1923 pxd_t pxd; /* truncated extent of xad */ 1924 int twm; 1925 1926 /* 1927 * For truncation the entire linelock may be used, so it would 1928 * be difficult to store xad list in linelock itself. 1929 * Therefore, we'll just force transaction to be committed 1930 * synchronously, so that xtree pages won't be changed before 1931 * txUpdateMap runs. 1932 */ 1933 tblk->xflag &= ~COMMIT_LAZY; 1934 lwm = xtlck->lwm.offset; 1935 if (lwm == 0) 1936 lwm = XTPAGEMAXSLOT; 1937 hwm = xtlck->hwm.offset; 1938 twm = xtlck->twm.offset; 1939 1940 /* 1941 * write log records 1942 */ 1943 /* log after-image for logredo(): 1944 * 1945 * logredo() will update bmap for alloc of new/extended 1946 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from 1947 * after-image of XADlist; 1948 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when 1949 * applying the after-image to the meta-data page. 1950 */ 1951 lrd->type = cpu_to_le16(LOG_REDOPAGE); 1952 PXDaddress(page_pxd, mp->index); 1953 PXDlength(page_pxd, 1954 mp->logical_size >> tblk->sb->s_blocksize_bits); 1955 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1956 1957 /* 1958 * truncate entry XAD[twm == next - 1]: 1959 */ 1960 if (twm == next - 1) { 1961 /* init LOG_UPDATEMAP for logredo() to update bmap for 1962 * free of truncated delta extent of the truncated 1963 * entry XAD[next - 1]: 1964 * (xtlck->pxdlock = truncated delta extent); 1965 */ 1966 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 1967 /* assert(pxdlock->type & tlckTRUNCATE); */ 1968 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1969 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 1970 lrd->log.updatemap.nxd = cpu_to_le16(1); 1971 lrd->log.updatemap.pxd = pxdlock->pxd; 1972 pxd = pxdlock->pxd; /* save to format maplock */ 1973 lrd->backchain = 1974 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 1975 } 1976 1977 /* 1978 * free entries XAD[next:hwm]: 1979 */ 1980 if (hwm >= next) { 1981 /* init LOG_UPDATEMAP of the freed extents 1982 * XAD[next:hwm] from the deleted page itself 1983 * for logredo() to update bmap; 1984 */ 1985 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 1986 lrd->log.updatemap.type = 1987 cpu_to_le16(LOG_FREEXADLIST); 1988 xtlck = (struct xtlock *) & tlck->lock; 1989 hwm = xtlck->hwm.offset; 1990 lrd->log.updatemap.nxd = 1991 cpu_to_le16(hwm - next + 1); 1992 /* reformat linelock for lmLog() */ 1993 xtlck->header.offset = next; 1994 xtlck->header.length = hwm - next + 1; 1995 xtlck->index = 1; 1996 lrd->backchain = 1997 cpu_to_le32(lmLog(log, tblk, lrd, tlck)); 1998 } 1999 2000 /* 2001 * format maplock(s) for txUpdateMap() to update bmap 2002 */ 2003 maplock->index = 0; 2004 2005 /* 2006 * allocate entries XAD[lwm:next): 2007 */ 2008 if (lwm < next) { 2009 /* format a maplock for txUpdateMap() to update bPMAP 2010 * for alloc of new/extended extents of XAD[lwm:next) 2011 * from the page itself; 2012 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. 2013 */ 2014 tlck->flag |= tlckUPDATEMAP; 2015 xadlock->flag = mlckALLOCXADLIST; 2016 xadlock->count = next - lwm; 2017 xadlock->xdlist = &p->xad[lwm]; 2018 2019 jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d", 2020 tlck->ip, mp, xadlock->count, lwm, next); 2021 maplock->index++; 2022 xadlock++; 2023 } 2024 2025 /* 2026 * truncate entry XAD[twm == next - 1]: 2027 */ 2028 if (twm == next - 1) { 2029 /* format a maplock for txUpdateMap() to update bmap 2030 * to free truncated delta extent of the truncated 2031 * entry XAD[next - 1]; 2032 * (xtlck->pxdlock = truncated delta extent); 2033 */ 2034 tlck->flag |= tlckUPDATEMAP; 2035 pxdlock = (struct pxd_lock *) xadlock; 2036 pxdlock->flag = mlckFREEPXD; 2037 pxdlock->count = 1; 2038 pxdlock->pxd = pxd; 2039 2040 jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d", 2041 ip, mp, pxdlock->count, hwm); 2042 maplock->index++; 2043 xadlock++; 2044 } 2045 2046 /* 2047 * free entries XAD[next:hwm]: 2048 */ 2049 if (hwm >= next) { 2050 /* format a maplock for txUpdateMap() to update bmap 2051 * to free extents of XAD[next:hwm] from thedeleted 2052 * page itself; 2053 */ 2054 tlck->flag |= tlckUPDATEMAP; 2055 xadlock->flag = mlckFREEXADLIST; 2056 xadlock->count = hwm - next + 1; 2057 xadlock->xdlist = &p->xad[next]; 2058 2059 jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d", 2060 tlck->ip, mp, xadlock->count, next, hwm); 2061 maplock->index++; 2062 } 2063 2064 /* mark page as homeward bound */ 2065 tlck->flag |= tlckWRITEPAGE; 2066 } 2067 return; 2068} 2069 2070/* 2071 * mapLog() 2072 * 2073 * function: log from maplock of freed data extents; 2074 */ 2075static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 2076 struct tlock * tlck) 2077{ 2078 struct pxd_lock *pxdlock; 2079 int i, nlock; 2080 pxd_t *pxd; 2081 2082 /* 2083 * page relocation: free the source page extent 2084 * 2085 * a maplock for txUpdateMap() for free of the page 2086 * has been formatted at txLock() time saving the src 2087 * relocated page address; 2088 */ 2089 if (tlck->type & tlckRELOCATE) { 2090 /* log LOG_NOREDOPAGE of the old relocated page 2091 * for logredo() to start NoRedoPage filter; 2092 */ 2093 lrd->type = cpu_to_le16(LOG_NOREDOPAGE); 2094 pxdlock = (struct pxd_lock *) & tlck->lock; 2095 pxd = &lrd->log.redopage.pxd; 2096 *pxd = pxdlock->pxd; 2097 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2098 2099 /* (N.B. currently, logredo() does NOT update bmap 2100 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); 2101 * if page free from relocation, LOG_UPDATEMAP log is 2102 * specifically generated now for logredo() 2103 * to update bmap for free of src relocated page; 2104 * (new flag LOG_RELOCATE may be introduced which will 2105 * inform logredo() to start NORedoPage filter and also 2106 * update block allocation map at the same time, thus 2107 * avoiding an extra log write); 2108 */ 2109 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2110 lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); 2111 lrd->log.updatemap.nxd = cpu_to_le16(1); 2112 lrd->log.updatemap.pxd = pxdlock->pxd; 2113 lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2114 2115 /* a maplock for txUpdateMap() for free of the page 2116 * has been formatted at txLock() time; 2117 */ 2118 tlck->flag |= tlckUPDATEMAP; 2119 return; 2120 } 2121 /* 2122 2123 * Otherwise it's not a relocate request 2124 * 2125 */ 2126 else { 2127 /* log LOG_UPDATEMAP for logredo() to update bmap for 2128 * free of truncated/relocated delta extent of the data; 2129 * e.g.: external EA extent, relocated/truncated extent 2130 * from xtTailgate(); 2131 */ 2132 lrd->type = cpu_to_le16(LOG_UPDATEMAP); 2133 pxdlock = (struct pxd_lock *) & tlck->lock; 2134 nlock = pxdlock->index; 2135 for (i = 0; i < nlock; i++, pxdlock++) { 2136 if (pxdlock->flag & mlckALLOCPXD) 2137 lrd->log.updatemap.type = 2138 cpu_to_le16(LOG_ALLOCPXD); 2139 else 2140 lrd->log.updatemap.type = 2141 cpu_to_le16(LOG_FREEPXD); 2142 lrd->log.updatemap.nxd = cpu_to_le16(1); 2143 lrd->log.updatemap.pxd = pxdlock->pxd; 2144 lrd->backchain = 2145 cpu_to_le32(lmLog(log, tblk, lrd, NULL)); 2146 jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", 2147 (ulong) addressPXD(&pxdlock->pxd), 2148 lengthPXD(&pxdlock->pxd)); 2149 } 2150 2151 /* update bmap */ 2152 tlck->flag |= tlckUPDATEMAP; 2153 } 2154} 2155 2156/* 2157 * txEA() 2158 * 2159 * function: acquire maplock for EA/ACL extents or 2160 * set COMMIT_INLINE flag; 2161 */ 2162void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) 2163{ 2164 struct tlock *tlck = NULL; 2165 struct pxd_lock *maplock = NULL, *pxdlock = NULL; 2166 2167 /* 2168 * format maplock for alloc of new EA extent 2169 */ 2170 if (newea) { 2171 /* Since the newea could be a completely zeroed entry we need to 2172 * check for the two flags which indicate we should actually 2173 * commit new EA data 2174 */ 2175 if (newea->flag & DXD_EXTENT) { 2176 tlck = txMaplock(tid, ip, tlckMAP); 2177 maplock = (struct pxd_lock *) & tlck->lock; 2178 pxdlock = (struct pxd_lock *) maplock; 2179 pxdlock->flag = mlckALLOCPXD; 2180 PXDaddress(&pxdlock->pxd, addressDXD(newea)); 2181 PXDlength(&pxdlock->pxd, lengthDXD(newea)); 2182 pxdlock++; 2183 maplock->index = 1; 2184 } else if (newea->flag & DXD_INLINE) { 2185 tlck = NULL; 2186 2187 set_cflag(COMMIT_Inlineea, ip); 2188 } 2189 } 2190 2191 /* 2192 * format maplock for free of old EA extent 2193 */ 2194 if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { 2195 if (tlck == NULL) { 2196 tlck = txMaplock(tid, ip, tlckMAP); 2197 maplock = (struct pxd_lock *) & tlck->lock; 2198 pxdlock = (struct pxd_lock *) maplock; 2199 maplock->index = 0; 2200 } 2201 pxdlock->flag = mlckFREEPXD; 2202 PXDaddress(&pxdlock->pxd, addressDXD(oldea)); 2203 PXDlength(&pxdlock->pxd, lengthDXD(oldea)); 2204 maplock->index++; 2205 } 2206} 2207 2208/* 2209 * txForce() 2210 * 2211 * function: synchronously write pages locked by transaction 2212 * after txLog() but before txUpdateMap(); 2213 */ 2214static void txForce(struct tblock * tblk) 2215{ 2216 struct tlock *tlck; 2217 lid_t lid, next; 2218 struct metapage *mp; 2219 2220 /* 2221 * reverse the order of transaction tlocks in 2222 * careful update order of address index pages 2223 * (right to left, bottom up) 2224 */ 2225 tlck = lid_to_tlock(tblk->next); 2226 lid = tlck->next; 2227 tlck->next = 0; 2228 while (lid) { 2229 tlck = lid_to_tlock(lid); 2230 next = tlck->next; 2231 tlck->next = tblk->next; 2232 tblk->next = lid; 2233 lid = next; 2234 } 2235 2236 /* 2237 * synchronously write the page, and 2238 * hold the page for txUpdateMap(); 2239 */ 2240 for (lid = tblk->next; lid; lid = next) { 2241 tlck = lid_to_tlock(lid); 2242 next = tlck->next; 2243 2244 if ((mp = tlck->mp) != NULL && 2245 (tlck->type & tlckBTROOT) == 0) { 2246 assert(mp->xflag & COMMIT_PAGE); 2247 2248 if (tlck->flag & tlckWRITEPAGE) { 2249 tlck->flag &= ~tlckWRITEPAGE; 2250 2251 /* do not release page to freelist */ 2252 force_metapage(mp); 2253#if 0 2254 /* 2255 * The "right" thing to do here is to 2256 * synchronously write the metadata. 2257 * With the current implementation this 2258 * is hard since write_metapage requires 2259 * us to kunmap & remap the page. If we 2260 * have tlocks pointing into the metadata 2261 * pages, we don't want to do this. I think 2262 * we can get by with synchronously writing 2263 * the pages when they are released. 2264 */ 2265 assert(mp->nohomeok); 2266 set_bit(META_dirty, &mp->flag); 2267 set_bit(META_sync, &mp->flag); 2268#endif 2269 } 2270 } 2271 } 2272} 2273 2274/* 2275 * txUpdateMap() 2276 * 2277 * function: update persistent allocation map (and working map 2278 * if appropriate); 2279 * 2280 * parameter: 2281 */ 2282static void txUpdateMap(struct tblock * tblk) 2283{ 2284 struct inode *ip; 2285 struct inode *ipimap; 2286 lid_t lid; 2287 struct tlock *tlck; 2288 struct maplock *maplock; 2289 struct pxd_lock pxdlock; 2290 int maptype; 2291 int k, nlock; 2292 struct metapage *mp = NULL; 2293 2294 ipimap = JFS_SBI(tblk->sb)->ipimap; 2295 2296 maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; 2297 2298 2299 /* 2300 * update block allocation map 2301 * 2302 * update allocation state in pmap (and wmap) and 2303 * update lsn of the pmap page; 2304 */ 2305 /* 2306 * scan each tlock/page of transaction for block allocation/free: 2307 * 2308 * for each tlock/page of transaction, update map. 2309 * ? are there tlock for pmap and pwmap at the same time ? 2310 */ 2311 for (lid = tblk->next; lid; lid = tlck->next) { 2312 tlck = lid_to_tlock(lid); 2313 2314 if ((tlck->flag & tlckUPDATEMAP) == 0) 2315 continue; 2316 2317 if (tlck->flag & tlckFREEPAGE) { 2318 /* 2319 * Another thread may attempt to reuse freed space 2320 * immediately, so we want to get rid of the metapage 2321 * before anyone else has a chance to get it. 2322 * Lock metapage, update maps, then invalidate 2323 * the metapage. 2324 */ 2325 mp = tlck->mp; 2326 ASSERT(mp->xflag & COMMIT_PAGE); 2327 grab_metapage(mp); 2328 } 2329 2330 /* 2331 * extent list: 2332 * . in-line PXD list: 2333 * . out-of-line XAD list: 2334 */ 2335 maplock = (struct maplock *) & tlck->lock; 2336 nlock = maplock->index; 2337 2338 for (k = 0; k < nlock; k++, maplock++) { 2339 /* 2340 * allocate blocks in persistent map: 2341 * 2342 * blocks have been allocated from wmap at alloc time; 2343 */ 2344 if (maplock->flag & mlckALLOC) { 2345 txAllocPMap(ipimap, maplock, tblk); 2346 } 2347 /* 2348 * free blocks in persistent and working map: 2349 * blocks will be freed in pmap and then in wmap; 2350 * 2351 * ? tblock specifies the PMAP/PWMAP based upon 2352 * transaction 2353 * 2354 * free blocks in persistent map: 2355 * blocks will be freed from wmap at last reference 2356 * release of the object for regular files; 2357 * 2358 * Alway free blocks from both persistent & working 2359 * maps for directories 2360 */ 2361 else { /* (maplock->flag & mlckFREE) */ 2362 2363 if (tlck->flag & tlckDIRECTORY) 2364 txFreeMap(ipimap, maplock, 2365 tblk, COMMIT_PWMAP); 2366 else 2367 txFreeMap(ipimap, maplock, 2368 tblk, maptype); 2369 } 2370 } 2371 if (tlck->flag & tlckFREEPAGE) { 2372 if (!(tblk->flag & tblkGC_LAZY)) { 2373 /* This is equivalent to txRelease */ 2374 ASSERT(mp->lid == lid); 2375 tlck->mp->lid = 0; 2376 } 2377 assert(mp->nohomeok == 1); 2378 metapage_homeok(mp); 2379 discard_metapage(mp); 2380 tlck->mp = NULL; 2381 } 2382 } 2383 /* 2384 * update inode allocation map 2385 * 2386 * update allocation state in pmap and 2387 * update lsn of the pmap page; 2388 * update in-memory inode flag/state 2389 * 2390 * unlock mapper/write lock 2391 */ 2392 if (tblk->xflag & COMMIT_CREATE) { 2393 diUpdatePMap(ipimap, tblk->ino, false, tblk); 2394 /* update persistent block allocation map 2395 * for the allocation of inode extent; 2396 */ 2397 pxdlock.flag = mlckALLOCPXD; 2398 pxdlock.pxd = tblk->u.ixpxd; 2399 pxdlock.index = 1; 2400 txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); 2401 } else if (tblk->xflag & COMMIT_DELETE) { 2402 ip = tblk->u.ip; 2403 diUpdatePMap(ipimap, ip->i_ino, true, tblk); 2404 iput(ip); 2405 } 2406} 2407 2408/* 2409 * txAllocPMap() 2410 * 2411 * function: allocate from persistent map; 2412 * 2413 * parameter: 2414 * ipbmap - 2415 * malock - 2416 * xad list: 2417 * pxd: 2418 * 2419 * maptype - 2420 * allocate from persistent map; 2421 * free from persistent map; 2422 * (e.g., tmp file - free from working map at releae 2423 * of last reference); 2424 * free from persistent and working map; 2425 * 2426 * lsn - log sequence number; 2427 */ 2428static void txAllocPMap(struct inode *ip, struct maplock * maplock, 2429 struct tblock * tblk) 2430{ 2431 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2432 struct xdlistlock *xadlistlock; 2433 xad_t *xad; 2434 s64 xaddr; 2435 int xlen; 2436 struct pxd_lock *pxdlock; 2437 struct xdlistlock *pxdlistlock; 2438 pxd_t *pxd; 2439 int n; 2440 2441 /* 2442 * allocate from persistent map; 2443 */ 2444 if (maplock->flag & mlckALLOCXADLIST) { 2445 xadlistlock = (struct xdlistlock *) maplock; 2446 xad = xadlistlock->xdlist; 2447 for (n = 0; n < xadlistlock->count; n++, xad++) { 2448 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { 2449 xaddr = addressXAD(xad); 2450 xlen = lengthXAD(xad); 2451 dbUpdatePMap(ipbmap, false, xaddr, 2452 (s64) xlen, tblk); 2453 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 2454 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2455 (ulong) xaddr, xlen); 2456 } 2457 } 2458 } else if (maplock->flag & mlckALLOCPXD) { 2459 pxdlock = (struct pxd_lock *) maplock; 2460 xaddr = addressPXD(&pxdlock->pxd); 2461 xlen = lengthPXD(&pxdlock->pxd); 2462 dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); 2463 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); 2464 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2465 2466 pxdlistlock = (struct xdlistlock *) maplock; 2467 pxd = pxdlistlock->xdlist; 2468 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2469 xaddr = addressPXD(pxd); 2470 xlen = lengthPXD(pxd); 2471 dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, 2472 tblk); 2473 jfs_info("allocPMap: xaddr:0x%lx xlen:%d", 2474 (ulong) xaddr, xlen); 2475 } 2476 } 2477} 2478 2479/* 2480 * txFreeMap() 2481 * 2482 * function: free from persistent and/or working map; 2483 * 2484 * todo: optimization 2485 */ 2486void txFreeMap(struct inode *ip, 2487 struct maplock * maplock, struct tblock * tblk, int maptype) 2488{ 2489 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 2490 struct xdlistlock *xadlistlock; 2491 xad_t *xad; 2492 s64 xaddr; 2493 int xlen; 2494 struct pxd_lock *pxdlock; 2495 struct xdlistlock *pxdlistlock; 2496 pxd_t *pxd; 2497 int n; 2498 2499 jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", 2500 tblk, maplock, maptype); 2501 2502 /* 2503 * free from persistent map; 2504 */ 2505 if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { 2506 if (maplock->flag & mlckFREEXADLIST) { 2507 xadlistlock = (struct xdlistlock *) maplock; 2508 xad = xadlistlock->xdlist; 2509 for (n = 0; n < xadlistlock->count; n++, xad++) { 2510 if (!(xad->flag & XAD_NEW)) { 2511 xaddr = addressXAD(xad); 2512 xlen = lengthXAD(xad); 2513 dbUpdatePMap(ipbmap, true, xaddr, 2514 (s64) xlen, tblk); 2515 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2516 (ulong) xaddr, xlen); 2517 } 2518 } 2519 } else if (maplock->flag & mlckFREEPXD) { 2520 pxdlock = (struct pxd_lock *) maplock; 2521 xaddr = addressPXD(&pxdlock->pxd); 2522 xlen = lengthPXD(&pxdlock->pxd); 2523 dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, 2524 tblk); 2525 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2526 (ulong) xaddr, xlen); 2527 } else { /* (maplock->flag & mlckALLOCPXDLIST) */ 2528 2529 pxdlistlock = (struct xdlistlock *) maplock; 2530 pxd = pxdlistlock->xdlist; 2531 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2532 xaddr = addressPXD(pxd); 2533 xlen = lengthPXD(pxd); 2534 dbUpdatePMap(ipbmap, true, xaddr, 2535 (s64) xlen, tblk); 2536 jfs_info("freePMap: xaddr:0x%lx xlen:%d", 2537 (ulong) xaddr, xlen); 2538 } 2539 } 2540 } 2541 2542 /* 2543 * free from working map; 2544 */ 2545 if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { 2546 if (maplock->flag & mlckFREEXADLIST) { 2547 xadlistlock = (struct xdlistlock *) maplock; 2548 xad = xadlistlock->xdlist; 2549 for (n = 0; n < xadlistlock->count; n++, xad++) { 2550 xaddr = addressXAD(xad); 2551 xlen = lengthXAD(xad); 2552 dbFree(ip, xaddr, (s64) xlen); 2553 xad->flag = 0; 2554 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2555 (ulong) xaddr, xlen); 2556 } 2557 } else if (maplock->flag & mlckFREEPXD) { 2558 pxdlock = (struct pxd_lock *) maplock; 2559 xaddr = addressPXD(&pxdlock->pxd); 2560 xlen = lengthPXD(&pxdlock->pxd); 2561 dbFree(ip, xaddr, (s64) xlen); 2562 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2563 (ulong) xaddr, xlen); 2564 } else { /* (maplock->flag & mlckFREEPXDLIST) */ 2565 2566 pxdlistlock = (struct xdlistlock *) maplock; 2567 pxd = pxdlistlock->xdlist; 2568 for (n = 0; n < pxdlistlock->count; n++, pxd++) { 2569 xaddr = addressPXD(pxd); 2570 xlen = lengthPXD(pxd); 2571 dbFree(ip, xaddr, (s64) xlen); 2572 jfs_info("freeWMap: xaddr:0x%lx xlen:%d", 2573 (ulong) xaddr, xlen); 2574 } 2575 } 2576 } 2577} 2578 2579/* 2580 * txFreelock() 2581 * 2582 * function: remove tlock from inode anonymous locklist 2583 */ 2584void txFreelock(struct inode *ip) 2585{ 2586 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2587 struct tlock *xtlck, *tlck; 2588 lid_t xlid = 0, lid; 2589 2590 if (!jfs_ip->atlhead) 2591 return; 2592 2593 TXN_LOCK(); 2594 xtlck = (struct tlock *) &jfs_ip->atlhead; 2595 2596 while ((lid = xtlck->next) != 0) { 2597 tlck = lid_to_tlock(lid); 2598 if (tlck->flag & tlckFREELOCK) { 2599 xtlck->next = tlck->next; 2600 txLockFree(lid); 2601 } else { 2602 xtlck = tlck; 2603 xlid = lid; 2604 } 2605 } 2606 2607 if (jfs_ip->atlhead) 2608 jfs_ip->atltail = xlid; 2609 else { 2610 jfs_ip->atltail = 0; 2611 /* 2612 * If inode was on anon_list, remove it 2613 */ 2614 list_del_init(&jfs_ip->anon_inode_list); 2615 } 2616 TXN_UNLOCK(); 2617} 2618 2619/* 2620 * txAbort() 2621 * 2622 * function: abort tx before commit; 2623 * 2624 * frees line-locks and segment locks for all 2625 * segments in comdata structure. 2626 * Optionally sets state of file-system to FM_DIRTY in super-block. 2627 * log age of page-frames in memory for which caller has 2628 * are reset to 0 (to avoid logwarap). 2629 */ 2630void txAbort(tid_t tid, int dirty) 2631{ 2632 lid_t lid, next; 2633 struct metapage *mp; 2634 struct tblock *tblk = tid_to_tblock(tid); 2635 struct tlock *tlck; 2636 2637 /* 2638 * free tlocks of the transaction 2639 */ 2640 for (lid = tblk->next; lid; lid = next) { 2641 tlck = lid_to_tlock(lid); 2642 next = tlck->next; 2643 mp = tlck->mp; 2644 JFS_IP(tlck->ip)->xtlid = 0; 2645 2646 if (mp) { 2647 mp->lid = 0; 2648 2649 /* 2650 * reset lsn of page to avoid logwarap: 2651 * 2652 * (page may have been previously committed by another 2653 * transaction(s) but has not been paged, i.e., 2654 * it may be on logsync list even though it has not 2655 * been logged for the current tx.) 2656 */ 2657 if (mp->xflag & COMMIT_PAGE && mp->lsn) 2658 LogSyncRelease(mp); 2659 } 2660 /* insert tlock at head of freelist */ 2661 TXN_LOCK(); 2662 txLockFree(lid); 2663 TXN_UNLOCK(); 2664 } 2665 2666 /* caller will free the transaction block */ 2667 2668 tblk->next = tblk->last = 0; 2669 2670 /* 2671 * mark filesystem dirty 2672 */ 2673 if (dirty) 2674 jfs_error(tblk->sb, "\n"); 2675 2676 return; 2677} 2678 2679/* 2680 * txLazyCommit(void) 2681 * 2682 * All transactions except those changing ipimap (COMMIT_FORCE) are 2683 * processed by this routine. This insures that the inode and block 2684 * allocation maps are updated in order. For synchronous transactions, 2685 * let the user thread finish processing after txUpdateMap() is called. 2686 */ 2687static void txLazyCommit(struct tblock * tblk) 2688{ 2689 struct jfs_log *log; 2690 2691 while (((tblk->flag & tblkGC_READY) == 0) && 2692 ((tblk->flag & tblkGC_UNLOCKED) == 0)) { 2693 /* We must have gotten ahead of the user thread 2694 */ 2695 jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); 2696 yield(); 2697 } 2698 2699 jfs_info("txLazyCommit: processing tblk 0x%p", tblk); 2700 2701 txUpdateMap(tblk); 2702 2703 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; 2704 2705 spin_lock_irq(&log->gclock); // LOGGC_LOCK 2706 2707 tblk->flag |= tblkGC_COMMITTED; 2708 2709 if (tblk->flag & tblkGC_READY) 2710 log->gcrtc--; 2711 2712 wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP 2713 2714 /* 2715 * Can't release log->gclock until we've tested tblk->flag 2716 */ 2717 if (tblk->flag & tblkGC_LAZY) { 2718 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2719 txUnlock(tblk); 2720 tblk->flag &= ~tblkGC_LAZY; 2721 txEnd(tblk - TxBlock); /* Convert back to tid */ 2722 } else 2723 spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK 2724 2725 jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); 2726} 2727 2728/* 2729 * jfs_lazycommit(void) 2730 * 2731 * To be run as a kernel daemon. If lbmIODone is called in an interrupt 2732 * context, or where blocking is not wanted, this routine will process 2733 * committed transactions from the unlock queue. 2734 */ 2735int jfs_lazycommit(void *arg) 2736{ 2737 int WorkDone; 2738 struct tblock *tblk; 2739 unsigned long flags; 2740 struct jfs_sb_info *sbi; 2741 2742 do { 2743 LAZY_LOCK(flags); 2744 jfs_commit_thread_waking = 0; /* OK to wake another thread */ 2745 while (!list_empty(&TxAnchor.unlock_queue)) { 2746 WorkDone = 0; 2747 list_for_each_entry(tblk, &TxAnchor.unlock_queue, 2748 cqueue) { 2749 2750 sbi = JFS_SBI(tblk->sb); 2751 /* 2752 * For each volume, the transactions must be 2753 * handled in order. If another commit thread 2754 * is handling a tblk for this superblock, 2755 * skip it 2756 */ 2757 if (sbi->commit_state & IN_LAZYCOMMIT) 2758 continue; 2759 2760 sbi->commit_state |= IN_LAZYCOMMIT; 2761 WorkDone = 1; 2762 2763 /* 2764 * Remove transaction from queue 2765 */ 2766 list_del(&tblk->cqueue); 2767 2768 LAZY_UNLOCK(flags); 2769 txLazyCommit(tblk); 2770 LAZY_LOCK(flags); 2771 2772 sbi->commit_state &= ~IN_LAZYCOMMIT; 2773 /* 2774 * Don't continue in the for loop. (We can't 2775 * anyway, it's unsafe!) We want to go back to 2776 * the beginning of the list. 2777 */ 2778 break; 2779 } 2780 2781 /* If there was nothing to do, don't continue */ 2782 if (!WorkDone) 2783 break; 2784 } 2785 /* In case a wakeup came while all threads were active */ 2786 jfs_commit_thread_waking = 0; 2787 2788 if (freezing(current)) { 2789 LAZY_UNLOCK(flags); 2790 try_to_freeze(); 2791 } else { 2792 DECLARE_WAITQUEUE(wq, current); 2793 2794 add_wait_queue(&jfs_commit_thread_wait, &wq); 2795 set_current_state(TASK_INTERRUPTIBLE); 2796 LAZY_UNLOCK(flags); 2797 schedule(); 2798 remove_wait_queue(&jfs_commit_thread_wait, &wq); 2799 } 2800 } while (!kthread_should_stop()); 2801 2802 if (!list_empty(&TxAnchor.unlock_queue)) 2803 jfs_err("jfs_lazycommit being killed w/pending transactions!"); 2804 else 2805 jfs_info("jfs_lazycommit being killed"); 2806 return 0; 2807} 2808 2809void txLazyUnlock(struct tblock * tblk) 2810{ 2811 unsigned long flags; 2812 2813 LAZY_LOCK(flags); 2814 2815 list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); 2816 /* 2817 * Don't wake up a commit thread if there is already one servicing 2818 * this superblock, or if the last one we woke up hasn't started yet. 2819 */ 2820 if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && 2821 !jfs_commit_thread_waking) { 2822 jfs_commit_thread_waking = 1; 2823 wake_up(&jfs_commit_thread_wait); 2824 } 2825 LAZY_UNLOCK(flags); 2826} 2827 2828static void LogSyncRelease(struct metapage * mp) 2829{ 2830 struct jfs_log *log = mp->log; 2831 2832 assert(mp->nohomeok); 2833 assert(log); 2834 metapage_homeok(mp); 2835} 2836 2837/* 2838 * txQuiesce 2839 * 2840 * Block all new transactions and push anonymous transactions to 2841 * completion 2842 * 2843 * This does almost the same thing as jfs_sync below. We don't 2844 * worry about deadlocking when jfs_tlocks_low is set, since we would 2845 * expect jfs_sync to get us out of that jam. 2846 */ 2847void txQuiesce(struct super_block *sb) 2848{ 2849 struct inode *ip; 2850 struct jfs_inode_info *jfs_ip; 2851 struct jfs_log *log = JFS_SBI(sb)->log; 2852 tid_t tid; 2853 2854 set_bit(log_QUIESCE, &log->flag); 2855 2856 TXN_LOCK(); 2857restart: 2858 while (!list_empty(&TxAnchor.anon_list)) { 2859 jfs_ip = list_entry(TxAnchor.anon_list.next, 2860 struct jfs_inode_info, 2861 anon_inode_list); 2862 ip = &jfs_ip->vfs_inode; 2863 2864 /* 2865 * inode will be removed from anonymous list 2866 * when it is committed 2867 */ 2868 TXN_UNLOCK(); 2869 tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); 2870 mutex_lock(&jfs_ip->commit_mutex); 2871 txCommit(tid, 1, &ip, 0); 2872 txEnd(tid); 2873 mutex_unlock(&jfs_ip->commit_mutex); 2874 /* 2875 * Just to be safe. I don't know how 2876 * long we can run without blocking 2877 */ 2878 cond_resched(); 2879 TXN_LOCK(); 2880 } 2881 2882 /* 2883 * If jfs_sync is running in parallel, there could be some inodes 2884 * on anon_list2. Let's check. 2885 */ 2886 if (!list_empty(&TxAnchor.anon_list2)) { 2887 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2888 goto restart; 2889 } 2890 TXN_UNLOCK(); 2891 2892 /* 2893 * We may need to kick off the group commit 2894 */ 2895 jfs_flush_journal(log, 0); 2896} 2897 2898/* 2899 * txResume() 2900 * 2901 * Allows transactions to start again following txQuiesce 2902 */ 2903void txResume(struct super_block *sb) 2904{ 2905 struct jfs_log *log = JFS_SBI(sb)->log; 2906 2907 clear_bit(log_QUIESCE, &log->flag); 2908 TXN_WAKEUP(&log->syncwait); 2909} 2910 2911/* 2912 * jfs_sync(void) 2913 * 2914 * To be run as a kernel daemon. This is awakened when tlocks run low. 2915 * We write any inodes that have anonymous tlocks so they will become 2916 * available. 2917 */ 2918int jfs_sync(void *arg) 2919{ 2920 struct inode *ip; 2921 struct jfs_inode_info *jfs_ip; 2922 tid_t tid; 2923 2924 do { 2925 /* 2926 * write each inode on the anonymous inode list 2927 */ 2928 TXN_LOCK(); 2929 while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { 2930 jfs_ip = list_entry(TxAnchor.anon_list.next, 2931 struct jfs_inode_info, 2932 anon_inode_list); 2933 ip = &jfs_ip->vfs_inode; 2934 2935 if (! igrab(ip)) { 2936 /* 2937 * Inode is being freed 2938 */ 2939 list_del_init(&jfs_ip->anon_inode_list); 2940 } else if (mutex_trylock(&jfs_ip->commit_mutex)) { 2941 /* 2942 * inode will be removed from anonymous list 2943 * when it is committed 2944 */ 2945 TXN_UNLOCK(); 2946 tid = txBegin(ip->i_sb, COMMIT_INODE); 2947 txCommit(tid, 1, &ip, 0); 2948 txEnd(tid); 2949 mutex_unlock(&jfs_ip->commit_mutex); 2950 2951 iput(ip); 2952 /* 2953 * Just to be safe. I don't know how 2954 * long we can run without blocking 2955 */ 2956 cond_resched(); 2957 TXN_LOCK(); 2958 } else { 2959 /* We can't get the commit mutex. It may 2960 * be held by a thread waiting for tlock's 2961 * so let's not block here. Save it to 2962 * put back on the anon_list. 2963 */ 2964 2965 /* Move from anon_list to anon_list2 */ 2966 list_move(&jfs_ip->anon_inode_list, 2967 &TxAnchor.anon_list2); 2968 2969 TXN_UNLOCK(); 2970 iput(ip); 2971 TXN_LOCK(); 2972 } 2973 } 2974 /* Add anon_list2 back to anon_list */ 2975 list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); 2976 2977 if (freezing(current)) { 2978 TXN_UNLOCK(); 2979 try_to_freeze(); 2980 } else { 2981 set_current_state(TASK_INTERRUPTIBLE); 2982 TXN_UNLOCK(); 2983 schedule(); 2984 } 2985 } while (!kthread_should_stop()); 2986 2987 jfs_info("jfs_sync being killed"); 2988 return 0; 2989} 2990 2991#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) 2992int jfs_txanchor_proc_show(struct seq_file *m, void *v) 2993{ 2994 char *freewait; 2995 char *freelockwait; 2996 char *lowlockwait; 2997 2998 freewait = 2999 waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; 3000 freelockwait = 3001 waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; 3002 lowlockwait = 3003 waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; 3004 3005 seq_printf(m, 3006 "JFS TxAnchor\n" 3007 "============\n" 3008 "freetid = %d\n" 3009 "freewait = %s\n" 3010 "freelock = %d\n" 3011 "freelockwait = %s\n" 3012 "lowlockwait = %s\n" 3013 "tlocksInUse = %d\n" 3014 "jfs_tlocks_low = %d\n" 3015 "unlock_queue is %sempty\n", 3016 TxAnchor.freetid, 3017 freewait, 3018 TxAnchor.freelock, 3019 freelockwait, 3020 lowlockwait, 3021 TxAnchor.tlocksInUse, 3022 jfs_tlocks_low, 3023 list_empty(&TxAnchor.unlock_queue) ? "" : "not "); 3024 return 0; 3025} 3026#endif 3027 3028#if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) 3029int jfs_txstats_proc_show(struct seq_file *m, void *v) 3030{ 3031 seq_printf(m, 3032 "JFS TxStats\n" 3033 "===========\n" 3034 "calls to txBegin = %d\n" 3035 "txBegin blocked by sync barrier = %d\n" 3036 "txBegin blocked by tlocks low = %d\n" 3037 "txBegin blocked by no free tid = %d\n" 3038 "calls to txBeginAnon = %d\n" 3039 "txBeginAnon blocked by sync barrier = %d\n" 3040 "txBeginAnon blocked by tlocks low = %d\n" 3041 "calls to txLockAlloc = %d\n" 3042 "tLockAlloc blocked by no free lock = %d\n", 3043 TxStat.txBegin, 3044 TxStat.txBegin_barrier, 3045 TxStat.txBegin_lockslow, 3046 TxStat.txBegin_freetid, 3047 TxStat.txBeginAnon, 3048 TxStat.txBeginAnon_barrier, 3049 TxStat.txBeginAnon_lockslow, 3050 TxStat.txLockAlloc, 3051 TxStat.txLockAlloc_freelock); 3052 return 0; 3053} 3054#endif 3055