1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (C) International Business Machines Corp., 2000-2004 4 */ 5 6/* 7 * jfs_imap.c: inode allocation map manager 8 * 9 * Serialization: 10 * Each AG has a simple lock which is used to control the serialization of 11 * the AG level lists. This lock should be taken first whenever an AG 12 * level list will be modified or accessed. 13 * 14 * Each IAG is locked by obtaining the buffer for the IAG page. 15 * 16 * There is also a inode lock for the inode map inode. A read lock needs to 17 * be taken whenever an IAG is read from the map or the global level 18 * information is read. A write lock needs to be taken whenever the global 19 * level information is modified or an atomic operation needs to be used. 20 * 21 * If more than one IAG is read at one time, the read lock may not 22 * be given up until all of the IAG's are read. Otherwise, a deadlock 23 * may occur when trying to obtain the read lock while another thread 24 * holding the read lock is waiting on the IAG already being held. 25 * 26 * The control page of the inode map is read into memory by diMount(). 27 * Thereafter it should only be modified in memory and then it will be 28 * written out when the filesystem is unmounted by diUnmount(). 29 */ 30 31#include <linux/fs.h> 32#include <linux/buffer_head.h> 33#include <linux/pagemap.h> 34#include <linux/quotaops.h> 35#include <linux/slab.h> 36 37#include "jfs_incore.h" 38#include "jfs_inode.h" 39#include "jfs_filsys.h" 40#include "jfs_dinode.h" 41#include "jfs_dmap.h" 42#include "jfs_imap.h" 43#include "jfs_metapage.h" 44#include "jfs_superblock.h" 45#include "jfs_debug.h" 46 47/* 48 * imap locks 49 */ 50/* iag free list lock */ 51#define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) 52#define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) 53#define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) 54 55/* per ag iag list locks */ 56#define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) 57#define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) 58#define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) 59 60/* 61 * forward references 62 */ 63static int diAllocAG(struct inomap *, int, bool, struct inode *); 64static int diAllocAny(struct inomap *, int, bool, struct inode *); 65static int diAllocBit(struct inomap *, struct iag *, int); 66static int diAllocExt(struct inomap *, int, struct inode *); 67static int diAllocIno(struct inomap *, int, struct inode *); 68static int diFindFree(u32, int); 69static int diNewExt(struct inomap *, struct iag *, int); 70static int diNewIAG(struct inomap *, int *, int, struct metapage **); 71static void duplicateIXtree(struct super_block *, s64, int, s64 *); 72 73static int diIAGRead(struct inomap * imap, int, struct metapage **); 74static int copy_from_dinode(struct dinode *, struct inode *); 75static void copy_to_dinode(struct dinode *, struct inode *); 76 77/* 78 * NAME: diMount() 79 * 80 * FUNCTION: initialize the incore inode map control structures for 81 * a fileset or aggregate init time. 82 * 83 * the inode map's control structure (dinomap) is 84 * brought in from disk and placed in virtual memory. 85 * 86 * PARAMETERS: 87 * ipimap - pointer to inode map inode for the aggregate or fileset. 88 * 89 * RETURN VALUES: 90 * 0 - success 91 * -ENOMEM - insufficient free virtual memory. 92 * -EIO - i/o error. 93 */ 94int diMount(struct inode *ipimap) 95{ 96 struct inomap *imap; 97 struct metapage *mp; 98 int index; 99 struct dinomap_disk *dinom_le; 100 101 /* 102 * allocate/initialize the in-memory inode map control structure 103 */ 104 /* allocate the in-memory inode map control structure. */ 105 imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); 106 if (imap == NULL) { 107 jfs_err("diMount: kmalloc returned NULL!"); 108 return -ENOMEM; 109 } 110 111 /* read the on-disk inode map control structure. */ 112 113 mp = read_metapage(ipimap, 114 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 115 PSIZE, 0); 116 if (mp == NULL) { 117 kfree(imap); 118 return -EIO; 119 } 120 121 /* copy the on-disk version to the in-memory version. */ 122 dinom_le = (struct dinomap_disk *) mp->data; 123 imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); 124 imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); 125 atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); 126 atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); 127 imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); 128 imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); 129 for (index = 0; index < MAXAG; index++) { 130 imap->im_agctl[index].inofree = 131 le32_to_cpu(dinom_le->in_agctl[index].inofree); 132 imap->im_agctl[index].extfree = 133 le32_to_cpu(dinom_le->in_agctl[index].extfree); 134 imap->im_agctl[index].numinos = 135 le32_to_cpu(dinom_le->in_agctl[index].numinos); 136 imap->im_agctl[index].numfree = 137 le32_to_cpu(dinom_le->in_agctl[index].numfree); 138 } 139 140 /* release the buffer. */ 141 release_metapage(mp); 142 143 /* 144 * allocate/initialize inode allocation map locks 145 */ 146 /* allocate and init iag free list lock */ 147 IAGFREE_LOCK_INIT(imap); 148 149 /* allocate and init ag list locks */ 150 for (index = 0; index < MAXAG; index++) { 151 AG_LOCK_INIT(imap, index); 152 } 153 154 /* bind the inode map inode and inode map control structure 155 * to each other. 156 */ 157 imap->im_ipimap = ipimap; 158 JFS_IP(ipimap)->i_imap = imap; 159 160 return (0); 161} 162 163 164/* 165 * NAME: diUnmount() 166 * 167 * FUNCTION: write to disk the incore inode map control structures for 168 * a fileset or aggregate at unmount time. 169 * 170 * PARAMETERS: 171 * ipimap - pointer to inode map inode for the aggregate or fileset. 172 * 173 * RETURN VALUES: 174 * 0 - success 175 * -ENOMEM - insufficient free virtual memory. 176 * -EIO - i/o error. 177 */ 178int diUnmount(struct inode *ipimap, int mounterror) 179{ 180 struct inomap *imap = JFS_IP(ipimap)->i_imap; 181 182 /* 183 * update the on-disk inode map control structure 184 */ 185 186 if (!(mounterror || isReadOnly(ipimap))) 187 diSync(ipimap); 188 189 /* 190 * Invalidate the page cache buffers 191 */ 192 truncate_inode_pages(ipimap->i_mapping, 0); 193 194 /* 195 * free in-memory control structure 196 */ 197 kfree(imap); 198 JFS_IP(ipimap)->i_imap = NULL; 199 200 return (0); 201} 202 203 204/* 205 * diSync() 206 */ 207int diSync(struct inode *ipimap) 208{ 209 struct dinomap_disk *dinom_le; 210 struct inomap *imp = JFS_IP(ipimap)->i_imap; 211 struct metapage *mp; 212 int index; 213 214 /* 215 * write imap global conrol page 216 */ 217 /* read the on-disk inode map control structure */ 218 mp = get_metapage(ipimap, 219 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 220 PSIZE, 0); 221 if (mp == NULL) { 222 jfs_err("diSync: get_metapage failed!"); 223 return -EIO; 224 } 225 226 /* copy the in-memory version to the on-disk version */ 227 dinom_le = (struct dinomap_disk *) mp->data; 228 dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); 229 dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); 230 dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); 231 dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); 232 dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); 233 dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); 234 for (index = 0; index < MAXAG; index++) { 235 dinom_le->in_agctl[index].inofree = 236 cpu_to_le32(imp->im_agctl[index].inofree); 237 dinom_le->in_agctl[index].extfree = 238 cpu_to_le32(imp->im_agctl[index].extfree); 239 dinom_le->in_agctl[index].numinos = 240 cpu_to_le32(imp->im_agctl[index].numinos); 241 dinom_le->in_agctl[index].numfree = 242 cpu_to_le32(imp->im_agctl[index].numfree); 243 } 244 245 /* write out the control structure */ 246 write_metapage(mp); 247 248 /* 249 * write out dirty pages of imap 250 */ 251 filemap_write_and_wait(ipimap->i_mapping); 252 253 diWriteSpecial(ipimap, 0); 254 255 return (0); 256} 257 258 259/* 260 * NAME: diRead() 261 * 262 * FUNCTION: initialize an incore inode from disk. 263 * 264 * on entry, the specifed incore inode should itself 265 * specify the disk inode number corresponding to the 266 * incore inode (i.e. i_number should be initialized). 267 * 268 * this routine handles incore inode initialization for 269 * both "special" and "regular" inodes. special inodes 270 * are those required early in the mount process and 271 * require special handling since much of the file system 272 * is not yet initialized. these "special" inodes are 273 * identified by a NULL inode map inode pointer and are 274 * actually initialized by a call to diReadSpecial(). 275 * 276 * for regular inodes, the iag describing the disk inode 277 * is read from disk to determine the inode extent address 278 * for the disk inode. with the inode extent address in 279 * hand, the page of the extent that contains the disk 280 * inode is read and the disk inode is copied to the 281 * incore inode. 282 * 283 * PARAMETERS: 284 * ip - pointer to incore inode to be initialized from disk. 285 * 286 * RETURN VALUES: 287 * 0 - success 288 * -EIO - i/o error. 289 * -ENOMEM - insufficient memory 290 * 291 */ 292int diRead(struct inode *ip) 293{ 294 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 295 int iagno, ino, extno, rc; 296 struct inode *ipimap; 297 struct dinode *dp; 298 struct iag *iagp; 299 struct metapage *mp; 300 s64 blkno, agstart; 301 struct inomap *imap; 302 int block_offset; 303 int inodes_left; 304 unsigned long pageno; 305 int rel_inode; 306 307 jfs_info("diRead: ino = %ld", ip->i_ino); 308 309 ipimap = sbi->ipimap; 310 JFS_IP(ip)->ipimap = ipimap; 311 312 /* determine the iag number for this inode (number) */ 313 iagno = INOTOIAG(ip->i_ino); 314 315 /* read the iag */ 316 imap = JFS_IP(ipimap)->i_imap; 317 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 318 rc = diIAGRead(imap, iagno, &mp); 319 IREAD_UNLOCK(ipimap); 320 if (rc) { 321 jfs_err("diRead: diIAGRead returned %d", rc); 322 return (rc); 323 } 324 325 iagp = (struct iag *) mp->data; 326 327 /* determine inode extent that holds the disk inode */ 328 ino = ip->i_ino & (INOSPERIAG - 1); 329 extno = ino >> L2INOSPEREXT; 330 331 if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || 332 (addressPXD(&iagp->inoext[extno]) == 0)) { 333 release_metapage(mp); 334 return -ESTALE; 335 } 336 337 /* get disk block number of the page within the inode extent 338 * that holds the disk inode. 339 */ 340 blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); 341 342 /* get the ag for the iag */ 343 agstart = le64_to_cpu(iagp->agstart); 344 345 release_metapage(mp); 346 347 rel_inode = (ino & (INOSPERPAGE - 1)); 348 pageno = blkno >> sbi->l2nbperpage; 349 350 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 351 /* 352 * OS/2 didn't always align inode extents on page boundaries 353 */ 354 inodes_left = 355 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 356 357 if (rel_inode < inodes_left) 358 rel_inode += block_offset << sbi->l2niperblk; 359 else { 360 pageno += 1; 361 rel_inode -= inodes_left; 362 } 363 } 364 365 /* read the page of disk inode */ 366 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 367 if (!mp) { 368 jfs_err("diRead: read_metapage failed"); 369 return -EIO; 370 } 371 372 /* locate the disk inode requested */ 373 dp = (struct dinode *) mp->data; 374 dp += rel_inode; 375 376 if (ip->i_ino != le32_to_cpu(dp->di_number)) { 377 jfs_error(ip->i_sb, "i_ino != di_number\n"); 378 rc = -EIO; 379 } else if (le32_to_cpu(dp->di_nlink) == 0) 380 rc = -ESTALE; 381 else 382 /* copy the disk inode to the in-memory inode */ 383 rc = copy_from_dinode(dp, ip); 384 385 release_metapage(mp); 386 387 /* set the ag for the inode */ 388 JFS_IP(ip)->agstart = agstart; 389 JFS_IP(ip)->active_ag = -1; 390 391 return (rc); 392} 393 394 395/* 396 * NAME: diReadSpecial() 397 * 398 * FUNCTION: initialize a 'special' inode from disk. 399 * 400 * this routines handles aggregate level inodes. The 401 * inode cache cannot differentiate between the 402 * aggregate inodes and the filesystem inodes, so we 403 * handle these here. We don't actually use the aggregate 404 * inode map, since these inodes are at a fixed location 405 * and in some cases the aggregate inode map isn't initialized 406 * yet. 407 * 408 * PARAMETERS: 409 * sb - filesystem superblock 410 * inum - aggregate inode number 411 * secondary - 1 if secondary aggregate inode table 412 * 413 * RETURN VALUES: 414 * new inode - success 415 * NULL - i/o error. 416 */ 417struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 418{ 419 struct jfs_sb_info *sbi = JFS_SBI(sb); 420 uint address; 421 struct dinode *dp; 422 struct inode *ip; 423 struct metapage *mp; 424 425 ip = new_inode(sb); 426 if (ip == NULL) { 427 jfs_err("diReadSpecial: new_inode returned NULL!"); 428 return ip; 429 } 430 431 if (secondary) { 432 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 433 JFS_IP(ip)->ipimap = sbi->ipaimap2; 434 } else { 435 address = AITBL_OFF >> L2PSIZE; 436 JFS_IP(ip)->ipimap = sbi->ipaimap; 437 } 438 439 ASSERT(inum < INOSPEREXT); 440 441 ip->i_ino = inum; 442 443 address += inum >> 3; /* 8 inodes per 4K page */ 444 445 /* read the page of fixed disk inode (AIT) in raw mode */ 446 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 447 if (mp == NULL) { 448 set_nlink(ip, 1); /* Don't want iput() deleting it */ 449 iput(ip); 450 return (NULL); 451 } 452 453 /* get the pointer to the disk inode of interest */ 454 dp = (struct dinode *) (mp->data); 455 dp += inum % 8; /* 8 inodes per 4K page */ 456 457 /* copy on-disk inode to in-memory inode */ 458 if ((copy_from_dinode(dp, ip)) != 0) { 459 /* handle bad return by returning NULL for ip */ 460 set_nlink(ip, 1); /* Don't want iput() deleting it */ 461 iput(ip); 462 /* release the page */ 463 release_metapage(mp); 464 return (NULL); 465 466 } 467 468 ip->i_mapping->a_ops = &jfs_metapage_aops; 469 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); 470 471 /* Allocations to metadata inodes should not affect quotas */ 472 ip->i_flags |= S_NOQUOTA; 473 474 if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { 475 sbi->gengen = le32_to_cpu(dp->di_gengen); 476 sbi->inostamp = le32_to_cpu(dp->di_inostamp); 477 } 478 479 /* release the page */ 480 release_metapage(mp); 481 482 inode_fake_hash(ip); 483 484 return (ip); 485} 486 487/* 488 * NAME: diWriteSpecial() 489 * 490 * FUNCTION: Write the special inode to disk 491 * 492 * PARAMETERS: 493 * ip - special inode 494 * secondary - 1 if secondary aggregate inode table 495 * 496 * RETURN VALUES: none 497 */ 498 499void diWriteSpecial(struct inode *ip, int secondary) 500{ 501 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 502 uint address; 503 struct dinode *dp; 504 ino_t inum = ip->i_ino; 505 struct metapage *mp; 506 507 if (secondary) 508 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 509 else 510 address = AITBL_OFF >> L2PSIZE; 511 512 ASSERT(inum < INOSPEREXT); 513 514 address += inum >> 3; /* 8 inodes per 4K page */ 515 516 /* read the page of fixed disk inode (AIT) in raw mode */ 517 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 518 if (mp == NULL) { 519 jfs_err("diWriteSpecial: failed to read aggregate inode extent!"); 520 return; 521 } 522 523 /* get the pointer to the disk inode of interest */ 524 dp = (struct dinode *) (mp->data); 525 dp += inum % 8; /* 8 inodes per 4K page */ 526 527 /* copy on-disk inode to in-memory inode */ 528 copy_to_dinode(dp, ip); 529 memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); 530 531 if (inum == FILESYSTEM_I) 532 dp->di_gengen = cpu_to_le32(sbi->gengen); 533 534 /* write the page */ 535 write_metapage(mp); 536} 537 538/* 539 * NAME: diFreeSpecial() 540 * 541 * FUNCTION: Free allocated space for special inode 542 */ 543void diFreeSpecial(struct inode *ip) 544{ 545 if (ip == NULL) { 546 jfs_err("diFreeSpecial called with NULL ip!"); 547 return; 548 } 549 filemap_write_and_wait(ip->i_mapping); 550 truncate_inode_pages(ip->i_mapping, 0); 551 iput(ip); 552} 553 554 555 556/* 557 * NAME: diWrite() 558 * 559 * FUNCTION: write the on-disk inode portion of the in-memory inode 560 * to its corresponding on-disk inode. 561 * 562 * on entry, the specifed incore inode should itself 563 * specify the disk inode number corresponding to the 564 * incore inode (i.e. i_number should be initialized). 565 * 566 * the inode contains the inode extent address for the disk 567 * inode. with the inode extent address in hand, the 568 * page of the extent that contains the disk inode is 569 * read and the disk inode portion of the incore inode 570 * is copied to the disk inode. 571 * 572 * PARAMETERS: 573 * tid - transacation id 574 * ip - pointer to incore inode to be written to the inode extent. 575 * 576 * RETURN VALUES: 577 * 0 - success 578 * -EIO - i/o error. 579 */ 580int diWrite(tid_t tid, struct inode *ip) 581{ 582 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 583 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 584 int rc = 0; 585 s32 ino; 586 struct dinode *dp; 587 s64 blkno; 588 int block_offset; 589 int inodes_left; 590 struct metapage *mp; 591 unsigned long pageno; 592 int rel_inode; 593 int dioffset; 594 struct inode *ipimap; 595 uint type; 596 lid_t lid; 597 struct tlock *ditlck, *tlck; 598 struct linelock *dilinelock, *ilinelock; 599 struct lv *lv; 600 int n; 601 602 ipimap = jfs_ip->ipimap; 603 604 ino = ip->i_ino & (INOSPERIAG - 1); 605 606 if (!addressPXD(&(jfs_ip->ixpxd)) || 607 (lengthPXD(&(jfs_ip->ixpxd)) != 608 JFS_IP(ipimap)->i_imap->im_nbperiext)) { 609 jfs_error(ip->i_sb, "ixpxd invalid\n"); 610 return -EIO; 611 } 612 613 /* 614 * read the page of disk inode containing the specified inode: 615 */ 616 /* compute the block address of the page */ 617 blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); 618 619 rel_inode = (ino & (INOSPERPAGE - 1)); 620 pageno = blkno >> sbi->l2nbperpage; 621 622 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 623 /* 624 * OS/2 didn't always align inode extents on page boundaries 625 */ 626 inodes_left = 627 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 628 629 if (rel_inode < inodes_left) 630 rel_inode += block_offset << sbi->l2niperblk; 631 else { 632 pageno += 1; 633 rel_inode -= inodes_left; 634 } 635 } 636 /* read the page of disk inode */ 637 retry: 638 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 639 if (!mp) 640 return -EIO; 641 642 /* get the pointer to the disk inode */ 643 dp = (struct dinode *) mp->data; 644 dp += rel_inode; 645 646 dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; 647 648 /* 649 * acquire transaction lock on the on-disk inode; 650 * N.B. tlock is acquired on ipimap not ip; 651 */ 652 if ((ditlck = 653 txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) 654 goto retry; 655 dilinelock = (struct linelock *) & ditlck->lock; 656 657 /* 658 * copy btree root from in-memory inode to on-disk inode 659 * 660 * (tlock is taken from inline B+-tree root in in-memory 661 * inode when the B+-tree root is updated, which is pointed 662 * by jfs_ip->blid as well as being on tx tlock list) 663 * 664 * further processing of btree root is based on the copy 665 * in in-memory inode, where txLog() will log from, and, 666 * for xtree root, txUpdateMap() will update map and reset 667 * XAD_NEW bit; 668 */ 669 670 if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { 671 /* 672 * This is the special xtree inside the directory for storing 673 * the directory table 674 */ 675 xtpage_t *p, *xp; 676 xad_t *xad; 677 678 jfs_ip->xtlid = 0; 679 tlck = lid_to_tlock(lid); 680 assert(tlck->type & tlckXTREE); 681 tlck->type |= tlckBTROOT; 682 tlck->mp = mp; 683 ilinelock = (struct linelock *) & tlck->lock; 684 685 /* 686 * copy xtree root from inode to dinode: 687 */ 688 p = &jfs_ip->i_xtroot; 689 xp = (xtpage_t *) &dp->di_dirtable; 690 lv = ilinelock->lv; 691 for (n = 0; n < ilinelock->index; n++, lv++) { 692 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 693 lv->length << L2XTSLOTSIZE); 694 } 695 696 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 697 xad = &xp->xad[XTENTRYSTART]; 698 for (n = XTENTRYSTART; 699 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 700 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 701 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 702 } 703 704 if ((lid = jfs_ip->blid) == 0) 705 goto inlineData; 706 jfs_ip->blid = 0; 707 708 tlck = lid_to_tlock(lid); 709 type = tlck->type; 710 tlck->type |= tlckBTROOT; 711 tlck->mp = mp; 712 ilinelock = (struct linelock *) & tlck->lock; 713 714 /* 715 * regular file: 16 byte (XAD slot) granularity 716 */ 717 if (type & tlckXTREE) { 718 xtpage_t *p, *xp; 719 xad_t *xad; 720 721 /* 722 * copy xtree root from inode to dinode: 723 */ 724 p = &jfs_ip->i_xtroot; 725 xp = &dp->di_xtroot; 726 lv = ilinelock->lv; 727 for (n = 0; n < ilinelock->index; n++, lv++) { 728 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 729 lv->length << L2XTSLOTSIZE); 730 } 731 732 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 733 xad = &xp->xad[XTENTRYSTART]; 734 for (n = XTENTRYSTART; 735 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 736 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 737 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 738 } 739 /* 740 * directory: 32 byte (directory entry slot) granularity 741 */ 742 else if (type & tlckDTREE) { 743 dtpage_t *p, *xp; 744 745 /* 746 * copy dtree root from inode to dinode: 747 */ 748 p = (dtpage_t *) &jfs_ip->i_dtroot; 749 xp = (dtpage_t *) & dp->di_dtroot; 750 lv = ilinelock->lv; 751 for (n = 0; n < ilinelock->index; n++, lv++) { 752 memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], 753 lv->length << L2DTSLOTSIZE); 754 } 755 } else { 756 jfs_err("diWrite: UFO tlock"); 757 } 758 759 inlineData: 760 /* 761 * copy inline symlink from in-memory inode to on-disk inode 762 */ 763 if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { 764 lv = & dilinelock->lv[dilinelock->index]; 765 lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; 766 lv->length = 2; 767 memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); 768 dilinelock->index++; 769 } 770 /* 771 * copy inline data from in-memory inode to on-disk inode: 772 * 128 byte slot granularity 773 */ 774 if (test_cflag(COMMIT_Inlineea, ip)) { 775 lv = & dilinelock->lv[dilinelock->index]; 776 lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; 777 lv->length = 1; 778 memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); 779 dilinelock->index++; 780 781 clear_cflag(COMMIT_Inlineea, ip); 782 } 783 784 /* 785 * lock/copy inode base: 128 byte slot granularity 786 */ 787 lv = & dilinelock->lv[dilinelock->index]; 788 lv->offset = dioffset >> L2INODESLOTSIZE; 789 copy_to_dinode(dp, ip); 790 if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { 791 lv->length = 2; 792 memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); 793 } else 794 lv->length = 1; 795 dilinelock->index++; 796 797 /* release the buffer holding the updated on-disk inode. 798 * the buffer will be later written by commit processing. 799 */ 800 write_metapage(mp); 801 802 return (rc); 803} 804 805 806/* 807 * NAME: diFree(ip) 808 * 809 * FUNCTION: free a specified inode from the inode working map 810 * for a fileset or aggregate. 811 * 812 * if the inode to be freed represents the first (only) 813 * free inode within the iag, the iag will be placed on 814 * the ag free inode list. 815 * 816 * freeing the inode will cause the inode extent to be 817 * freed if the inode is the only allocated inode within 818 * the extent. in this case all the disk resource backing 819 * up the inode extent will be freed. in addition, the iag 820 * will be placed on the ag extent free list if the extent 821 * is the first free extent in the iag. if freeing the 822 * extent also means that no free inodes will exist for 823 * the iag, the iag will also be removed from the ag free 824 * inode list. 825 * 826 * the iag describing the inode will be freed if the extent 827 * is to be freed and it is the only backed extent within 828 * the iag. in this case, the iag will be removed from the 829 * ag free extent list and ag free inode list and placed on 830 * the inode map's free iag list. 831 * 832 * a careful update approach is used to provide consistency 833 * in the face of updates to multiple buffers. under this 834 * approach, all required buffers are obtained before making 835 * any updates and are held until all updates are complete. 836 * 837 * PARAMETERS: 838 * ip - inode to be freed. 839 * 840 * RETURN VALUES: 841 * 0 - success 842 * -EIO - i/o error. 843 */ 844int diFree(struct inode *ip) 845{ 846 int rc; 847 ino_t inum = ip->i_ino; 848 struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; 849 struct metapage *mp, *amp, *bmp, *cmp, *dmp; 850 int iagno, ino, extno, bitno, sword, agno; 851 int back, fwd; 852 u32 bitmap, mask; 853 struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; 854 struct inomap *imap = JFS_IP(ipimap)->i_imap; 855 pxd_t freepxd; 856 tid_t tid; 857 struct inode *iplist[3]; 858 struct tlock *tlck; 859 struct pxd_lock *pxdlock; 860 861 /* 862 * This is just to suppress compiler warnings. The same logic that 863 * references these variables is used to initialize them. 864 */ 865 aiagp = biagp = ciagp = diagp = NULL; 866 867 /* get the iag number containing the inode. 868 */ 869 iagno = INOTOIAG(inum); 870 871 /* make sure that the iag is contained within 872 * the map. 873 */ 874 if (iagno >= imap->im_nextiag) { 875 print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, 876 imap, 32, 0); 877 jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", 878 (uint) inum, iagno, imap->im_nextiag); 879 return -EIO; 880 } 881 882 /* get the allocation group for this ino. 883 */ 884 agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); 885 886 /* Lock the AG specific inode map information 887 */ 888 AG_LOCK(imap, agno); 889 890 /* Obtain read lock in imap inode. Don't release it until we have 891 * read all of the IAG's that we are going to. 892 */ 893 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 894 895 /* read the iag. 896 */ 897 if ((rc = diIAGRead(imap, iagno, &mp))) { 898 IREAD_UNLOCK(ipimap); 899 AG_UNLOCK(imap, agno); 900 return (rc); 901 } 902 iagp = (struct iag *) mp->data; 903 904 /* get the inode number and extent number of the inode within 905 * the iag and the inode number within the extent. 906 */ 907 ino = inum & (INOSPERIAG - 1); 908 extno = ino >> L2INOSPEREXT; 909 bitno = ino & (INOSPEREXT - 1); 910 mask = HIGHORDER >> bitno; 911 912 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 913 jfs_error(ip->i_sb, "wmap shows inode already free\n"); 914 } 915 916 if (!addressPXD(&iagp->inoext[extno])) { 917 release_metapage(mp); 918 IREAD_UNLOCK(ipimap); 919 AG_UNLOCK(imap, agno); 920 jfs_error(ip->i_sb, "invalid inoext\n"); 921 return -EIO; 922 } 923 924 /* compute the bitmap for the extent reflecting the freed inode. 925 */ 926 bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; 927 928 if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { 929 release_metapage(mp); 930 IREAD_UNLOCK(ipimap); 931 AG_UNLOCK(imap, agno); 932 jfs_error(ip->i_sb, "numfree > numinos\n"); 933 return -EIO; 934 } 935 /* 936 * inode extent still has some inodes or below low water mark: 937 * keep the inode extent; 938 */ 939 if (bitmap || 940 imap->im_agctl[agno].numfree < 96 || 941 (imap->im_agctl[agno].numfree < 288 && 942 (((imap->im_agctl[agno].numfree * 100) / 943 imap->im_agctl[agno].numinos) <= 25))) { 944 /* if the iag currently has no free inodes (i.e., 945 * the inode being freed is the first free inode of iag), 946 * insert the iag at head of the inode free list for the ag. 947 */ 948 if (iagp->nfreeinos == 0) { 949 /* check if there are any iags on the ag inode 950 * free list. if so, read the first one so that 951 * we can link the current iag onto the list at 952 * the head. 953 */ 954 if ((fwd = imap->im_agctl[agno].inofree) >= 0) { 955 /* read the iag that currently is the head 956 * of the list. 957 */ 958 if ((rc = diIAGRead(imap, fwd, &))) { 959 IREAD_UNLOCK(ipimap); 960 AG_UNLOCK(imap, agno); 961 release_metapage(mp); 962 return (rc); 963 } 964 aiagp = (struct iag *) amp->data; 965 966 /* make current head point back to the iag. 967 */ 968 aiagp->inofreeback = cpu_to_le32(iagno); 969 970 write_metapage(amp); 971 } 972 973 /* iag points forward to current head and iag 974 * becomes the new head of the list. 975 */ 976 iagp->inofreefwd = 977 cpu_to_le32(imap->im_agctl[agno].inofree); 978 iagp->inofreeback = cpu_to_le32(-1); 979 imap->im_agctl[agno].inofree = iagno; 980 } 981 IREAD_UNLOCK(ipimap); 982 983 /* update the free inode summary map for the extent if 984 * freeing the inode means the extent will now have free 985 * inodes (i.e., the inode being freed is the first free 986 * inode of extent), 987 */ 988 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 989 sword = extno >> L2EXTSPERSUM; 990 bitno = extno & (EXTSPERSUM - 1); 991 iagp->inosmap[sword] &= 992 cpu_to_le32(~(HIGHORDER >> bitno)); 993 } 994 995 /* update the bitmap. 996 */ 997 iagp->wmap[extno] = cpu_to_le32(bitmap); 998 999 /* update the free inode counts at the iag, ag and 1000 * map level. 1001 */ 1002 le32_add_cpu(&iagp->nfreeinos, 1); 1003 imap->im_agctl[agno].numfree += 1; 1004 atomic_inc(&imap->im_numfree); 1005 1006 /* release the AG inode map lock 1007 */ 1008 AG_UNLOCK(imap, agno); 1009 1010 /* write the iag */ 1011 write_metapage(mp); 1012 1013 return (0); 1014 } 1015 1016 1017 /* 1018 * inode extent has become free and above low water mark: 1019 * free the inode extent; 1020 */ 1021 1022 /* 1023 * prepare to update iag list(s) (careful update step 1) 1024 */ 1025 amp = bmp = cmp = dmp = NULL; 1026 fwd = back = -1; 1027 1028 /* check if the iag currently has no free extents. if so, 1029 * it will be placed on the head of the ag extent free list. 1030 */ 1031 if (iagp->nfreeexts == 0) { 1032 /* check if the ag extent free list has any iags. 1033 * if so, read the iag at the head of the list now. 1034 * this (head) iag will be updated later to reflect 1035 * the addition of the current iag at the head of 1036 * the list. 1037 */ 1038 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 1039 if ((rc = diIAGRead(imap, fwd, &))) 1040 goto error_out; 1041 aiagp = (struct iag *) amp->data; 1042 } 1043 } else { 1044 /* iag has free extents. check if the addition of a free 1045 * extent will cause all extents to be free within this 1046 * iag. if so, the iag will be removed from the ag extent 1047 * free list and placed on the inode map's free iag list. 1048 */ 1049 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1050 /* in preparation for removing the iag from the 1051 * ag extent free list, read the iags preceding 1052 * and following the iag on the ag extent free 1053 * list. 1054 */ 1055 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 1056 if ((rc = diIAGRead(imap, fwd, &))) 1057 goto error_out; 1058 aiagp = (struct iag *) amp->data; 1059 } 1060 1061 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 1062 if ((rc = diIAGRead(imap, back, &bmp))) 1063 goto error_out; 1064 biagp = (struct iag *) bmp->data; 1065 } 1066 } 1067 } 1068 1069 /* remove the iag from the ag inode free list if freeing 1070 * this extent cause the iag to have no free inodes. 1071 */ 1072 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1073 int inofreeback = le32_to_cpu(iagp->inofreeback); 1074 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1075 1076 /* in preparation for removing the iag from the 1077 * ag inode free list, read the iags preceding 1078 * and following the iag on the ag inode free 1079 * list. before reading these iags, we must make 1080 * sure that we already don't have them in hand 1081 * from up above, since re-reading an iag (buffer) 1082 * we are currently holding would cause a deadlock. 1083 */ 1084 if (inofreefwd >= 0) { 1085 1086 if (inofreefwd == fwd) 1087 ciagp = (struct iag *) amp->data; 1088 else if (inofreefwd == back) 1089 ciagp = (struct iag *) bmp->data; 1090 else { 1091 if ((rc = 1092 diIAGRead(imap, inofreefwd, &cmp))) 1093 goto error_out; 1094 ciagp = (struct iag *) cmp->data; 1095 } 1096 assert(ciagp != NULL); 1097 } 1098 1099 if (inofreeback >= 0) { 1100 if (inofreeback == fwd) 1101 diagp = (struct iag *) amp->data; 1102 else if (inofreeback == back) 1103 diagp = (struct iag *) bmp->data; 1104 else { 1105 if ((rc = 1106 diIAGRead(imap, inofreeback, &dmp))) 1107 goto error_out; 1108 diagp = (struct iag *) dmp->data; 1109 } 1110 assert(diagp != NULL); 1111 } 1112 } 1113 1114 IREAD_UNLOCK(ipimap); 1115 1116 /* 1117 * invalidate any page of the inode extent freed from buffer cache; 1118 */ 1119 freepxd = iagp->inoext[extno]; 1120 invalidate_pxd_metapages(ip, freepxd); 1121 1122 /* 1123 * update iag list(s) (careful update step 2) 1124 */ 1125 /* add the iag to the ag extent free list if this is the 1126 * first free extent for the iag. 1127 */ 1128 if (iagp->nfreeexts == 0) { 1129 if (fwd >= 0) 1130 aiagp->extfreeback = cpu_to_le32(iagno); 1131 1132 iagp->extfreefwd = 1133 cpu_to_le32(imap->im_agctl[agno].extfree); 1134 iagp->extfreeback = cpu_to_le32(-1); 1135 imap->im_agctl[agno].extfree = iagno; 1136 } else { 1137 /* remove the iag from the ag extent list if all extents 1138 * are now free and place it on the inode map iag free list. 1139 */ 1140 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1141 if (fwd >= 0) 1142 aiagp->extfreeback = iagp->extfreeback; 1143 1144 if (back >= 0) 1145 biagp->extfreefwd = iagp->extfreefwd; 1146 else 1147 imap->im_agctl[agno].extfree = 1148 le32_to_cpu(iagp->extfreefwd); 1149 1150 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 1151 1152 IAGFREE_LOCK(imap); 1153 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1154 imap->im_freeiag = iagno; 1155 IAGFREE_UNLOCK(imap); 1156 } 1157 } 1158 1159 /* remove the iag from the ag inode free list if freeing 1160 * this extent causes the iag to have no free inodes. 1161 */ 1162 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1163 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) 1164 ciagp->inofreeback = iagp->inofreeback; 1165 1166 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) 1167 diagp->inofreefwd = iagp->inofreefwd; 1168 else 1169 imap->im_agctl[agno].inofree = 1170 le32_to_cpu(iagp->inofreefwd); 1171 1172 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 1173 } 1174 1175 /* update the inode extent address and working map 1176 * to reflect the free extent. 1177 * the permanent map should have been updated already 1178 * for the inode being freed. 1179 */ 1180 if (iagp->pmap[extno] != 0) { 1181 jfs_error(ip->i_sb, "the pmap does not show inode free\n"); 1182 } 1183 iagp->wmap[extno] = 0; 1184 PXDlength(&iagp->inoext[extno], 0); 1185 PXDaddress(&iagp->inoext[extno], 0); 1186 1187 /* update the free extent and free inode summary maps 1188 * to reflect the freed extent. 1189 * the inode summary map is marked to indicate no inodes 1190 * available for the freed extent. 1191 */ 1192 sword = extno >> L2EXTSPERSUM; 1193 bitno = extno & (EXTSPERSUM - 1); 1194 mask = HIGHORDER >> bitno; 1195 iagp->inosmap[sword] |= cpu_to_le32(mask); 1196 iagp->extsmap[sword] &= cpu_to_le32(~mask); 1197 1198 /* update the number of free inodes and number of free extents 1199 * for the iag. 1200 */ 1201 le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); 1202 le32_add_cpu(&iagp->nfreeexts, 1); 1203 1204 /* update the number of free inodes and backed inodes 1205 * at the ag and inode map level. 1206 */ 1207 imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); 1208 imap->im_agctl[agno].numinos -= INOSPEREXT; 1209 atomic_sub(INOSPEREXT - 1, &imap->im_numfree); 1210 atomic_sub(INOSPEREXT, &imap->im_numinos); 1211 1212 if (amp) 1213 write_metapage(amp); 1214 if (bmp) 1215 write_metapage(bmp); 1216 if (cmp) 1217 write_metapage(cmp); 1218 if (dmp) 1219 write_metapage(dmp); 1220 1221 /* 1222 * start transaction to update block allocation map 1223 * for the inode extent freed; 1224 * 1225 * N.B. AG_LOCK is released and iag will be released below, and 1226 * other thread may allocate inode from/reusing the ixad freed 1227 * BUT with new/different backing inode extent from the extent 1228 * to be freed by the transaction; 1229 */ 1230 tid = txBegin(ipimap->i_sb, COMMIT_FORCE); 1231 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 1232 1233 /* acquire tlock of the iag page of the freed ixad 1234 * to force the page NOHOMEOK (even though no data is 1235 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 1236 * for the free of the extent is committed; 1237 * write FREEXTENT|NOREDOPAGE log record 1238 * N.B. linelock is overlaid as freed extent descriptor; 1239 */ 1240 tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); 1241 pxdlock = (struct pxd_lock *) & tlck->lock; 1242 pxdlock->flag = mlckFREEPXD; 1243 pxdlock->pxd = freepxd; 1244 pxdlock->index = 1; 1245 1246 write_metapage(mp); 1247 1248 iplist[0] = ipimap; 1249 1250 /* 1251 * logredo needs the IAG number and IAG extent index in order 1252 * to ensure that the IMap is consistent. The least disruptive 1253 * way to pass these values through to the transaction manager 1254 * is in the iplist array. 1255 * 1256 * It's not pretty, but it works. 1257 */ 1258 iplist[1] = (struct inode *) (size_t)iagno; 1259 iplist[2] = (struct inode *) (size_t)extno; 1260 1261 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 1262 1263 txEnd(tid); 1264 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 1265 1266 /* unlock the AG inode map information */ 1267 AG_UNLOCK(imap, agno); 1268 1269 return (0); 1270 1271 error_out: 1272 IREAD_UNLOCK(ipimap); 1273 1274 if (amp) 1275 release_metapage(amp); 1276 if (bmp) 1277 release_metapage(bmp); 1278 if (cmp) 1279 release_metapage(cmp); 1280 if (dmp) 1281 release_metapage(dmp); 1282 1283 AG_UNLOCK(imap, agno); 1284 1285 release_metapage(mp); 1286 1287 return (rc); 1288} 1289 1290/* 1291 * There are several places in the diAlloc* routines where we initialize 1292 * the inode. 1293 */ 1294static inline void 1295diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) 1296{ 1297 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1298 1299 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1300 jfs_ip->ixpxd = iagp->inoext[extno]; 1301 jfs_ip->agstart = le64_to_cpu(iagp->agstart); 1302 jfs_ip->active_ag = -1; 1303} 1304 1305 1306/* 1307 * NAME: diAlloc(pip,dir,ip) 1308 * 1309 * FUNCTION: allocate a disk inode from the inode working map 1310 * for a fileset or aggregate. 1311 * 1312 * PARAMETERS: 1313 * pip - pointer to incore inode for the parent inode. 1314 * dir - 'true' if the new disk inode is for a directory. 1315 * ip - pointer to a new inode 1316 * 1317 * RETURN VALUES: 1318 * 0 - success. 1319 * -ENOSPC - insufficient disk resources. 1320 * -EIO - i/o error. 1321 */ 1322int diAlloc(struct inode *pip, bool dir, struct inode *ip) 1323{ 1324 int rc, ino, iagno, addext, extno, bitno, sword; 1325 int nwords, rem, i, agno, dn_numag; 1326 u32 mask, inosmap, extsmap; 1327 struct inode *ipimap; 1328 struct metapage *mp; 1329 ino_t inum; 1330 struct iag *iagp; 1331 struct inomap *imap; 1332 1333 /* get the pointers to the inode map inode and the 1334 * corresponding imap control structure. 1335 */ 1336 ipimap = JFS_SBI(pip->i_sb)->ipimap; 1337 imap = JFS_IP(ipimap)->i_imap; 1338 JFS_IP(ip)->ipimap = ipimap; 1339 JFS_IP(ip)->fileset = FILESYSTEM_I; 1340 1341 /* for a directory, the allocation policy is to start 1342 * at the ag level using the preferred ag. 1343 */ 1344 if (dir) { 1345 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1346 AG_LOCK(imap, agno); 1347 goto tryag; 1348 } 1349 1350 /* for files, the policy starts off by trying to allocate from 1351 * the same iag containing the parent disk inode: 1352 * try to allocate the new disk inode close to the parent disk 1353 * inode, using parent disk inode number + 1 as the allocation 1354 * hint. (we use a left-to-right policy to attempt to avoid 1355 * moving backward on the disk.) compute the hint within the 1356 * file system and the iag. 1357 */ 1358 1359 /* get the ag number of this iag */ 1360 agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); 1361 dn_numag = JFS_SBI(pip->i_sb)->bmap->db_numag; 1362 if (agno < 0 || agno > dn_numag) 1363 return -EIO; 1364 1365 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { 1366 /* 1367 * There is an open file actively growing. We want to 1368 * allocate new inodes from a different ag to avoid 1369 * fragmentation problems. 1370 */ 1371 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1372 AG_LOCK(imap, agno); 1373 goto tryag; 1374 } 1375 1376 inum = pip->i_ino + 1; 1377 ino = inum & (INOSPERIAG - 1); 1378 1379 /* back off the hint if it is outside of the iag */ 1380 if (ino == 0) 1381 inum = pip->i_ino; 1382 1383 /* lock the AG inode map information */ 1384 AG_LOCK(imap, agno); 1385 1386 /* Get read lock on imap inode */ 1387 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 1388 1389 /* get the iag number and read the iag */ 1390 iagno = INOTOIAG(inum); 1391 if ((rc = diIAGRead(imap, iagno, &mp))) { 1392 IREAD_UNLOCK(ipimap); 1393 AG_UNLOCK(imap, agno); 1394 return (rc); 1395 } 1396 iagp = (struct iag *) mp->data; 1397 1398 /* determine if new inode extent is allowed to be added to the iag. 1399 * new inode extent can be added to the iag if the ag 1400 * has less than 32 free disk inodes and the iag has free extents. 1401 */ 1402 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1403 1404 /* 1405 * try to allocate from the IAG 1406 */ 1407 /* check if the inode may be allocated from the iag 1408 * (i.e. the inode has free inodes or new extent can be added). 1409 */ 1410 if (iagp->nfreeinos || addext) { 1411 /* determine the extent number of the hint. 1412 */ 1413 extno = ino >> L2INOSPEREXT; 1414 1415 /* check if the extent containing the hint has backed 1416 * inodes. if so, try to allocate within this extent. 1417 */ 1418 if (addressPXD(&iagp->inoext[extno])) { 1419 bitno = ino & (INOSPEREXT - 1); 1420 if ((bitno = 1421 diFindFree(le32_to_cpu(iagp->wmap[extno]), 1422 bitno)) 1423 < INOSPEREXT) { 1424 ino = (extno << L2INOSPEREXT) + bitno; 1425 1426 /* a free inode (bit) was found within this 1427 * extent, so allocate it. 1428 */ 1429 rc = diAllocBit(imap, iagp, ino); 1430 IREAD_UNLOCK(ipimap); 1431 if (rc) { 1432 assert(rc == -EIO); 1433 } else { 1434 /* set the results of the allocation 1435 * and write the iag. 1436 */ 1437 diInitInode(ip, iagno, ino, extno, 1438 iagp); 1439 mark_metapage_dirty(mp); 1440 } 1441 release_metapage(mp); 1442 1443 /* free the AG lock and return. 1444 */ 1445 AG_UNLOCK(imap, agno); 1446 return (rc); 1447 } 1448 1449 if (!addext) 1450 extno = 1451 (extno == 1452 EXTSPERIAG - 1) ? 0 : extno + 1; 1453 } 1454 1455 /* 1456 * no free inodes within the extent containing the hint. 1457 * 1458 * try to allocate from the backed extents following 1459 * hint or, if appropriate (i.e. addext is true), allocate 1460 * an extent of free inodes at or following the extent 1461 * containing the hint. 1462 * 1463 * the free inode and free extent summary maps are used 1464 * here, so determine the starting summary map position 1465 * and the number of words we'll have to examine. again, 1466 * the approach is to allocate following the hint, so we 1467 * might have to initially ignore prior bits of the summary 1468 * map that represent extents prior to the extent containing 1469 * the hint and later revisit these bits. 1470 */ 1471 bitno = extno & (EXTSPERSUM - 1); 1472 nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; 1473 sword = extno >> L2EXTSPERSUM; 1474 1475 /* mask any prior bits for the starting words of the 1476 * summary map. 1477 */ 1478 mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno)); 1479 inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; 1480 extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; 1481 1482 /* scan the free inode and free extent summary maps for 1483 * free resources. 1484 */ 1485 for (i = 0; i < nwords; i++) { 1486 /* check if this word of the free inode summary 1487 * map describes an extent with free inodes. 1488 */ 1489 if (~inosmap) { 1490 /* an extent with free inodes has been 1491 * found. determine the extent number 1492 * and the inode number within the extent. 1493 */ 1494 rem = diFindFree(inosmap, 0); 1495 extno = (sword << L2EXTSPERSUM) + rem; 1496 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 1497 0); 1498 if (rem >= INOSPEREXT) { 1499 IREAD_UNLOCK(ipimap); 1500 release_metapage(mp); 1501 AG_UNLOCK(imap, agno); 1502 jfs_error(ip->i_sb, 1503 "can't find free bit in wmap\n"); 1504 return -EIO; 1505 } 1506 1507 /* determine the inode number within the 1508 * iag and allocate the inode from the 1509 * map. 1510 */ 1511 ino = (extno << L2INOSPEREXT) + rem; 1512 rc = diAllocBit(imap, iagp, ino); 1513 IREAD_UNLOCK(ipimap); 1514 if (rc) 1515 assert(rc == -EIO); 1516 else { 1517 /* set the results of the allocation 1518 * and write the iag. 1519 */ 1520 diInitInode(ip, iagno, ino, extno, 1521 iagp); 1522 mark_metapage_dirty(mp); 1523 } 1524 release_metapage(mp); 1525 1526 /* free the AG lock and return. 1527 */ 1528 AG_UNLOCK(imap, agno); 1529 return (rc); 1530 1531 } 1532 1533 /* check if we may allocate an extent of free 1534 * inodes and whether this word of the free 1535 * extents summary map describes a free extent. 1536 */ 1537 if (addext && ~extsmap) { 1538 /* a free extent has been found. determine 1539 * the extent number. 1540 */ 1541 rem = diFindFree(extsmap, 0); 1542 extno = (sword << L2EXTSPERSUM) + rem; 1543 1544 /* allocate an extent of free inodes. 1545 */ 1546 if ((rc = diNewExt(imap, iagp, extno))) { 1547 /* if there is no disk space for a 1548 * new extent, try to allocate the 1549 * disk inode from somewhere else. 1550 */ 1551 if (rc == -ENOSPC) 1552 break; 1553 1554 assert(rc == -EIO); 1555 } else { 1556 /* set the results of the allocation 1557 * and write the iag. 1558 */ 1559 diInitInode(ip, iagno, 1560 extno << L2INOSPEREXT, 1561 extno, iagp); 1562 mark_metapage_dirty(mp); 1563 } 1564 release_metapage(mp); 1565 /* free the imap inode & the AG lock & return. 1566 */ 1567 IREAD_UNLOCK(ipimap); 1568 AG_UNLOCK(imap, agno); 1569 return (rc); 1570 } 1571 1572 /* move on to the next set of summary map words. 1573 */ 1574 sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; 1575 inosmap = le32_to_cpu(iagp->inosmap[sword]); 1576 extsmap = le32_to_cpu(iagp->extsmap[sword]); 1577 } 1578 } 1579 /* unlock imap inode */ 1580 IREAD_UNLOCK(ipimap); 1581 1582 /* nothing doing in this iag, so release it. */ 1583 release_metapage(mp); 1584 1585 tryag: 1586 /* 1587 * try to allocate anywhere within the same AG as the parent inode. 1588 */ 1589 rc = diAllocAG(imap, agno, dir, ip); 1590 1591 AG_UNLOCK(imap, agno); 1592 1593 if (rc != -ENOSPC) 1594 return (rc); 1595 1596 /* 1597 * try to allocate in any AG. 1598 */ 1599 return (diAllocAny(imap, agno, dir, ip)); 1600} 1601 1602 1603/* 1604 * NAME: diAllocAG(imap,agno,dir,ip) 1605 * 1606 * FUNCTION: allocate a disk inode from the allocation group. 1607 * 1608 * this routine first determines if a new extent of free 1609 * inodes should be added for the allocation group, with 1610 * the current request satisfied from this extent. if this 1611 * is the case, an attempt will be made to do just that. if 1612 * this attempt fails or it has been determined that a new 1613 * extent should not be added, an attempt is made to satisfy 1614 * the request by allocating an existing (backed) free inode 1615 * from the allocation group. 1616 * 1617 * PRE CONDITION: Already have the AG lock for this AG. 1618 * 1619 * PARAMETERS: 1620 * imap - pointer to inode map control structure. 1621 * agno - allocation group to allocate from. 1622 * dir - 'true' if the new disk inode is for a directory. 1623 * ip - pointer to the new inode to be filled in on successful return 1624 * with the disk inode number allocated, its extent address 1625 * and the start of the ag. 1626 * 1627 * RETURN VALUES: 1628 * 0 - success. 1629 * -ENOSPC - insufficient disk resources. 1630 * -EIO - i/o error. 1631 */ 1632static int 1633diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) 1634{ 1635 int rc, addext, numfree, numinos; 1636 1637 /* get the number of free and the number of backed disk 1638 * inodes currently within the ag. 1639 */ 1640 numfree = imap->im_agctl[agno].numfree; 1641 numinos = imap->im_agctl[agno].numinos; 1642 1643 if (numfree > numinos) { 1644 jfs_error(ip->i_sb, "numfree > numinos\n"); 1645 return -EIO; 1646 } 1647 1648 /* determine if we should allocate a new extent of free inodes 1649 * within the ag: for directory inodes, add a new extent 1650 * if there are a small number of free inodes or number of free 1651 * inodes is a small percentage of the number of backed inodes. 1652 */ 1653 if (dir) 1654 addext = (numfree < 64 || 1655 (numfree < 256 1656 && ((numfree * 100) / numinos) <= 20)); 1657 else 1658 addext = (numfree == 0); 1659 1660 /* 1661 * try to allocate a new extent of free inodes. 1662 */ 1663 if (addext) { 1664 /* if free space is not available for this new extent, try 1665 * below to allocate a free and existing (already backed) 1666 * inode from the ag. 1667 */ 1668 if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) 1669 return (rc); 1670 } 1671 1672 /* 1673 * try to allocate an existing free inode from the ag. 1674 */ 1675 return (diAllocIno(imap, agno, ip)); 1676} 1677 1678 1679/* 1680 * NAME: diAllocAny(imap,agno,dir,iap) 1681 * 1682 * FUNCTION: allocate a disk inode from any other allocation group. 1683 * 1684 * this routine is called when an allocation attempt within 1685 * the primary allocation group has failed. if attempts to 1686 * allocate an inode from any allocation group other than the 1687 * specified primary group. 1688 * 1689 * PARAMETERS: 1690 * imap - pointer to inode map control structure. 1691 * agno - primary allocation group (to avoid). 1692 * dir - 'true' if the new disk inode is for a directory. 1693 * ip - pointer to a new inode to be filled in on successful return 1694 * with the disk inode number allocated, its extent address 1695 * and the start of the ag. 1696 * 1697 * RETURN VALUES: 1698 * 0 - success. 1699 * -ENOSPC - insufficient disk resources. 1700 * -EIO - i/o error. 1701 */ 1702static int 1703diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) 1704{ 1705 int ag, rc; 1706 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; 1707 1708 1709 /* try to allocate from the ags following agno up to 1710 * the maximum ag number. 1711 */ 1712 for (ag = agno + 1; ag <= maxag; ag++) { 1713 AG_LOCK(imap, ag); 1714 1715 rc = diAllocAG(imap, ag, dir, ip); 1716 1717 AG_UNLOCK(imap, ag); 1718 1719 if (rc != -ENOSPC) 1720 return (rc); 1721 } 1722 1723 /* try to allocate from the ags in front of agno. 1724 */ 1725 for (ag = 0; ag < agno; ag++) { 1726 AG_LOCK(imap, ag); 1727 1728 rc = diAllocAG(imap, ag, dir, ip); 1729 1730 AG_UNLOCK(imap, ag); 1731 1732 if (rc != -ENOSPC) 1733 return (rc); 1734 } 1735 1736 /* no free disk inodes. 1737 */ 1738 return -ENOSPC; 1739} 1740 1741 1742/* 1743 * NAME: diAllocIno(imap,agno,ip) 1744 * 1745 * FUNCTION: allocate a disk inode from the allocation group's free 1746 * inode list, returning an error if this free list is 1747 * empty (i.e. no iags on the list). 1748 * 1749 * allocation occurs from the first iag on the list using 1750 * the iag's free inode summary map to find the leftmost 1751 * free inode in the iag. 1752 * 1753 * PRE CONDITION: Already have AG lock for this AG. 1754 * 1755 * PARAMETERS: 1756 * imap - pointer to inode map control structure. 1757 * agno - allocation group. 1758 * ip - pointer to new inode to be filled in on successful return 1759 * with the disk inode number allocated, its extent address 1760 * and the start of the ag. 1761 * 1762 * RETURN VALUES: 1763 * 0 - success. 1764 * -ENOSPC - insufficient disk resources. 1765 * -EIO - i/o error. 1766 */ 1767static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1768{ 1769 int iagno, ino, rc, rem, extno, sword; 1770 struct metapage *mp; 1771 struct iag *iagp; 1772 1773 /* check if there are iags on the ag's free inode list. 1774 */ 1775 if ((iagno = imap->im_agctl[agno].inofree) < 0) 1776 return -ENOSPC; 1777 1778 /* obtain read lock on imap inode */ 1779 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1780 1781 /* read the iag at the head of the list. 1782 */ 1783 if ((rc = diIAGRead(imap, iagno, &mp))) { 1784 IREAD_UNLOCK(imap->im_ipimap); 1785 return (rc); 1786 } 1787 iagp = (struct iag *) mp->data; 1788 1789 /* better be free inodes in this iag if it is on the 1790 * list. 1791 */ 1792 if (!iagp->nfreeinos) { 1793 IREAD_UNLOCK(imap->im_ipimap); 1794 release_metapage(mp); 1795 jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); 1796 return -EIO; 1797 } 1798 1799 /* scan the free inode summary map to find an extent 1800 * with free inodes. 1801 */ 1802 for (sword = 0;; sword++) { 1803 if (sword >= SMAPSZ) { 1804 IREAD_UNLOCK(imap->im_ipimap); 1805 release_metapage(mp); 1806 jfs_error(ip->i_sb, 1807 "free inode not found in summary map\n"); 1808 return -EIO; 1809 } 1810 1811 if (~iagp->inosmap[sword]) 1812 break; 1813 } 1814 1815 /* found a extent with free inodes. determine 1816 * the extent number. 1817 */ 1818 rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); 1819 if (rem >= EXTSPERSUM) { 1820 IREAD_UNLOCK(imap->im_ipimap); 1821 release_metapage(mp); 1822 jfs_error(ip->i_sb, "no free extent found\n"); 1823 return -EIO; 1824 } 1825 extno = (sword << L2EXTSPERSUM) + rem; 1826 1827 /* find the first free inode in the extent. 1828 */ 1829 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); 1830 if (rem >= INOSPEREXT) { 1831 IREAD_UNLOCK(imap->im_ipimap); 1832 release_metapage(mp); 1833 jfs_error(ip->i_sb, "free inode not found\n"); 1834 return -EIO; 1835 } 1836 1837 /* compute the inode number within the iag. 1838 */ 1839 ino = (extno << L2INOSPEREXT) + rem; 1840 1841 /* allocate the inode. 1842 */ 1843 rc = diAllocBit(imap, iagp, ino); 1844 IREAD_UNLOCK(imap->im_ipimap); 1845 if (rc) { 1846 release_metapage(mp); 1847 return (rc); 1848 } 1849 1850 /* set the results of the allocation and write the iag. 1851 */ 1852 diInitInode(ip, iagno, ino, extno, iagp); 1853 write_metapage(mp); 1854 1855 return (0); 1856} 1857 1858 1859/* 1860 * NAME: diAllocExt(imap,agno,ip) 1861 * 1862 * FUNCTION: add a new extent of free inodes to an iag, allocating 1863 * an inode from this extent to satisfy the current allocation 1864 * request. 1865 * 1866 * this routine first tries to find an existing iag with free 1867 * extents through the ag free extent list. if list is not 1868 * empty, the head of the list will be selected as the home 1869 * of the new extent of free inodes. otherwise (the list is 1870 * empty), a new iag will be allocated for the ag to contain 1871 * the extent. 1872 * 1873 * once an iag has been selected, the free extent summary map 1874 * is used to locate a free extent within the iag and diNewExt() 1875 * is called to initialize the extent, with initialization 1876 * including the allocation of the first inode of the extent 1877 * for the purpose of satisfying this request. 1878 * 1879 * PARAMETERS: 1880 * imap - pointer to inode map control structure. 1881 * agno - allocation group number. 1882 * ip - pointer to new inode to be filled in on successful return 1883 * with the disk inode number allocated, its extent address 1884 * and the start of the ag. 1885 * 1886 * RETURN VALUES: 1887 * 0 - success. 1888 * -ENOSPC - insufficient disk resources. 1889 * -EIO - i/o error. 1890 */ 1891static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1892{ 1893 int rem, iagno, sword, extno, rc; 1894 struct metapage *mp; 1895 struct iag *iagp; 1896 1897 /* check if the ag has any iags with free extents. if not, 1898 * allocate a new iag for the ag. 1899 */ 1900 if ((iagno = imap->im_agctl[agno].extfree) < 0) { 1901 /* If successful, diNewIAG will obtain the read lock on the 1902 * imap inode. 1903 */ 1904 if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { 1905 return (rc); 1906 } 1907 iagp = (struct iag *) mp->data; 1908 1909 /* set the ag number if this a brand new iag 1910 */ 1911 iagp->agstart = 1912 cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); 1913 } else { 1914 /* read the iag. 1915 */ 1916 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1917 if ((rc = diIAGRead(imap, iagno, &mp))) { 1918 IREAD_UNLOCK(imap->im_ipimap); 1919 jfs_error(ip->i_sb, "error reading iag\n"); 1920 return rc; 1921 } 1922 iagp = (struct iag *) mp->data; 1923 } 1924 1925 /* using the free extent summary map, find a free extent. 1926 */ 1927 for (sword = 0;; sword++) { 1928 if (sword >= SMAPSZ) { 1929 release_metapage(mp); 1930 IREAD_UNLOCK(imap->im_ipimap); 1931 jfs_error(ip->i_sb, "free ext summary map not found\n"); 1932 return -EIO; 1933 } 1934 if (~iagp->extsmap[sword]) 1935 break; 1936 } 1937 1938 /* determine the extent number of the free extent. 1939 */ 1940 rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); 1941 if (rem >= EXTSPERSUM) { 1942 release_metapage(mp); 1943 IREAD_UNLOCK(imap->im_ipimap); 1944 jfs_error(ip->i_sb, "free extent not found\n"); 1945 return -EIO; 1946 } 1947 extno = (sword << L2EXTSPERSUM) + rem; 1948 1949 /* initialize the new extent. 1950 */ 1951 rc = diNewExt(imap, iagp, extno); 1952 IREAD_UNLOCK(imap->im_ipimap); 1953 if (rc) { 1954 /* something bad happened. if a new iag was allocated, 1955 * place it back on the inode map's iag free list, and 1956 * clear the ag number information. 1957 */ 1958 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 1959 IAGFREE_LOCK(imap); 1960 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1961 imap->im_freeiag = iagno; 1962 IAGFREE_UNLOCK(imap); 1963 } 1964 write_metapage(mp); 1965 return (rc); 1966 } 1967 1968 /* set the results of the allocation and write the iag. 1969 */ 1970 diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); 1971 1972 write_metapage(mp); 1973 1974 return (0); 1975} 1976 1977 1978/* 1979 * NAME: diAllocBit(imap,iagp,ino) 1980 * 1981 * FUNCTION: allocate a backed inode from an iag. 1982 * 1983 * this routine performs the mechanics of allocating a 1984 * specified inode from a backed extent. 1985 * 1986 * if the inode to be allocated represents the last free 1987 * inode within the iag, the iag will be removed from the 1988 * ag free inode list. 1989 * 1990 * a careful update approach is used to provide consistency 1991 * in the face of updates to multiple buffers. under this 1992 * approach, all required buffers are obtained before making 1993 * any updates and are held all are updates are complete. 1994 * 1995 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 1996 * this AG. Must have read lock on imap inode. 1997 * 1998 * PARAMETERS: 1999 * imap - pointer to inode map control structure. 2000 * iagp - pointer to iag. 2001 * ino - inode number to be allocated within the iag. 2002 * 2003 * RETURN VALUES: 2004 * 0 - success. 2005 * -ENOSPC - insufficient disk resources. 2006 * -EIO - i/o error. 2007 */ 2008static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2009{ 2010 int extno, bitno, agno, sword, rc; 2011 struct metapage *amp = NULL, *bmp = NULL; 2012 struct iag *aiagp = NULL, *biagp = NULL; 2013 u32 mask; 2014 2015 /* check if this is the last free inode within the iag. 2016 * if so, it will have to be removed from the ag free 2017 * inode list, so get the iags preceding and following 2018 * it on the list. 2019 */ 2020 if (iagp->nfreeinos == cpu_to_le32(1)) { 2021 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { 2022 if ((rc = 2023 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), 2024 &))) 2025 return (rc); 2026 aiagp = (struct iag *) amp->data; 2027 } 2028 2029 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { 2030 if ((rc = 2031 diIAGRead(imap, 2032 le32_to_cpu(iagp->inofreeback), 2033 &bmp))) { 2034 if (amp) 2035 release_metapage(amp); 2036 return (rc); 2037 } 2038 biagp = (struct iag *) bmp->data; 2039 } 2040 } 2041 2042 /* get the ag number, extent number, inode number within 2043 * the extent. 2044 */ 2045 agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); 2046 extno = ino >> L2INOSPEREXT; 2047 bitno = ino & (INOSPEREXT - 1); 2048 2049 /* compute the mask for setting the map. 2050 */ 2051 mask = HIGHORDER >> bitno; 2052 2053 /* the inode should be free and backed. 2054 */ 2055 if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || 2056 ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || 2057 (addressPXD(&iagp->inoext[extno]) == 0)) { 2058 if (amp) 2059 release_metapage(amp); 2060 if (bmp) 2061 release_metapage(bmp); 2062 2063 jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); 2064 return -EIO; 2065 } 2066 2067 /* mark the inode as allocated in the working map. 2068 */ 2069 iagp->wmap[extno] |= cpu_to_le32(mask); 2070 2071 /* check if all inodes within the extent are now 2072 * allocated. if so, update the free inode summary 2073 * map to reflect this. 2074 */ 2075 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 2076 sword = extno >> L2EXTSPERSUM; 2077 bitno = extno & (EXTSPERSUM - 1); 2078 iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); 2079 } 2080 2081 /* if this was the last free inode in the iag, remove the 2082 * iag from the ag free inode list. 2083 */ 2084 if (iagp->nfreeinos == cpu_to_le32(1)) { 2085 if (amp) { 2086 aiagp->inofreeback = iagp->inofreeback; 2087 write_metapage(amp); 2088 } 2089 2090 if (bmp) { 2091 biagp->inofreefwd = iagp->inofreefwd; 2092 write_metapage(bmp); 2093 } else { 2094 imap->im_agctl[agno].inofree = 2095 le32_to_cpu(iagp->inofreefwd); 2096 } 2097 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2098 } 2099 2100 /* update the free inode count at the iag, ag, inode 2101 * map levels. 2102 */ 2103 le32_add_cpu(&iagp->nfreeinos, -1); 2104 imap->im_agctl[agno].numfree -= 1; 2105 atomic_dec(&imap->im_numfree); 2106 2107 return (0); 2108} 2109 2110 2111/* 2112 * NAME: diNewExt(imap,iagp,extno) 2113 * 2114 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2115 * the first inode of the extent for use for the current 2116 * allocation request. 2117 * 2118 * disk resources are allocated for the new extent of inodes 2119 * and the inodes themselves are initialized to reflect their 2120 * existence within the extent (i.e. their inode numbers and 2121 * inode extent addresses are set) and their initial state 2122 * (mode and link count are set to zero). 2123 * 2124 * if the iag is new, it is not yet on an ag extent free list 2125 * but will now be placed on this list. 2126 * 2127 * if the allocation of the new extent causes the iag to 2128 * have no free extent, the iag will be removed from the 2129 * ag extent free list. 2130 * 2131 * if the iag has no free backed inodes, it will be placed 2132 * on the ag free inode list, since the addition of the new 2133 * extent will now cause it to have free inodes. 2134 * 2135 * a careful update approach is used to provide consistency 2136 * (i.e. list consistency) in the face of updates to multiple 2137 * buffers. under this approach, all required buffers are 2138 * obtained before making any updates and are held until all 2139 * updates are complete. 2140 * 2141 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2142 * this AG. Must have read lock on imap inode. 2143 * 2144 * PARAMETERS: 2145 * imap - pointer to inode map control structure. 2146 * iagp - pointer to iag. 2147 * extno - extent number. 2148 * 2149 * RETURN VALUES: 2150 * 0 - success. 2151 * -ENOSPC - insufficient disk resources. 2152 * -EIO - i/o error. 2153 */ 2154static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2155{ 2156 int agno, iagno, fwd, back, freei = 0, sword, rc; 2157 struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; 2158 struct metapage *amp, *bmp, *cmp, *dmp; 2159 struct inode *ipimap; 2160 s64 blkno, hint; 2161 int i, j; 2162 u32 mask; 2163 ino_t ino; 2164 struct dinode *dp; 2165 struct jfs_sb_info *sbi; 2166 2167 /* better have free extents. 2168 */ 2169 if (!iagp->nfreeexts) { 2170 jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); 2171 return -EIO; 2172 } 2173 2174 /* get the inode map inode. 2175 */ 2176 ipimap = imap->im_ipimap; 2177 sbi = JFS_SBI(ipimap->i_sb); 2178 2179 amp = bmp = cmp = NULL; 2180 2181 /* get the ag and iag numbers for this iag. 2182 */ 2183 agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 2184 if (agno >= MAXAG || agno < 0) 2185 return -EIO; 2186 2187 iagno = le32_to_cpu(iagp->iagnum); 2188 2189 /* check if this is the last free extent within the 2190 * iag. if so, the iag must be removed from the ag 2191 * free extent list, so get the iags preceding and 2192 * following the iag on this list. 2193 */ 2194 if (iagp->nfreeexts == cpu_to_le32(1)) { 2195 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 2196 if ((rc = diIAGRead(imap, fwd, &))) 2197 return (rc); 2198 aiagp = (struct iag *) amp->data; 2199 } 2200 2201 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 2202 if ((rc = diIAGRead(imap, back, &bmp))) 2203 goto error_out; 2204 biagp = (struct iag *) bmp->data; 2205 } 2206 } else { 2207 /* the iag has free extents. if all extents are free 2208 * (as is the case for a newly allocated iag), the iag 2209 * must be added to the ag free extent list, so get 2210 * the iag at the head of the list in preparation for 2211 * adding this iag to this list. 2212 */ 2213 fwd = back = -1; 2214 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2215 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 2216 if ((rc = diIAGRead(imap, fwd, &))) 2217 goto error_out; 2218 aiagp = (struct iag *) amp->data; 2219 } 2220 } 2221 } 2222 2223 /* check if the iag has no free inodes. if so, the iag 2224 * will have to be added to the ag free inode list, so get 2225 * the iag at the head of the list in preparation for 2226 * adding this iag to this list. in doing this, we must 2227 * check if we already have the iag at the head of 2228 * the list in hand. 2229 */ 2230 if (iagp->nfreeinos == 0) { 2231 freei = imap->im_agctl[agno].inofree; 2232 2233 if (freei >= 0) { 2234 if (freei == fwd) { 2235 ciagp = aiagp; 2236 } else if (freei == back) { 2237 ciagp = biagp; 2238 } else { 2239 if ((rc = diIAGRead(imap, freei, &cmp))) 2240 goto error_out; 2241 ciagp = (struct iag *) cmp->data; 2242 } 2243 if (ciagp == NULL) { 2244 jfs_error(imap->im_ipimap->i_sb, 2245 "ciagp == NULL\n"); 2246 rc = -EIO; 2247 goto error_out; 2248 } 2249 } 2250 } 2251 2252 /* allocate disk space for the inode extent. 2253 */ 2254 if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) 2255 hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; 2256 else 2257 hint = addressPXD(&iagp->inoext[extno - 1]) + 2258 lengthPXD(&iagp->inoext[extno - 1]) - 1; 2259 2260 if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) 2261 goto error_out; 2262 2263 /* compute the inode number of the first inode within the 2264 * extent. 2265 */ 2266 ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); 2267 2268 /* initialize the inodes within the newly allocated extent a 2269 * page at a time. 2270 */ 2271 for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { 2272 /* get a buffer for this page of disk inodes. 2273 */ 2274 dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); 2275 if (dmp == NULL) { 2276 rc = -EIO; 2277 goto error_out; 2278 } 2279 dp = (struct dinode *) dmp->data; 2280 2281 /* initialize the inode number, mode, link count and 2282 * inode extent address. 2283 */ 2284 for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { 2285 dp->di_inostamp = cpu_to_le32(sbi->inostamp); 2286 dp->di_number = cpu_to_le32(ino); 2287 dp->di_fileset = cpu_to_le32(FILESYSTEM_I); 2288 dp->di_mode = 0; 2289 dp->di_nlink = 0; 2290 PXDaddress(&(dp->di_ixpxd), blkno); 2291 PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); 2292 } 2293 write_metapage(dmp); 2294 } 2295 2296 /* if this is the last free extent within the iag, remove the 2297 * iag from the ag free extent list. 2298 */ 2299 if (iagp->nfreeexts == cpu_to_le32(1)) { 2300 if (fwd >= 0) 2301 aiagp->extfreeback = iagp->extfreeback; 2302 2303 if (back >= 0) 2304 biagp->extfreefwd = iagp->extfreefwd; 2305 else 2306 imap->im_agctl[agno].extfree = 2307 le32_to_cpu(iagp->extfreefwd); 2308 2309 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2310 } else { 2311 /* if the iag has all free extents (newly allocated iag), 2312 * add the iag to the ag free extent list. 2313 */ 2314 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2315 if (fwd >= 0) 2316 aiagp->extfreeback = cpu_to_le32(iagno); 2317 2318 iagp->extfreefwd = cpu_to_le32(fwd); 2319 iagp->extfreeback = cpu_to_le32(-1); 2320 imap->im_agctl[agno].extfree = iagno; 2321 } 2322 } 2323 2324 /* if the iag has no free inodes, add the iag to the 2325 * ag free inode list. 2326 */ 2327 if (iagp->nfreeinos == 0) { 2328 if (freei >= 0) 2329 ciagp->inofreeback = cpu_to_le32(iagno); 2330 2331 iagp->inofreefwd = 2332 cpu_to_le32(imap->im_agctl[agno].inofree); 2333 iagp->inofreeback = cpu_to_le32(-1); 2334 imap->im_agctl[agno].inofree = iagno; 2335 } 2336 2337 /* initialize the extent descriptor of the extent. */ 2338 PXDlength(&iagp->inoext[extno], imap->im_nbperiext); 2339 PXDaddress(&iagp->inoext[extno], blkno); 2340 2341 /* initialize the working and persistent map of the extent. 2342 * the working map will be initialized such that 2343 * it indicates the first inode of the extent is allocated. 2344 */ 2345 iagp->wmap[extno] = cpu_to_le32(HIGHORDER); 2346 iagp->pmap[extno] = 0; 2347 2348 /* update the free inode and free extent summary maps 2349 * for the extent to indicate the extent has free inodes 2350 * and no longer represents a free extent. 2351 */ 2352 sword = extno >> L2EXTSPERSUM; 2353 mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); 2354 iagp->extsmap[sword] |= cpu_to_le32(mask); 2355 iagp->inosmap[sword] &= cpu_to_le32(~mask); 2356 2357 /* update the free inode and free extent counts for the 2358 * iag. 2359 */ 2360 le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); 2361 le32_add_cpu(&iagp->nfreeexts, -1); 2362 2363 /* update the free and backed inode counts for the ag. 2364 */ 2365 imap->im_agctl[agno].numfree += (INOSPEREXT - 1); 2366 imap->im_agctl[agno].numinos += INOSPEREXT; 2367 2368 /* update the free and backed inode counts for the inode map. 2369 */ 2370 atomic_add(INOSPEREXT - 1, &imap->im_numfree); 2371 atomic_add(INOSPEREXT, &imap->im_numinos); 2372 2373 /* write the iags. 2374 */ 2375 if (amp) 2376 write_metapage(amp); 2377 if (bmp) 2378 write_metapage(bmp); 2379 if (cmp) 2380 write_metapage(cmp); 2381 2382 return (0); 2383 2384 error_out: 2385 2386 /* release the iags. 2387 */ 2388 if (amp) 2389 release_metapage(amp); 2390 if (bmp) 2391 release_metapage(bmp); 2392 if (cmp) 2393 release_metapage(cmp); 2394 2395 return (rc); 2396} 2397 2398 2399/* 2400 * NAME: diNewIAG(imap,iagnop,agno) 2401 * 2402 * FUNCTION: allocate a new iag for an allocation group. 2403 * 2404 * first tries to allocate the iag from the inode map 2405 * iagfree list: 2406 * if the list has free iags, the head of the list is removed 2407 * and returned to satisfy the request. 2408 * if the inode map's iag free list is empty, the inode map 2409 * is extended to hold a new iag. this new iag is initialized 2410 * and returned to satisfy the request. 2411 * 2412 * PARAMETERS: 2413 * imap - pointer to inode map control structure. 2414 * iagnop - pointer to an iag number set with the number of the 2415 * newly allocated iag upon successful return. 2416 * agno - allocation group number. 2417 * bpp - Buffer pointer to be filled in with new IAG's buffer 2418 * 2419 * RETURN VALUES: 2420 * 0 - success. 2421 * -ENOSPC - insufficient disk resources. 2422 * -EIO - i/o error. 2423 * 2424 * serialization: 2425 * AG lock held on entry/exit; 2426 * write lock on the map is held inside; 2427 * read lock on the map is held on successful completion; 2428 * 2429 * note: new iag transaction: 2430 * . synchronously write iag; 2431 * . write log of xtree and inode of imap; 2432 * . commit; 2433 * . synchronous write of xtree (right to left, bottom to top); 2434 * . at start of logredo(): init in-memory imap with one additional iag page; 2435 * . at end of logredo(): re-read imap inode to determine 2436 * new imap size; 2437 */ 2438static int 2439diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) 2440{ 2441 int rc; 2442 int iagno, i, xlen; 2443 struct inode *ipimap; 2444 struct super_block *sb; 2445 struct jfs_sb_info *sbi; 2446 struct metapage *mp; 2447 struct iag *iagp; 2448 s64 xaddr = 0; 2449 s64 blkno; 2450 tid_t tid; 2451 struct inode *iplist[1]; 2452 2453 /* pick up pointers to the inode map and mount inodes */ 2454 ipimap = imap->im_ipimap; 2455 sb = ipimap->i_sb; 2456 sbi = JFS_SBI(sb); 2457 2458 /* acquire the free iag lock */ 2459 IAGFREE_LOCK(imap); 2460 2461 /* if there are any iags on the inode map free iag list, 2462 * allocate the iag from the head of the list. 2463 */ 2464 if (imap->im_freeiag >= 0) { 2465 /* pick up the iag number at the head of the list */ 2466 iagno = imap->im_freeiag; 2467 2468 /* determine the logical block number of the iag */ 2469 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2470 } else { 2471 /* no free iags. the inode map will have to be extented 2472 * to include a new iag. 2473 */ 2474 2475 /* acquire inode map lock */ 2476 IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); 2477 2478 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { 2479 IWRITE_UNLOCK(ipimap); 2480 IAGFREE_UNLOCK(imap); 2481 jfs_error(imap->im_ipimap->i_sb, 2482 "ipimap->i_size is wrong\n"); 2483 return -EIO; 2484 } 2485 2486 2487 /* get the next available iag number */ 2488 iagno = imap->im_nextiag; 2489 2490 /* make sure that we have not exceeded the maximum inode 2491 * number limit. 2492 */ 2493 if (iagno > (MAXIAGS - 1)) { 2494 /* release the inode map lock */ 2495 IWRITE_UNLOCK(ipimap); 2496 2497 rc = -ENOSPC; 2498 goto out; 2499 } 2500 2501 /* 2502 * synchronously append new iag page. 2503 */ 2504 /* determine the logical address of iag page to append */ 2505 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2506 2507 /* Allocate extent for new iag page */ 2508 xlen = sbi->nbperpage; 2509 if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { 2510 /* release the inode map lock */ 2511 IWRITE_UNLOCK(ipimap); 2512 2513 goto out; 2514 } 2515 2516 /* 2517 * start transaction of update of the inode map 2518 * addressing structure pointing to the new iag page; 2519 */ 2520 tid = txBegin(sb, COMMIT_FORCE); 2521 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 2522 2523 /* update the inode map addressing structure to point to it */ 2524 if ((rc = 2525 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { 2526 txEnd(tid); 2527 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2528 /* Free the blocks allocated for the iag since it was 2529 * not successfully added to the inode map 2530 */ 2531 dbFree(ipimap, xaddr, (s64) xlen); 2532 2533 /* release the inode map lock */ 2534 IWRITE_UNLOCK(ipimap); 2535 2536 goto out; 2537 } 2538 2539 /* update the inode map's inode to reflect the extension */ 2540 ipimap->i_size += PSIZE; 2541 inode_add_bytes(ipimap, PSIZE); 2542 2543 /* assign a buffer for the page */ 2544 mp = get_metapage(ipimap, blkno, PSIZE, 0); 2545 if (!mp) { 2546 /* 2547 * This is very unlikely since we just created the 2548 * extent, but let's try to handle it correctly 2549 */ 2550 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, 2551 COMMIT_PWMAP); 2552 2553 txAbort(tid, 0); 2554 txEnd(tid); 2555 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2556 2557 /* release the inode map lock */ 2558 IWRITE_UNLOCK(ipimap); 2559 2560 rc = -EIO; 2561 goto out; 2562 } 2563 iagp = (struct iag *) mp->data; 2564 2565 /* init the iag */ 2566 memset(iagp, 0, sizeof(struct iag)); 2567 iagp->iagnum = cpu_to_le32(iagno); 2568 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2569 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2570 iagp->iagfree = cpu_to_le32(-1); 2571 iagp->nfreeinos = 0; 2572 iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); 2573 2574 /* initialize the free inode summary map (free extent 2575 * summary map initialization handled by bzero). 2576 */ 2577 for (i = 0; i < SMAPSZ; i++) 2578 iagp->inosmap[i] = cpu_to_le32(ONES); 2579 2580 /* 2581 * Write and sync the metapage 2582 */ 2583 flush_metapage(mp); 2584 2585 /* 2586 * txCommit(COMMIT_FORCE) will synchronously write address 2587 * index pages and inode after commit in careful update order 2588 * of address index pages (right to left, bottom up); 2589 */ 2590 iplist[0] = ipimap; 2591 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 2592 2593 txEnd(tid); 2594 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2595 2596 duplicateIXtree(sb, blkno, xlen, &xaddr); 2597 2598 /* update the next available iag number */ 2599 imap->im_nextiag += 1; 2600 2601 /* Add the iag to the iag free list so we don't lose the iag 2602 * if a failure happens now. 2603 */ 2604 imap->im_freeiag = iagno; 2605 2606 /* Until we have logredo working, we want the imap inode & 2607 * control page to be up to date. 2608 */ 2609 diSync(ipimap); 2610 2611 /* release the inode map lock */ 2612 IWRITE_UNLOCK(ipimap); 2613 } 2614 2615 /* obtain read lock on map */ 2616 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2617 2618 /* read the iag */ 2619 if ((rc = diIAGRead(imap, iagno, &mp))) { 2620 IREAD_UNLOCK(ipimap); 2621 rc = -EIO; 2622 goto out; 2623 } 2624 iagp = (struct iag *) mp->data; 2625 2626 /* remove the iag from the iag free list */ 2627 imap->im_freeiag = le32_to_cpu(iagp->iagfree); 2628 iagp->iagfree = cpu_to_le32(-1); 2629 2630 /* set the return iag number and buffer pointer */ 2631 *iagnop = iagno; 2632 *mpp = mp; 2633 2634 out: 2635 /* release the iag free lock */ 2636 IAGFREE_UNLOCK(imap); 2637 2638 return (rc); 2639} 2640 2641/* 2642 * NAME: diIAGRead() 2643 * 2644 * FUNCTION: get the buffer for the specified iag within a fileset 2645 * or aggregate inode map. 2646 * 2647 * PARAMETERS: 2648 * imap - pointer to inode map control structure. 2649 * iagno - iag number. 2650 * bpp - point to buffer pointer to be filled in on successful 2651 * exit. 2652 * 2653 * SERIALIZATION: 2654 * must have read lock on imap inode 2655 * (When called by diExtendFS, the filesystem is quiesced, therefore 2656 * the read lock is unnecessary.) 2657 * 2658 * RETURN VALUES: 2659 * 0 - success. 2660 * -EIO - i/o error. 2661 */ 2662static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2663{ 2664 struct inode *ipimap = imap->im_ipimap; 2665 s64 blkno; 2666 2667 /* compute the logical block number of the iag. */ 2668 blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); 2669 2670 /* read the iag. */ 2671 *mpp = read_metapage(ipimap, blkno, PSIZE, 0); 2672 if (*mpp == NULL) { 2673 return -EIO; 2674 } 2675 2676 return (0); 2677} 2678 2679/* 2680 * NAME: diFindFree() 2681 * 2682 * FUNCTION: find the first free bit in a word starting at 2683 * the specified bit position. 2684 * 2685 * PARAMETERS: 2686 * word - word to be examined. 2687 * start - starting bit position. 2688 * 2689 * RETURN VALUES: 2690 * bit position of first free bit in the word or 32 if 2691 * no free bits were found. 2692 */ 2693static int diFindFree(u32 word, int start) 2694{ 2695 int bitno; 2696 assert(start < 32); 2697 /* scan the word for the first free bit. */ 2698 for (word <<= start, bitno = start; bitno < 32; 2699 bitno++, word <<= 1) { 2700 if ((word & HIGHORDER) == 0) 2701 break; 2702 } 2703 return (bitno); 2704} 2705 2706/* 2707 * NAME: diUpdatePMap() 2708 * 2709 * FUNCTION: Update the persistent map in an IAG for the allocation or 2710 * freeing of the specified inode. 2711 * 2712 * PRE CONDITIONS: Working map has already been updated for allocate. 2713 * 2714 * PARAMETERS: 2715 * ipimap - Incore inode map inode 2716 * inum - Number of inode to mark in permanent map 2717 * is_free - If 'true' indicates inode should be marked freed, otherwise 2718 * indicates inode should be marked allocated. 2719 * 2720 * RETURN VALUES: 2721 * 0 for success 2722 */ 2723int 2724diUpdatePMap(struct inode *ipimap, 2725 unsigned long inum, bool is_free, struct tblock * tblk) 2726{ 2727 int rc; 2728 struct iag *iagp; 2729 struct metapage *mp; 2730 int iagno, ino, extno, bitno; 2731 struct inomap *imap; 2732 u32 mask; 2733 struct jfs_log *log; 2734 int lsn, difft, diffp; 2735 unsigned long flags; 2736 2737 imap = JFS_IP(ipimap)->i_imap; 2738 /* get the iag number containing the inode */ 2739 iagno = INOTOIAG(inum); 2740 /* make sure that the iag is contained within the map */ 2741 if (iagno >= imap->im_nextiag) { 2742 jfs_error(ipimap->i_sb, "the iag is outside the map\n"); 2743 return -EIO; 2744 } 2745 /* read the iag */ 2746 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2747 rc = diIAGRead(imap, iagno, &mp); 2748 IREAD_UNLOCK(ipimap); 2749 if (rc) 2750 return (rc); 2751 metapage_wait_for_io(mp); 2752 iagp = (struct iag *) mp->data; 2753 /* get the inode number and extent number of the inode within 2754 * the iag and the inode number within the extent. 2755 */ 2756 ino = inum & (INOSPERIAG - 1); 2757 extno = ino >> L2INOSPEREXT; 2758 bitno = ino & (INOSPEREXT - 1); 2759 mask = HIGHORDER >> bitno; 2760 /* 2761 * mark the inode free in persistent map: 2762 */ 2763 if (is_free) { 2764 /* The inode should have been allocated both in working 2765 * map and in persistent map; 2766 * the inode will be freed from working map at the release 2767 * of last reference release; 2768 */ 2769 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2770 jfs_error(ipimap->i_sb, 2771 "inode %ld not marked as allocated in wmap!\n", 2772 inum); 2773 } 2774 if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { 2775 jfs_error(ipimap->i_sb, 2776 "inode %ld not marked as allocated in pmap!\n", 2777 inum); 2778 } 2779 /* update the bitmap for the extent of the freed inode */ 2780 iagp->pmap[extno] &= cpu_to_le32(~mask); 2781 } 2782 /* 2783 * mark the inode allocated in persistent map: 2784 */ 2785 else { 2786 /* The inode should be already allocated in the working map 2787 * and should be free in persistent map; 2788 */ 2789 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2790 release_metapage(mp); 2791 jfs_error(ipimap->i_sb, 2792 "the inode is not allocated in the working map\n"); 2793 return -EIO; 2794 } 2795 if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { 2796 release_metapage(mp); 2797 jfs_error(ipimap->i_sb, 2798 "the inode is not free in the persistent map\n"); 2799 return -EIO; 2800 } 2801 /* update the bitmap for the extent of the allocated inode */ 2802 iagp->pmap[extno] |= cpu_to_le32(mask); 2803 } 2804 /* 2805 * update iag lsn 2806 */ 2807 lsn = tblk->lsn; 2808 log = JFS_SBI(tblk->sb)->log; 2809 LOGSYNC_LOCK(log, flags); 2810 if (mp->lsn != 0) { 2811 /* inherit older/smaller lsn */ 2812 logdiff(difft, lsn, log); 2813 logdiff(diffp, mp->lsn, log); 2814 if (difft < diffp) { 2815 mp->lsn = lsn; 2816 /* move mp after tblock in logsync list */ 2817 list_move(&mp->synclist, &tblk->synclist); 2818 } 2819 /* inherit younger/larger clsn */ 2820 assert(mp->clsn); 2821 logdiff(difft, tblk->clsn, log); 2822 logdiff(diffp, mp->clsn, log); 2823 if (difft > diffp) 2824 mp->clsn = tblk->clsn; 2825 } else { 2826 mp->log = log; 2827 mp->lsn = lsn; 2828 /* insert mp after tblock in logsync list */ 2829 log->count++; 2830 list_add(&mp->synclist, &tblk->synclist); 2831 mp->clsn = tblk->clsn; 2832 } 2833 LOGSYNC_UNLOCK(log, flags); 2834 write_metapage(mp); 2835 return (0); 2836} 2837 2838/* 2839 * diExtendFS() 2840 * 2841 * function: update imap for extendfs(); 2842 * 2843 * note: AG size has been increased s.t. each k old contiguous AGs are 2844 * coalesced into a new AG; 2845 */ 2846int diExtendFS(struct inode *ipimap, struct inode *ipbmap) 2847{ 2848 int rc, rcx = 0; 2849 struct inomap *imap = JFS_IP(ipimap)->i_imap; 2850 struct iag *iagp = NULL, *hiagp = NULL; 2851 struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; 2852 struct metapage *bp, *hbp; 2853 int i, n, head; 2854 int numinos, xnuminos = 0, xnumfree = 0; 2855 s64 agstart; 2856 2857 jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", 2858 imap->im_nextiag, atomic_read(&imap->im_numinos), 2859 atomic_read(&imap->im_numfree)); 2860 2861 /* 2862 * reconstruct imap 2863 * 2864 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2865 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2866 * note: new AG size = old AG size * (2**x). 2867 */ 2868 2869 /* init per AG control information im_agctl[] */ 2870 for (i = 0; i < MAXAG; i++) { 2871 imap->im_agctl[i].inofree = -1; 2872 imap->im_agctl[i].extfree = -1; 2873 imap->im_agctl[i].numinos = 0; /* number of backed inodes */ 2874 imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ 2875 } 2876 2877 /* 2878 * process each iag page of the map. 2879 * 2880 * rebuild AG Free Inode List, AG Free Inode Extent List; 2881 */ 2882 for (i = 0; i < imap->im_nextiag; i++) { 2883 if ((rc = diIAGRead(imap, i, &bp))) { 2884 rcx = rc; 2885 continue; 2886 } 2887 iagp = (struct iag *) bp->data; 2888 if (le32_to_cpu(iagp->iagnum) != i) { 2889 release_metapage(bp); 2890 jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); 2891 return -EIO; 2892 } 2893 2894 /* leave free iag in the free iag list */ 2895 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2896 release_metapage(bp); 2897 continue; 2898 } 2899 2900 agstart = le64_to_cpu(iagp->agstart); 2901 n = agstart >> mp->db_agl2size; 2902 iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); 2903 2904 /* compute backed inodes */ 2905 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) 2906 << L2INOSPEREXT; 2907 if (numinos > 0) { 2908 /* merge AG backed inodes */ 2909 imap->im_agctl[n].numinos += numinos; 2910 xnuminos += numinos; 2911 } 2912 2913 /* if any backed free inodes, insert at AG free inode list */ 2914 if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { 2915 if ((head = imap->im_agctl[n].inofree) == -1) { 2916 iagp->inofreefwd = cpu_to_le32(-1); 2917 iagp->inofreeback = cpu_to_le32(-1); 2918 } else { 2919 if ((rc = diIAGRead(imap, head, &hbp))) { 2920 rcx = rc; 2921 goto nextiag; 2922 } 2923 hiagp = (struct iag *) hbp->data; 2924 hiagp->inofreeback = iagp->iagnum; 2925 iagp->inofreefwd = cpu_to_le32(head); 2926 iagp->inofreeback = cpu_to_le32(-1); 2927 write_metapage(hbp); 2928 } 2929 2930 imap->im_agctl[n].inofree = 2931 le32_to_cpu(iagp->iagnum); 2932 2933 /* merge AG backed free inodes */ 2934 imap->im_agctl[n].numfree += 2935 le32_to_cpu(iagp->nfreeinos); 2936 xnumfree += le32_to_cpu(iagp->nfreeinos); 2937 } 2938 2939 /* if any free extents, insert at AG free extent list */ 2940 if (le32_to_cpu(iagp->nfreeexts) > 0) { 2941 if ((head = imap->im_agctl[n].extfree) == -1) { 2942 iagp->extfreefwd = cpu_to_le32(-1); 2943 iagp->extfreeback = cpu_to_le32(-1); 2944 } else { 2945 if ((rc = diIAGRead(imap, head, &hbp))) { 2946 rcx = rc; 2947 goto nextiag; 2948 } 2949 hiagp = (struct iag *) hbp->data; 2950 hiagp->extfreeback = iagp->iagnum; 2951 iagp->extfreefwd = cpu_to_le32(head); 2952 iagp->extfreeback = cpu_to_le32(-1); 2953 write_metapage(hbp); 2954 } 2955 2956 imap->im_agctl[n].extfree = 2957 le32_to_cpu(iagp->iagnum); 2958 } 2959 2960 nextiag: 2961 write_metapage(bp); 2962 } 2963 2964 if (xnuminos != atomic_read(&imap->im_numinos) || 2965 xnumfree != atomic_read(&imap->im_numfree)) { 2966 jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); 2967 return -EIO; 2968 } 2969 2970 return rcx; 2971} 2972 2973 2974/* 2975 * duplicateIXtree() 2976 * 2977 * serialization: IWRITE_LOCK held on entry/exit 2978 * 2979 * note: shadow page with regular inode (rel.2); 2980 */ 2981static void duplicateIXtree(struct super_block *sb, s64 blkno, 2982 int xlen, s64 *xaddr) 2983{ 2984 struct jfs_superblock *j_sb; 2985 struct buffer_head *bh; 2986 struct inode *ip; 2987 tid_t tid; 2988 2989 /* if AIT2 ipmap2 is bad, do not try to update it */ 2990 if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ 2991 return; 2992 ip = diReadSpecial(sb, FILESYSTEM_I, 1); 2993 if (ip == NULL) { 2994 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 2995 if (readSuper(sb, &bh)) 2996 return; 2997 j_sb = (struct jfs_superblock *)bh->b_data; 2998 j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); 2999 3000 mark_buffer_dirty(bh); 3001 sync_dirty_buffer(bh); 3002 brelse(bh); 3003 return; 3004 } 3005 3006 /* start transaction */ 3007 tid = txBegin(sb, COMMIT_FORCE); 3008 /* update the inode map addressing structure to point to it */ 3009 if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { 3010 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3011 txAbort(tid, 1); 3012 goto cleanup; 3013 3014 } 3015 /* update the inode map's inode to reflect the extension */ 3016 ip->i_size += PSIZE; 3017 inode_add_bytes(ip, PSIZE); 3018 txCommit(tid, 1, &ip, COMMIT_FORCE); 3019 cleanup: 3020 txEnd(tid); 3021 diFreeSpecial(ip); 3022} 3023 3024/* 3025 * NAME: copy_from_dinode() 3026 * 3027 * FUNCTION: Copies inode info from disk inode to in-memory inode 3028 * 3029 * RETURN VALUES: 3030 * 0 - success 3031 * -ENOMEM - insufficient memory 3032 */ 3033static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3034{ 3035 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3036 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3037 3038 jfs_ip->fileset = le32_to_cpu(dip->di_fileset); 3039 jfs_ip->mode2 = le32_to_cpu(dip->di_mode); 3040 jfs_set_inode_flags(ip); 3041 3042 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; 3043 if (sbi->umask != -1) { 3044 ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); 3045 /* For directories, add x permission if r is allowed by umask */ 3046 if (S_ISDIR(ip->i_mode)) { 3047 if (ip->i_mode & 0400) 3048 ip->i_mode |= 0100; 3049 if (ip->i_mode & 0040) 3050 ip->i_mode |= 0010; 3051 if (ip->i_mode & 0004) 3052 ip->i_mode |= 0001; 3053 } 3054 } 3055 set_nlink(ip, le32_to_cpu(dip->di_nlink)); 3056 3057 jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid)); 3058 if (!uid_valid(sbi->uid)) 3059 ip->i_uid = jfs_ip->saved_uid; 3060 else { 3061 ip->i_uid = sbi->uid; 3062 } 3063 3064 jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid)); 3065 if (!gid_valid(sbi->gid)) 3066 ip->i_gid = jfs_ip->saved_gid; 3067 else { 3068 ip->i_gid = sbi->gid; 3069 } 3070 3071 ip->i_size = le64_to_cpu(dip->di_size); 3072 ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); 3073 ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); 3074 ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); 3075 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); 3076 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); 3077 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); 3078 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); 3079 ip->i_generation = le32_to_cpu(dip->di_gen); 3080 3081 jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ 3082 jfs_ip->acl = dip->di_acl; /* as are dxd's */ 3083 jfs_ip->ea = dip->di_ea; 3084 jfs_ip->next_index = le32_to_cpu(dip->di_next_index); 3085 jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); 3086 jfs_ip->acltype = le32_to_cpu(dip->di_acltype); 3087 3088 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { 3089 jfs_ip->dev = le32_to_cpu(dip->di_rdev); 3090 ip->i_rdev = new_decode_dev(jfs_ip->dev); 3091 } 3092 3093 if (S_ISDIR(ip->i_mode)) { 3094 memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); 3095 } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { 3096 memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); 3097 } else 3098 memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); 3099 3100 /* Zero the in-memory-only stuff */ 3101 jfs_ip->cflag = 0; 3102 jfs_ip->btindex = 0; 3103 jfs_ip->btorder = 0; 3104 jfs_ip->bxflag = 0; 3105 jfs_ip->blid = 0; 3106 jfs_ip->atlhead = 0; 3107 jfs_ip->atltail = 0; 3108 jfs_ip->xtlid = 0; 3109 return (0); 3110} 3111 3112/* 3113 * NAME: copy_to_dinode() 3114 * 3115 * FUNCTION: Copies inode info from in-memory inode to disk inode 3116 */ 3117static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3118{ 3119 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3120 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3121 3122 dip->di_fileset = cpu_to_le32(jfs_ip->fileset); 3123 dip->di_inostamp = cpu_to_le32(sbi->inostamp); 3124 dip->di_number = cpu_to_le32(ip->i_ino); 3125 dip->di_gen = cpu_to_le32(ip->i_generation); 3126 dip->di_size = cpu_to_le64(ip->i_size); 3127 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); 3128 dip->di_nlink = cpu_to_le32(ip->i_nlink); 3129 if (!uid_valid(sbi->uid)) 3130 dip->di_uid = cpu_to_le32(i_uid_read(ip)); 3131 else 3132 dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns, 3133 jfs_ip->saved_uid)); 3134 if (!gid_valid(sbi->gid)) 3135 dip->di_gid = cpu_to_le32(i_gid_read(ip)); 3136 else 3137 dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, 3138 jfs_ip->saved_gid)); 3139 /* 3140 * mode2 is only needed for storing the higher order bits. 3141 * Trust i_mode for the lower order ones 3142 */ 3143 if (sbi->umask == -1) 3144 dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | 3145 ip->i_mode); 3146 else /* Leave the original permissions alone */ 3147 dip->di_mode = cpu_to_le32(jfs_ip->mode2); 3148 3149 dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); 3150 dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); 3151 dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); 3152 dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); 3153 dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); 3154 dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); 3155 dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ 3156 dip->di_acl = jfs_ip->acl; /* as are dxd's */ 3157 dip->di_ea = jfs_ip->ea; 3158 dip->di_next_index = cpu_to_le32(jfs_ip->next_index); 3159 dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); 3160 dip->di_otime.tv_nsec = 0; 3161 dip->di_acltype = cpu_to_le32(jfs_ip->acltype); 3162 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3163 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3164} 3165