xref: /kernel/linux/linux-5.10/fs/jfs/jfs_logmgr.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *   Copyright (C) International Business Machines Corp., 2000-2004
4 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
5 */
6
7/*
8 *	jfs_logmgr.c: log manager
9 *
10 * for related information, see transaction manager (jfs_txnmgr.c), and
11 * recovery manager (jfs_logredo.c).
12 *
13 * note: for detail, RTFS.
14 *
15 *	log buffer manager:
16 * special purpose buffer manager supporting log i/o requirements.
17 * per log serial pageout of logpage
18 * queuing i/o requests and redrive i/o at iodone
19 * maintain current logpage buffer
20 * no caching since append only
21 * appropriate jfs buffer cache buffers as needed
22 *
23 *	group commit:
24 * transactions which wrote COMMIT records in the same in-memory
25 * log page during the pageout of previous/current log page(s) are
26 * committed together by the pageout of the page.
27 *
28 *	TBD lazy commit:
29 * transactions are committed asynchronously when the log page
30 * containing it COMMIT is paged out when it becomes full;
31 *
32 *	serialization:
33 * . a per log lock serialize log write.
34 * . a per log lock serialize group commit.
35 * . a per log lock serialize log open/close;
36 *
37 *	TBD log integrity:
38 * careful-write (ping-pong) of last logpage to recover from crash
39 * in overwrite.
40 * detection of split (out-of-order) write of physical sectors
41 * of last logpage via timestamp at end of each sector
42 * with its mirror data array at trailer).
43 *
44 *	alternatives:
45 * lsn - 64-bit monotonically increasing integer vs
46 * 32-bit lspn and page eor.
47 */
48
49#include <linux/fs.h>
50#include <linux/blkdev.h>
51#include <linux/interrupt.h>
52#include <linux/completion.h>
53#include <linux/kthread.h>
54#include <linux/buffer_head.h>		/* for sync_blockdev() */
55#include <linux/bio.h>
56#include <linux/freezer.h>
57#include <linux/export.h>
58#include <linux/delay.h>
59#include <linux/mutex.h>
60#include <linux/seq_file.h>
61#include <linux/slab.h>
62#include "jfs_incore.h"
63#include "jfs_filsys.h"
64#include "jfs_metapage.h"
65#include "jfs_superblock.h"
66#include "jfs_txnmgr.h"
67#include "jfs_debug.h"
68
69
70/*
71 * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
72 */
73static struct lbuf *log_redrive_list;
74static DEFINE_SPINLOCK(log_redrive_lock);
75
76
77/*
78 *	log read/write serialization (per log)
79 */
80#define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
81#define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
82#define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
83
84
85/*
86 *	log group commit serialization (per log)
87 */
88
89#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
90#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
91#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
92#define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
93
94/*
95 *	log sync serialization (per log)
96 */
97#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
98#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
99/*
100#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
101#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
102*/
103
104
105/*
106 *	log buffer cache synchronization
107 */
108static DEFINE_SPINLOCK(jfsLCacheLock);
109
110#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
111#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
112
113/*
114 * See __SLEEP_COND in jfs_locks.h
115 */
116#define LCACHE_SLEEP_COND(wq, cond, flags)	\
117do {						\
118	if (cond)				\
119		break;				\
120	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
121} while (0)
122
123#define	LCACHE_WAKEUP(event)	wake_up(event)
124
125
126/*
127 *	lbuf buffer cache (lCache) control
128 */
129/* log buffer manager pageout control (cumulative, inclusive) */
130#define	lbmREAD		0x0001
131#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
132				 * init pageout if at head of queue;
133				 */
134#define	lbmRELEASE	0x0004	/* remove from write queue
135				 * at completion of pageout;
136				 * do not free/recycle it yet:
137				 * caller will free it;
138				 */
139#define	lbmSYNC		0x0008	/* do not return to freelist
140				 * when removed from write queue;
141				 */
142#define lbmFREE		0x0010	/* return to freelist
143				 * at completion of pageout;
144				 * the buffer may be recycled;
145				 */
146#define	lbmDONE		0x0020
147#define	lbmERROR	0x0040
148#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
149				 * of log page
150				 */
151#define lbmDIRECT	0x0100
152
153/*
154 * Global list of active external journals
155 */
156static LIST_HEAD(jfs_external_logs);
157static struct jfs_log *dummy_log;
158static DEFINE_MUTEX(jfs_log_mutex);
159
160/*
161 * forward references
162 */
163static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
164			 struct lrd * lrd, struct tlock * tlck);
165
166static int lmNextPage(struct jfs_log * log);
167static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
168			   int activate);
169
170static int open_inline_log(struct super_block *sb);
171static int open_dummy_log(struct super_block *sb);
172static int lbmLogInit(struct jfs_log * log);
173static void lbmLogShutdown(struct jfs_log * log);
174static struct lbuf *lbmAllocate(struct jfs_log * log, int);
175static void lbmFree(struct lbuf * bp);
176static void lbmfree(struct lbuf * bp);
177static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
178static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
179static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
180static int lbmIOWait(struct lbuf * bp, int flag);
181static bio_end_io_t lbmIODone;
182static void lbmStartIO(struct lbuf * bp);
183static void lmGCwrite(struct jfs_log * log, int cant_block);
184static int lmLogSync(struct jfs_log * log, int hard_sync);
185
186
187
188/*
189 *	statistics
190 */
191#ifdef CONFIG_JFS_STATISTICS
192static struct lmStat {
193	uint commit;		/* # of commit */
194	uint pagedone;		/* # of page written */
195	uint submitted;		/* # of pages submitted */
196	uint full_page;		/* # of full pages submitted */
197	uint partial_page;	/* # of partial pages submitted */
198} lmStat;
199#endif
200
201static void write_special_inodes(struct jfs_log *log,
202				 int (*writer)(struct address_space *))
203{
204	struct jfs_sb_info *sbi;
205
206	list_for_each_entry(sbi, &log->sb_list, log_list) {
207		writer(sbi->ipbmap->i_mapping);
208		writer(sbi->ipimap->i_mapping);
209		writer(sbi->direct_inode->i_mapping);
210	}
211}
212
213/*
214 * NAME:	lmLog()
215 *
216 * FUNCTION:	write a log record;
217 *
218 * PARAMETER:
219 *
220 * RETURN:	lsn - offset to the next log record to write (end-of-log);
221 *		-1  - error;
222 *
223 * note: todo: log error handler
224 */
225int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
226	  struct tlock * tlck)
227{
228	int lsn;
229	int diffp, difft;
230	struct metapage *mp = NULL;
231	unsigned long flags;
232
233	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
234		 log, tblk, lrd, tlck);
235
236	LOG_LOCK(log);
237
238	/* log by (out-of-transaction) JFS ? */
239	if (tblk == NULL)
240		goto writeRecord;
241
242	/* log from page ? */
243	if (tlck == NULL ||
244	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
245		goto writeRecord;
246
247	/*
248	 *	initialize/update page/transaction recovery lsn
249	 */
250	lsn = log->lsn;
251
252	LOGSYNC_LOCK(log, flags);
253
254	/*
255	 * initialize page lsn if first log write of the page
256	 */
257	if (mp->lsn == 0) {
258		mp->log = log;
259		mp->lsn = lsn;
260		log->count++;
261
262		/* insert page at tail of logsynclist */
263		list_add_tail(&mp->synclist, &log->synclist);
264	}
265
266	/*
267	 *	initialize/update lsn of tblock of the page
268	 *
269	 * transaction inherits oldest lsn of pages associated
270	 * with allocation/deallocation of resources (their
271	 * log records are used to reconstruct allocation map
272	 * at recovery time: inode for inode allocation map,
273	 * B+-tree index of extent descriptors for block
274	 * allocation map);
275	 * allocation map pages inherit transaction lsn at
276	 * commit time to allow forwarding log syncpt past log
277	 * records associated with allocation/deallocation of
278	 * resources only after persistent map of these map pages
279	 * have been updated and propagated to home.
280	 */
281	/*
282	 * initialize transaction lsn:
283	 */
284	if (tblk->lsn == 0) {
285		/* inherit lsn of its first page logged */
286		tblk->lsn = mp->lsn;
287		log->count++;
288
289		/* insert tblock after the page on logsynclist */
290		list_add(&tblk->synclist, &mp->synclist);
291	}
292	/*
293	 * update transaction lsn:
294	 */
295	else {
296		/* inherit oldest/smallest lsn of page */
297		logdiff(diffp, mp->lsn, log);
298		logdiff(difft, tblk->lsn, log);
299		if (diffp < difft) {
300			/* update tblock lsn with page lsn */
301			tblk->lsn = mp->lsn;
302
303			/* move tblock after page on logsynclist */
304			list_move(&tblk->synclist, &mp->synclist);
305		}
306	}
307
308	LOGSYNC_UNLOCK(log, flags);
309
310	/*
311	 *	write the log record
312	 */
313      writeRecord:
314	lsn = lmWriteRecord(log, tblk, lrd, tlck);
315
316	/*
317	 * forward log syncpt if log reached next syncpt trigger
318	 */
319	logdiff(diffp, lsn, log);
320	if (diffp >= log->nextsync)
321		lsn = lmLogSync(log, 0);
322
323	/* update end-of-log lsn */
324	log->lsn = lsn;
325
326	LOG_UNLOCK(log);
327
328	/* return end-of-log address */
329	return lsn;
330}
331
332/*
333 * NAME:	lmWriteRecord()
334 *
335 * FUNCTION:	move the log record to current log page
336 *
337 * PARAMETER:	cd	- commit descriptor
338 *
339 * RETURN:	end-of-log address
340 *
341 * serialization: LOG_LOCK() held on entry/exit
342 */
343static int
344lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
345	      struct tlock * tlck)
346{
347	int lsn = 0;		/* end-of-log address */
348	struct lbuf *bp;	/* dst log page buffer */
349	struct logpage *lp;	/* dst log page */
350	caddr_t dst;		/* destination address in log page */
351	int dstoffset;		/* end-of-log offset in log page */
352	int freespace;		/* free space in log page */
353	caddr_t p;		/* src meta-data page */
354	caddr_t src;
355	int srclen;
356	int nbytes;		/* number of bytes to move */
357	int i;
358	int len;
359	struct linelock *linelock;
360	struct lv *lv;
361	struct lvd *lvd;
362	int l2linesize;
363
364	len = 0;
365
366	/* retrieve destination log page to write */
367	bp = (struct lbuf *) log->bp;
368	lp = (struct logpage *) bp->l_ldata;
369	dstoffset = log->eor;
370
371	/* any log data to write ? */
372	if (tlck == NULL)
373		goto moveLrd;
374
375	/*
376	 *	move log record data
377	 */
378	/* retrieve source meta-data page to log */
379	if (tlck->flag & tlckPAGELOCK) {
380		p = (caddr_t) (tlck->mp->data);
381		linelock = (struct linelock *) & tlck->lock;
382	}
383	/* retrieve source in-memory inode to log */
384	else if (tlck->flag & tlckINODELOCK) {
385		if (tlck->type & tlckDTREE)
386			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
387		else
388			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
389		linelock = (struct linelock *) & tlck->lock;
390	}
391#ifdef	_JFS_WIP
392	else if (tlck->flag & tlckINLINELOCK) {
393
394		inlinelock = (struct inlinelock *) & tlck;
395		p = (caddr_t) & inlinelock->pxd;
396		linelock = (struct linelock *) & tlck;
397	}
398#endif				/* _JFS_WIP */
399	else {
400		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
401		return 0;	/* Probably should trap */
402	}
403	l2linesize = linelock->l2linesize;
404
405      moveData:
406	ASSERT(linelock->index <= linelock->maxcnt);
407
408	lv = linelock->lv;
409	for (i = 0; i < linelock->index; i++, lv++) {
410		if (lv->length == 0)
411			continue;
412
413		/* is page full ? */
414		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
415			/* page become full: move on to next page */
416			lmNextPage(log);
417
418			bp = log->bp;
419			lp = (struct logpage *) bp->l_ldata;
420			dstoffset = LOGPHDRSIZE;
421		}
422
423		/*
424		 * move log vector data
425		 */
426		src = (u8 *) p + (lv->offset << l2linesize);
427		srclen = lv->length << l2linesize;
428		len += srclen;
429		while (srclen > 0) {
430			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
431			nbytes = min(freespace, srclen);
432			dst = (caddr_t) lp + dstoffset;
433			memcpy(dst, src, nbytes);
434			dstoffset += nbytes;
435
436			/* is page not full ? */
437			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
438				break;
439
440			/* page become full: move on to next page */
441			lmNextPage(log);
442
443			bp = (struct lbuf *) log->bp;
444			lp = (struct logpage *) bp->l_ldata;
445			dstoffset = LOGPHDRSIZE;
446
447			srclen -= nbytes;
448			src += nbytes;
449		}
450
451		/*
452		 * move log vector descriptor
453		 */
454		len += 4;
455		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
456		lvd->offset = cpu_to_le16(lv->offset);
457		lvd->length = cpu_to_le16(lv->length);
458		dstoffset += 4;
459		jfs_info("lmWriteRecord: lv offset:%d length:%d",
460			 lv->offset, lv->length);
461	}
462
463	if ((i = linelock->next)) {
464		linelock = (struct linelock *) lid_to_tlock(i);
465		goto moveData;
466	}
467
468	/*
469	 *	move log record descriptor
470	 */
471      moveLrd:
472	lrd->length = cpu_to_le16(len);
473
474	src = (caddr_t) lrd;
475	srclen = LOGRDSIZE;
476
477	while (srclen > 0) {
478		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
479		nbytes = min(freespace, srclen);
480		dst = (caddr_t) lp + dstoffset;
481		memcpy(dst, src, nbytes);
482
483		dstoffset += nbytes;
484		srclen -= nbytes;
485
486		/* are there more to move than freespace of page ? */
487		if (srclen)
488			goto pageFull;
489
490		/*
491		 * end of log record descriptor
492		 */
493
494		/* update last log record eor */
495		log->eor = dstoffset;
496		bp->l_eor = dstoffset;
497		lsn = (log->page << L2LOGPSIZE) + dstoffset;
498
499		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
500			tblk->clsn = lsn;
501			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
502				 bp->l_eor);
503
504			INCREMENT(lmStat.commit);	/* # of commit */
505
506			/*
507			 * enqueue tblock for group commit:
508			 *
509			 * enqueue tblock of non-trivial/synchronous COMMIT
510			 * at tail of group commit queue
511			 * (trivial/asynchronous COMMITs are ignored by
512			 * group commit.)
513			 */
514			LOGGC_LOCK(log);
515
516			/* init tblock gc state */
517			tblk->flag = tblkGC_QUEUE;
518			tblk->bp = log->bp;
519			tblk->pn = log->page;
520			tblk->eor = log->eor;
521
522			/* enqueue transaction to commit queue */
523			list_add_tail(&tblk->cqueue, &log->cqueue);
524
525			LOGGC_UNLOCK(log);
526		}
527
528		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
529			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
530
531		/* page not full ? */
532		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
533			return lsn;
534
535	      pageFull:
536		/* page become full: move on to next page */
537		lmNextPage(log);
538
539		bp = (struct lbuf *) log->bp;
540		lp = (struct logpage *) bp->l_ldata;
541		dstoffset = LOGPHDRSIZE;
542		src += nbytes;
543	}
544
545	return lsn;
546}
547
548
549/*
550 * NAME:	lmNextPage()
551 *
552 * FUNCTION:	write current page and allocate next page.
553 *
554 * PARAMETER:	log
555 *
556 * RETURN:	0
557 *
558 * serialization: LOG_LOCK() held on entry/exit
559 */
560static int lmNextPage(struct jfs_log * log)
561{
562	struct logpage *lp;
563	int lspn;		/* log sequence page number */
564	int pn;			/* current page number */
565	struct lbuf *bp;
566	struct lbuf *nextbp;
567	struct tblock *tblk;
568
569	/* get current log page number and log sequence page number */
570	pn = log->page;
571	bp = log->bp;
572	lp = (struct logpage *) bp->l_ldata;
573	lspn = le32_to_cpu(lp->h.page);
574
575	LOGGC_LOCK(log);
576
577	/*
578	 *	write or queue the full page at the tail of write queue
579	 */
580	/* get the tail tblk on commit queue */
581	if (list_empty(&log->cqueue))
582		tblk = NULL;
583	else
584		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
585
586	/* every tblk who has COMMIT record on the current page,
587	 * and has not been committed, must be on commit queue
588	 * since tblk is queued at commit queueu at the time
589	 * of writing its COMMIT record on the page before
590	 * page becomes full (even though the tblk thread
591	 * who wrote COMMIT record may have been suspended
592	 * currently);
593	 */
594
595	/* is page bound with outstanding tail tblk ? */
596	if (tblk && tblk->pn == pn) {
597		/* mark tblk for end-of-page */
598		tblk->flag |= tblkGC_EOP;
599
600		if (log->cflag & logGC_PAGEOUT) {
601			/* if page is not already on write queue,
602			 * just enqueue (no lbmWRITE to prevent redrive)
603			 * buffer to wqueue to ensure correct serial order
604			 * of the pages since log pages will be added
605			 * continuously
606			 */
607			if (bp->l_wqnext == NULL)
608				lbmWrite(log, bp, 0, 0);
609		} else {
610			/*
611			 * No current GC leader, initiate group commit
612			 */
613			log->cflag |= logGC_PAGEOUT;
614			lmGCwrite(log, 0);
615		}
616	}
617	/* page is not bound with outstanding tblk:
618	 * init write or mark it to be redriven (lbmWRITE)
619	 */
620	else {
621		/* finalize the page */
622		bp->l_ceor = bp->l_eor;
623		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
624		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
625	}
626	LOGGC_UNLOCK(log);
627
628	/*
629	 *	allocate/initialize next page
630	 */
631	/* if log wraps, the first data page of log is 2
632	 * (0 never used, 1 is superblock).
633	 */
634	log->page = (pn == log->size - 1) ? 2 : pn + 1;
635	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
636
637	/* allocate/initialize next log page buffer */
638	nextbp = lbmAllocate(log, log->page);
639	nextbp->l_eor = log->eor;
640	log->bp = nextbp;
641
642	/* initialize next log page */
643	lp = (struct logpage *) nextbp->l_ldata;
644	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
645	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
646
647	return 0;
648}
649
650
651/*
652 * NAME:	lmGroupCommit()
653 *
654 * FUNCTION:	group commit
655 *	initiate pageout of the pages with COMMIT in the order of
656 *	page number - redrive pageout of the page at the head of
657 *	pageout queue until full page has been written.
658 *
659 * RETURN:
660 *
661 * NOTE:
662 *	LOGGC_LOCK serializes log group commit queue, and
663 *	transaction blocks on the commit queue.
664 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
665 */
666int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
667{
668	int rc = 0;
669
670	LOGGC_LOCK(log);
671
672	/* group committed already ? */
673	if (tblk->flag & tblkGC_COMMITTED) {
674		if (tblk->flag & tblkGC_ERROR)
675			rc = -EIO;
676
677		LOGGC_UNLOCK(log);
678		return rc;
679	}
680	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
681
682	if (tblk->xflag & COMMIT_LAZY)
683		tblk->flag |= tblkGC_LAZY;
684
685	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
686	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
687	     || jfs_tlocks_low)) {
688		/*
689		 * No pageout in progress
690		 *
691		 * start group commit as its group leader.
692		 */
693		log->cflag |= logGC_PAGEOUT;
694
695		lmGCwrite(log, 0);
696	}
697
698	if (tblk->xflag & COMMIT_LAZY) {
699		/*
700		 * Lazy transactions can leave now
701		 */
702		LOGGC_UNLOCK(log);
703		return 0;
704	}
705
706	/* lmGCwrite gives up LOGGC_LOCK, check again */
707
708	if (tblk->flag & tblkGC_COMMITTED) {
709		if (tblk->flag & tblkGC_ERROR)
710			rc = -EIO;
711
712		LOGGC_UNLOCK(log);
713		return rc;
714	}
715
716	/* upcount transaction waiting for completion
717	 */
718	log->gcrtc++;
719	tblk->flag |= tblkGC_READY;
720
721	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
722		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
723
724	/* removed from commit queue */
725	if (tblk->flag & tblkGC_ERROR)
726		rc = -EIO;
727
728	LOGGC_UNLOCK(log);
729	return rc;
730}
731
732/*
733 * NAME:	lmGCwrite()
734 *
735 * FUNCTION:	group commit write
736 *	initiate write of log page, building a group of all transactions
737 *	with commit records on that page.
738 *
739 * RETURN:	None
740 *
741 * NOTE:
742 *	LOGGC_LOCK must be held by caller.
743 *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
744 */
745static void lmGCwrite(struct jfs_log * log, int cant_write)
746{
747	struct lbuf *bp;
748	struct logpage *lp;
749	int gcpn;		/* group commit page number */
750	struct tblock *tblk;
751	struct tblock *xtblk = NULL;
752
753	/*
754	 * build the commit group of a log page
755	 *
756	 * scan commit queue and make a commit group of all
757	 * transactions with COMMIT records on the same log page.
758	 */
759	/* get the head tblk on the commit queue */
760	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
761
762	list_for_each_entry(tblk, &log->cqueue, cqueue) {
763		if (tblk->pn != gcpn)
764			break;
765
766		xtblk = tblk;
767
768		/* state transition: (QUEUE, READY) -> COMMIT */
769		tblk->flag |= tblkGC_COMMIT;
770	}
771	tblk = xtblk;		/* last tblk of the page */
772
773	/*
774	 * pageout to commit transactions on the log page.
775	 */
776	bp = (struct lbuf *) tblk->bp;
777	lp = (struct logpage *) bp->l_ldata;
778	/* is page already full ? */
779	if (tblk->flag & tblkGC_EOP) {
780		/* mark page to free at end of group commit of the page */
781		tblk->flag &= ~tblkGC_EOP;
782		tblk->flag |= tblkGC_FREE;
783		bp->l_ceor = bp->l_eor;
784		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
785		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
786			 cant_write);
787		INCREMENT(lmStat.full_page);
788	}
789	/* page is not yet full */
790	else {
791		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
792		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
793		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
794		INCREMENT(lmStat.partial_page);
795	}
796}
797
798/*
799 * NAME:	lmPostGC()
800 *
801 * FUNCTION:	group commit post-processing
802 *	Processes transactions after their commit records have been written
803 *	to disk, redriving log I/O if necessary.
804 *
805 * RETURN:	None
806 *
807 * NOTE:
808 *	This routine is called a interrupt time by lbmIODone
809 */
810static void lmPostGC(struct lbuf * bp)
811{
812	unsigned long flags;
813	struct jfs_log *log = bp->l_log;
814	struct logpage *lp;
815	struct tblock *tblk, *temp;
816
817	//LOGGC_LOCK(log);
818	spin_lock_irqsave(&log->gclock, flags);
819	/*
820	 * current pageout of group commit completed.
821	 *
822	 * remove/wakeup transactions from commit queue who were
823	 * group committed with the current log page
824	 */
825	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
826		if (!(tblk->flag & tblkGC_COMMIT))
827			break;
828		/* if transaction was marked GC_COMMIT then
829		 * it has been shipped in the current pageout
830		 * and made it to disk - it is committed.
831		 */
832
833		if (bp->l_flag & lbmERROR)
834			tblk->flag |= tblkGC_ERROR;
835
836		/* remove it from the commit queue */
837		list_del(&tblk->cqueue);
838		tblk->flag &= ~tblkGC_QUEUE;
839
840		if (tblk == log->flush_tblk) {
841			/* we can stop flushing the log now */
842			clear_bit(log_FLUSH, &log->flag);
843			log->flush_tblk = NULL;
844		}
845
846		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
847			 tblk->flag);
848
849		if (!(tblk->xflag & COMMIT_FORCE))
850			/*
851			 * Hand tblk over to lazy commit thread
852			 */
853			txLazyUnlock(tblk);
854		else {
855			/* state transition: COMMIT -> COMMITTED */
856			tblk->flag |= tblkGC_COMMITTED;
857
858			if (tblk->flag & tblkGC_READY)
859				log->gcrtc--;
860
861			LOGGC_WAKEUP(tblk);
862		}
863
864		/* was page full before pageout ?
865		 * (and this is the last tblk bound with the page)
866		 */
867		if (tblk->flag & tblkGC_FREE)
868			lbmFree(bp);
869		/* did page become full after pageout ?
870		 * (and this is the last tblk bound with the page)
871		 */
872		else if (tblk->flag & tblkGC_EOP) {
873			/* finalize the page */
874			lp = (struct logpage *) bp->l_ldata;
875			bp->l_ceor = bp->l_eor;
876			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
877			jfs_info("lmPostGC: calling lbmWrite");
878			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
879				 1);
880		}
881
882	}
883
884	/* are there any transactions who have entered lnGroupCommit()
885	 * (whose COMMITs are after that of the last log page written.
886	 * They are waiting for new group commit (above at (SLEEP 1))
887	 * or lazy transactions are on a full (queued) log page,
888	 * select the latest ready transaction as new group leader and
889	 * wake her up to lead her group.
890	 */
891	if ((!list_empty(&log->cqueue)) &&
892	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
893	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
894		/*
895		 * Call lmGCwrite with new group leader
896		 */
897		lmGCwrite(log, 1);
898
899	/* no transaction are ready yet (transactions are only just
900	 * queued (GC_QUEUE) and not entered for group commit yet).
901	 * the first transaction entering group commit
902	 * will elect herself as new group leader.
903	 */
904	else
905		log->cflag &= ~logGC_PAGEOUT;
906
907	//LOGGC_UNLOCK(log);
908	spin_unlock_irqrestore(&log->gclock, flags);
909	return;
910}
911
912/*
913 * NAME:	lmLogSync()
914 *
915 * FUNCTION:	write log SYNCPT record for specified log
916 *	if new sync address is available
917 *	(normally the case if sync() is executed by back-ground
918 *	process).
919 *	calculate new value of i_nextsync which determines when
920 *	this code is called again.
921 *
922 * PARAMETERS:	log	- log structure
923 *		hard_sync - 1 to force all metadata to be written
924 *
925 * RETURN:	0
926 *
927 * serialization: LOG_LOCK() held on entry/exit
928 */
929static int lmLogSync(struct jfs_log * log, int hard_sync)
930{
931	int logsize;
932	int written;		/* written since last syncpt */
933	int free;		/* free space left available */
934	int delta;		/* additional delta to write normally */
935	int more;		/* additional write granted */
936	struct lrd lrd;
937	int lsn;
938	struct logsyncblk *lp;
939	unsigned long flags;
940
941	/* push dirty metapages out to disk */
942	if (hard_sync)
943		write_special_inodes(log, filemap_fdatawrite);
944	else
945		write_special_inodes(log, filemap_flush);
946
947	/*
948	 *	forward syncpt
949	 */
950	/* if last sync is same as last syncpt,
951	 * invoke sync point forward processing to update sync.
952	 */
953
954	if (log->sync == log->syncpt) {
955		LOGSYNC_LOCK(log, flags);
956		if (list_empty(&log->synclist))
957			log->sync = log->lsn;
958		else {
959			lp = list_entry(log->synclist.next,
960					struct logsyncblk, synclist);
961			log->sync = lp->lsn;
962		}
963		LOGSYNC_UNLOCK(log, flags);
964
965	}
966
967	/* if sync is different from last syncpt,
968	 * write a SYNCPT record with syncpt = sync.
969	 * reset syncpt = sync
970	 */
971	if (log->sync != log->syncpt) {
972		lrd.logtid = 0;
973		lrd.backchain = 0;
974		lrd.type = cpu_to_le16(LOG_SYNCPT);
975		lrd.length = 0;
976		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
977		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
978
979		log->syncpt = log->sync;
980	} else
981		lsn = log->lsn;
982
983	/*
984	 *	setup next syncpt trigger (SWAG)
985	 */
986	logsize = log->logsize;
987
988	logdiff(written, lsn, log);
989	free = logsize - written;
990	delta = LOGSYNC_DELTA(logsize);
991	more = min(free / 2, delta);
992	if (more < 2 * LOGPSIZE) {
993		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
994		/*
995		 *	log wrapping
996		 *
997		 * option 1 - panic ? No.!
998		 * option 2 - shutdown file systems
999		 *	      associated with log ?
1000		 * option 3 - extend log ?
1001		 * option 4 - second chance
1002		 *
1003		 * mark log wrapped, and continue.
1004		 * when all active transactions are completed,
1005		 * mark log valid for recovery.
1006		 * if crashed during invalid state, log state
1007		 * implies invalid log, forcing fsck().
1008		 */
1009		/* mark log state log wrap in log superblock */
1010		/* log->state = LOGWRAP; */
1011
1012		/* reset sync point computation */
1013		log->syncpt = log->sync = lsn;
1014		log->nextsync = delta;
1015	} else
1016		/* next syncpt trigger = written + more */
1017		log->nextsync = written + more;
1018
1019	/* if number of bytes written from last sync point is more
1020	 * than 1/4 of the log size, stop new transactions from
1021	 * starting until all current transactions are completed
1022	 * by setting syncbarrier flag.
1023	 */
1024	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1025	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1026		set_bit(log_SYNCBARRIER, &log->flag);
1027		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1028			 log->syncpt);
1029		/*
1030		 * We may have to initiate group commit
1031		 */
1032		jfs_flush_journal(log, 0);
1033	}
1034
1035	return lsn;
1036}
1037
1038/*
1039 * NAME:	jfs_syncpt
1040 *
1041 * FUNCTION:	write log SYNCPT record for specified log
1042 *
1043 * PARAMETERS:	log	  - log structure
1044 *		hard_sync - set to 1 to force metadata to be written
1045 */
1046void jfs_syncpt(struct jfs_log *log, int hard_sync)
1047{	LOG_LOCK(log);
1048	if (!test_bit(log_QUIESCE, &log->flag))
1049		lmLogSync(log, hard_sync);
1050	LOG_UNLOCK(log);
1051}
1052
1053/*
1054 * NAME:	lmLogOpen()
1055 *
1056 * FUNCTION:	open the log on first open;
1057 *	insert filesystem in the active list of the log.
1058 *
1059 * PARAMETER:	ipmnt	- file system mount inode
1060 *		iplog	- log inode (out)
1061 *
1062 * RETURN:
1063 *
1064 * serialization:
1065 */
1066int lmLogOpen(struct super_block *sb)
1067{
1068	int rc;
1069	struct block_device *bdev;
1070	struct jfs_log *log;
1071	struct jfs_sb_info *sbi = JFS_SBI(sb);
1072
1073	if (sbi->flag & JFS_NOINTEGRITY)
1074		return open_dummy_log(sb);
1075
1076	if (sbi->mntflag & JFS_INLINELOG)
1077		return open_inline_log(sb);
1078
1079	mutex_lock(&jfs_log_mutex);
1080	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1081		if (log->bdev->bd_dev == sbi->logdev) {
1082			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1083				jfs_warn("wrong uuid on JFS journal");
1084				mutex_unlock(&jfs_log_mutex);
1085				return -EINVAL;
1086			}
1087			/*
1088			 * add file system to log active file system list
1089			 */
1090			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1091				mutex_unlock(&jfs_log_mutex);
1092				return rc;
1093			}
1094			goto journal_found;
1095		}
1096	}
1097
1098	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1099		mutex_unlock(&jfs_log_mutex);
1100		return -ENOMEM;
1101	}
1102	INIT_LIST_HEAD(&log->sb_list);
1103	init_waitqueue_head(&log->syncwait);
1104
1105	/*
1106	 *	external log as separate logical volume
1107	 *
1108	 * file systems to log may have n-to-1 relationship;
1109	 */
1110
1111	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1112				 log);
1113	if (IS_ERR(bdev)) {
1114		rc = PTR_ERR(bdev);
1115		goto free;
1116	}
1117
1118	log->bdev = bdev;
1119	uuid_copy(&log->uuid, &sbi->loguuid);
1120
1121	/*
1122	 * initialize log:
1123	 */
1124	if ((rc = lmLogInit(log)))
1125		goto close;
1126
1127	list_add(&log->journal_list, &jfs_external_logs);
1128
1129	/*
1130	 * add file system to log active file system list
1131	 */
1132	if ((rc = lmLogFileSystem(log, sbi, 1)))
1133		goto shutdown;
1134
1135journal_found:
1136	LOG_LOCK(log);
1137	list_add(&sbi->log_list, &log->sb_list);
1138	sbi->log = log;
1139	LOG_UNLOCK(log);
1140
1141	mutex_unlock(&jfs_log_mutex);
1142	return 0;
1143
1144	/*
1145	 *	unwind on error
1146	 */
1147      shutdown:		/* unwind lbmLogInit() */
1148	list_del(&log->journal_list);
1149	lbmLogShutdown(log);
1150
1151      close:		/* close external log device */
1152	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1153
1154      free:		/* free log descriptor */
1155	mutex_unlock(&jfs_log_mutex);
1156	kfree(log);
1157
1158	jfs_warn("lmLogOpen: exit(%d)", rc);
1159	return rc;
1160}
1161
1162static int open_inline_log(struct super_block *sb)
1163{
1164	struct jfs_log *log;
1165	int rc;
1166
1167	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1168		return -ENOMEM;
1169	INIT_LIST_HEAD(&log->sb_list);
1170	init_waitqueue_head(&log->syncwait);
1171
1172	set_bit(log_INLINELOG, &log->flag);
1173	log->bdev = sb->s_bdev;
1174	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1175	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1176	    (L2LOGPSIZE - sb->s_blocksize_bits);
1177	log->l2bsize = sb->s_blocksize_bits;
1178	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1179
1180	/*
1181	 * initialize log.
1182	 */
1183	if ((rc = lmLogInit(log))) {
1184		kfree(log);
1185		jfs_warn("lmLogOpen: exit(%d)", rc);
1186		return rc;
1187	}
1188
1189	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1190	JFS_SBI(sb)->log = log;
1191
1192	return rc;
1193}
1194
1195static int open_dummy_log(struct super_block *sb)
1196{
1197	int rc;
1198
1199	mutex_lock(&jfs_log_mutex);
1200	if (!dummy_log) {
1201		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1202		if (!dummy_log) {
1203			mutex_unlock(&jfs_log_mutex);
1204			return -ENOMEM;
1205		}
1206		INIT_LIST_HEAD(&dummy_log->sb_list);
1207		init_waitqueue_head(&dummy_log->syncwait);
1208		dummy_log->no_integrity = 1;
1209		/* Make up some stuff */
1210		dummy_log->base = 0;
1211		dummy_log->size = 1024;
1212		rc = lmLogInit(dummy_log);
1213		if (rc) {
1214			kfree(dummy_log);
1215			dummy_log = NULL;
1216			mutex_unlock(&jfs_log_mutex);
1217			return rc;
1218		}
1219	}
1220
1221	LOG_LOCK(dummy_log);
1222	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1223	JFS_SBI(sb)->log = dummy_log;
1224	LOG_UNLOCK(dummy_log);
1225	mutex_unlock(&jfs_log_mutex);
1226
1227	return 0;
1228}
1229
1230/*
1231 * NAME:	lmLogInit()
1232 *
1233 * FUNCTION:	log initialization at first log open.
1234 *
1235 *	logredo() (or logformat()) should have been run previously.
1236 *	initialize the log from log superblock.
1237 *	set the log state in the superblock to LOGMOUNT and
1238 *	write SYNCPT log record.
1239 *
1240 * PARAMETER:	log	- log structure
1241 *
1242 * RETURN:	0	- if ok
1243 *		-EINVAL	- bad log magic number or superblock dirty
1244 *		error returned from logwait()
1245 *
1246 * serialization: single first open thread
1247 */
1248int lmLogInit(struct jfs_log * log)
1249{
1250	int rc = 0;
1251	struct lrd lrd;
1252	struct logsuper *logsuper;
1253	struct lbuf *bpsuper;
1254	struct lbuf *bp;
1255	struct logpage *lp;
1256	int lsn = 0;
1257
1258	jfs_info("lmLogInit: log:0x%p", log);
1259
1260	/* initialize the group commit serialization lock */
1261	LOGGC_LOCK_INIT(log);
1262
1263	/* allocate/initialize the log write serialization lock */
1264	LOG_LOCK_INIT(log);
1265
1266	LOGSYNC_LOCK_INIT(log);
1267
1268	INIT_LIST_HEAD(&log->synclist);
1269
1270	INIT_LIST_HEAD(&log->cqueue);
1271	log->flush_tblk = NULL;
1272
1273	log->count = 0;
1274
1275	/*
1276	 * initialize log i/o
1277	 */
1278	if ((rc = lbmLogInit(log)))
1279		return rc;
1280
1281	if (!test_bit(log_INLINELOG, &log->flag))
1282		log->l2bsize = L2LOGPSIZE;
1283
1284	/* check for disabled journaling to disk */
1285	if (log->no_integrity) {
1286		/*
1287		 * Journal pages will still be filled.  When the time comes
1288		 * to actually do the I/O, the write is not done, and the
1289		 * endio routine is called directly.
1290		 */
1291		bp = lbmAllocate(log , 0);
1292		log->bp = bp;
1293		bp->l_pn = bp->l_eor = 0;
1294	} else {
1295		/*
1296		 * validate log superblock
1297		 */
1298		if ((rc = lbmRead(log, 1, &bpsuper)))
1299			goto errout10;
1300
1301		logsuper = (struct logsuper *) bpsuper->l_ldata;
1302
1303		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1304			jfs_warn("*** Log Format Error ! ***");
1305			rc = -EINVAL;
1306			goto errout20;
1307		}
1308
1309		/* logredo() should have been run successfully. */
1310		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1311			jfs_warn("*** Log Is Dirty ! ***");
1312			rc = -EINVAL;
1313			goto errout20;
1314		}
1315
1316		/* initialize log from log superblock */
1317		if (test_bit(log_INLINELOG,&log->flag)) {
1318			if (log->size != le32_to_cpu(logsuper->size)) {
1319				rc = -EINVAL;
1320				goto errout20;
1321			}
1322			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1323				 log, (unsigned long long)log->base, log->size);
1324		} else {
1325			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1326				jfs_warn("wrong uuid on JFS log device");
1327				rc = -EINVAL;
1328				goto errout20;
1329			}
1330			log->size = le32_to_cpu(logsuper->size);
1331			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1332			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1333				 log, (unsigned long long)log->base, log->size);
1334		}
1335
1336		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1337		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1338
1339		/*
1340		 * initialize for log append write mode
1341		 */
1342		/* establish current/end-of-log page/buffer */
1343		if ((rc = lbmRead(log, log->page, &bp)))
1344			goto errout20;
1345
1346		lp = (struct logpage *) bp->l_ldata;
1347
1348		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1349			 le32_to_cpu(logsuper->end), log->page, log->eor,
1350			 le16_to_cpu(lp->h.eor));
1351
1352		log->bp = bp;
1353		bp->l_pn = log->page;
1354		bp->l_eor = log->eor;
1355
1356		/* if current page is full, move on to next page */
1357		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1358			lmNextPage(log);
1359
1360		/*
1361		 * initialize log syncpoint
1362		 */
1363		/*
1364		 * write the first SYNCPT record with syncpoint = 0
1365		 * (i.e., log redo up to HERE !);
1366		 * remove current page from lbm write queue at end of pageout
1367		 * (to write log superblock update), but do not release to
1368		 * freelist;
1369		 */
1370		lrd.logtid = 0;
1371		lrd.backchain = 0;
1372		lrd.type = cpu_to_le16(LOG_SYNCPT);
1373		lrd.length = 0;
1374		lrd.log.syncpt.sync = 0;
1375		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1376		bp = log->bp;
1377		bp->l_ceor = bp->l_eor;
1378		lp = (struct logpage *) bp->l_ldata;
1379		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1380		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1381		if ((rc = lbmIOWait(bp, 0)))
1382			goto errout30;
1383
1384		/*
1385		 * update/write superblock
1386		 */
1387		logsuper->state = cpu_to_le32(LOGMOUNT);
1388		log->serial = le32_to_cpu(logsuper->serial) + 1;
1389		logsuper->serial = cpu_to_le32(log->serial);
1390		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1391		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1392			goto errout30;
1393	}
1394
1395	/* initialize logsync parameters */
1396	log->logsize = (log->size - 2) << L2LOGPSIZE;
1397	log->lsn = lsn;
1398	log->syncpt = lsn;
1399	log->sync = log->syncpt;
1400	log->nextsync = LOGSYNC_DELTA(log->logsize);
1401
1402	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1403		 log->lsn, log->syncpt, log->sync);
1404
1405	/*
1406	 * initialize for lazy/group commit
1407	 */
1408	log->clsn = lsn;
1409
1410	return 0;
1411
1412	/*
1413	 *	unwind on error
1414	 */
1415      errout30:		/* release log page */
1416	log->wqueue = NULL;
1417	bp->l_wqnext = NULL;
1418	lbmFree(bp);
1419
1420      errout20:		/* release log superblock */
1421	lbmFree(bpsuper);
1422
1423      errout10:		/* unwind lbmLogInit() */
1424	lbmLogShutdown(log);
1425
1426	jfs_warn("lmLogInit: exit(%d)", rc);
1427	return rc;
1428}
1429
1430
1431/*
1432 * NAME:	lmLogClose()
1433 *
1434 * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1435 *		and close it on last close.
1436 *
1437 * PARAMETER:	sb	- superblock
1438 *
1439 * RETURN:	errors from subroutines
1440 *
1441 * serialization:
1442 */
1443int lmLogClose(struct super_block *sb)
1444{
1445	struct jfs_sb_info *sbi = JFS_SBI(sb);
1446	struct jfs_log *log = sbi->log;
1447	struct block_device *bdev;
1448	int rc = 0;
1449
1450	jfs_info("lmLogClose: log:0x%p", log);
1451
1452	mutex_lock(&jfs_log_mutex);
1453	LOG_LOCK(log);
1454	list_del(&sbi->log_list);
1455	LOG_UNLOCK(log);
1456	sbi->log = NULL;
1457
1458	/*
1459	 * We need to make sure all of the "written" metapages
1460	 * actually make it to disk
1461	 */
1462	sync_blockdev(sb->s_bdev);
1463
1464	if (test_bit(log_INLINELOG, &log->flag)) {
1465		/*
1466		 *	in-line log in host file system
1467		 */
1468		rc = lmLogShutdown(log);
1469		kfree(log);
1470		goto out;
1471	}
1472
1473	if (!log->no_integrity)
1474		lmLogFileSystem(log, sbi, 0);
1475
1476	if (!list_empty(&log->sb_list))
1477		goto out;
1478
1479	/*
1480	 * TODO: ensure that the dummy_log is in a state to allow
1481	 * lbmLogShutdown to deallocate all the buffers and call
1482	 * kfree against dummy_log.  For now, leave dummy_log & its
1483	 * buffers in memory, and resuse if another no-integrity mount
1484	 * is requested.
1485	 */
1486	if (log->no_integrity)
1487		goto out;
1488
1489	/*
1490	 *	external log as separate logical volume
1491	 */
1492	list_del(&log->journal_list);
1493	bdev = log->bdev;
1494	rc = lmLogShutdown(log);
1495
1496	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1497
1498	kfree(log);
1499
1500      out:
1501	mutex_unlock(&jfs_log_mutex);
1502	jfs_info("lmLogClose: exit(%d)", rc);
1503	return rc;
1504}
1505
1506
1507/*
1508 * NAME:	jfs_flush_journal()
1509 *
1510 * FUNCTION:	initiate write of any outstanding transactions to the journal
1511 *		and optionally wait until they are all written to disk
1512 *
1513 *		wait == 0  flush until latest txn is committed, don't wait
1514 *		wait == 1  flush until latest txn is committed, wait
1515 *		wait > 1   flush until all txn's are complete, wait
1516 */
1517void jfs_flush_journal(struct jfs_log *log, int wait)
1518{
1519	int i;
1520	struct tblock *target = NULL;
1521
1522	/* jfs_write_inode may call us during read-only mount */
1523	if (!log)
1524		return;
1525
1526	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1527
1528	LOGGC_LOCK(log);
1529
1530	if (!list_empty(&log->cqueue)) {
1531		/*
1532		 * This ensures that we will keep writing to the journal as long
1533		 * as there are unwritten commit records
1534		 */
1535		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1536
1537		if (test_bit(log_FLUSH, &log->flag)) {
1538			/*
1539			 * We're already flushing.
1540			 * if flush_tblk is NULL, we are flushing everything,
1541			 * so leave it that way.  Otherwise, update it to the
1542			 * latest transaction
1543			 */
1544			if (log->flush_tblk)
1545				log->flush_tblk = target;
1546		} else {
1547			/* Only flush until latest transaction is committed */
1548			log->flush_tblk = target;
1549			set_bit(log_FLUSH, &log->flag);
1550
1551			/*
1552			 * Initiate I/O on outstanding transactions
1553			 */
1554			if (!(log->cflag & logGC_PAGEOUT)) {
1555				log->cflag |= logGC_PAGEOUT;
1556				lmGCwrite(log, 0);
1557			}
1558		}
1559	}
1560	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1561		/* Flush until all activity complete */
1562		set_bit(log_FLUSH, &log->flag);
1563		log->flush_tblk = NULL;
1564	}
1565
1566	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1567		DECLARE_WAITQUEUE(__wait, current);
1568
1569		add_wait_queue(&target->gcwait, &__wait);
1570		set_current_state(TASK_UNINTERRUPTIBLE);
1571		LOGGC_UNLOCK(log);
1572		schedule();
1573		LOGGC_LOCK(log);
1574		remove_wait_queue(&target->gcwait, &__wait);
1575	}
1576	LOGGC_UNLOCK(log);
1577
1578	if (wait < 2)
1579		return;
1580
1581	write_special_inodes(log, filemap_fdatawrite);
1582
1583	/*
1584	 * If there was recent activity, we may need to wait
1585	 * for the lazycommit thread to catch up
1586	 */
1587	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1588		for (i = 0; i < 200; i++) {	/* Too much? */
1589			msleep(250);
1590			write_special_inodes(log, filemap_fdatawrite);
1591			if (list_empty(&log->cqueue) &&
1592			    list_empty(&log->synclist))
1593				break;
1594		}
1595	}
1596	assert(list_empty(&log->cqueue));
1597
1598#ifdef CONFIG_JFS_DEBUG
1599	if (!list_empty(&log->synclist)) {
1600		struct logsyncblk *lp;
1601
1602		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1603		list_for_each_entry(lp, &log->synclist, synclist) {
1604			if (lp->xflag & COMMIT_PAGE) {
1605				struct metapage *mp = (struct metapage *)lp;
1606				print_hex_dump(KERN_ERR, "metapage: ",
1607					       DUMP_PREFIX_ADDRESS, 16, 4,
1608					       mp, sizeof(struct metapage), 0);
1609				print_hex_dump(KERN_ERR, "page: ",
1610					       DUMP_PREFIX_ADDRESS, 16,
1611					       sizeof(long), mp->page,
1612					       sizeof(struct page), 0);
1613			} else
1614				print_hex_dump(KERN_ERR, "tblock:",
1615					       DUMP_PREFIX_ADDRESS, 16, 4,
1616					       lp, sizeof(struct tblock), 0);
1617		}
1618	}
1619#else
1620	WARN_ON(!list_empty(&log->synclist));
1621#endif
1622	clear_bit(log_FLUSH, &log->flag);
1623}
1624
1625/*
1626 * NAME:	lmLogShutdown()
1627 *
1628 * FUNCTION:	log shutdown at last LogClose().
1629 *
1630 *		write log syncpt record.
1631 *		update super block to set redone flag to 0.
1632 *
1633 * PARAMETER:	log	- log inode
1634 *
1635 * RETURN:	0	- success
1636 *
1637 * serialization: single last close thread
1638 */
1639int lmLogShutdown(struct jfs_log * log)
1640{
1641	int rc;
1642	struct lrd lrd;
1643	int lsn;
1644	struct logsuper *logsuper;
1645	struct lbuf *bpsuper;
1646	struct lbuf *bp;
1647	struct logpage *lp;
1648
1649	jfs_info("lmLogShutdown: log:0x%p", log);
1650
1651	jfs_flush_journal(log, 2);
1652
1653	/*
1654	 * write the last SYNCPT record with syncpoint = 0
1655	 * (i.e., log redo up to HERE !)
1656	 */
1657	lrd.logtid = 0;
1658	lrd.backchain = 0;
1659	lrd.type = cpu_to_le16(LOG_SYNCPT);
1660	lrd.length = 0;
1661	lrd.log.syncpt.sync = 0;
1662
1663	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1664	bp = log->bp;
1665	lp = (struct logpage *) bp->l_ldata;
1666	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1667	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1668	lbmIOWait(log->bp, lbmFREE);
1669	log->bp = NULL;
1670
1671	/*
1672	 * synchronous update log superblock
1673	 * mark log state as shutdown cleanly
1674	 * (i.e., Log does not need to be replayed).
1675	 */
1676	if ((rc = lbmRead(log, 1, &bpsuper)))
1677		goto out;
1678
1679	logsuper = (struct logsuper *) bpsuper->l_ldata;
1680	logsuper->state = cpu_to_le32(LOGREDONE);
1681	logsuper->end = cpu_to_le32(lsn);
1682	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1683	rc = lbmIOWait(bpsuper, lbmFREE);
1684
1685	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1686		 lsn, log->page, log->eor);
1687
1688      out:
1689	/*
1690	 * shutdown per log i/o
1691	 */
1692	lbmLogShutdown(log);
1693
1694	if (rc) {
1695		jfs_warn("lmLogShutdown: exit(%d)", rc);
1696	}
1697	return rc;
1698}
1699
1700
1701/*
1702 * NAME:	lmLogFileSystem()
1703 *
1704 * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1705 *	file system into/from log active file system list.
1706 *
1707 * PARAMETE:	log	- pointer to logs inode.
1708 *		fsdev	- kdev_t of filesystem.
1709 *		serial	- pointer to returned log serial number
1710 *		activate - insert/remove device from active list.
1711 *
1712 * RETURN:	0	- success
1713 *		errors returned by vms_iowait().
1714 */
1715static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1716			   int activate)
1717{
1718	int rc = 0;
1719	int i;
1720	struct logsuper *logsuper;
1721	struct lbuf *bpsuper;
1722	uuid_t *uuid = &sbi->uuid;
1723
1724	/*
1725	 * insert/remove file system device to log active file system list.
1726	 */
1727	if ((rc = lbmRead(log, 1, &bpsuper)))
1728		return rc;
1729
1730	logsuper = (struct logsuper *) bpsuper->l_ldata;
1731	if (activate) {
1732		for (i = 0; i < MAX_ACTIVE; i++)
1733			if (uuid_is_null(&logsuper->active[i].uuid)) {
1734				uuid_copy(&logsuper->active[i].uuid, uuid);
1735				sbi->aggregate = i;
1736				break;
1737			}
1738		if (i == MAX_ACTIVE) {
1739			jfs_warn("Too many file systems sharing journal!");
1740			lbmFree(bpsuper);
1741			return -EMFILE;	/* Is there a better rc? */
1742		}
1743	} else {
1744		for (i = 0; i < MAX_ACTIVE; i++)
1745			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1746				uuid_copy(&logsuper->active[i].uuid,
1747					  &uuid_null);
1748				break;
1749			}
1750		if (i == MAX_ACTIVE) {
1751			jfs_warn("Somebody stomped on the journal!");
1752			lbmFree(bpsuper);
1753			return -EIO;
1754		}
1755
1756	}
1757
1758	/*
1759	 * synchronous write log superblock:
1760	 *
1761	 * write sidestream bypassing write queue:
1762	 * at file system mount, log super block is updated for
1763	 * activation of the file system before any log record
1764	 * (MOUNT record) of the file system, and at file system
1765	 * unmount, all meta data for the file system has been
1766	 * flushed before log super block is updated for deactivation
1767	 * of the file system.
1768	 */
1769	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1770	rc = lbmIOWait(bpsuper, lbmFREE);
1771
1772	return rc;
1773}
1774
1775/*
1776 *		log buffer manager (lbm)
1777 *		------------------------
1778 *
1779 * special purpose buffer manager supporting log i/o requirements.
1780 *
1781 * per log write queue:
1782 * log pageout occurs in serial order by fifo write queue and
1783 * restricting to a single i/o in pregress at any one time.
1784 * a circular singly-linked list
1785 * (log->wrqueue points to the tail, and buffers are linked via
1786 * bp->wrqueue field), and
1787 * maintains log page in pageout ot waiting for pageout in serial pageout.
1788 */
1789
1790/*
1791 *	lbmLogInit()
1792 *
1793 * initialize per log I/O setup at lmLogInit()
1794 */
1795static int lbmLogInit(struct jfs_log * log)
1796{				/* log inode */
1797	int i;
1798	struct lbuf *lbuf;
1799
1800	jfs_info("lbmLogInit: log:0x%p", log);
1801
1802	/* initialize current buffer cursor */
1803	log->bp = NULL;
1804
1805	/* initialize log device write queue */
1806	log->wqueue = NULL;
1807
1808	/*
1809	 * Each log has its own buffer pages allocated to it.  These are
1810	 * not managed by the page cache.  This ensures that a transaction
1811	 * writing to the log does not block trying to allocate a page from
1812	 * the page cache (for the log).  This would be bad, since page
1813	 * allocation waits on the kswapd thread that may be committing inodes
1814	 * which would cause log activity.  Was that clear?  I'm trying to
1815	 * avoid deadlock here.
1816	 */
1817	init_waitqueue_head(&log->free_wait);
1818
1819	log->lbuf_free = NULL;
1820
1821	for (i = 0; i < LOGPAGES;) {
1822		char *buffer;
1823		uint offset;
1824		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1825
1826		if (!page)
1827			goto error;
1828		buffer = page_address(page);
1829		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1830			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1831			if (lbuf == NULL) {
1832				if (offset == 0)
1833					__free_page(page);
1834				goto error;
1835			}
1836			if (offset) /* we already have one reference */
1837				get_page(page);
1838			lbuf->l_offset = offset;
1839			lbuf->l_ldata = buffer + offset;
1840			lbuf->l_page = page;
1841			lbuf->l_log = log;
1842			init_waitqueue_head(&lbuf->l_ioevent);
1843
1844			lbuf->l_freelist = log->lbuf_free;
1845			log->lbuf_free = lbuf;
1846			i++;
1847		}
1848	}
1849
1850	return (0);
1851
1852      error:
1853	lbmLogShutdown(log);
1854	return -ENOMEM;
1855}
1856
1857
1858/*
1859 *	lbmLogShutdown()
1860 *
1861 * finalize per log I/O setup at lmLogShutdown()
1862 */
1863static void lbmLogShutdown(struct jfs_log * log)
1864{
1865	struct lbuf *lbuf;
1866
1867	jfs_info("lbmLogShutdown: log:0x%p", log);
1868
1869	lbuf = log->lbuf_free;
1870	while (lbuf) {
1871		struct lbuf *next = lbuf->l_freelist;
1872		__free_page(lbuf->l_page);
1873		kfree(lbuf);
1874		lbuf = next;
1875	}
1876}
1877
1878
1879/*
1880 *	lbmAllocate()
1881 *
1882 * allocate an empty log buffer
1883 */
1884static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1885{
1886	struct lbuf *bp;
1887	unsigned long flags;
1888
1889	/*
1890	 * recycle from log buffer freelist if any
1891	 */
1892	LCACHE_LOCK(flags);
1893	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1894	log->lbuf_free = bp->l_freelist;
1895	LCACHE_UNLOCK(flags);
1896
1897	bp->l_flag = 0;
1898
1899	bp->l_wqnext = NULL;
1900	bp->l_freelist = NULL;
1901
1902	bp->l_pn = pn;
1903	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1904	bp->l_ceor = 0;
1905
1906	return bp;
1907}
1908
1909
1910/*
1911 *	lbmFree()
1912 *
1913 * release a log buffer to freelist
1914 */
1915static void lbmFree(struct lbuf * bp)
1916{
1917	unsigned long flags;
1918
1919	LCACHE_LOCK(flags);
1920
1921	lbmfree(bp);
1922
1923	LCACHE_UNLOCK(flags);
1924}
1925
1926static void lbmfree(struct lbuf * bp)
1927{
1928	struct jfs_log *log = bp->l_log;
1929
1930	assert(bp->l_wqnext == NULL);
1931
1932	/*
1933	 * return the buffer to head of freelist
1934	 */
1935	bp->l_freelist = log->lbuf_free;
1936	log->lbuf_free = bp;
1937
1938	wake_up(&log->free_wait);
1939	return;
1940}
1941
1942
1943/*
1944 * NAME:	lbmRedrive
1945 *
1946 * FUNCTION:	add a log buffer to the log redrive list
1947 *
1948 * PARAMETER:
1949 *	bp	- log buffer
1950 *
1951 * NOTES:
1952 *	Takes log_redrive_lock.
1953 */
1954static inline void lbmRedrive(struct lbuf *bp)
1955{
1956	unsigned long flags;
1957
1958	spin_lock_irqsave(&log_redrive_lock, flags);
1959	bp->l_redrive_next = log_redrive_list;
1960	log_redrive_list = bp;
1961	spin_unlock_irqrestore(&log_redrive_lock, flags);
1962
1963	wake_up_process(jfsIOthread);
1964}
1965
1966
1967/*
1968 *	lbmRead()
1969 */
1970static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1971{
1972	struct bio *bio;
1973	struct lbuf *bp;
1974
1975	/*
1976	 * allocate a log buffer
1977	 */
1978	*bpp = bp = lbmAllocate(log, pn);
1979	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1980
1981	bp->l_flag |= lbmREAD;
1982
1983	bio = bio_alloc(GFP_NOFS, 1);
1984
1985	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1986	bio_set_dev(bio, log->bdev);
1987
1988	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1989	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1990
1991	bio->bi_end_io = lbmIODone;
1992	bio->bi_private = bp;
1993	bio->bi_opf = REQ_OP_READ;
1994	/*check if journaling to disk has been disabled*/
1995	if (log->no_integrity) {
1996		bio->bi_iter.bi_size = 0;
1997		lbmIODone(bio);
1998	} else {
1999		submit_bio(bio);
2000	}
2001
2002	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2003
2004	return 0;
2005}
2006
2007
2008/*
2009 *	lbmWrite()
2010 *
2011 * buffer at head of pageout queue stays after completion of
2012 * partial-page pageout and redriven by explicit initiation of
2013 * pageout by caller until full-page pageout is completed and
2014 * released.
2015 *
2016 * device driver i/o done redrives pageout of new buffer at
2017 * head of pageout queue when current buffer at head of pageout
2018 * queue is released at the completion of its full-page pageout.
2019 *
2020 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2021 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2022 */
2023static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2024		     int cant_block)
2025{
2026	struct lbuf *tail;
2027	unsigned long flags;
2028
2029	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2030
2031	/* map the logical block address to physical block address */
2032	bp->l_blkno =
2033	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2034
2035	LCACHE_LOCK(flags);		/* disable+lock */
2036
2037	/*
2038	 * initialize buffer for device driver
2039	 */
2040	bp->l_flag = flag;
2041
2042	/*
2043	 *	insert bp at tail of write queue associated with log
2044	 *
2045	 * (request is either for bp already/currently at head of queue
2046	 * or new bp to be inserted at tail)
2047	 */
2048	tail = log->wqueue;
2049
2050	/* is buffer not already on write queue ? */
2051	if (bp->l_wqnext == NULL) {
2052		/* insert at tail of wqueue */
2053		if (tail == NULL) {
2054			log->wqueue = bp;
2055			bp->l_wqnext = bp;
2056		} else {
2057			log->wqueue = bp;
2058			bp->l_wqnext = tail->l_wqnext;
2059			tail->l_wqnext = bp;
2060		}
2061
2062		tail = bp;
2063	}
2064
2065	/* is buffer at head of wqueue and for write ? */
2066	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2067		LCACHE_UNLOCK(flags);	/* unlock+enable */
2068		return;
2069	}
2070
2071	LCACHE_UNLOCK(flags);	/* unlock+enable */
2072
2073	if (cant_block)
2074		lbmRedrive(bp);
2075	else if (flag & lbmSYNC)
2076		lbmStartIO(bp);
2077	else {
2078		LOGGC_UNLOCK(log);
2079		lbmStartIO(bp);
2080		LOGGC_LOCK(log);
2081	}
2082}
2083
2084
2085/*
2086 *	lbmDirectWrite()
2087 *
2088 * initiate pageout bypassing write queue for sidestream
2089 * (e.g., log superblock) write;
2090 */
2091static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2092{
2093	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2094		 bp, flag, bp->l_pn);
2095
2096	/*
2097	 * initialize buffer for device driver
2098	 */
2099	bp->l_flag = flag | lbmDIRECT;
2100
2101	/* map the logical block address to physical block address */
2102	bp->l_blkno =
2103	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2104
2105	/*
2106	 *	initiate pageout of the page
2107	 */
2108	lbmStartIO(bp);
2109}
2110
2111
2112/*
2113 * NAME:	lbmStartIO()
2114 *
2115 * FUNCTION:	Interface to DD strategy routine
2116 *
2117 * RETURN:	none
2118 *
2119 * serialization: LCACHE_LOCK() is NOT held during log i/o;
2120 */
2121static void lbmStartIO(struct lbuf * bp)
2122{
2123	struct bio *bio;
2124	struct jfs_log *log = bp->l_log;
2125
2126	jfs_info("lbmStartIO");
2127
2128	bio = bio_alloc(GFP_NOFS, 1);
2129	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2130	bio_set_dev(bio, log->bdev);
2131
2132	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2133	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2134
2135	bio->bi_end_io = lbmIODone;
2136	bio->bi_private = bp;
2137	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2138
2139	/* check if journaling to disk has been disabled */
2140	if (log->no_integrity) {
2141		bio->bi_iter.bi_size = 0;
2142		lbmIODone(bio);
2143	} else {
2144		submit_bio(bio);
2145		INCREMENT(lmStat.submitted);
2146	}
2147}
2148
2149
2150/*
2151 *	lbmIOWait()
2152 */
2153static int lbmIOWait(struct lbuf * bp, int flag)
2154{
2155	unsigned long flags;
2156	int rc = 0;
2157
2158	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2159
2160	LCACHE_LOCK(flags);		/* disable+lock */
2161
2162	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2163
2164	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2165
2166	if (flag & lbmFREE)
2167		lbmfree(bp);
2168
2169	LCACHE_UNLOCK(flags);	/* unlock+enable */
2170
2171	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2172	return rc;
2173}
2174
2175/*
2176 *	lbmIODone()
2177 *
2178 * executed at INTIODONE level
2179 */
2180static void lbmIODone(struct bio *bio)
2181{
2182	struct lbuf *bp = bio->bi_private;
2183	struct lbuf *nextbp, *tail;
2184	struct jfs_log *log;
2185	unsigned long flags;
2186
2187	/*
2188	 * get back jfs buffer bound to the i/o buffer
2189	 */
2190	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2191
2192	LCACHE_LOCK(flags);		/* disable+lock */
2193
2194	bp->l_flag |= lbmDONE;
2195
2196	if (bio->bi_status) {
2197		bp->l_flag |= lbmERROR;
2198
2199		jfs_err("lbmIODone: I/O error in JFS log");
2200	}
2201
2202	bio_put(bio);
2203
2204	/*
2205	 *	pagein completion
2206	 */
2207	if (bp->l_flag & lbmREAD) {
2208		bp->l_flag &= ~lbmREAD;
2209
2210		LCACHE_UNLOCK(flags);	/* unlock+enable */
2211
2212		/* wakeup I/O initiator */
2213		LCACHE_WAKEUP(&bp->l_ioevent);
2214
2215		return;
2216	}
2217
2218	/*
2219	 *	pageout completion
2220	 *
2221	 * the bp at the head of write queue has completed pageout.
2222	 *
2223	 * if single-commit/full-page pageout, remove the current buffer
2224	 * from head of pageout queue, and redrive pageout with
2225	 * the new buffer at head of pageout queue;
2226	 * otherwise, the partial-page pageout buffer stays at
2227	 * the head of pageout queue to be redriven for pageout
2228	 * by lmGroupCommit() until full-page pageout is completed.
2229	 */
2230	bp->l_flag &= ~lbmWRITE;
2231	INCREMENT(lmStat.pagedone);
2232
2233	/* update committed lsn */
2234	log = bp->l_log;
2235	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2236
2237	if (bp->l_flag & lbmDIRECT) {
2238		LCACHE_WAKEUP(&bp->l_ioevent);
2239		LCACHE_UNLOCK(flags);
2240		return;
2241	}
2242
2243	tail = log->wqueue;
2244
2245	/* single element queue */
2246	if (bp == tail) {
2247		/* remove head buffer of full-page pageout
2248		 * from log device write queue
2249		 */
2250		if (bp->l_flag & lbmRELEASE) {
2251			log->wqueue = NULL;
2252			bp->l_wqnext = NULL;
2253		}
2254	}
2255	/* multi element queue */
2256	else {
2257		/* remove head buffer of full-page pageout
2258		 * from log device write queue
2259		 */
2260		if (bp->l_flag & lbmRELEASE) {
2261			nextbp = tail->l_wqnext = bp->l_wqnext;
2262			bp->l_wqnext = NULL;
2263
2264			/*
2265			 * redrive pageout of next page at head of write queue:
2266			 * redrive next page without any bound tblk
2267			 * (i.e., page w/o any COMMIT records), or
2268			 * first page of new group commit which has been
2269			 * queued after current page (subsequent pageout
2270			 * is performed synchronously, except page without
2271			 * any COMMITs) by lmGroupCommit() as indicated
2272			 * by lbmWRITE flag;
2273			 */
2274			if (nextbp->l_flag & lbmWRITE) {
2275				/*
2276				 * We can't do the I/O at interrupt time.
2277				 * The jfsIO thread can do it
2278				 */
2279				lbmRedrive(nextbp);
2280			}
2281		}
2282	}
2283
2284	/*
2285	 *	synchronous pageout:
2286	 *
2287	 * buffer has not necessarily been removed from write queue
2288	 * (e.g., synchronous write of partial-page with COMMIT):
2289	 * leave buffer for i/o initiator to dispose
2290	 */
2291	if (bp->l_flag & lbmSYNC) {
2292		LCACHE_UNLOCK(flags);	/* unlock+enable */
2293
2294		/* wakeup I/O initiator */
2295		LCACHE_WAKEUP(&bp->l_ioevent);
2296	}
2297
2298	/*
2299	 *	Group Commit pageout:
2300	 */
2301	else if (bp->l_flag & lbmGC) {
2302		LCACHE_UNLOCK(flags);
2303		lmPostGC(bp);
2304	}
2305
2306	/*
2307	 *	asynchronous pageout:
2308	 *
2309	 * buffer must have been removed from write queue:
2310	 * insert buffer at head of freelist where it can be recycled
2311	 */
2312	else {
2313		assert(bp->l_flag & lbmRELEASE);
2314		assert(bp->l_flag & lbmFREE);
2315		lbmfree(bp);
2316
2317		LCACHE_UNLOCK(flags);	/* unlock+enable */
2318	}
2319}
2320
2321int jfsIOWait(void *arg)
2322{
2323	struct lbuf *bp;
2324
2325	do {
2326		spin_lock_irq(&log_redrive_lock);
2327		while ((bp = log_redrive_list)) {
2328			log_redrive_list = bp->l_redrive_next;
2329			bp->l_redrive_next = NULL;
2330			spin_unlock_irq(&log_redrive_lock);
2331			lbmStartIO(bp);
2332			spin_lock_irq(&log_redrive_lock);
2333		}
2334
2335		if (freezing(current)) {
2336			spin_unlock_irq(&log_redrive_lock);
2337			try_to_freeze();
2338		} else {
2339			set_current_state(TASK_INTERRUPTIBLE);
2340			spin_unlock_irq(&log_redrive_lock);
2341			schedule();
2342		}
2343	} while (!kthread_should_stop());
2344
2345	jfs_info("jfsIOWait being killed!");
2346	return 0;
2347}
2348
2349/*
2350 * NAME:	lmLogFormat()/jfs_logform()
2351 *
2352 * FUNCTION:	format file system log
2353 *
2354 * PARAMETERS:
2355 *	log	- volume log
2356 *	logAddress - start address of log space in FS block
2357 *	logSize	- length of log space in FS block;
2358 *
2359 * RETURN:	0	- success
2360 *		-EIO	- i/o error
2361 *
2362 * XXX: We're synchronously writing one page at a time.  This needs to
2363 *	be improved by writing multiple pages at once.
2364 */
2365int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2366{
2367	int rc = -EIO;
2368	struct jfs_sb_info *sbi;
2369	struct logsuper *logsuper;
2370	struct logpage *lp;
2371	int lspn;		/* log sequence page number */
2372	struct lrd *lrd_ptr;
2373	int npages = 0;
2374	struct lbuf *bp;
2375
2376	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2377		 (long long)logAddress, logSize);
2378
2379	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2380
2381	/* allocate a log buffer */
2382	bp = lbmAllocate(log, 1);
2383
2384	npages = logSize >> sbi->l2nbperpage;
2385
2386	/*
2387	 *	log space:
2388	 *
2389	 * page 0 - reserved;
2390	 * page 1 - log superblock;
2391	 * page 2 - log data page: A SYNC log record is written
2392	 *	    into this page at logform time;
2393	 * pages 3-N - log data page: set to empty log data pages;
2394	 */
2395	/*
2396	 *	init log superblock: log page 1
2397	 */
2398	logsuper = (struct logsuper *) bp->l_ldata;
2399
2400	logsuper->magic = cpu_to_le32(LOGMAGIC);
2401	logsuper->version = cpu_to_le32(LOGVERSION);
2402	logsuper->state = cpu_to_le32(LOGREDONE);
2403	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2404	logsuper->size = cpu_to_le32(npages);
2405	logsuper->bsize = cpu_to_le32(sbi->bsize);
2406	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2407	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2408
2409	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2410	bp->l_blkno = logAddress + sbi->nbperpage;
2411	lbmStartIO(bp);
2412	if ((rc = lbmIOWait(bp, 0)))
2413		goto exit;
2414
2415	/*
2416	 *	init pages 2 to npages-1 as log data pages:
2417	 *
2418	 * log page sequence number (lpsn) initialization:
2419	 *
2420	 * pn:   0     1     2     3                 n-1
2421	 *       +-----+-----+=====+=====+===.....===+=====+
2422	 * lspn:             N-1   0     1           N-2
2423	 *                   <--- N page circular file ---->
2424	 *
2425	 * the N (= npages-2) data pages of the log is maintained as
2426	 * a circular file for the log records;
2427	 * lpsn grows by 1 monotonically as each log page is written
2428	 * to the circular file of the log;
2429	 * and setLogpage() will not reset the page number even if
2430	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2431	 * still work in find log end process, we have to simulate the
2432	 * log wrap situation at the log format time.
2433	 * The 1st log page written will have the highest lpsn. Then
2434	 * the succeeding log pages will have ascending order of
2435	 * the lspn starting from 0, ... (N-2)
2436	 */
2437	lp = (struct logpage *) bp->l_ldata;
2438	/*
2439	 * initialize 1st log page to be written: lpsn = N - 1,
2440	 * write a SYNCPT log record is written to this page
2441	 */
2442	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2443	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2444
2445	lrd_ptr = (struct lrd *) &lp->data;
2446	lrd_ptr->logtid = 0;
2447	lrd_ptr->backchain = 0;
2448	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2449	lrd_ptr->length = 0;
2450	lrd_ptr->log.syncpt.sync = 0;
2451
2452	bp->l_blkno += sbi->nbperpage;
2453	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2454	lbmStartIO(bp);
2455	if ((rc = lbmIOWait(bp, 0)))
2456		goto exit;
2457
2458	/*
2459	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2460	 */
2461	for (lspn = 0; lspn < npages - 3; lspn++) {
2462		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2463		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2464
2465		bp->l_blkno += sbi->nbperpage;
2466		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2467		lbmStartIO(bp);
2468		if ((rc = lbmIOWait(bp, 0)))
2469			goto exit;
2470	}
2471
2472	rc = 0;
2473exit:
2474	/*
2475	 *	finalize log
2476	 */
2477	/* release the buffer */
2478	lbmFree(bp);
2479
2480	return rc;
2481}
2482
2483#ifdef CONFIG_JFS_STATISTICS
2484int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2485{
2486	seq_printf(m,
2487		       "JFS Logmgr stats\n"
2488		       "================\n"
2489		       "commits = %d\n"
2490		       "writes submitted = %d\n"
2491		       "writes completed = %d\n"
2492		       "full pages submitted = %d\n"
2493		       "partial pages submitted = %d\n",
2494		       lmStat.commit,
2495		       lmStat.submitted,
2496		       lmStat.pagedone,
2497		       lmStat.full_page,
2498		       lmStat.partial_page);
2499	return 0;
2500}
2501#endif /* CONFIG_JFS_STATISTICS */
2502