xref: /kernel/linux/linux-5.10/fs/gfs2/lops.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
5 */
6
7#include <linux/sched.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/completion.h>
11#include <linux/buffer_head.h>
12#include <linux/mempool.h>
13#include <linux/gfs2_ondisk.h>
14#include <linux/bio.h>
15#include <linux/fs.h>
16#include <linux/list_sort.h>
17#include <linux/blkdev.h>
18
19#include "bmap.h"
20#include "dir.h"
21#include "gfs2.h"
22#include "incore.h"
23#include "inode.h"
24#include "glock.h"
25#include "glops.h"
26#include "log.h"
27#include "lops.h"
28#include "meta_io.h"
29#include "recovery.h"
30#include "rgrp.h"
31#include "trans.h"
32#include "util.h"
33#include "trace_gfs2.h"
34
35/**
36 * gfs2_pin - Pin a buffer in memory
37 * @sdp: The superblock
38 * @bh: The buffer to be pinned
39 *
40 * The log lock must be held when calling this function
41 */
42void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
43{
44	struct gfs2_bufdata *bd;
45
46	BUG_ON(!current->journal_info);
47
48	clear_buffer_dirty(bh);
49	if (test_set_buffer_pinned(bh))
50		gfs2_assert_withdraw(sdp, 0);
51	if (!buffer_uptodate(bh))
52		gfs2_io_error_bh_wd(sdp, bh);
53	bd = bh->b_private;
54	/* If this buffer is in the AIL and it has already been written
55	 * to in-place disk block, remove it from the AIL.
56	 */
57	spin_lock(&sdp->sd_ail_lock);
58	if (bd->bd_tr)
59		list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
60	spin_unlock(&sdp->sd_ail_lock);
61	get_bh(bh);
62	atomic_inc(&sdp->sd_log_pinned);
63	trace_gfs2_pin(bd, 1);
64}
65
66static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
67{
68	return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
69}
70
71static void maybe_release_space(struct gfs2_bufdata *bd)
72{
73	struct gfs2_glock *gl = bd->bd_gl;
74	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
75	struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
76	unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
77	struct gfs2_bitmap *bi = rgd->rd_bits + index;
78
79	if (bi->bi_clone == NULL)
80		return;
81	if (sdp->sd_args.ar_discard)
82		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
83	memcpy(bi->bi_clone + bi->bi_offset,
84	       bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
85	clear_bit(GBF_FULL, &bi->bi_flags);
86	rgd->rd_free_clone = rgd->rd_free;
87	rgd->rd_extfail_pt = rgd->rd_free;
88}
89
90/**
91 * gfs2_unpin - Unpin a buffer
92 * @sdp: the filesystem the buffer belongs to
93 * @bh: The buffer to unpin
94 * @ai:
95 * @flags: The inode dirty flags
96 *
97 */
98
99static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
100		       struct gfs2_trans *tr)
101{
102	struct gfs2_bufdata *bd = bh->b_private;
103
104	BUG_ON(!buffer_uptodate(bh));
105	BUG_ON(!buffer_pinned(bh));
106
107	lock_buffer(bh);
108	mark_buffer_dirty(bh);
109	clear_buffer_pinned(bh);
110
111	if (buffer_is_rgrp(bd))
112		maybe_release_space(bd);
113
114	spin_lock(&sdp->sd_ail_lock);
115	if (bd->bd_tr) {
116		list_del(&bd->bd_ail_st_list);
117		brelse(bh);
118	} else {
119		struct gfs2_glock *gl = bd->bd_gl;
120		list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
121		atomic_inc(&gl->gl_ail_count);
122	}
123	bd->bd_tr = tr;
124	list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
125	spin_unlock(&sdp->sd_ail_lock);
126
127	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
128	trace_gfs2_pin(bd, 0);
129	unlock_buffer(bh);
130	atomic_dec(&sdp->sd_log_pinned);
131}
132
133void gfs2_log_incr_head(struct gfs2_sbd *sdp)
134{
135	BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
136	       (sdp->sd_log_flush_head != sdp->sd_log_head));
137
138	if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
139		sdp->sd_log_flush_head = 0;
140}
141
142u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lblock)
143{
144	struct gfs2_journal_extent *je;
145
146	list_for_each_entry(je, &jd->extent_list, list) {
147		if (lblock >= je->lblock && lblock < je->lblock + je->blocks)
148			return je->dblock + lblock - je->lblock;
149	}
150
151	return -1;
152}
153
154/**
155 * gfs2_end_log_write_bh - end log write of pagecache data with buffers
156 * @sdp: The superblock
157 * @bvec: The bio_vec
158 * @error: The i/o status
159 *
160 * This finds the relevant buffers and unlocks them and sets the
161 * error flag according to the status of the i/o request. This is
162 * used when the log is writing data which has an in-place version
163 * that is pinned in the pagecache.
164 */
165
166static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
167				  struct bio_vec *bvec,
168				  blk_status_t error)
169{
170	struct buffer_head *bh, *next;
171	struct page *page = bvec->bv_page;
172	unsigned size;
173
174	bh = page_buffers(page);
175	size = bvec->bv_len;
176	while (bh_offset(bh) < bvec->bv_offset)
177		bh = bh->b_this_page;
178	do {
179		if (error)
180			mark_buffer_write_io_error(bh);
181		unlock_buffer(bh);
182		next = bh->b_this_page;
183		size -= bh->b_size;
184		brelse(bh);
185		bh = next;
186	} while(bh && size);
187}
188
189/**
190 * gfs2_end_log_write - end of i/o to the log
191 * @bio: The bio
192 *
193 * Each bio_vec contains either data from the pagecache or data
194 * relating to the log itself. Here we iterate over the bio_vec
195 * array, processing both kinds of data.
196 *
197 */
198
199static void gfs2_end_log_write(struct bio *bio)
200{
201	struct gfs2_sbd *sdp = bio->bi_private;
202	struct bio_vec *bvec;
203	struct page *page;
204	struct bvec_iter_all iter_all;
205
206	if (bio->bi_status) {
207		if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status))
208			fs_err(sdp, "Error %d writing to journal, jid=%u\n",
209			       bio->bi_status, sdp->sd_jdesc->jd_jid);
210		gfs2_withdraw_delayed(sdp);
211		/* prevent more writes to the journal */
212		clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
213		wake_up(&sdp->sd_logd_waitq);
214	}
215
216	bio_for_each_segment_all(bvec, bio, iter_all) {
217		page = bvec->bv_page;
218		if (page_has_buffers(page))
219			gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
220		else
221			mempool_free(page, gfs2_page_pool);
222	}
223
224	bio_put(bio);
225	if (atomic_dec_and_test(&sdp->sd_log_in_flight))
226		wake_up(&sdp->sd_log_flush_wait);
227}
228
229/**
230 * gfs2_log_submit_bio - Submit any pending log bio
231 * @biop: Address of the bio pointer
232 * @opf: REQ_OP | op_flags
233 *
234 * Submit any pending part-built or full bio to the block device. If
235 * there is no pending bio, then this is a no-op.
236 */
237
238void gfs2_log_submit_bio(struct bio **biop, int opf)
239{
240	struct bio *bio = *biop;
241	if (bio) {
242		struct gfs2_sbd *sdp = bio->bi_private;
243		atomic_inc(&sdp->sd_log_in_flight);
244		bio->bi_opf = opf;
245		submit_bio(bio);
246		*biop = NULL;
247	}
248}
249
250/**
251 * gfs2_log_alloc_bio - Allocate a bio
252 * @sdp: The super block
253 * @blkno: The device block number we want to write to
254 * @end_io: The bi_end_io callback
255 *
256 * Allocate a new bio, initialize it with the given parameters and return it.
257 *
258 * Returns: The newly allocated bio
259 */
260
261static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
262				      bio_end_io_t *end_io)
263{
264	struct super_block *sb = sdp->sd_vfs;
265	struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
266
267	bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
268	bio_set_dev(bio, sb->s_bdev);
269	bio->bi_end_io = end_io;
270	bio->bi_private = sdp;
271
272	return bio;
273}
274
275/**
276 * gfs2_log_get_bio - Get cached log bio, or allocate a new one
277 * @sdp: The super block
278 * @blkno: The device block number we want to write to
279 * @bio: The bio to get or allocate
280 * @op: REQ_OP
281 * @end_io: The bi_end_io callback
282 * @flush: Always flush the current bio and allocate a new one?
283 *
284 * If there is a cached bio, then if the next block number is sequential
285 * with the previous one, return it, otherwise flush the bio to the
286 * device. If there is no cached bio, or we just flushed it, then
287 * allocate a new one.
288 *
289 * Returns: The bio to use for log writes
290 */
291
292static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
293				    struct bio **biop, int op,
294				    bio_end_io_t *end_io, bool flush)
295{
296	struct bio *bio = *biop;
297
298	if (bio) {
299		u64 nblk;
300
301		nblk = bio_end_sector(bio);
302		nblk >>= sdp->sd_fsb2bb_shift;
303		if (blkno == nblk && !flush)
304			return bio;
305		gfs2_log_submit_bio(biop, op);
306	}
307
308	*biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
309	return *biop;
310}
311
312/**
313 * gfs2_log_write - write to log
314 * @sdp: the filesystem
315 * @page: the page to write
316 * @size: the size of the data to write
317 * @offset: the offset within the page
318 * @blkno: block number of the log entry
319 *
320 * Try and add the page segment to the current bio. If that fails,
321 * submit the current bio to the device and create a new one, and
322 * then add the page segment to that.
323 */
324
325void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
326		    unsigned size, unsigned offset, u64 blkno)
327{
328	struct bio *bio;
329	int ret;
330
331	bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
332			       gfs2_end_log_write, false);
333	ret = bio_add_page(bio, page, size, offset);
334	if (ret == 0) {
335		bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
336				       REQ_OP_WRITE, gfs2_end_log_write, true);
337		ret = bio_add_page(bio, page, size, offset);
338		WARN_ON(ret == 0);
339	}
340}
341
342/**
343 * gfs2_log_write_bh - write a buffer's content to the log
344 * @sdp: The super block
345 * @bh: The buffer pointing to the in-place location
346 *
347 * This writes the content of the buffer to the next available location
348 * in the log. The buffer will be unlocked once the i/o to the log has
349 * completed.
350 */
351
352static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
353{
354	u64 dblock;
355
356	dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
357	gfs2_log_incr_head(sdp);
358	gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh), dblock);
359}
360
361/**
362 * gfs2_log_write_page - write one block stored in a page, into the log
363 * @sdp: The superblock
364 * @page: The struct page
365 *
366 * This writes the first block-sized part of the page into the log. Note
367 * that the page must have been allocated from the gfs2_page_pool mempool
368 * and that after this has been called, ownership has been transferred and
369 * the page may be freed at any time.
370 */
371
372void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
373{
374	struct super_block *sb = sdp->sd_vfs;
375	u64 dblock;
376
377	dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
378	gfs2_log_incr_head(sdp);
379	gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
380}
381
382/**
383 * gfs2_end_log_read - end I/O callback for reads from the log
384 * @bio: The bio
385 *
386 * Simply unlock the pages in the bio. The main thread will wait on them and
387 * process them in order as necessary.
388 */
389
390static void gfs2_end_log_read(struct bio *bio)
391{
392	struct page *page;
393	struct bio_vec *bvec;
394	struct bvec_iter_all iter_all;
395
396	bio_for_each_segment_all(bvec, bio, iter_all) {
397		page = bvec->bv_page;
398		if (bio->bi_status) {
399			int err = blk_status_to_errno(bio->bi_status);
400
401			SetPageError(page);
402			mapping_set_error(page->mapping, err);
403		}
404		unlock_page(page);
405	}
406
407	bio_put(bio);
408}
409
410/**
411 * gfs2_jhead_pg_srch - Look for the journal head in a given page.
412 * @jd: The journal descriptor
413 * @page: The page to look in
414 *
415 * Returns: 1 if found, 0 otherwise.
416 */
417
418static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
419			      struct gfs2_log_header_host *head,
420			      struct page *page)
421{
422	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
423	struct gfs2_log_header_host lh;
424	void *kaddr = kmap_atomic(page);
425	unsigned int offset;
426	bool ret = false;
427
428	for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
429		if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
430			if (lh.lh_sequence >= head->lh_sequence)
431				*head = lh;
432			else {
433				ret = true;
434				break;
435			}
436		}
437	}
438	kunmap_atomic(kaddr);
439	return ret;
440}
441
442/**
443 * gfs2_jhead_process_page - Search/cleanup a page
444 * @jd: The journal descriptor
445 * @index: Index of the page to look into
446 * @done: If set, perform only cleanup, else search and set if found.
447 *
448 * Find the page with 'index' in the journal's mapping. Search the page for
449 * the journal head if requested (cleanup == false). Release refs on the
450 * page so the page cache can reclaim it (put_page() twice). We grabbed a
451 * reference on this page two times, first when we did a find_or_create_page()
452 * to obtain the page to add it to the bio and second when we do a
453 * find_get_page() here to get the page to wait on while I/O on it is being
454 * completed.
455 * This function is also used to free up a page we might've grabbed but not
456 * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
457 * submitted the I/O, but we already found the jhead so we only need to drop
458 * our references to the page.
459 */
460
461static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
462				    struct gfs2_log_header_host *head,
463				    bool *done)
464{
465	struct page *page;
466
467	page = find_get_page(jd->jd_inode->i_mapping, index);
468	wait_on_page_locked(page);
469
470	if (PageError(page))
471		*done = true;
472
473	if (!*done)
474		*done = gfs2_jhead_pg_srch(jd, head, page);
475
476	put_page(page); /* Once for find_get_page */
477	put_page(page); /* Once more for find_or_create_page */
478}
479
480static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs)
481{
482	struct bio *new;
483
484	new = bio_alloc(GFP_NOIO, nr_iovecs);
485	bio_copy_dev(new, prev);
486	new->bi_iter.bi_sector = bio_end_sector(prev);
487	new->bi_opf = prev->bi_opf;
488	new->bi_write_hint = prev->bi_write_hint;
489	bio_chain(new, prev);
490	submit_bio(prev);
491	return new;
492}
493
494/**
495 * gfs2_find_jhead - find the head of a log
496 * @jd: The journal descriptor
497 * @head: The log descriptor for the head of the log is returned here
498 *
499 * Do a search of a journal by reading it in large chunks using bios and find
500 * the valid log entry with the highest sequence number.  (i.e. the log head)
501 *
502 * Returns: 0 on success, errno otherwise
503 */
504int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
505		    bool keep_cache)
506{
507	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
508	struct address_space *mapping = jd->jd_inode->i_mapping;
509	unsigned int block = 0, blocks_submitted = 0, blocks_read = 0;
510	unsigned int bsize = sdp->sd_sb.sb_bsize, off;
511	unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
512	unsigned int shift = PAGE_SHIFT - bsize_shift;
513	unsigned int max_blocks = 2 * 1024 * 1024 >> bsize_shift;
514	struct gfs2_journal_extent *je;
515	int sz, ret = 0;
516	struct bio *bio = NULL;
517	struct page *page = NULL;
518	bool done = false;
519	errseq_t since;
520
521	memset(head, 0, sizeof(*head));
522	if (list_empty(&jd->extent_list))
523		gfs2_map_journal_extents(sdp, jd);
524
525	since = filemap_sample_wb_err(mapping);
526	list_for_each_entry(je, &jd->extent_list, list) {
527		u64 dblock = je->dblock;
528
529		for (; block < je->lblock + je->blocks; block++, dblock++) {
530			if (!page) {
531				page = find_or_create_page(mapping,
532						block >> shift, GFP_NOFS);
533				if (!page) {
534					ret = -ENOMEM;
535					done = true;
536					goto out;
537				}
538				off = 0;
539			}
540
541			if (bio && (off || block < blocks_submitted + max_blocks)) {
542				sector_t sector = dblock << sdp->sd_fsb2bb_shift;
543
544				if (bio_end_sector(bio) == sector) {
545					sz = bio_add_page(bio, page, bsize, off);
546					if (sz == bsize)
547						goto block_added;
548				}
549				if (off) {
550					unsigned int blocks =
551						(PAGE_SIZE - off) >> bsize_shift;
552
553					bio = gfs2_chain_bio(bio, blocks);
554					goto add_block_to_new_bio;
555				}
556			}
557
558			if (bio) {
559				blocks_submitted = block;
560				submit_bio(bio);
561			}
562
563			bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read);
564			bio->bi_opf = REQ_OP_READ;
565add_block_to_new_bio:
566			sz = bio_add_page(bio, page, bsize, off);
567			BUG_ON(sz != bsize);
568block_added:
569			off += bsize;
570			if (off == PAGE_SIZE)
571				page = NULL;
572			if (blocks_submitted <= blocks_read + max_blocks) {
573				/* Keep at least one bio in flight */
574				continue;
575			}
576
577			gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
578			blocks_read += PAGE_SIZE >> bsize_shift;
579			if (done)
580				goto out;  /* found */
581		}
582	}
583
584out:
585	if (bio)
586		submit_bio(bio);
587	while (blocks_read < block) {
588		gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
589		blocks_read += PAGE_SIZE >> bsize_shift;
590	}
591
592	if (!ret)
593		ret = filemap_check_wb_err(mapping, since);
594
595	if (!keep_cache)
596		truncate_inode_pages(mapping, 0);
597
598	return ret;
599}
600
601static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
602				      u32 ld_length, u32 ld_data1)
603{
604	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
605	struct gfs2_log_descriptor *ld = page_address(page);
606	clear_page(ld);
607	ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
608	ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
609	ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
610	ld->ld_type = cpu_to_be32(ld_type);
611	ld->ld_length = cpu_to_be32(ld_length);
612	ld->ld_data1 = cpu_to_be32(ld_data1);
613	ld->ld_data2 = 0;
614	return page;
615}
616
617static void gfs2_check_magic(struct buffer_head *bh)
618{
619	void *kaddr;
620	__be32 *ptr;
621
622	clear_buffer_escaped(bh);
623	kaddr = kmap_atomic(bh->b_page);
624	ptr = kaddr + bh_offset(bh);
625	if (*ptr == cpu_to_be32(GFS2_MAGIC))
626		set_buffer_escaped(bh);
627	kunmap_atomic(kaddr);
628}
629
630static int blocknr_cmp(void *priv, const struct list_head *a,
631		       const struct list_head *b)
632{
633	struct gfs2_bufdata *bda, *bdb;
634
635	bda = list_entry(a, struct gfs2_bufdata, bd_list);
636	bdb = list_entry(b, struct gfs2_bufdata, bd_list);
637
638	if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
639		return -1;
640	if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
641		return 1;
642	return 0;
643}
644
645static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
646				unsigned int total, struct list_head *blist,
647				bool is_databuf)
648{
649	struct gfs2_log_descriptor *ld;
650	struct gfs2_bufdata *bd1 = NULL, *bd2;
651	struct page *page;
652	unsigned int num;
653	unsigned n;
654	__be64 *ptr;
655
656	gfs2_log_lock(sdp);
657	list_sort(NULL, blist, blocknr_cmp);
658	bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
659	while(total) {
660		num = total;
661		if (total > limit)
662			num = limit;
663		gfs2_log_unlock(sdp);
664		page = gfs2_get_log_desc(sdp,
665					 is_databuf ? GFS2_LOG_DESC_JDATA :
666					 GFS2_LOG_DESC_METADATA, num + 1, num);
667		ld = page_address(page);
668		gfs2_log_lock(sdp);
669		ptr = (__be64 *)(ld + 1);
670
671		n = 0;
672		list_for_each_entry_continue(bd1, blist, bd_list) {
673			*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
674			if (is_databuf) {
675				gfs2_check_magic(bd1->bd_bh);
676				*ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
677			}
678			if (++n >= num)
679				break;
680		}
681
682		gfs2_log_unlock(sdp);
683		gfs2_log_write_page(sdp, page);
684		gfs2_log_lock(sdp);
685
686		n = 0;
687		list_for_each_entry_continue(bd2, blist, bd_list) {
688			get_bh(bd2->bd_bh);
689			gfs2_log_unlock(sdp);
690			lock_buffer(bd2->bd_bh);
691
692			if (buffer_escaped(bd2->bd_bh)) {
693				void *kaddr;
694				page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
695				ptr = page_address(page);
696				kaddr = kmap_atomic(bd2->bd_bh->b_page);
697				memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
698				       bd2->bd_bh->b_size);
699				kunmap_atomic(kaddr);
700				*(__be32 *)ptr = 0;
701				clear_buffer_escaped(bd2->bd_bh);
702				unlock_buffer(bd2->bd_bh);
703				brelse(bd2->bd_bh);
704				gfs2_log_write_page(sdp, page);
705			} else {
706				gfs2_log_write_bh(sdp, bd2->bd_bh);
707			}
708			gfs2_log_lock(sdp);
709			if (++n >= num)
710				break;
711		}
712
713		BUG_ON(total < num);
714		total -= num;
715	}
716	gfs2_log_unlock(sdp);
717}
718
719static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
720{
721	unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
722	unsigned int nbuf;
723	if (tr == NULL)
724		return;
725	nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
726	gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
727}
728
729static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
730{
731	struct list_head *head;
732	struct gfs2_bufdata *bd;
733
734	if (tr == NULL)
735		return;
736
737	head = &tr->tr_buf;
738	while (!list_empty(head)) {
739		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
740		list_del_init(&bd->bd_list);
741		gfs2_unpin(sdp, bd->bd_bh, tr);
742	}
743}
744
745static void buf_lo_before_scan(struct gfs2_jdesc *jd,
746			       struct gfs2_log_header_host *head, int pass)
747{
748	if (pass != 0)
749		return;
750
751	jd->jd_found_blocks = 0;
752	jd->jd_replayed_blocks = 0;
753}
754
755static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
756				struct gfs2_log_descriptor *ld, __be64 *ptr,
757				int pass)
758{
759	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
760	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
761	struct gfs2_glock *gl = ip->i_gl;
762	unsigned int blks = be32_to_cpu(ld->ld_data1);
763	struct buffer_head *bh_log, *bh_ip;
764	u64 blkno;
765	int error = 0;
766
767	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
768		return 0;
769
770	gfs2_replay_incr_blk(jd, &start);
771
772	for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
773		blkno = be64_to_cpu(*ptr++);
774
775		jd->jd_found_blocks++;
776
777		if (gfs2_revoke_check(jd, blkno, start))
778			continue;
779
780		error = gfs2_replay_read_block(jd, start, &bh_log);
781		if (error)
782			return error;
783
784		bh_ip = gfs2_meta_new(gl, blkno);
785		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
786
787		if (gfs2_meta_check(sdp, bh_ip))
788			error = -EIO;
789		else {
790			struct gfs2_meta_header *mh =
791				(struct gfs2_meta_header *)bh_ip->b_data;
792
793			if (mh->mh_type == cpu_to_be32(GFS2_METATYPE_RG)) {
794				struct gfs2_rgrpd *rgd;
795
796				rgd = gfs2_blk2rgrpd(sdp, blkno, false);
797				if (rgd && rgd->rd_addr == blkno &&
798				    rgd->rd_bits && rgd->rd_bits->bi_bh) {
799					fs_info(sdp, "Replaying 0x%llx but we "
800						"already have a bh!\n",
801						(unsigned long long)blkno);
802					fs_info(sdp, "busy:%d, pinned:%d\n",
803						buffer_busy(rgd->rd_bits->bi_bh) ? 1 : 0,
804						buffer_pinned(rgd->rd_bits->bi_bh));
805					gfs2_dump_glock(NULL, rgd->rd_gl, true);
806				}
807			}
808			mark_buffer_dirty(bh_ip);
809		}
810		brelse(bh_log);
811		brelse(bh_ip);
812
813		if (error)
814			break;
815
816		jd->jd_replayed_blocks++;
817	}
818
819	return error;
820}
821
822static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
823{
824	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
825	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
826
827	if (error) {
828		gfs2_inode_metasync(ip->i_gl);
829		return;
830	}
831	if (pass != 1)
832		return;
833
834	gfs2_inode_metasync(ip->i_gl);
835
836	fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
837	        jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
838}
839
840static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
841{
842	struct gfs2_meta_header *mh;
843	unsigned int offset;
844	struct list_head *head = &sdp->sd_log_revokes;
845	struct gfs2_bufdata *bd;
846	struct page *page;
847	unsigned int length;
848
849	gfs2_write_revokes(sdp);
850	if (!sdp->sd_log_num_revoke)
851		return;
852
853	length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke);
854	page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
855	offset = sizeof(struct gfs2_log_descriptor);
856
857	list_for_each_entry(bd, head, bd_list) {
858		sdp->sd_log_num_revoke--;
859
860		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
861
862			gfs2_log_write_page(sdp, page);
863			page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
864			mh = page_address(page);
865			clear_page(mh);
866			mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
867			mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
868			mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
869			offset = sizeof(struct gfs2_meta_header);
870		}
871
872		*(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
873		offset += sizeof(u64);
874	}
875	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
876
877	gfs2_log_write_page(sdp, page);
878}
879
880static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
881{
882	struct list_head *head = &sdp->sd_log_revokes;
883	struct gfs2_bufdata *bd;
884	struct gfs2_glock *gl;
885
886	while (!list_empty(head)) {
887		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
888		list_del_init(&bd->bd_list);
889		gl = bd->bd_gl;
890		gfs2_glock_remove_revoke(gl);
891		kmem_cache_free(gfs2_bufdata_cachep, bd);
892	}
893}
894
895static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
896				  struct gfs2_log_header_host *head, int pass)
897{
898	if (pass != 0)
899		return;
900
901	jd->jd_found_revokes = 0;
902	jd->jd_replay_tail = head->lh_tail;
903}
904
905static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
906				   struct gfs2_log_descriptor *ld, __be64 *ptr,
907				   int pass)
908{
909	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
910	unsigned int blks = be32_to_cpu(ld->ld_length);
911	unsigned int revokes = be32_to_cpu(ld->ld_data1);
912	struct buffer_head *bh;
913	unsigned int offset;
914	u64 blkno;
915	int first = 1;
916	int error;
917
918	if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
919		return 0;
920
921	offset = sizeof(struct gfs2_log_descriptor);
922
923	for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
924		error = gfs2_replay_read_block(jd, start, &bh);
925		if (error)
926			return error;
927
928		if (!first)
929			gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
930
931		while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
932			blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
933
934			error = gfs2_revoke_add(jd, blkno, start);
935			if (error < 0) {
936				brelse(bh);
937				return error;
938			}
939			else if (error)
940				jd->jd_found_revokes++;
941
942			if (!--revokes)
943				break;
944			offset += sizeof(u64);
945		}
946
947		brelse(bh);
948		offset = sizeof(struct gfs2_meta_header);
949		first = 0;
950	}
951
952	return 0;
953}
954
955static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
956{
957	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
958
959	if (error) {
960		gfs2_revoke_clean(jd);
961		return;
962	}
963	if (pass != 1)
964		return;
965
966	fs_info(sdp, "jid=%u: Found %u revoke tags\n",
967	        jd->jd_jid, jd->jd_found_revokes);
968
969	gfs2_revoke_clean(jd);
970}
971
972/**
973 * databuf_lo_before_commit - Scan the data buffers, writing as we go
974 *
975 */
976
977static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
978{
979	unsigned int limit = databuf_limit(sdp);
980	unsigned int nbuf;
981	if (tr == NULL)
982		return;
983	nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
984	gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
985}
986
987static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
988				    struct gfs2_log_descriptor *ld,
989				    __be64 *ptr, int pass)
990{
991	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
992	struct gfs2_glock *gl = ip->i_gl;
993	unsigned int blks = be32_to_cpu(ld->ld_data1);
994	struct buffer_head *bh_log, *bh_ip;
995	u64 blkno;
996	u64 esc;
997	int error = 0;
998
999	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
1000		return 0;
1001
1002	gfs2_replay_incr_blk(jd, &start);
1003	for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
1004		blkno = be64_to_cpu(*ptr++);
1005		esc = be64_to_cpu(*ptr++);
1006
1007		jd->jd_found_blocks++;
1008
1009		if (gfs2_revoke_check(jd, blkno, start))
1010			continue;
1011
1012		error = gfs2_replay_read_block(jd, start, &bh_log);
1013		if (error)
1014			return error;
1015
1016		bh_ip = gfs2_meta_new(gl, blkno);
1017		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
1018
1019		/* Unescape */
1020		if (esc) {
1021			__be32 *eptr = (__be32 *)bh_ip->b_data;
1022			*eptr = cpu_to_be32(GFS2_MAGIC);
1023		}
1024		mark_buffer_dirty(bh_ip);
1025
1026		brelse(bh_log);
1027		brelse(bh_ip);
1028
1029		jd->jd_replayed_blocks++;
1030	}
1031
1032	return error;
1033}
1034
1035/* FIXME: sort out accounting for log blocks etc. */
1036
1037static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
1038{
1039	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1040	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
1041
1042	if (error) {
1043		gfs2_inode_metasync(ip->i_gl);
1044		return;
1045	}
1046	if (pass != 1)
1047		return;
1048
1049	/* data sync? */
1050	gfs2_inode_metasync(ip->i_gl);
1051
1052	fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
1053		jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
1054}
1055
1056static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
1057{
1058	struct list_head *head;
1059	struct gfs2_bufdata *bd;
1060
1061	if (tr == NULL)
1062		return;
1063
1064	head = &tr->tr_databuf;
1065	while (!list_empty(head)) {
1066		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
1067		list_del_init(&bd->bd_list);
1068		gfs2_unpin(sdp, bd->bd_bh, tr);
1069	}
1070}
1071
1072
1073static const struct gfs2_log_operations gfs2_buf_lops = {
1074	.lo_before_commit = buf_lo_before_commit,
1075	.lo_after_commit = buf_lo_after_commit,
1076	.lo_before_scan = buf_lo_before_scan,
1077	.lo_scan_elements = buf_lo_scan_elements,
1078	.lo_after_scan = buf_lo_after_scan,
1079	.lo_name = "buf",
1080};
1081
1082static const struct gfs2_log_operations gfs2_revoke_lops = {
1083	.lo_before_commit = revoke_lo_before_commit,
1084	.lo_after_commit = revoke_lo_after_commit,
1085	.lo_before_scan = revoke_lo_before_scan,
1086	.lo_scan_elements = revoke_lo_scan_elements,
1087	.lo_after_scan = revoke_lo_after_scan,
1088	.lo_name = "revoke",
1089};
1090
1091static const struct gfs2_log_operations gfs2_databuf_lops = {
1092	.lo_before_commit = databuf_lo_before_commit,
1093	.lo_after_commit = databuf_lo_after_commit,
1094	.lo_scan_elements = databuf_lo_scan_elements,
1095	.lo_after_scan = databuf_lo_after_scan,
1096	.lo_name = "databuf",
1097};
1098
1099const struct gfs2_log_operations *gfs2_log_ops[] = {
1100	&gfs2_databuf_lops,
1101	&gfs2_buf_lops,
1102	&gfs2_revoke_lops,
1103	NULL,
1104};
1105
1106