162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 1999-2002 Hewlett-Packard Co
462306a36Sopenharmony_ci *	Stephane Eranian <eranian@hpl.hp.com>
562306a36Sopenharmony_ci *	David Mosberger-Tang <davidm@hpl.hp.com>
662306a36Sopenharmony_ci * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * 1/06/01 davidm	Tuned for Itanium.
962306a36Sopenharmony_ci * 2/12/02 kchen	Tuned for both Itanium and McKinley
1062306a36Sopenharmony_ci * 3/08/02 davidm	Some more tweaking
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <linux/export.h>
1462306a36Sopenharmony_ci#include <asm/asmmacro.h>
1562306a36Sopenharmony_ci#include <asm/page.h>
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#ifdef CONFIG_ITANIUM
1862306a36Sopenharmony_ci# define L3_LINE_SIZE	64	// Itanium L3 line size
1962306a36Sopenharmony_ci# define PREFETCH_LINES	9	// magic number
2062306a36Sopenharmony_ci#else
2162306a36Sopenharmony_ci# define L3_LINE_SIZE	128	// McKinley L3 line size
2262306a36Sopenharmony_ci# define PREFETCH_LINES	12	// magic number
2362306a36Sopenharmony_ci#endif
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#define saved_lc	r2
2662306a36Sopenharmony_ci#define dst_fetch	r3
2762306a36Sopenharmony_ci#define dst1		r8
2862306a36Sopenharmony_ci#define dst2		r9
2962306a36Sopenharmony_ci#define dst3		r10
3062306a36Sopenharmony_ci#define dst4		r11
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#define dst_last	r31
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ciGLOBAL_ENTRY(clear_page)
3562306a36Sopenharmony_ci	.prologue
3662306a36Sopenharmony_ci	.regstk 1,0,0,0
3762306a36Sopenharmony_ci	mov r16 = PAGE_SIZE/L3_LINE_SIZE-1	// main loop count, -1=repeat/until
3862306a36Sopenharmony_ci	.save ar.lc, saved_lc
3962306a36Sopenharmony_ci	mov saved_lc = ar.lc
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	.body
4262306a36Sopenharmony_ci	mov ar.lc = (PREFETCH_LINES - 1)
4362306a36Sopenharmony_ci	mov dst_fetch = in0
4462306a36Sopenharmony_ci	adds dst1 = 16, in0
4562306a36Sopenharmony_ci	adds dst2 = 32, in0
4662306a36Sopenharmony_ci	;;
4762306a36Sopenharmony_ci.fetch:	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
4862306a36Sopenharmony_ci	adds dst3 = 48, in0		// executing this multiple times is harmless
4962306a36Sopenharmony_ci	br.cloop.sptk.few .fetch
5062306a36Sopenharmony_ci	;;
5162306a36Sopenharmony_ci	addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
5262306a36Sopenharmony_ci	mov ar.lc = r16			// one L3 line per iteration
5362306a36Sopenharmony_ci	adds dst4 = 64, in0
5462306a36Sopenharmony_ci	;;
5562306a36Sopenharmony_ci#ifdef CONFIG_ITANIUM
5662306a36Sopenharmony_ci	// Optimized for Itanium
5762306a36Sopenharmony_ci1:	stf.spill.nta [dst1] = f0, 64
5862306a36Sopenharmony_ci	stf.spill.nta [dst2] = f0, 64
5962306a36Sopenharmony_ci	cmp.lt p8,p0=dst_fetch, dst_last
6062306a36Sopenharmony_ci	;;
6162306a36Sopenharmony_ci#else
6262306a36Sopenharmony_ci	// Optimized for McKinley
6362306a36Sopenharmony_ci1:	stf.spill.nta [dst1] = f0, 64
6462306a36Sopenharmony_ci	stf.spill.nta [dst2] = f0, 64
6562306a36Sopenharmony_ci	stf.spill.nta [dst3] = f0, 64
6662306a36Sopenharmony_ci	stf.spill.nta [dst4] = f0, 128
6762306a36Sopenharmony_ci	cmp.lt p8,p0=dst_fetch, dst_last
6862306a36Sopenharmony_ci	;;
6962306a36Sopenharmony_ci	stf.spill.nta [dst1] = f0, 64
7062306a36Sopenharmony_ci	stf.spill.nta [dst2] = f0, 64
7162306a36Sopenharmony_ci#endif
7262306a36Sopenharmony_ci	stf.spill.nta [dst3] = f0, 64
7362306a36Sopenharmony_ci(p8)	stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
7462306a36Sopenharmony_ci	br.cloop.sptk.few 1b
7562306a36Sopenharmony_ci	;;
7662306a36Sopenharmony_ci	mov ar.lc = saved_lc		// restore lc
7762306a36Sopenharmony_ci	br.ret.sptk.many rp
7862306a36Sopenharmony_ciEND(clear_page)
7962306a36Sopenharmony_ciEXPORT_SYMBOL(clear_page)
80