1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include "amdgpu.h"
25#include "amdgpu_discovery.h"
26#include "soc15_hw_ip.h"
27#include "discovery.h"
28
29#define mmRCC_CONFIG_MEMSIZE	0xde3
30#define mmMM_INDEX		0x0
31#define mmMM_INDEX_HI		0x6
32#define mmMM_DATA		0x1
33#define HW_ID_MAX		300
34
35static const char *hw_id_names[HW_ID_MAX] = {
36	[MP1_HWID]		= "MP1",
37	[MP2_HWID]		= "MP2",
38	[THM_HWID]		= "THM",
39	[SMUIO_HWID]		= "SMUIO",
40	[FUSE_HWID]		= "FUSE",
41	[CLKA_HWID]		= "CLKA",
42	[PWR_HWID]		= "PWR",
43	[GC_HWID]		= "GC",
44	[UVD_HWID]		= "UVD",
45	[AUDIO_AZ_HWID]		= "AUDIO_AZ",
46	[ACP_HWID]		= "ACP",
47	[DCI_HWID]		= "DCI",
48	[DMU_HWID]		= "DMU",
49	[DCO_HWID]		= "DCO",
50	[DIO_HWID]		= "DIO",
51	[XDMA_HWID]		= "XDMA",
52	[DCEAZ_HWID]		= "DCEAZ",
53	[DAZ_HWID]		= "DAZ",
54	[SDPMUX_HWID]		= "SDPMUX",
55	[NTB_HWID]		= "NTB",
56	[IOHC_HWID]		= "IOHC",
57	[L2IMU_HWID]		= "L2IMU",
58	[VCE_HWID]		= "VCE",
59	[MMHUB_HWID]		= "MMHUB",
60	[ATHUB_HWID]		= "ATHUB",
61	[DBGU_NBIO_HWID]	= "DBGU_NBIO",
62	[DFX_HWID]		= "DFX",
63	[DBGU0_HWID]		= "DBGU0",
64	[DBGU1_HWID]		= "DBGU1",
65	[OSSSYS_HWID]		= "OSSSYS",
66	[HDP_HWID]		= "HDP",
67	[SDMA0_HWID]		= "SDMA0",
68	[SDMA1_HWID]		= "SDMA1",
69	[ISP_HWID]		= "ISP",
70	[DBGU_IO_HWID]		= "DBGU_IO",
71	[DF_HWID]		= "DF",
72	[CLKB_HWID]		= "CLKB",
73	[FCH_HWID]		= "FCH",
74	[DFX_DAP_HWID]		= "DFX_DAP",
75	[L1IMU_PCIE_HWID]	= "L1IMU_PCIE",
76	[L1IMU_NBIF_HWID]	= "L1IMU_NBIF",
77	[L1IMU_IOAGR_HWID]	= "L1IMU_IOAGR",
78	[L1IMU3_HWID]		= "L1IMU3",
79	[L1IMU4_HWID]		= "L1IMU4",
80	[L1IMU5_HWID]		= "L1IMU5",
81	[L1IMU6_HWID]		= "L1IMU6",
82	[L1IMU7_HWID]		= "L1IMU7",
83	[L1IMU8_HWID]		= "L1IMU8",
84	[L1IMU9_HWID]		= "L1IMU9",
85	[L1IMU10_HWID]		= "L1IMU10",
86	[L1IMU11_HWID]		= "L1IMU11",
87	[L1IMU12_HWID]		= "L1IMU12",
88	[L1IMU13_HWID]		= "L1IMU13",
89	[L1IMU14_HWID]		= "L1IMU14",
90	[L1IMU15_HWID]		= "L1IMU15",
91	[WAFLC_HWID]		= "WAFLC",
92	[FCH_USB_PD_HWID]	= "FCH_USB_PD",
93	[PCIE_HWID]		= "PCIE",
94	[PCS_HWID]		= "PCS",
95	[DDCL_HWID]		= "DDCL",
96	[SST_HWID]		= "SST",
97	[IOAGR_HWID]		= "IOAGR",
98	[NBIF_HWID]		= "NBIF",
99	[IOAPIC_HWID]		= "IOAPIC",
100	[SYSTEMHUB_HWID]	= "SYSTEMHUB",
101	[NTBCCP_HWID]		= "NTBCCP",
102	[UMC_HWID]		= "UMC",
103	[SATA_HWID]		= "SATA",
104	[USB_HWID]		= "USB",
105	[CCXSEC_HWID]		= "CCXSEC",
106	[XGMI_HWID]		= "XGMI",
107	[XGBE_HWID]		= "XGBE",
108	[MP0_HWID]		= "MP0",
109};
110
111static int hw_id_map[MAX_HWIP] = {
112	[GC_HWIP]	= GC_HWID,
113	[HDP_HWIP]	= HDP_HWID,
114	[SDMA0_HWIP]	= SDMA0_HWID,
115	[SDMA1_HWIP]	= SDMA1_HWID,
116	[MMHUB_HWIP]	= MMHUB_HWID,
117	[ATHUB_HWIP]	= ATHUB_HWID,
118	[NBIO_HWIP]	= NBIF_HWID,
119	[MP0_HWIP]	= MP0_HWID,
120	[MP1_HWIP]	= MP1_HWID,
121	[UVD_HWIP]	= UVD_HWID,
122	[VCE_HWIP]	= VCE_HWID,
123	[DF_HWIP]	= DF_HWID,
124	[DCE_HWIP]	= DMU_HWID,
125	[OSSSYS_HWIP]	= OSSSYS_HWID,
126	[SMUIO_HWIP]	= SMUIO_HWID,
127	[PWR_HWIP]	= PWR_HWID,
128	[NBIF_HWIP]	= NBIF_HWID,
129	[THM_HWIP]	= THM_HWID,
130	[CLK_HWIP]	= CLKA_HWID,
131};
132
133static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
134{
135	uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
136	uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
137
138	amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
139				  adev->mman.discovery_tmr_size, false);
140	return 0;
141}
142
143static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
144{
145	uint16_t checksum = 0;
146	int i;
147
148	for (i = 0; i < size; i++)
149		checksum += data[i];
150
151	return checksum;
152}
153
154static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
155						    uint16_t expected)
156{
157	return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
158}
159
160static int amdgpu_discovery_init(struct amdgpu_device *adev)
161{
162	struct table_info *info;
163	struct binary_header *bhdr;
164	struct ip_discovery_header *ihdr;
165	struct gpu_info_header *ghdr;
166	uint16_t offset;
167	uint16_t size;
168	uint16_t checksum;
169	int r;
170
171	adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
172	adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
173	if (!adev->mman.discovery_bin)
174		return -ENOMEM;
175
176	r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin);
177	if (r) {
178		DRM_ERROR("failed to read ip discovery binary\n");
179		goto out;
180	}
181
182	bhdr = (struct binary_header *)adev->mman.discovery_bin;
183
184	if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
185		DRM_ERROR("invalid ip discovery binary signature\n");
186		r = -EINVAL;
187		goto out;
188	}
189
190	offset = offsetof(struct binary_header, binary_checksum) +
191		sizeof(bhdr->binary_checksum);
192	size = bhdr->binary_size - offset;
193	checksum = bhdr->binary_checksum;
194
195	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
196					      size, checksum)) {
197		DRM_ERROR("invalid ip discovery binary checksum\n");
198		r = -EINVAL;
199		goto out;
200	}
201
202	info = &bhdr->table_list[IP_DISCOVERY];
203	offset = le16_to_cpu(info->offset);
204	checksum = le16_to_cpu(info->checksum);
205	ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
206
207	if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
208		DRM_ERROR("invalid ip discovery data table signature\n");
209		r = -EINVAL;
210		goto out;
211	}
212
213	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
214					      ihdr->size, checksum)) {
215		DRM_ERROR("invalid ip discovery data table checksum\n");
216		r = -EINVAL;
217		goto out;
218	}
219
220	info = &bhdr->table_list[GC];
221	offset = le16_to_cpu(info->offset);
222	checksum = le16_to_cpu(info->checksum);
223	ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
224
225	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
226				              ghdr->size, checksum)) {
227		DRM_ERROR("invalid gc data table checksum\n");
228		r = -EINVAL;
229		goto out;
230	}
231
232	return 0;
233
234out:
235	kfree(adev->mman.discovery_bin);
236	adev->mman.discovery_bin = NULL;
237
238	return r;
239}
240
241void amdgpu_discovery_fini(struct amdgpu_device *adev)
242{
243	kfree(adev->mman.discovery_bin);
244	adev->mman.discovery_bin = NULL;
245}
246
247int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
248{
249	struct binary_header *bhdr;
250	struct ip_discovery_header *ihdr;
251	struct die_header *dhdr;
252	struct ip *ip;
253	uint16_t die_offset;
254	uint16_t ip_offset;
255	uint16_t num_dies;
256	uint16_t num_ips;
257	uint8_t num_base_address;
258	int hw_ip;
259	int i, j, k;
260	int r;
261
262	r = amdgpu_discovery_init(adev);
263	if (r) {
264		DRM_ERROR("amdgpu_discovery_init failed\n");
265		return r;
266	}
267
268	bhdr = (struct binary_header *)adev->mman.discovery_bin;
269	ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
270			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
271	num_dies = le16_to_cpu(ihdr->num_dies);
272
273	DRM_DEBUG("number of dies: %d\n", num_dies);
274
275	for (i = 0; i < num_dies; i++) {
276		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
277		dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
278		num_ips = le16_to_cpu(dhdr->num_ips);
279		ip_offset = die_offset + sizeof(*dhdr);
280
281		if (le16_to_cpu(dhdr->die_id) != i) {
282			DRM_ERROR("invalid die id %d, expected %d\n",
283					le16_to_cpu(dhdr->die_id), i);
284			return -EINVAL;
285		}
286
287		DRM_DEBUG("number of hardware IPs on die%d: %d\n",
288				le16_to_cpu(dhdr->die_id), num_ips);
289
290		for (j = 0; j < num_ips; j++) {
291			ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
292			num_base_address = ip->num_base_address;
293
294			DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
295				  hw_id_names[le16_to_cpu(ip->hw_id)],
296				  le16_to_cpu(ip->hw_id),
297				  ip->number_instance,
298				  ip->major, ip->minor,
299				  ip->revision);
300
301			for (k = 0; k < num_base_address; k++) {
302				/*
303				 * convert the endianness of base addresses in place,
304				 * so that we don't need to convert them when accessing adev->reg_offset.
305				 */
306				ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
307				DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
308			}
309
310			for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
311				if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
312					DRM_DEBUG("set register base offset for %s\n",
313							hw_id_names[le16_to_cpu(ip->hw_id)]);
314					adev->reg_offset[hw_ip][ip->number_instance] =
315						ip->base_address;
316				}
317
318			}
319
320			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
321		}
322	}
323
324	return 0;
325}
326
327int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
328				    int *major, int *minor, int *revision)
329{
330	struct binary_header *bhdr;
331	struct ip_discovery_header *ihdr;
332	struct die_header *dhdr;
333	struct ip *ip;
334	uint16_t die_offset;
335	uint16_t ip_offset;
336	uint16_t num_dies;
337	uint16_t num_ips;
338	int i, j;
339
340	if (!adev->mman.discovery_bin) {
341		DRM_ERROR("ip discovery uninitialized\n");
342		return -EINVAL;
343	}
344
345	bhdr = (struct binary_header *)adev->mman.discovery_bin;
346	ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
347			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
348	num_dies = le16_to_cpu(ihdr->num_dies);
349
350	for (i = 0; i < num_dies; i++) {
351		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
352		dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
353		num_ips = le16_to_cpu(dhdr->num_ips);
354		ip_offset = die_offset + sizeof(*dhdr);
355
356		for (j = 0; j < num_ips; j++) {
357			ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
358
359			if (le16_to_cpu(ip->hw_id) == hw_id) {
360				if (major)
361					*major = ip->major;
362				if (minor)
363					*minor = ip->minor;
364				if (revision)
365					*revision = ip->revision;
366				return 0;
367			}
368			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
369		}
370	}
371
372	return -EINVAL;
373}
374
375union gc_info {
376	struct gc_info_v1_0 v1;
377	struct gc_info_v2_0 v2;
378};
379
380int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
381{
382	struct binary_header *bhdr;
383	union gc_info *gc_info;
384
385	if (!adev->mman.discovery_bin) {
386		DRM_ERROR("ip discovery uninitialized\n");
387		return -EINVAL;
388	}
389
390	bhdr = (struct binary_header *)adev->mman.discovery_bin;
391	gc_info = (union gc_info *)(adev->mman.discovery_bin +
392			le16_to_cpu(bhdr->table_list[GC].offset));
393	switch (gc_info->v1.header.version_major) {
394	case 1:
395		adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
396		adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
397						      le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
398		adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
399		adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
400		adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
401		adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
402		adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
403		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
404		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
405		adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
406		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
407		adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
408		adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
409		adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
410		adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
411			le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
412		adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
413		break;
414	case 2:
415		adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
416		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
417		adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
418		adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
419		adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
420		adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
421		adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
422		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
423		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
424		adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
425		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
426		adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
427		adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
428		adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
429		adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
430			le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
431		adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
432		break;
433	default:
434		dev_err(adev->dev,
435			"Unhandled GC info table %d.%d\n",
436			gc_info->v1.header.version_major,
437			gc_info->v1.header.version_minor);
438		return -EINVAL;
439	}
440	return 0;
441}
442