162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright 2021 Advanced Micro Devices, Inc. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 1262306a36Sopenharmony_ci * all copies or substantial portions of the Software. 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1562306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1662306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1762306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 1862306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 1962306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2062306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 2162306a36Sopenharmony_ci * 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci#include "amdgpu_ras.h" 2462306a36Sopenharmony_ci#include "amdgpu.h" 2562306a36Sopenharmony_ci#include "amdgpu_mca.h" 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#include "umc/umc_6_7_0_offset.h" 2862306a36Sopenharmony_ci#include "umc/umc_6_7_0_sh_mask.h" 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_civoid amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, 3162306a36Sopenharmony_ci uint64_t mc_status_addr, 3262306a36Sopenharmony_ci unsigned long *error_count) 3362306a36Sopenharmony_ci{ 3462306a36Sopenharmony_ci uint64_t mc_status = RREG64_PCIE(mc_status_addr); 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci if (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 3762306a36Sopenharmony_ci REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) 3862306a36Sopenharmony_ci *error_count += 1; 3962306a36Sopenharmony_ci} 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_civoid amdgpu_mca_query_uncorrectable_error_count(struct amdgpu_device *adev, 4262306a36Sopenharmony_ci uint64_t mc_status_addr, 4362306a36Sopenharmony_ci unsigned long *error_count) 4462306a36Sopenharmony_ci{ 4562306a36Sopenharmony_ci uint64_t mc_status = RREG64_PCIE(mc_status_addr); 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci if ((REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && 4862306a36Sopenharmony_ci (REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || 4962306a36Sopenharmony_ci REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 5062306a36Sopenharmony_ci REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || 5162306a36Sopenharmony_ci REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || 5262306a36Sopenharmony_ci REG_GET_FIELD(mc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) 5362306a36Sopenharmony_ci *error_count += 1; 5462306a36Sopenharmony_ci} 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_civoid amdgpu_mca_reset_error_count(struct amdgpu_device *adev, 5762306a36Sopenharmony_ci uint64_t mc_status_addr) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci WREG64_PCIE(mc_status_addr, 0x0ULL); 6062306a36Sopenharmony_ci} 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_civoid amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, 6362306a36Sopenharmony_ci uint64_t mc_status_addr, 6462306a36Sopenharmony_ci void *ras_error_status) 6562306a36Sopenharmony_ci{ 6662306a36Sopenharmony_ci struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci amdgpu_mca_query_correctable_error_count(adev, mc_status_addr, &(err_data->ce_count)); 6962306a36Sopenharmony_ci amdgpu_mca_query_uncorrectable_error_count(adev, mc_status_addr, &(err_data->ue_count)); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci amdgpu_mca_reset_error_count(adev, mc_status_addr); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ciint amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci int err; 7762306a36Sopenharmony_ci struct amdgpu_mca_ras_block *ras; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci if (!adev->mca.mp0.ras) 8062306a36Sopenharmony_ci return 0; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci ras = adev->mca.mp0.ras; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 8562306a36Sopenharmony_ci if (err) { 8662306a36Sopenharmony_ci dev_err(adev->dev, "Failed to register mca.mp0 ras block!\n"); 8762306a36Sopenharmony_ci return err; 8862306a36Sopenharmony_ci } 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci strcpy(ras->ras_block.ras_comm.name, "mca.mp0"); 9162306a36Sopenharmony_ci ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA; 9262306a36Sopenharmony_ci ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 9362306a36Sopenharmony_ci adev->mca.mp0.ras_if = &ras->ras_block.ras_comm; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci return 0; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ciint amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci int err; 10162306a36Sopenharmony_ci struct amdgpu_mca_ras_block *ras; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci if (!adev->mca.mp1.ras) 10462306a36Sopenharmony_ci return 0; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci ras = adev->mca.mp1.ras; 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 10962306a36Sopenharmony_ci if (err) { 11062306a36Sopenharmony_ci dev_err(adev->dev, "Failed to register mca.mp1 ras block!\n"); 11162306a36Sopenharmony_ci return err; 11262306a36Sopenharmony_ci } 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci strcpy(ras->ras_block.ras_comm.name, "mca.mp1"); 11562306a36Sopenharmony_ci ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA; 11662306a36Sopenharmony_ci ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 11762306a36Sopenharmony_ci adev->mca.mp1.ras_if = &ras->ras_block.ras_comm; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci return 0; 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ciint amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci int err; 12562306a36Sopenharmony_ci struct amdgpu_mca_ras_block *ras; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (!adev->mca.mpio.ras) 12862306a36Sopenharmony_ci return 0; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci ras = adev->mca.mpio.ras; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); 13362306a36Sopenharmony_ci if (err) { 13462306a36Sopenharmony_ci dev_err(adev->dev, "Failed to register mca.mpio ras block!\n"); 13562306a36Sopenharmony_ci return err; 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci strcpy(ras->ras_block.ras_comm.name, "mca.mpio"); 13962306a36Sopenharmony_ci ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MCA; 14062306a36Sopenharmony_ci ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 14162306a36Sopenharmony_ci adev->mca.mpio.ras_if = &ras->ras_block.ras_comm; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci return 0; 14462306a36Sopenharmony_ci} 145