162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* Copyright(c) 2022 Intel Corporation. */ 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/cpu.h> 562306a36Sopenharmony_ci#include <linux/delay.h> 662306a36Sopenharmony_ci#include <linux/fs.h> 762306a36Sopenharmony_ci#include <linux/nmi.h> 862306a36Sopenharmony_ci#include <linux/slab.h> 962306a36Sopenharmony_ci#include <linux/stop_machine.h> 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include "ifs.h" 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci/* 1462306a36Sopenharmony_ci * Note all code and data in this file is protected by 1562306a36Sopenharmony_ci * ifs_sem. On HT systems all threads on a core will 1662306a36Sopenharmony_ci * execute together, but only the first thread on the 1762306a36Sopenharmony_ci * core will update results of the test. 1862306a36Sopenharmony_ci */ 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 2162306a36Sopenharmony_ci#include <trace/events/intel_ifs.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci/* Max retries on the same chunk */ 2462306a36Sopenharmony_ci#define MAX_IFS_RETRIES 5 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* 2762306a36Sopenharmony_ci * Number of TSC cycles that a logical CPU will wait for the other 2862306a36Sopenharmony_ci * logical CPU on the core in the WRMSR(ACTIVATE_SCAN). 2962306a36Sopenharmony_ci */ 3062306a36Sopenharmony_ci#define IFS_THREAD_WAIT 100000 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_cienum ifs_status_err_code { 3362306a36Sopenharmony_ci IFS_NO_ERROR = 0, 3462306a36Sopenharmony_ci IFS_OTHER_THREAD_COULD_NOT_JOIN = 1, 3562306a36Sopenharmony_ci IFS_INTERRUPTED_BEFORE_RENDEZVOUS = 2, 3662306a36Sopenharmony_ci IFS_POWER_MGMT_INADEQUATE_FOR_SCAN = 3, 3762306a36Sopenharmony_ci IFS_INVALID_CHUNK_RANGE = 4, 3862306a36Sopenharmony_ci IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS = 5, 3962306a36Sopenharmony_ci IFS_CORE_NOT_CAPABLE_CURRENTLY = 6, 4062306a36Sopenharmony_ci IFS_UNASSIGNED_ERROR_CODE = 7, 4162306a36Sopenharmony_ci IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT = 8, 4262306a36Sopenharmony_ci IFS_INTERRUPTED_DURING_EXECUTION = 9, 4362306a36Sopenharmony_ci}; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_cistatic const char * const scan_test_status[] = { 4662306a36Sopenharmony_ci [IFS_NO_ERROR] = "SCAN no error", 4762306a36Sopenharmony_ci [IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.", 4862306a36Sopenharmony_ci [IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.", 4962306a36Sopenharmony_ci [IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] = 5062306a36Sopenharmony_ci "Core Abort SCAN Response due to power management condition.", 5162306a36Sopenharmony_ci [IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range", 5262306a36Sopenharmony_ci [IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.", 5362306a36Sopenharmony_ci [IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently", 5462306a36Sopenharmony_ci [IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7", 5562306a36Sopenharmony_ci [IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = 5662306a36Sopenharmony_ci "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently", 5762306a36Sopenharmony_ci [IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start", 5862306a36Sopenharmony_ci}; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic void message_not_tested(struct device *dev, int cpu, union ifs_status status) 6162306a36Sopenharmony_ci{ 6262306a36Sopenharmony_ci if (status.error_code < ARRAY_SIZE(scan_test_status)) { 6362306a36Sopenharmony_ci dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n", 6462306a36Sopenharmony_ci cpumask_pr_args(cpu_smt_mask(cpu)), 6562306a36Sopenharmony_ci scan_test_status[status.error_code]); 6662306a36Sopenharmony_ci } else if (status.error_code == IFS_SW_TIMEOUT) { 6762306a36Sopenharmony_ci dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n", 6862306a36Sopenharmony_ci cpumask_pr_args(cpu_smt_mask(cpu))); 6962306a36Sopenharmony_ci } else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) { 7062306a36Sopenharmony_ci dev_info(dev, "CPU(s) %*pbl: %s\n", 7162306a36Sopenharmony_ci cpumask_pr_args(cpu_smt_mask(cpu)), 7262306a36Sopenharmony_ci "Not all scan chunks were executed. Maximum forward progress retries exceeded"); 7362306a36Sopenharmony_ci } else { 7462306a36Sopenharmony_ci dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n", 7562306a36Sopenharmony_ci cpumask_pr_args(cpu_smt_mask(cpu)), status.data); 7662306a36Sopenharmony_ci } 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_cistatic void message_fail(struct device *dev, int cpu, union ifs_status status) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci struct ifs_data *ifsd = ifs_get_data(dev); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci /* 8462306a36Sopenharmony_ci * control_error is set when the microcode runs into a problem 8562306a36Sopenharmony_ci * loading the image from the reserved BIOS memory, or it has 8662306a36Sopenharmony_ci * been corrupted. Reloading the image may fix this issue. 8762306a36Sopenharmony_ci */ 8862306a36Sopenharmony_ci if (status.control_error) { 8962306a36Sopenharmony_ci dev_err(dev, "CPU(s) %*pbl: could not execute from loaded scan image. Batch: %02x version: 0x%x\n", 9062306a36Sopenharmony_ci cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version); 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci /* 9462306a36Sopenharmony_ci * signature_error is set when the output from the scan chains does not 9562306a36Sopenharmony_ci * match the expected signature. This might be a transient problem (e.g. 9662306a36Sopenharmony_ci * due to a bit flip from an alpha particle or neutron). If the problem 9762306a36Sopenharmony_ci * repeats on a subsequent test, then it indicates an actual problem in 9862306a36Sopenharmony_ci * the core being tested. 9962306a36Sopenharmony_ci */ 10062306a36Sopenharmony_ci if (status.signature_error) { 10162306a36Sopenharmony_ci dev_err(dev, "CPU(s) %*pbl: test signature incorrect. Batch: %02x version: 0x%x\n", 10262306a36Sopenharmony_ci cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version); 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_cistatic bool can_restart(union ifs_status status) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci enum ifs_status_err_code err_code = status.error_code; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci /* Signature for chunk is bad, or scan test failed */ 11162306a36Sopenharmony_ci if (status.signature_error || status.control_error) 11262306a36Sopenharmony_ci return false; 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci switch (err_code) { 11562306a36Sopenharmony_ci case IFS_NO_ERROR: 11662306a36Sopenharmony_ci case IFS_OTHER_THREAD_COULD_NOT_JOIN: 11762306a36Sopenharmony_ci case IFS_INTERRUPTED_BEFORE_RENDEZVOUS: 11862306a36Sopenharmony_ci case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN: 11962306a36Sopenharmony_ci case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT: 12062306a36Sopenharmony_ci case IFS_INTERRUPTED_DURING_EXECUTION: 12162306a36Sopenharmony_ci return true; 12262306a36Sopenharmony_ci case IFS_INVALID_CHUNK_RANGE: 12362306a36Sopenharmony_ci case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS: 12462306a36Sopenharmony_ci case IFS_CORE_NOT_CAPABLE_CURRENTLY: 12562306a36Sopenharmony_ci case IFS_UNASSIGNED_ERROR_CODE: 12662306a36Sopenharmony_ci break; 12762306a36Sopenharmony_ci } 12862306a36Sopenharmony_ci return false; 12962306a36Sopenharmony_ci} 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci/* 13262306a36Sopenharmony_ci * Execute the scan. Called "simultaneously" on all threads of a core 13362306a36Sopenharmony_ci * at high priority using the stop_cpus mechanism. 13462306a36Sopenharmony_ci */ 13562306a36Sopenharmony_cistatic int doscan(void *data) 13662306a36Sopenharmony_ci{ 13762306a36Sopenharmony_ci int cpu = smp_processor_id(); 13862306a36Sopenharmony_ci u64 *msrs = data; 13962306a36Sopenharmony_ci int first; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci /* Only the first logical CPU on a core reports result */ 14262306a36Sopenharmony_ci first = cpumask_first(cpu_smt_mask(cpu)); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* 14562306a36Sopenharmony_ci * This WRMSR will wait for other HT threads to also write 14662306a36Sopenharmony_ci * to this MSR (at most for activate.delay cycles). Then it 14762306a36Sopenharmony_ci * starts scan of each requested chunk. The core scan happens 14862306a36Sopenharmony_ci * during the "execution" of the WRMSR. This instruction can 14962306a36Sopenharmony_ci * take up to 200 milliseconds (in the case where all chunks 15062306a36Sopenharmony_ci * are processed in a single pass) before it retires. 15162306a36Sopenharmony_ci */ 15262306a36Sopenharmony_ci wrmsrl(MSR_ACTIVATE_SCAN, msrs[0]); 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci if (cpu == first) { 15562306a36Sopenharmony_ci /* Pass back the result of the scan */ 15662306a36Sopenharmony_ci rdmsrl(MSR_SCAN_STATUS, msrs[1]); 15762306a36Sopenharmony_ci } 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci return 0; 16062306a36Sopenharmony_ci} 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci/* 16362306a36Sopenharmony_ci * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN 16462306a36Sopenharmony_ci * on all threads of the core to be tested. Loop if necessary to complete 16562306a36Sopenharmony_ci * run of all chunks. Include some defensive tests to make sure forward 16662306a36Sopenharmony_ci * progress is made, and that the whole test completes in a reasonable time. 16762306a36Sopenharmony_ci */ 16862306a36Sopenharmony_cistatic void ifs_test_core(int cpu, struct device *dev) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci union ifs_scan activate; 17162306a36Sopenharmony_ci union ifs_status status; 17262306a36Sopenharmony_ci unsigned long timeout; 17362306a36Sopenharmony_ci struct ifs_data *ifsd; 17462306a36Sopenharmony_ci u64 msrvals[2]; 17562306a36Sopenharmony_ci int retries; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci ifsd = ifs_get_data(dev); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci activate.rsvd = 0; 18062306a36Sopenharmony_ci activate.delay = IFS_THREAD_WAIT; 18162306a36Sopenharmony_ci activate.sigmce = 0; 18262306a36Sopenharmony_ci activate.start = 0; 18362306a36Sopenharmony_ci activate.stop = ifsd->valid_chunks - 1; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci timeout = jiffies + HZ / 2; 18662306a36Sopenharmony_ci retries = MAX_IFS_RETRIES; 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci while (activate.start <= activate.stop) { 18962306a36Sopenharmony_ci if (time_after(jiffies, timeout)) { 19062306a36Sopenharmony_ci status.error_code = IFS_SW_TIMEOUT; 19162306a36Sopenharmony_ci break; 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci msrvals[0] = activate.data; 19562306a36Sopenharmony_ci stop_core_cpuslocked(cpu, doscan, msrvals); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci status.data = msrvals[1]; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci trace_ifs_status(cpu, activate, status); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci /* Some cases can be retried, give up for others */ 20262306a36Sopenharmony_ci if (!can_restart(status)) 20362306a36Sopenharmony_ci break; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci if (status.chunk_num == activate.start) { 20662306a36Sopenharmony_ci /* Check for forward progress */ 20762306a36Sopenharmony_ci if (--retries == 0) { 20862306a36Sopenharmony_ci if (status.error_code == IFS_NO_ERROR) 20962306a36Sopenharmony_ci status.error_code = IFS_SW_PARTIAL_COMPLETION; 21062306a36Sopenharmony_ci break; 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci } else { 21362306a36Sopenharmony_ci retries = MAX_IFS_RETRIES; 21462306a36Sopenharmony_ci activate.start = status.chunk_num; 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci } 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci /* Update status for this core */ 21962306a36Sopenharmony_ci ifsd->scan_details = status.data; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci if (status.control_error || status.signature_error) { 22262306a36Sopenharmony_ci ifsd->status = SCAN_TEST_FAIL; 22362306a36Sopenharmony_ci message_fail(dev, cpu, status); 22462306a36Sopenharmony_ci } else if (status.error_code) { 22562306a36Sopenharmony_ci ifsd->status = SCAN_NOT_TESTED; 22662306a36Sopenharmony_ci message_not_tested(dev, cpu, status); 22762306a36Sopenharmony_ci } else { 22862306a36Sopenharmony_ci ifsd->status = SCAN_TEST_PASS; 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci#define SPINUNIT 100 /* 100 nsec */ 23362306a36Sopenharmony_cistatic atomic_t array_cpus_out; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci/* 23662306a36Sopenharmony_ci * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus() 23762306a36Sopenharmony_ci */ 23862306a36Sopenharmony_cistatic void wait_for_sibling_cpu(atomic_t *t, long long timeout) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci int cpu = smp_processor_id(); 24162306a36Sopenharmony_ci const struct cpumask *smt_mask = cpu_smt_mask(cpu); 24262306a36Sopenharmony_ci int all_cpus = cpumask_weight(smt_mask); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci atomic_inc(t); 24562306a36Sopenharmony_ci while (atomic_read(t) < all_cpus) { 24662306a36Sopenharmony_ci if (timeout < SPINUNIT) 24762306a36Sopenharmony_ci return; 24862306a36Sopenharmony_ci ndelay(SPINUNIT); 24962306a36Sopenharmony_ci timeout -= SPINUNIT; 25062306a36Sopenharmony_ci touch_nmi_watchdog(); 25162306a36Sopenharmony_ci } 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_cistatic int do_array_test(void *data) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci union ifs_array *command = data; 25762306a36Sopenharmony_ci int cpu = smp_processor_id(); 25862306a36Sopenharmony_ci int first; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci /* 26162306a36Sopenharmony_ci * Only one logical CPU on a core needs to trigger the Array test via MSR write. 26262306a36Sopenharmony_ci */ 26362306a36Sopenharmony_ci first = cpumask_first(cpu_smt_mask(cpu)); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci if (cpu == first) { 26662306a36Sopenharmony_ci wrmsrl(MSR_ARRAY_BIST, command->data); 26762306a36Sopenharmony_ci /* Pass back the result of the test */ 26862306a36Sopenharmony_ci rdmsrl(MSR_ARRAY_BIST, command->data); 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci /* Tests complete faster if the sibling is spinning here */ 27262306a36Sopenharmony_ci wait_for_sibling_cpu(&array_cpus_out, NSEC_PER_SEC); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci return 0; 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_cistatic void ifs_array_test_core(int cpu, struct device *dev) 27862306a36Sopenharmony_ci{ 27962306a36Sopenharmony_ci union ifs_array command = {}; 28062306a36Sopenharmony_ci bool timed_out = false; 28162306a36Sopenharmony_ci struct ifs_data *ifsd; 28262306a36Sopenharmony_ci unsigned long timeout; 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci ifsd = ifs_get_data(dev); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci command.array_bitmask = ~0U; 28762306a36Sopenharmony_ci timeout = jiffies + HZ / 2; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci do { 29062306a36Sopenharmony_ci if (time_after(jiffies, timeout)) { 29162306a36Sopenharmony_ci timed_out = true; 29262306a36Sopenharmony_ci break; 29362306a36Sopenharmony_ci } 29462306a36Sopenharmony_ci atomic_set(&array_cpus_out, 0); 29562306a36Sopenharmony_ci stop_core_cpuslocked(cpu, do_array_test, &command); 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci if (command.ctrl_result) 29862306a36Sopenharmony_ci break; 29962306a36Sopenharmony_ci } while (command.array_bitmask); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci ifsd->scan_details = command.data; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci if (command.ctrl_result) 30462306a36Sopenharmony_ci ifsd->status = SCAN_TEST_FAIL; 30562306a36Sopenharmony_ci else if (timed_out || command.array_bitmask) 30662306a36Sopenharmony_ci ifsd->status = SCAN_NOT_TESTED; 30762306a36Sopenharmony_ci else 30862306a36Sopenharmony_ci ifsd->status = SCAN_TEST_PASS; 30962306a36Sopenharmony_ci} 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci/* 31262306a36Sopenharmony_ci * Initiate per core test. It wakes up work queue threads on the target cpu and 31362306a36Sopenharmony_ci * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and 31462306a36Sopenharmony_ci * wait for all sibling threads to finish the scan test. 31562306a36Sopenharmony_ci */ 31662306a36Sopenharmony_ciint do_core_test(int cpu, struct device *dev) 31762306a36Sopenharmony_ci{ 31862306a36Sopenharmony_ci const struct ifs_test_caps *test = ifs_get_test_caps(dev); 31962306a36Sopenharmony_ci struct ifs_data *ifsd = ifs_get_data(dev); 32062306a36Sopenharmony_ci int ret = 0; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci /* Prevent CPUs from being taken offline during the scan test */ 32362306a36Sopenharmony_ci cpus_read_lock(); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci if (!cpu_online(cpu)) { 32662306a36Sopenharmony_ci dev_info(dev, "cannot test on the offline cpu %d\n", cpu); 32762306a36Sopenharmony_ci ret = -EINVAL; 32862306a36Sopenharmony_ci goto out; 32962306a36Sopenharmony_ci } 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci switch (test->test_num) { 33262306a36Sopenharmony_ci case IFS_TYPE_SAF: 33362306a36Sopenharmony_ci if (!ifsd->loaded) 33462306a36Sopenharmony_ci ret = -EPERM; 33562306a36Sopenharmony_ci else 33662306a36Sopenharmony_ci ifs_test_core(cpu, dev); 33762306a36Sopenharmony_ci break; 33862306a36Sopenharmony_ci case IFS_TYPE_ARRAY_BIST: 33962306a36Sopenharmony_ci ifs_array_test_core(cpu, dev); 34062306a36Sopenharmony_ci break; 34162306a36Sopenharmony_ci default: 34262306a36Sopenharmony_ci ret = -EINVAL; 34362306a36Sopenharmony_ci } 34462306a36Sopenharmony_ciout: 34562306a36Sopenharmony_ci cpus_read_unlock(); 34662306a36Sopenharmony_ci return ret; 34762306a36Sopenharmony_ci} 348