162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2020-2023 Intel Corporation
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/highmem.h>
762306a36Sopenharmony_ci#include <linux/moduleparam.h>
862306a36Sopenharmony_ci#include <linux/pci.h>
962306a36Sopenharmony_ci#include <linux/pm_runtime.h>
1062306a36Sopenharmony_ci#include <linux/reboot.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include "vpu_boot_api.h"
1362306a36Sopenharmony_ci#include "ivpu_drv.h"
1462306a36Sopenharmony_ci#include "ivpu_hw.h"
1562306a36Sopenharmony_ci#include "ivpu_fw.h"
1662306a36Sopenharmony_ci#include "ivpu_ipc.h"
1762306a36Sopenharmony_ci#include "ivpu_job.h"
1862306a36Sopenharmony_ci#include "ivpu_mmu.h"
1962306a36Sopenharmony_ci#include "ivpu_pm.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_cistatic bool ivpu_disable_recovery;
2262306a36Sopenharmony_cimodule_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
2362306a36Sopenharmony_ciMODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#define PM_RESCHEDULE_LIMIT     5
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_cistatic void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
2862306a36Sopenharmony_ci{
2962306a36Sopenharmony_ci	struct ivpu_fw_info *fw = vdev->fw;
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	ivpu_cmdq_reset_all_contexts(vdev);
3262306a36Sopenharmony_ci	ivpu_ipc_reset(vdev);
3362306a36Sopenharmony_ci	ivpu_fw_load(vdev);
3462306a36Sopenharmony_ci	fw->entry_point = fw->cold_boot_entry_point;
3562306a36Sopenharmony_ci}
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev)
3862306a36Sopenharmony_ci{
3962306a36Sopenharmony_ci	struct ivpu_fw_info *fw = vdev->fw;
4062306a36Sopenharmony_ci	struct vpu_boot_params *bp = fw->mem->kvaddr;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	if (!bp->save_restore_ret_address) {
4362306a36Sopenharmony_ci		ivpu_pm_prepare_cold_boot(vdev);
4462306a36Sopenharmony_ci		return;
4562306a36Sopenharmony_ci	}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	ivpu_dbg(vdev, FW_BOOT, "Save/restore entry point %llx", bp->save_restore_ret_address);
4862306a36Sopenharmony_ci	fw->entry_point = bp->save_restore_ret_address;
4962306a36Sopenharmony_ci}
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_cistatic int ivpu_suspend(struct ivpu_device *vdev)
5262306a36Sopenharmony_ci{
5362306a36Sopenharmony_ci	int ret;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	ret = ivpu_shutdown(vdev);
5662306a36Sopenharmony_ci	if (ret) {
5762306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
5862306a36Sopenharmony_ci		return ret;
5962306a36Sopenharmony_ci	}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	return ret;
6262306a36Sopenharmony_ci}
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_cistatic int ivpu_resume(struct ivpu_device *vdev)
6562306a36Sopenharmony_ci{
6662306a36Sopenharmony_ci	int ret;
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ciretry:
6962306a36Sopenharmony_ci	ret = ivpu_hw_power_up(vdev);
7062306a36Sopenharmony_ci	if (ret) {
7162306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
7262306a36Sopenharmony_ci		return ret;
7362306a36Sopenharmony_ci	}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	ret = ivpu_mmu_enable(vdev);
7662306a36Sopenharmony_ci	if (ret) {
7762306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
7862306a36Sopenharmony_ci		ivpu_hw_power_down(vdev);
7962306a36Sopenharmony_ci		return ret;
8062306a36Sopenharmony_ci	}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	ret = ivpu_boot(vdev);
8362306a36Sopenharmony_ci	if (ret) {
8462306a36Sopenharmony_ci		ivpu_mmu_disable(vdev);
8562306a36Sopenharmony_ci		ivpu_hw_power_down(vdev);
8662306a36Sopenharmony_ci		if (!ivpu_fw_is_cold_boot(vdev)) {
8762306a36Sopenharmony_ci			ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret);
8862306a36Sopenharmony_ci			ivpu_pm_prepare_cold_boot(vdev);
8962306a36Sopenharmony_ci			goto retry;
9062306a36Sopenharmony_ci		} else {
9162306a36Sopenharmony_ci			ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
9262306a36Sopenharmony_ci		}
9362306a36Sopenharmony_ci	}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	return ret;
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cistatic void ivpu_pm_recovery_work(struct work_struct *work)
9962306a36Sopenharmony_ci{
10062306a36Sopenharmony_ci	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
10162306a36Sopenharmony_ci	struct ivpu_device *vdev = pm->vdev;
10262306a36Sopenharmony_ci	char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
10362306a36Sopenharmony_ci	int ret;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ciretry:
10662306a36Sopenharmony_ci	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
10762306a36Sopenharmony_ci	if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
10862306a36Sopenharmony_ci		cond_resched();
10962306a36Sopenharmony_ci		goto retry;
11062306a36Sopenharmony_ci	}
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if (ret && ret != -EAGAIN)
11362306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_civoid ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	struct ivpu_pm_info *pm = vdev->pm;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	if (ivpu_disable_recovery) {
12362306a36Sopenharmony_ci		ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
12462306a36Sopenharmony_ci		return;
12562306a36Sopenharmony_ci	}
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (ivpu_is_fpga(vdev)) {
12862306a36Sopenharmony_ci		ivpu_err(vdev, "Recovery not available on FPGA\n");
12962306a36Sopenharmony_ci		return;
13062306a36Sopenharmony_ci	}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	/* Schedule recovery if it's not in progress */
13362306a36Sopenharmony_ci	if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) {
13462306a36Sopenharmony_ci		ivpu_hw_irq_disable(vdev);
13562306a36Sopenharmony_ci		queue_work(system_long_wq, &pm->recovery_work);
13662306a36Sopenharmony_ci	}
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ciint ivpu_pm_suspend_cb(struct device *dev)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	struct drm_device *drm = dev_get_drvdata(dev);
14262306a36Sopenharmony_ci	struct ivpu_device *vdev = to_ivpu_device(drm);
14362306a36Sopenharmony_ci	unsigned long timeout;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Suspend..\n");
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr);
14862306a36Sopenharmony_ci	while (!ivpu_hw_is_idle(vdev)) {
14962306a36Sopenharmony_ci		cond_resched();
15062306a36Sopenharmony_ci		if (time_after_eq(jiffies, timeout)) {
15162306a36Sopenharmony_ci			ivpu_err(vdev, "Failed to enter idle on system suspend\n");
15262306a36Sopenharmony_ci			return -EBUSY;
15362306a36Sopenharmony_ci		}
15462306a36Sopenharmony_ci	}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	ivpu_suspend(vdev);
15762306a36Sopenharmony_ci	ivpu_pm_prepare_warm_boot(vdev);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	pci_save_state(to_pci_dev(dev));
16062306a36Sopenharmony_ci	pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Suspend done.\n");
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	return 0;
16562306a36Sopenharmony_ci}
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ciint ivpu_pm_resume_cb(struct device *dev)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	struct drm_device *drm = dev_get_drvdata(dev);
17062306a36Sopenharmony_ci	struct ivpu_device *vdev = to_ivpu_device(drm);
17162306a36Sopenharmony_ci	int ret;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Resume..\n");
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	pci_set_power_state(to_pci_dev(dev), PCI_D0);
17662306a36Sopenharmony_ci	pci_restore_state(to_pci_dev(dev));
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	ret = ivpu_resume(vdev);
17962306a36Sopenharmony_ci	if (ret)
18062306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to resume: %d\n", ret);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Resume done.\n");
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	return ret;
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ciint ivpu_pm_runtime_suspend_cb(struct device *dev)
18862306a36Sopenharmony_ci{
18962306a36Sopenharmony_ci	struct drm_device *drm = dev_get_drvdata(dev);
19062306a36Sopenharmony_ci	struct ivpu_device *vdev = to_ivpu_device(drm);
19162306a36Sopenharmony_ci	int ret;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Runtime suspend..\n");
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
19662306a36Sopenharmony_ci		ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
19762306a36Sopenharmony_ci			 vdev->pm->suspend_reschedule_counter);
19862306a36Sopenharmony_ci		pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
19962306a36Sopenharmony_ci		vdev->pm->suspend_reschedule_counter--;
20062306a36Sopenharmony_ci		return -EAGAIN;
20162306a36Sopenharmony_ci	}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	ret = ivpu_suspend(vdev);
20462306a36Sopenharmony_ci	if (ret)
20562306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	if (!vdev->pm->suspend_reschedule_counter) {
20862306a36Sopenharmony_ci		ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
20962306a36Sopenharmony_ci		ivpu_pm_prepare_cold_boot(vdev);
21062306a36Sopenharmony_ci	} else {
21162306a36Sopenharmony_ci		ivpu_pm_prepare_warm_boot(vdev);
21262306a36Sopenharmony_ci	}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Runtime suspend done.\n");
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	return 0;
21962306a36Sopenharmony_ci}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ciint ivpu_pm_runtime_resume_cb(struct device *dev)
22262306a36Sopenharmony_ci{
22362306a36Sopenharmony_ci	struct drm_device *drm = dev_get_drvdata(dev);
22462306a36Sopenharmony_ci	struct ivpu_device *vdev = to_ivpu_device(drm);
22562306a36Sopenharmony_ci	int ret;
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Runtime resume..\n");
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	ret = ivpu_resume(vdev);
23062306a36Sopenharmony_ci	if (ret)
23162306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Runtime resume done.\n");
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	return ret;
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ciint ivpu_rpm_get(struct ivpu_device *vdev)
23962306a36Sopenharmony_ci{
24062306a36Sopenharmony_ci	int ret;
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	ret = pm_runtime_resume_and_get(vdev->drm.dev);
24362306a36Sopenharmony_ci	if (!drm_WARN_ON(&vdev->drm, ret < 0))
24462306a36Sopenharmony_ci		vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	return ret;
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_civoid ivpu_rpm_put(struct ivpu_device *vdev)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	pm_runtime_mark_last_busy(vdev->drm.dev);
25262306a36Sopenharmony_ci	pm_runtime_put_autosuspend(vdev->drm.dev);
25362306a36Sopenharmony_ci}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_civoid ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	struct ivpu_device *vdev = pci_get_drvdata(pdev);
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	pm_runtime_get_sync(vdev->drm.dev);
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Pre-reset..\n");
26262306a36Sopenharmony_ci	atomic_inc(&vdev->pm->reset_counter);
26362306a36Sopenharmony_ci	atomic_set(&vdev->pm->in_reset, 1);
26462306a36Sopenharmony_ci	ivpu_prepare_for_reset(vdev);
26562306a36Sopenharmony_ci	ivpu_hw_reset(vdev);
26662306a36Sopenharmony_ci	ivpu_pm_prepare_cold_boot(vdev);
26762306a36Sopenharmony_ci	ivpu_jobs_abort_all(vdev);
26862306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Pre-reset done.\n");
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_civoid ivpu_pm_reset_done_cb(struct pci_dev *pdev)
27262306a36Sopenharmony_ci{
27362306a36Sopenharmony_ci	struct ivpu_device *vdev = pci_get_drvdata(pdev);
27462306a36Sopenharmony_ci	int ret;
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Post-reset..\n");
27762306a36Sopenharmony_ci	ret = ivpu_resume(vdev);
27862306a36Sopenharmony_ci	if (ret)
27962306a36Sopenharmony_ci		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
28062306a36Sopenharmony_ci	atomic_set(&vdev->pm->in_reset, 0);
28162306a36Sopenharmony_ci	ivpu_dbg(vdev, PM, "Post-reset done.\n");
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	pm_runtime_put_autosuspend(vdev->drm.dev);
28462306a36Sopenharmony_ci}
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ciint ivpu_pm_init(struct ivpu_device *vdev)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	struct device *dev = vdev->drm.dev;
28962306a36Sopenharmony_ci	struct ivpu_pm_info *pm = vdev->pm;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	pm->vdev = vdev;
29262306a36Sopenharmony_ci	pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	atomic_set(&pm->in_reset, 0);
29562306a36Sopenharmony_ci	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	pm_runtime_use_autosuspend(dev);
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	if (ivpu_disable_recovery)
30062306a36Sopenharmony_ci		pm_runtime_set_autosuspend_delay(dev, -1);
30162306a36Sopenharmony_ci	else if (ivpu_is_silicon(vdev))
30262306a36Sopenharmony_ci		pm_runtime_set_autosuspend_delay(dev, 100);
30362306a36Sopenharmony_ci	else
30462306a36Sopenharmony_ci		pm_runtime_set_autosuspend_delay(dev, 60000);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	return 0;
30762306a36Sopenharmony_ci}
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_civoid ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci	cancel_work_sync(&vdev->pm->recovery_work);
31262306a36Sopenharmony_ci}
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_civoid ivpu_pm_enable(struct ivpu_device *vdev)
31562306a36Sopenharmony_ci{
31662306a36Sopenharmony_ci	struct device *dev = vdev->drm.dev;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	pm_runtime_set_active(dev);
31962306a36Sopenharmony_ci	pm_runtime_allow(dev);
32062306a36Sopenharmony_ci	pm_runtime_mark_last_busy(dev);
32162306a36Sopenharmony_ci	pm_runtime_put_autosuspend(dev);
32262306a36Sopenharmony_ci}
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_civoid ivpu_pm_disable(struct ivpu_device *vdev)
32562306a36Sopenharmony_ci{
32662306a36Sopenharmony_ci	pm_runtime_get_noresume(vdev->drm.dev);
32762306a36Sopenharmony_ci	pm_runtime_forbid(vdev->drm.dev);
32862306a36Sopenharmony_ci}
329