src/sys/arch/arm64/dev/aplcpu.c

457 lines
11 KiB
C

/* $OpenBSD: aplcpu.c,v 1.9 2024/09/29 09:25:37 jsg Exp $ */
/*
* Copyright (c) 2022 Mark Kettenis <kettenis@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/sensors.h>
#include <sys/sysctl.h>
#include <machine/bus.h>
#include <machine/fdt.h>
#include <dev/ofw/openfirm.h>
#include <dev/ofw/fdt.h>
#define DVFS_CMD 0x0020
#define DVFS_CMD_BUSY (1U << 31)
#define DVFS_CMD_SET (1 << 25)
#define DVFS_CMD_PS2_MASK (0x1f << 12)
#define DVFS_CMD_PS2_SHIFT 12
#define DVFS_CMD_PS1_MASK (0x1f << 0)
#define DVFS_CMD_PS1_SHIFT 0
#define DVFS_STATUS 0x50
#define DVFS_T8103_STATUS_CUR_PS_MASK (0xf << 4)
#define DVFS_T8103_STATUS_CUR_PS_SHIFT 4
#define DVFS_T8112_STATUS_CUR_PS_MASK (0x1f << 5)
#define DVFS_T8112_STATUS_CUR_PS_SHIFT 5
#define APLCPU_DEEP_WFI_LATENCY 10 /* microseconds */
struct opp {
uint64_t opp_hz;
uint32_t opp_level;
};
struct opp_table {
LIST_ENTRY(opp_table) ot_list;
uint32_t ot_phandle;
struct opp *ot_opp;
u_int ot_nopp;
uint64_t ot_opp_hz_min;
uint64_t ot_opp_hz_max;
};
#define APLCPU_MAX_CLUSTERS 8
struct aplcpu_softc {
struct device sc_dev;
bus_space_tag_t sc_iot;
bus_space_handle_t sc_ioh[APLCPU_MAX_CLUSTERS];
bus_size_t sc_ios[APLCPU_MAX_CLUSTERS];
int sc_node;
u_int sc_nclusters;
int sc_perflevel;
uint32_t sc_cur_ps_mask;
u_int sc_cur_ps_shift;
LIST_HEAD(, opp_table) sc_opp_tables;
struct opp_table *sc_opp_table[APLCPU_MAX_CLUSTERS];
uint64_t sc_opp_hz_min;
uint64_t sc_opp_hz_max;
struct ksensordev sc_sensordev;
struct ksensor sc_sensor[APLCPU_MAX_CLUSTERS];
};
int aplcpu_match(struct device *, void *, void *);
void aplcpu_attach(struct device *, struct device *, void *);
const struct cfattach aplcpu_ca = {
sizeof (struct aplcpu_softc), aplcpu_match, aplcpu_attach
};
struct cfdriver aplcpu_cd = {
NULL, "aplcpu", DV_DULL
};
void aplcpu_opp_init(struct aplcpu_softc *, int);
uint32_t aplcpu_opp_level(struct aplcpu_softc *, int);
int aplcpu_clockspeed(int *);
void aplcpu_setperf(int level);
void aplcpu_refresh_sensors(void *);
void aplcpu_idle_cycle(void);
void aplcpu_deep_wfi(void);
int
aplcpu_match(struct device *parent, void *match, void *aux)
{
struct fdt_attach_args *faa = aux;
return OF_is_compatible(faa->fa_node, "apple,soc-cpufreq") ||
OF_is_compatible(faa->fa_node, "apple,cluster-cpufreq");
}
void
aplcpu_attach(struct device *parent, struct device *self, void *aux)
{
struct aplcpu_softc *sc = (struct aplcpu_softc *)self;
struct fdt_attach_args *faa = aux;
struct cpu_info *ci;
CPU_INFO_ITERATOR cii;
int i;
if (faa->fa_nreg < 1) {
printf(": no registers\n");
return;
}
if (faa->fa_nreg > APLCPU_MAX_CLUSTERS) {
printf(": too many registers\n");
return;
}
sc->sc_iot = faa->fa_iot;
for (i = 0; i < faa->fa_nreg; i++) {
if (bus_space_map(sc->sc_iot, faa->fa_reg[i].addr,
faa->fa_reg[i].size, 0, &sc->sc_ioh[i])) {
printf(": can't map registers\n");
goto unmap;
}
sc->sc_ios[i] = faa->fa_reg[i].size;
}
printf("\n");
sc->sc_node = faa->fa_node;
sc->sc_nclusters = faa->fa_nreg;
if (OF_is_compatible(sc->sc_node, "apple,t8103-soc-cpufreq") ||
OF_is_compatible(sc->sc_node, "apple,t8103-cluster-cpufreq")) {
sc->sc_cur_ps_mask = DVFS_T8103_STATUS_CUR_PS_MASK;
sc->sc_cur_ps_shift = DVFS_T8103_STATUS_CUR_PS_SHIFT;
} else if (OF_is_compatible(sc->sc_node, "apple,t8112-soc-cpufreq") ||
OF_is_compatible(sc->sc_node, "apple,t8112-cluster-cpufreq")) {
sc->sc_cur_ps_mask = DVFS_T8112_STATUS_CUR_PS_MASK;
sc->sc_cur_ps_shift = DVFS_T8112_STATUS_CUR_PS_SHIFT;
}
sc->sc_opp_hz_min = UINT64_MAX;
sc->sc_opp_hz_max = 0;
LIST_INIT(&sc->sc_opp_tables);
CPU_INFO_FOREACH(cii, ci) {
aplcpu_opp_init(sc, ci->ci_node);
}
for (i = 0; i < sc->sc_nclusters; i++) {
sc->sc_sensor[i].type = SENSOR_FREQ;
sensor_attach(&sc->sc_sensordev, &sc->sc_sensor[i]);
}
aplcpu_refresh_sensors(sc);
strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
sizeof(sc->sc_sensordev.xname));
sensordev_install(&sc->sc_sensordev);
sensor_task_register(sc, aplcpu_refresh_sensors, 1);
cpu_idle_cycle_fcn = aplcpu_idle_cycle;
cpu_suspend_cycle_fcn = aplcpu_deep_wfi;
cpu_cpuspeed = aplcpu_clockspeed;
cpu_setperf = aplcpu_setperf;
return;
unmap:
for (i = 0; i < faa->fa_nreg; i++) {
if (sc->sc_ios[i] == 0)
continue;
bus_space_unmap(sc->sc_iot, sc->sc_ioh[i], sc->sc_ios[i]);
}
}
void
aplcpu_opp_init(struct aplcpu_softc *sc, int node)
{
struct opp_table *ot;
int count, child;
uint32_t freq_domain[2], phandle;
uint32_t opp_hz, opp_level;
int i, j;
freq_domain[0] = OF_getpropint(node, "performance-domains", 0);
freq_domain[1] = 0;
if (freq_domain[0] == 0) {
if (OF_getpropintarray(node, "apple,freq-domain", freq_domain,
sizeof(freq_domain)) != sizeof(freq_domain))
return;
if (freq_domain[1] > APLCPU_MAX_CLUSTERS)
return;
}
if (freq_domain[0] != OF_getpropint(sc->sc_node, "phandle", 0))
return;
phandle = OF_getpropint(node, "operating-points-v2", 0);
if (phandle == 0)
return;
LIST_FOREACH(ot, &sc->sc_opp_tables, ot_list) {
if (ot->ot_phandle == phandle) {
sc->sc_opp_table[freq_domain[1]] = ot;
return;
}
}
node = OF_getnodebyphandle(phandle);
if (node == 0)
return;
if (!OF_is_compatible(node, "operating-points-v2"))
return;
count = 0;
for (child = OF_child(node); child != 0; child = OF_peer(child))
count++;
if (count == 0)
return;
ot = malloc(sizeof(struct opp_table), M_DEVBUF, M_ZERO | M_WAITOK);
ot->ot_phandle = phandle;
ot->ot_opp = mallocarray(count, sizeof(struct opp),
M_DEVBUF, M_ZERO | M_WAITOK);
ot->ot_nopp = count;
count = 0;
for (child = OF_child(node); child != 0; child = OF_peer(child)) {
opp_hz = OF_getpropint64(child, "opp-hz", 0);
opp_level = OF_getpropint(child, "opp-level", 0);
/* Insert into the array, keeping things sorted. */
for (i = 0; i < count; i++) {
if (opp_hz < ot->ot_opp[i].opp_hz)
break;
}
for (j = count; j > i; j--)
ot->ot_opp[j] = ot->ot_opp[j - 1];
ot->ot_opp[i].opp_hz = opp_hz;
ot->ot_opp[i].opp_level = opp_level;
count++;
}
ot->ot_opp_hz_min = ot->ot_opp[0].opp_hz;
ot->ot_opp_hz_max = ot->ot_opp[count - 1].opp_hz;
LIST_INSERT_HEAD(&sc->sc_opp_tables, ot, ot_list);
sc->sc_opp_table[freq_domain[1]] = ot;
/* Keep track of overall min/max frequency. */
if (sc->sc_opp_hz_min > ot->ot_opp_hz_min)
sc->sc_opp_hz_min = ot->ot_opp_hz_min;
if (sc->sc_opp_hz_max < ot->ot_opp_hz_max)
sc->sc_opp_hz_max = ot->ot_opp_hz_max;
}
uint32_t
aplcpu_opp_level(struct aplcpu_softc *sc, int cluster)
{
uint32_t opp_level;
uint64_t pstate;
if (sc->sc_cur_ps_mask) {
pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
DVFS_STATUS);
opp_level = (pstate & sc->sc_cur_ps_mask);
opp_level >>= sc->sc_cur_ps_shift;
} else {
pstate = bus_space_read_8(sc->sc_iot, sc->sc_ioh[cluster],
DVFS_CMD);
opp_level = (pstate & DVFS_CMD_PS1_MASK);
opp_level >>= DVFS_CMD_PS1_SHIFT;
}
return opp_level;
}
int
aplcpu_clockspeed(int *freq)
{
struct aplcpu_softc *sc;
struct opp_table *ot;
uint32_t opp_hz = 0, opp_level;
int i, j, k;
/*
* Clusters can run at different frequencies. We report the
* highest frequency among all clusters.
*/
for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
sc = aplcpu_cd.cd_devs[i];
if (sc == NULL)
continue;
for (j = 0; j < sc->sc_nclusters; j++) {
if (sc->sc_opp_table[j] == NULL)
continue;
opp_level = aplcpu_opp_level(sc, j);
/* Translate P-state to frequency. */
ot = sc->sc_opp_table[j];
for (k = 0; k < ot->ot_nopp; k++) {
if (ot->ot_opp[k].opp_level != opp_level)
continue;
opp_hz = MAX(opp_hz, ot->ot_opp[k].opp_hz);
}
}
}
if (opp_hz == 0)
return EINVAL;
*freq = opp_hz / 1000000;
return 0;
}
void
aplcpu_setperf(int level)
{
struct aplcpu_softc *sc;
struct opp_table *ot;
uint64_t min, max;
uint64_t level_hz;
uint32_t opp_level;
uint64_t reg;
int i, j, k, timo;
/*
* We let the CPU performance level span the entire range
* between the lowest frequency on any of the clusters and the
* highest frequency on any of the clusters. We pick a
* frequency within that range based on the performance level
* and set all the clusters to the frequency that is closest
* to but less than that frequency. This isn't a particularly
* sensible method but it is easy to implement and it is hard
* to come up with something more sensible given the
* constraints of the hw.setperf sysctl interface.
*/
for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
sc = aplcpu_cd.cd_devs[i];
if (sc == NULL)
continue;
min = sc->sc_opp_hz_min;
max = sc->sc_opp_hz_max;
level_hz = min + (level * (max - min)) / 100;
}
for (i = 0; i < aplcpu_cd.cd_ndevs; i++) {
sc = aplcpu_cd.cd_devs[i];
if (sc == NULL)
continue;
if (sc->sc_perflevel == level)
continue;
for (j = 0; j < sc->sc_nclusters; j++) {
if (sc->sc_opp_table[j] == NULL)
continue;
/* Translate performance level to a P-state. */
ot = sc->sc_opp_table[j];
opp_level = ot->ot_opp[0].opp_level;
for (k = 0; k < ot->ot_nopp; k++) {
if (ot->ot_opp[k].opp_hz <= level_hz &&
ot->ot_opp[k].opp_level >= opp_level)
opp_level = ot->ot_opp[k].opp_level;
}
/* Wait until P-state logic isn't busy. */
for (timo = 100; timo > 0; timo--) {
reg = bus_space_read_8(sc->sc_iot,
sc->sc_ioh[j], DVFS_CMD);
if ((reg & DVFS_CMD_BUSY) == 0)
break;
delay(1);
}
if (reg & DVFS_CMD_BUSY)
continue;
/* Set desired P-state. */
reg &= ~DVFS_CMD_PS1_MASK;
reg |= (opp_level << DVFS_CMD_PS1_SHIFT);
reg |= DVFS_CMD_SET;
bus_space_write_8(sc->sc_iot, sc->sc_ioh[j],
DVFS_CMD, reg);
}
sc->sc_perflevel = level;
}
}
void
aplcpu_refresh_sensors(void *arg)
{
struct aplcpu_softc *sc = arg;
struct opp_table *ot;
uint32_t opp_level;
int i, j;
for (i = 0; i < sc->sc_nclusters; i++) {
if (sc->sc_opp_table[i] == NULL)
continue;
opp_level = aplcpu_opp_level(sc, i);
/* Translate P-state to frequency. */
ot = sc->sc_opp_table[i];
for (j = 0; j < ot->ot_nopp; j++) {
if (ot->ot_opp[j].opp_level == opp_level) {
sc->sc_sensor[i].value = ot->ot_opp[j].opp_hz;
break;
}
}
}
}
void
aplcpu_idle_cycle(void)
{
struct cpu_info *ci = curcpu();
struct timeval start, stop;
u_long itime;
microuptime(&start);
if (ci->ci_prev_sleep > 3 * APLCPU_DEEP_WFI_LATENCY)
aplcpu_deep_wfi();
else
cpu_wfi();
microuptime(&stop);
timersub(&stop, &start, &stop);
itime = stop.tv_sec * 1000000 + stop.tv_usec;
ci->ci_last_itime = itime;
itime >>= 1;
ci->ci_prev_sleep = (ci->ci_prev_sleep + (ci->ci_prev_sleep >> 1)
+ itime) >> 1;
}