summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel/fpsimd.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel/fpsimd.c')
-rw-r--r--arch/arm64/kernel/fpsimd.c37
1 files changed, 12 insertions, 25 deletions
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 520b681a07bb..91e44ac7150f 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task)
unsigned int i;
__uint128_t const *p;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vl = thread_get_cur_vl(&task->thread);
@@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -909,7 +910,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
*/
task->thread.svcr &= ~(SVCR_SM_MASK |
SVCR_ZA_MASK);
- clear_thread_flag(TIF_SME);
+ clear_tsk_thread_flag(task, TIF_SME);
free_sme = true;
}
}
@@ -1178,9 +1179,6 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
*/
u64 read_zcr_features(void)
{
- u64 zcr;
- unsigned int vq_max;
-
/*
* Set the maximum possible VL, and write zeroes to all other
* bits to see if they stick.
@@ -1188,12 +1186,8 @@ u64 read_zcr_features(void)
sve_kernel_enable(NULL);
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
- zcr = read_sysreg_s(SYS_ZCR_EL1);
- zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
- zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- return zcr;
+ /* Return LEN value that would be written to get the maximum VL */
+ return sve_vq_from_vl(sve_get_vl()) - 1;
}
void __init sve_setup(void)
@@ -1284,9 +1278,9 @@ void fpsimd_release_task(struct task_struct *dead_task)
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
-void sme_alloc(struct task_struct *task)
+void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.sme_state) {
+ if (task->thread.sme_state && flush) {
memset(task->thread.sme_state, 0, sme_state_size(task));
return;
}
@@ -1348,9 +1342,6 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
*/
u64 read_smcr_features(void)
{
- u64 smcr;
- unsigned int vq_max;
-
sme_kernel_enable(NULL);
/*
@@ -1359,12 +1350,8 @@ u64 read_smcr_features(void)
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
SYS_SMCR_EL1);
- smcr = read_sysreg_s(SYS_SMCR_EL1);
- smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
- vq_max = sve_vq_from_vl(sme_get_vl());
- smcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- return smcr;
+ /* Return LEN value that would be written to get the maximum VL */
+ return sve_vq_from_vl(sme_get_vl()) - 1;
}
void __init sme_setup(void)
@@ -1514,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
}
sve_alloc(current, false);
- sme_alloc(current);
+ sme_alloc(current, true);
if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;