summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2023-02-15 12:35:26 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2023-02-15 12:35:26 -0500
commite4922088f8e90ea163281ba8e69b98f34a7a1b83 (patch)
tree1db574412a5365f285754dd0364d249581535ad8
parent33436335e93a1788a58443fc99c5ab320ce4b9d9 (diff)
parent5fc5b94a273655128159186c87662105db8afeb5 (diff)
downloadlinux-e4922088f8e90ea163281ba8e69b98f34a7a1b83.tar.gz
linux-e4922088f8e90ea163281ba8e69b98f34a7a1b83.tar.bz2
linux-e4922088f8e90ea163281ba8e69b98f34a7a1b83.zip
Merge tag 'kvm-s390-next-6.3-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
* Two more V!=R patches * The last part of the cmpxchg patches * A few fixes
-rw-r--r--Documentation/virt/kvm/api.rst46
-rw-r--r--Documentation/virt/kvm/devices/vm.rst4
-rw-r--r--arch/s390/include/asm/asm-extable.h4
-rw-r--r--arch/s390/include/asm/cmpxchg.h109
-rw-r--r--arch/s390/include/asm/uaccess.h208
-rw-r--r--arch/s390/kvm/gaccess.c109
-rw-r--r--arch/s390/kvm/gaccess.h3
-rw-r--r--arch/s390/kvm/interrupt.c11
-rw-r--r--arch/s390/kvm/kvm-s390.c264
-rw-r--r--arch/s390/mm/extable.c9
-rw-r--r--drivers/s390/virtio/virtio_ccw.c46
-rw-r--r--include/uapi/linux/kvm.h8
-rw-r--r--tools/testing/selftests/kvm/Makefile3
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c672
14 files changed, 1170 insertions, 326 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1b1f721e3fd9..62de0768d6aa 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3736,7 +3736,7 @@ The fields in each entry are defined as follows:
:Parameters: struct kvm_s390_mem_op (in)
:Returns: = 0 on success,
< 0 on generic error (e.g. -EFAULT or -ENOMEM),
- > 0 if an exception occurred while walking the page tables
+ 16 bit program exception code if the access causes such an exception
Read or write data from/to the VM's memory.
The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is
@@ -3754,6 +3754,8 @@ Parameters are specified via the following structure::
struct {
__u8 ar; /* the access register number */
__u8 key; /* access key, ignored if flag unset */
+ __u8 pad1[6]; /* ignored */
+ __u64 old_addr; /* ignored if flag unset */
};
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* ignored */
@@ -3781,6 +3783,7 @@ Possible operations are:
* ``KVM_S390_MEMOP_ABSOLUTE_WRITE``
* ``KVM_S390_MEMOP_SIDA_READ``
* ``KVM_S390_MEMOP_SIDA_WRITE``
+ * ``KVM_S390_MEMOP_ABSOLUTE_CMPXCHG``
Logical read/write:
^^^^^^^^^^^^^^^^^^^
@@ -3829,7 +3832,7 @@ the checks required for storage key protection as one operation (as opposed to
user space getting the storage keys, performing the checks, and accessing
memory thereafter, which could lead to a delay between check and access).
Absolute accesses are permitted for the VM ioctl if KVM_CAP_S390_MEM_OP_EXTENSION
-is > 0.
+has the KVM_S390_MEMOP_EXTENSION_CAP_BASE bit set.
Currently absolute accesses are not permitted for VCPU ioctls.
Absolute accesses are permitted for non-protected guests only.
@@ -3837,7 +3840,26 @@ Supported flags:
* ``KVM_S390_MEMOP_F_CHECK_ONLY``
* ``KVM_S390_MEMOP_F_SKEY_PROTECTION``
-The semantics of the flags are as for logical accesses.
+The semantics of the flags common with logical accesses are as for logical
+accesses.
+
+Absolute cmpxchg:
+^^^^^^^^^^^^^^^^^
+
+Perform cmpxchg on absolute guest memory. Intended for use with the
+KVM_S390_MEMOP_F_SKEY_PROTECTION flag.
+Instead of doing an unconditional write, the access occurs only if the target
+location contains the value pointed to by "old_addr".
+This is performed as an atomic cmpxchg with the length specified by the "size"
+parameter. "size" must be a power of two up to and including 16.
+If the exchange did not take place because the target value doesn't match the
+old value, the value "old_addr" points to is replaced by the target value.
+User space can tell if an exchange took place by checking if this replacement
+occurred. The cmpxchg op is permitted for the VM ioctl if
+KVM_CAP_S390_MEM_OP_EXTENSION has flag KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG set.
+
+Supported flags:
+ * ``KVM_S390_MEMOP_F_SKEY_PROTECTION``
SIDA read/write:
^^^^^^^^^^^^^^^^
@@ -4457,6 +4479,18 @@ not holding a previously reported uncorrected error).
:Parameters: struct kvm_s390_cmma_log (in, out)
:Returns: 0 on success, a negative value on error
+Errors:
+
+ ====== =============================================================
+ ENOMEM not enough memory can be allocated to complete the task
+ ENXIO if CMMA is not enabled
+ EINVAL if KVM_S390_CMMA_PEEK is not set but migration mode was not enabled
+ EINVAL if KVM_S390_CMMA_PEEK is not set but dirty tracking has been
+ disabled (and thus migration mode was automatically disabled)
+ EFAULT if the userspace address is invalid or if no page table is
+ present for the addresses (e.g. when using hugepages).
+ ====== =============================================================
+
This ioctl is used to get the values of the CMMA bits on the s390
architecture. It is meant to be used in two scenarios:
@@ -4537,12 +4571,6 @@ mask is unused.
values points to the userspace buffer where the result will be stored.
-This ioctl can fail with -ENOMEM if not enough memory can be allocated to
-complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
-KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
--EFAULT if the userspace address is invalid or if no page table is
-present for the addresses (e.g. when using hugepages).
-
4.108 KVM_S390_SET_CMMA_BITS
----------------------------
diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst
index 60acc39e0e93..147efec626e5 100644
--- a/Documentation/virt/kvm/devices/vm.rst
+++ b/Documentation/virt/kvm/devices/vm.rst
@@ -302,6 +302,10 @@ Allows userspace to start migration mode, needed for PGSTE migration.
Setting this attribute when migration mode is already active will have
no effects.
+Dirty tracking must be enabled on all memslots, else -EINVAL is returned. When
+dirty tracking is disabled on any memslot, migration mode is automatically
+stopped.
+
:Parameters: none
:Returns: -ENOMEM if there is not enough free memory to start migration mode;
-EINVAL if the state of the VM is invalid (e.g. no memory defined);
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index b74f1070ddb2..55a02a153dfc 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -12,6 +12,7 @@
#define EX_TYPE_UA_STORE 3
#define EX_TYPE_UA_LOAD_MEM 4
#define EX_TYPE_UA_LOAD_REG 5
+#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -85,4 +86,7 @@
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \
+ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 84c3f0d576c5..3f26416c2ad8 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -88,67 +88,90 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
unsigned long old,
unsigned long new, int size)
{
- unsigned long prev, tmp;
- int shift;
-
switch (size) {
- case 1:
+ case 1: {
+ unsigned int prev, shift, mask;
+
shift = (3 ^ (address & 3)) << 3;
address ^= address & 3;
+ old = (old & 0xff) << shift;
+ new = (new & 0xff) << shift;
+ mask = ~(0xff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xff) << shift),
- "d" ((new & 0xff) << shift),
- "d" (~(0xff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 2:
+ }
+ case 2: {
+ unsigned int prev, shift, mask;
+
shift = (2 ^ (address & 2)) << 3;
address ^= address & 2;
+ old = (old & 0xffff) << shift;
+ new = (new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xffff) << shift),
- "d" ((new & 0xffff) << shift),
- "d" (~(0xffff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 4:
+ }
+ case 4: {
+ unsigned int prev = old;
+
asm volatile(
- " cs %0,%3,%1\n"
- : "=&d" (prev), "+Q" (*(int *) address)
- : "0" (old), "d" (new)
+ " cs %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
- case 8:
+ }
+ case 8: {
+ unsigned long prev = old;
+
asm volatile(
- " csg %0,%3,%1\n"
- : "=&d" (prev), "+QS" (*(long *) address)
- : "0" (old), "d" (new)
+ " csg %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
}
+ }
__cmpxchg_called_with_bad_pointer();
return old;
}
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index f7038b800cc3..8a8c64a678c4 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -390,4 +390,212 @@ do { \
goto err_label; \
} while (0)
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
+ __uint128_t old, __uint128_t new,
+ unsigned long key, int size)
+{
+ int rc = 0;
+
+ switch (size) {
+ case 1: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (3 ^ (address & 3)) << 3;
+ address ^= address & 3;
+ _old = ((unsigned int)old & 0xff) << shift;
+ _new = ((unsigned int)new & 0xff) << shift;
+ mask = ~(0xff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned char *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 2: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (2 ^ (address & 2)) << 3;
+ address ^= address & 2;
+ _old = ((unsigned int)old & 0xffff) << shift;
+ _new = ((unsigned int)new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned short *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 4: {
+ unsigned int prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cs %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" ((unsigned int)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned int *)uval = prev;
+ return rc;
+ }
+ case 8: {
+ unsigned long prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: csg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" ((unsigned long)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned long *)uval = prev;
+ return rc;
+ }
+ case 16: {
+ __uint128_t prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cdsg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(__int128_t *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(__uint128_t *)uval = prev;
+ return rc;
+ }
+ }
+ __cmpxchg_user_key_called_with_bad_pointer();
+ return rc;
+}
+
+/**
+ * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
+ * @ptr: User space address of value to compare to @old and exchange with
+ * @new. Must be aligned to sizeof(*@ptr).
+ * @uval: Address where the old value of *@ptr is written to.
+ * @old: Old value. Compared to the content pointed to by @ptr in order to
+ * determine if the exchange occurs. The old value read from *@ptr is
+ * written to *@uval.
+ * @new: New value to place at *@ptr.
+ * @key: Access key to use for checking storage key protection.
+ *
+ * Perform a cmpxchg on a user space target, honoring storage key protection.
+ * @key alone determines how key checking is performed, neither
+ * storage-protection-override nor fetch-protection-override apply.
+ * The caller must compare *@uval and @old to determine if values have been
+ * exchanged. In case of an exception *@uval is set to zero.
+ *
+ * Return: 0: cmpxchg executed
+ * -EFAULT: an exception happened when trying to access *@ptr
+ * -EAGAIN: maxed out number of retries (byte and short only)
+ */
+#define cmpxchg_user_key(ptr, uval, old, new, key) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(uval) __uval = (uval); \
+ \
+ BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
+ might_fault(); \
+ __chk_user_ptr(__ptr); \
+ __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
+ (old), (new), (key), sizeof(*(__ptr))); \
+})
+
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0243b6e38d36..3eb85f254881 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1162,6 +1162,115 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
}
/**
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ * non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ * *@old_addr contains the value at @gpa before the attempt to
+ * exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ * * >0: a program interruption code indicating the reason cmpxchg could
+ * not be attempted
+ * * -EINVAL: address misaligned or len not power of two
+ * * -EAGAIN: transient failure (len 1 or 2)
+ * * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+ __uint128_t *old_addr, __uint128_t new,
+ u8 access_key, bool *success)
+{
+ gfn_t gfn = gpa_to_gfn(gpa);
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ bool writable;
+ hva_t hva;
+ int ret;
+
+ if (!IS_ALIGNED(gpa, len))
+ return -EINVAL;
+
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ /*
+ * Check if it's a read-only memslot, even though that cannot occur
+ * since those are unsupported.
+ * Don't try to actually handle that case.
+ */
+ if (!writable)
+ return -EOPNOTSUPP;
+
+ hva += offset_in_page(gpa);
+ /*
+ * The cmpxchg_user_key macro depends on the type of "old", so we need
+ * a case for each valid length and get some code duplication as long
+ * as we don't introduce a new macro.
+ */
+ switch (len) {
+ case 1: {
+ u8 old;
+
+ ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 2: {
+ u16 old;
+
+ ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 4: {
+ u32 old;
+
+ ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 8: {
+ u64 old;
+
+ ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 16: {
+ __uint128_t old;
+
+ ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ if (*success)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ /*
+ * Assume that the fault is caused by protection, either key protection
+ * or user page write protection.
+ */
+ if (ret == -EFAULT)
+ ret = PGM_PROTECTION;
+ return ret;
+}
+
+/**
* guest_translate_address_with_key - translate guest logical into guest absolute address
* @vcpu: virtual cpu
* @gva: Guest virtual address
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 9408d6cc8e2c..b320d12aa049 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -206,6 +206,9 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode);
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old,
+ __uint128_t new, u8 access_key, bool *success);
+
/**
* write_guest_with_key - copy data from kernel space to guest space
* @vcpu: virtual cpu
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 8edb3bee74b6..9250fde1f97d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3103,9 +3103,9 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
static void process_gib_alert_list(void)
{
struct kvm_s390_gisa_interrupt *gi;
+ u32 final, gisa_phys, origin = 0UL;
struct kvm_s390_gisa *gisa;
struct kvm *kvm;
- u32 final, origin = 0UL;
do {
/*
@@ -3131,9 +3131,10 @@ static void process_gib_alert_list(void)
* interruptions asap.
*/
while (origin & GISA_ADDR_MASK) {
- gisa = (struct kvm_s390_gisa *)(u64)origin;
+ gisa_phys = origin;
+ gisa = phys_to_virt(gisa_phys);
origin = gisa->next_alert;
- gisa->next_alert = (u32)(u64)gisa;
+ gisa->next_alert = gisa_phys;
kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
gi = &kvm->arch.gisa_int;
if (hrtimer_active(&gi->timer))
@@ -3417,6 +3418,7 @@ void kvm_s390_gib_destroy(void)
int __init kvm_s390_gib_init(u8 nisc)
{
+ u32 gib_origin;
int rc = 0;
if (!css_general_characteristics.aiv) {
@@ -3438,7 +3440,8 @@ int __init kvm_s390_gib_init(u8 nisc)
}
gib->nisc = nisc;
- if (chsc_sgib((u32)(u64)gib)) {
+ gib_origin = virt_to_phys(gib);
+ if (chsc_sgib(gib_origin)) {
pr_err("Associating the GIB with the AIV facility failed\n");
free_page((unsigned long)gib);
gib = NULL;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bd25076aa19b..39b36562c043 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -573,7 +573,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
- case KVM_CAP_S390_MEM_OP_EXTENSION:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -587,6 +586,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE;
break;
+ case KVM_CAP_S390_MEM_OP_EXTENSION:
+ /*
+ * Flag bits indicating which extensions are supported.
+ * If r > 0, the base extension must also be supported/indicated,
+ * in order to maintain backwards compatibility.
+ */
+ r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
+ KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
+ break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
@@ -2753,41 +2761,33 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
return r;
}
-static bool access_key_invalid(u8 access_key)
-{
- return access_key > 0xf;
-}
-
-static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
{
- void __user *uaddr = (void __user *)mop->buf;
- u64 supported_flags;
- void *tmpbuf = NULL;
- int r, srcu_idx;
-
- supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
- | KVM_S390_MEMOP_F_CHECK_ONLY;
if (mop->flags & ~supported_flags || !mop->size)
return -EINVAL;
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
- /*
- * This is technically a heuristic only, if the kvm->lock is not
- * taken, it is not guaranteed that the vm is/remains non-protected.
- * This is ok from a kernel perspective, wrongdoing is detected
- * on the access, -EFAULT is returned and the vm may crash the
- * next time it accesses the memory in question.
- * There is no sane usecase to do switching and a memop on two
- * different CPUs at the same time.
- */
- if (kvm_s390_pv_get_handle(kvm))
- return -EINVAL;
if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
- if (access_key_invalid(mop->key))
+ if (mop->key > 0xf)
return -EINVAL;
} else {
mop->key = 0;
}
+ return 0;
+}
+
+static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ enum gacc_mode acc_mode;
+ void *tmpbuf = NULL;
+ int r, srcu_idx;
+
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
+ KVM_S390_MEMOP_F_CHECK_ONLY);
+ if (r)
+ return r;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
@@ -2801,35 +2801,25 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
goto out_unlock;
}
- switch (mop->op) {
- case KVM_S390_MEMOP_ABSOLUTE_READ: {
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
- } else {
- r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
- mop->size, GACC_FETCH, mop->key);
- if (r == 0) {
- if (copy_to_user(uaddr, tmpbuf, mop->size))
- r = -EFAULT;
- }
- }
- break;
+ acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
+ goto out_unlock;
}
- case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
- } else {
- if (copy_from_user(tmpbuf, uaddr, mop->size)) {
- r = -EFAULT;
- break;
- }
- r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
- mop->size, GACC_STORE, mop->key);
+ if (acc_mode == GACC_FETCH) {
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_FETCH, mop->key);
+ if (r)
+ goto out_unlock;
+ if (copy_to_user(uaddr, tmpbuf, mop->size))
+ r = -EFAULT;
+ } else {
+ if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+ r = -EFAULT;
+ goto out_unlock;
}
- break;
- }
- default:
- r = -EINVAL;
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_STORE, mop->key);
}
out_unlock:
@@ -2839,6 +2829,75 @@ out_unlock:
return r;
}
+static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ void __user *old_addr = (void __user *)mop->old_addr;
+ union {
+ __uint128_t quad;
+ char raw[sizeof(__uint128_t)];
+ } old = { .quad = 0}, new = { .quad = 0 };
+ unsigned int off_in_quad = sizeof(new) - mop->size;
+ int r, srcu_idx;
+ bool success;
+
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
+ if (r)
+ return r;
+ /*
+ * This validates off_in_quad. Checking that size is a power
+ * of two is not necessary, as cmpxchg_guest_abs_with_key
+ * takes care of that
+ */
+ if (mop->size > sizeof(new))
+ return -EINVAL;
+ if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
+ return -EFAULT;
+ if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
+ return -EFAULT;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+ r = PGM_ADDRESSING;
+ goto out_unlock;
+ }
+
+ r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
+ new.quad, mop->key, &success);
+ if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
+ r = -EFAULT;
+
+out_unlock:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return r;
+}
+
+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ /*
+ * This is technically a heuristic only, if the kvm->lock is not
+ * taken, it is not guaranteed that the vm is/remains non-protected.
+ * This is ok from a kernel perspective, wrongdoing is detected
+ * on the access, -EFAULT is returned and the vm may crash the
+ * next time it accesses the memory in question.
+ * There is no sane usecase to do switching and a memop on two
+ * different CPUs at the same time.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_ABSOLUTE_READ:
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+ return kvm_s390_vm_mem_op_abs(kvm, mop);
+ case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+ return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
+ default:
+ return -EINVAL;
+ }
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5238,62 +5297,54 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
+ enum gacc_mode acc_mode;
void *tmpbuf = NULL;
- int r = 0;
- const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
- | KVM_S390_MEMOP_F_CHECK_ONLY
- | KVM_S390_MEMOP_F_SKEY_PROTECTION;
+ int r;
- if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
+ KVM_S390_MEMOP_F_CHECK_ONLY |
+ KVM_S390_MEMOP_F_SKEY_PROTECTION);
+ if (r)
+ return r;
+ if (mop->ar >= NUM_ACRS)
return -EINVAL;
- if (mop->size > MEM_OP_MAX_SIZE)
- return -E2BIG;
if (kvm_s390_pv_cpu_is_protected(vcpu))
return -EINVAL;
- if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
- if (access_key_invalid(mop->key))
- return -EINVAL;
- } else {
- mop->key = 0;
- }
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- switch (mop->op) {