diff options
| author | Palmer Dabbelt <palmer@rivosinc.com> | 2024-03-14 09:24:37 -0700 |
|---|---|---|
| committer | Palmer Dabbelt <palmer@rivosinc.com> | 2024-03-20 08:56:13 -0700 |
| commit | eeb7a8933e71f98354536c3d849a26978539b09f (patch) | |
| tree | ed86e64f3f0630cfaad02a360fe731f80e8f1889 | |
| parent | 2b2ca354674bed0d0222ce1426d2d45b065ac1e8 (diff) | |
| parent | cd6c916ccf21bb4c16367493541192e0f3a19278 (diff) | |
| download | linux-eeb7a8933e71f98354536c3d849a26978539b09f.tar.gz linux-eeb7a8933e71f98354536c3d849a26978539b09f.tar.bz2 linux-eeb7a8933e71f98354536c3d849a26978539b09f.zip | |
Merge patch series "riscv: mm: Extend mappable memory up to hint address"
Charlie Jenkins <charlie@rivosinc.com> says:
On riscv, mmap currently returns an address from the largest address
space that can fit entirely inside of the hint address. This makes it
such that the hint address is almost never returned. This patch raises
the mappable area up to and including the hint address. This allows mmap
to often return the hint address, which allows a performance improvement
over searching for a valid address as well as making the behavior more
similar to other architectures.
Note that a previous patch introduced stronger semantics compared to
other architectures for riscv mmap. On riscv, mmap will not use bits in
the upper bits of the virtual address depending on the hint address. On
other architectures, a random address is returned in the address space
requested. On all architectures the hint address will be returned if it
is available. This allows riscv applications to configure how many bits
in the virtual address should be left empty. This has the two benefits
of being able to request address spaces that are smaller than the
default and doesn't require the application to know the page table
layout of riscv.
* b4-shazam-merge:
docs: riscv: Define behavior of mmap
selftests: riscv: Generalize mm selftests
riscv: mm: Use hint address in mmap if available
Link: https://lore.kernel.org/r/20240130-use_mmap_hint_address-v3-0-8a655cfa8bcb@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
34 files changed, 758 insertions, 325 deletions
diff --git a/Documentation/arch/riscv/vm-layout.rst b/Documentation/arch/riscv/vm-layout.rst index 69ff6da1dbf8..e476b4386bd9 100644 --- a/Documentation/arch/riscv/vm-layout.rst +++ b/Documentation/arch/riscv/vm-layout.rst @@ -144,14 +144,8 @@ passing 0 into the hint address parameter of mmap. On CPUs with an address space smaller than sv48, the CPU maximum supported address space will be the default. Software can "opt-in" to receiving VAs from another VA space by providing -a hint address to mmap. A hint address passed to mmap will cause the largest -address space that fits entirely into the hint to be used, unless there is no -space left in the address space. If there is no space available in the requested -address space, an address in the next smallest available address space will be -returned. - -For example, in order to obtain 48-bit VA space, a hint address greater than -:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace -ending at :code:`1 << 47` and the addresses beyond this are reserved for the -kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater -than or equal to :code:`1 << 56` must be provided. +a hint address to mmap. When a hint address is passed to mmap, the returned +address will never use more bits than the hint address. For example, if a hint +address of `1 << 40` is passed to mmap, a valid returned address will never use +bits 41 through 63. If no mappable addresses are available in that range, mmap +will return `MAP_FAILED`. diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 89a009a580fe..fc0ec2ee13bc 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -44,6 +44,7 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m CONFIG_CPUFREQ_DT=y +CONFIG_ACPI_CPPC_CPUFREQ=m CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_ACPI=y diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig index 2ad44e1d464a..ad58dad9a580 100644 --- a/arch/riscv/crypto/Kconfig +++ b/arch/riscv/crypto/Kconfig @@ -3,14 +3,14 @@ menu "Accelerated Cryptographic Algorithms for CPU (riscv)" config CRYPTO_AES_RISCV64 - tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS" + tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTS" depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO select CRYPTO_ALGAPI select CRYPTO_LIB_AES select CRYPTO_SKCIPHER help Block cipher: AES cipher algorithms - Length-preserving ciphers: AES with ECB, CBC, CTR, XTS + Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTS Architecture: riscv64 using: - Zvkned vector crypto extension diff --git a/arch/riscv/crypto/aes-riscv64-glue.c b/arch/riscv/crypto/aes-riscv64-glue.c index 37bc6ef0be40..f814ee048555 100644 --- a/arch/riscv/crypto/aes-riscv64-glue.c +++ b/arch/riscv/crypto/aes-riscv64-glue.c @@ -1,13 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-only /* * AES using the RISC-V vector crypto extensions. Includes the bare block - * cipher and the ECB, CBC, CTR, and XTS modes. + * cipher and the ECB, CBC, CBC-CTS, CTR, and XTS modes. * * Copyright (C) 2023 VRULL GmbH * Author: Heiko Stuebner <heiko.stuebner@vrull.eu> * * Copyright (C) 2023 SiFive, Inc. * Author: Jerry Shih <jerry.shih@sifive.com> + * + * Copyright 2024 Google LLC */ #include <asm/simd.h> @@ -40,6 +42,10 @@ asmlinkage void aes_cbc_decrypt_zvkned(const struct crypto_aes_ctx *key, const u8 *in, u8 *out, size_t len, u8 iv[AES_BLOCK_SIZE]); +asmlinkage void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key, + const u8 *in, u8 *out, size_t len, + const u8 iv[AES_BLOCK_SIZE], bool enc); + asmlinkage void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key, const u8 *in, u8 *out, size_t len, u8 iv[AES_BLOCK_SIZE]); @@ -164,7 +170,7 @@ static int riscv64_aes_ecb_decrypt(struct skcipher_request *req) /* AES-CBC */ -static inline int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc) +static int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -202,6 +208,70 @@ static int riscv64_aes_cbc_decrypt(struct skcipher_request *req) return riscv64_aes_cbc_crypt(req, false); } +/* AES-CBC-CTS */ + +static int riscv64_aes_cbc_cts_crypt(struct skcipher_request *req, bool enc) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; + struct skcipher_walk walk; + unsigned int cbc_len; + int err; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + /* + * If the full message is available in one step, decrypt it in one call + * to the CBC-CTS assembly function. This reduces overhead, especially + * on short messages. Otherwise, fall back to doing CBC up to the last + * two blocks, then invoke CTS just for the ciphertext stealing. + */ + if (unlikely(walk.nbytes != req->cryptlen)) { + cbc_len = round_down(req->cryptlen - AES_BLOCK_SIZE - 1, + AES_BLOCK_SIZE); + skcipher_walk_abort(&walk); + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + cbc_len, req->iv); + err = riscv64_aes_cbc_crypt(&subreq, enc); + if (err) + return err; + dst = src = scatterwalk_ffwd(sg_src, req->src, cbc_len); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, cbc_len); + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_len, req->iv); + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + } + kernel_vector_begin(); + aes_cbc_cts_crypt_zvkned(ctx, walk.src.virt.addr, walk.dst.virt.addr, + walk.nbytes, req->iv, enc); + kernel_vector_end(); + return skcipher_walk_done(&walk, 0); +} + +static int riscv64_aes_cbc_cts_encrypt(struct skcipher_request *req) +{ + return riscv64_aes_cbc_cts_crypt(req, true); +} + +static int riscv64_aes_cbc_cts_decrypt(struct skcipher_request *req) +{ + return riscv64_aes_cbc_cts_crypt(req, false); +} + /* AES-CTR */ static int riscv64_aes_ctr_crypt(struct skcipher_request *req) @@ -434,6 +504,22 @@ static struct skcipher_alg riscv64_zvkned_aes_skcipher_algs[] = { .cra_driver_name = "cbc-aes-riscv64-zvkned", .cra_module = THIS_MODULE, }, + }, { + .setkey = riscv64_aes_setkey_skcipher, + .encrypt = riscv64_aes_cbc_cts_encrypt, + .decrypt = riscv64_aes_cbc_cts_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */ + .base = { + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_priority = 300, + .cra_name = "cts(cbc(aes))", + .cra_driver_name = "cts-cbc-aes-riscv64-zvkned", + .cra_module = THIS_MODULE, + }, } }; @@ -540,11 +626,12 @@ static void __exit riscv64_aes_mod_exit(void) module_init(riscv64_aes_mod_init); module_exit(riscv64_aes_mod_exit); -MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS (RISC-V accelerated)"); +MODULE_DESCRIPTION("AES-ECB/CBC/CTS/CTR/XTS (RISC-V accelerated)"); MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("aes"); MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("cts(cbc(aes))"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); diff --git a/arch/riscv/crypto/aes-riscv64-zvkned.S b/arch/riscv/crypto/aes-riscv64-zvkned.S index 78d4e1186c07..23d063f94ce6 100644 --- a/arch/riscv/crypto/aes-riscv64-zvkned.S +++ b/arch/riscv/crypto/aes-riscv64-zvkned.S @@ -139,19 +139,25 @@ SYM_FUNC_END(aes_ecb_decrypt_zvkned) .endm .macro aes_cbc_decrypt keylen + srli LEN, LEN, 2 // Convert LEN from bytes to words vle32.v v16, (IVP) // Load IV 1: - vle32.v v17, (INP) // Load ciphertext block - vmv.v.v v18, v17 // Save ciphertext block - aes_decrypt v17, \keylen // Decrypt - vxor.vv v17, v17, v16 // XOR with IV or prev ciphertext block - vse32.v v17, (OUTP) // Store plaintext block - vmv.v.v v16, v18 // Next "IV" is prev ciphertext block - addi INP, INP, 16 - addi OUTP, OUTP, 16 - addi LEN, LEN, -16 + vsetvli t0, LEN, e32, m4, ta, ma + vle32.v v20, (INP) // Load ciphertext blocks + vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks + addi t1, t0, -4 + vslidedown.vx v24, v20, t1 // Save last ciphertext block + aes_decrypt v20, \keylen // Decrypt the blocks + vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks + vse32.v v20, (OUTP) // Store plaintext blocks + vmv.v.v v16, v24 // Next "IV" is last ciphertext block + slli t1, t0, 2 // Words to bytes + add INP, INP, t1 + add OUTP, OUTP, t1 + sub LEN, LEN, t0 bnez LEN, 1b + vsetivli zero, 4, e32, m1, ta, ma vse32.v v16, (IVP) // Store next IV ret .endm @@ -178,3 +184,156 @@ SYM_FUNC_START(aes_cbc_decrypt_zvkned) 192: aes_cbc_decrypt 192 SYM_FUNC_END(aes_cbc_decrypt_zvkned) + +.macro aes_cbc_cts_encrypt keylen + + // CBC-encrypt all blocks except the last. But don't store the + // second-to-last block to the output buffer yet, since it will be + // handled specially in the ciphertext stealing step. Exception: if the + // message is single-block, still encrypt the last (and only) block. + li t0, 16 + j 2f +1: + vse32.v v16, (OUTP) // Store ciphertext block + addi OUTP, OUTP, 16 +2: + vle32.v v17, (INP) // Load plaintext block + vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block + aes_encrypt v16, \keylen // Encrypt + addi INP, INP, 16 + addi LEN, LEN, -16 + bgt LEN, t0, 1b // Repeat if more than one block remains + + // Special case: if the message is a single block, just do CBC. + beqz LEN, .Lcts_encrypt_done\@ + + // Encrypt the last two blocks using ciphertext stealing as follows: + // C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n]) + // C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN] + // + // C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th + // plaintext block. Block n, the last block, may be partial; its length + // is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV. + // + // v16 already contains Encrypt(P[n-1] ^ C[n-2]). + // INP points to P[n]. OUTP points to where C[n-1] should go. + // To support in-place encryption, load P[n] before storing C[n]. + addi t0, OUTP, 16 // Get pointer to where C[n] should go + vsetvli zero, LEN, e8, m1, tu, ma + vle8.v v17, (INP) // Load P[n] + vse8.v v16, (t0) // Store C[n] + vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n] + vsetivli zero, 4, e32, m1, ta, ma + aes_encrypt v16, \keylen +.Lcts_encrypt_done\@: + vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case) + ret +.endm + +#define LEN32 t4 // Length of remaining full blocks in 32-bit words +#define LEN_MOD16 t5 // Length of message in bytes mod 16 + +.macro aes_cbc_cts_decrypt keylen + andi LEN32, LEN, ~15 + srli LEN32, LEN32, 2 + andi LEN_MOD16, LEN, 15 + + // Save C[n-2] in v28 so that it's available later during the ciphertext + // stealing step. If there are fewer than three blocks, C[n-2] means + // the IV, otherwise it means the third-to-last ciphertext block. + vmv.v.v v28, v16 // IV + add t0, LEN, -33 + bltz t0, .Lcts_decrypt_loop\@ + andi t0, t0, ~15 + add t0, t0, INP + vle32.v v28, (t0) + + // CBC-decrypt all full blocks. For the last full block, or the last 2 + // full blocks if the message is block-aligned, this doesn't write the + // correct output blocks (unless the message is only a single block), + // because it XORs the wrong values with the raw AES plaintexts. But we + // fix this after this loop without redoing the AES decryptions. This + // approach allows more of the AES decryptions to be parallelized. +.Lcts_decrypt_loop\@: + vsetvli t0, LEN32, e32, m4, ta, ma + addi t1, t0, -4 + vle32.v v20, (INP) // Load next set of ciphertext blocks + vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set + vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks + vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set + aes_decrypt v20, \keylen // Decrypt this set of blocks + vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks + vse32.v v24, (OUTP) // Store this set of plaintext blocks + sub LEN32, LEN32, t0 + slli t0, t0, 2 // Words to bytes + add INP, INP, t0 + add OUTP, OUTP, t0 + bnez LEN32, .Lcts_decrypt_loop\@ + + vsetivli zero, 4, e32, m4, ta, ma + vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block + addi t0, OUTP, -16 // Get pointer to last full plaintext block + bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@ + + // Special case: if the message is a single block, just do CBC. + li t1, 16 + beq LEN, t1, .Lcts_decrypt_done\@ + + // Block-aligned message. Just fix up the last 2 blocks. We need: + // + // P[n-1] = Decrypt(C[n]) ^ C[n-2] + // P[n] = Decrypt(C[n-1]) ^ C[n] + // + // We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28. + // Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this + // is everything needed to fix the output without re-decrypting blocks. + addi t1, OUTP, -32 // Get pointer to where P[n-1] should go + vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1] + vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2] + vse32.v v20, (t1) // Store P[n-1] + vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2] + j .Lcts_decrypt_finish\@ + +.Lcts_decrypt_non_block_aligned\@: + // Decrypt the last two blocks using ciphertext stealing as follows: + // + // P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2] + // P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16] + // + // We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28. + vmv.v.v v16, v20 // v16 = Decrypt(C[n-1]) + vsetvli zero, LEN_MOD16, e8, m1, tu, ma + vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16] + vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n] + vse8.v v16, (OUTP) // Store P[n] + vsetivli zero, 4, e32, m1, ta, ma + aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) +.Lcts_decrypt_finish\@: + vxor.vv v20, v20, v28 // XOR with C[n-2] + vse32.v v20, (t0) // Store last full plaintext block +.Lcts_decrypt_done\@: + ret +.endm + +.macro aes_cbc_cts_crypt keylen + vle32.v v16, (IVP) // Load IV + beqz a5, .Lcts_decrypt\@ + aes_cbc_cts_encrypt \keylen +.Lcts_decrypt\@: + aes_cbc_cts_decrypt \keylen +.endm + +// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key, +// const u8 *in, u8 *out, size_t len, +// const u8 iv[16], bool enc); +// +// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS. +// This is the variant that unconditionally swaps the last two blocks. +SYM_FUNC_START(aes_cbc_cts_crypt_zvkned) + aes_begin KEYP, 128f, 192f + aes_cbc_cts_crypt 256 +128: + aes_cbc_cts_crypt 128 +192: + aes_cbc_cts_crypt 192 +SYM_FUNC_END(aes_cbc_cts_crypt_zvkned) diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index f5dfef6c2153..0e0522e588ca 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -17,7 +17,6 @@ #endif #include <asm/cmpxchg.h> -#include <asm/barrier.h> #define __atomic_acquire_fence() \ __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory") @@ -207,7 +206,7 @@ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int " add %[rc], %[p], %[a]\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) @@ -228,7 +227,7 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, " add %[rc], %[p], %[a]\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : [a]"r" (a), [u]"r" (u) @@ -248,7 +247,7 @@ static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v) " addi %[rc], %[p], 1\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -268,7 +267,7 @@ static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v) " addi %[rc], %[p], -1\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -288,7 +287,7 @@ static __always_inline int arch_atomic_dec_if_positive(atomic_t *v) " bltz %[rc], 1f\n" " sc.w.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -310,7 +309,7 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v) " addi %[rc], %[p], 1\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -331,7 +330,7 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v) " addi %[rc], %[p], -1\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : @@ -352,7 +351,7 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) " bltz %[rc], 1f\n" " sc.d.rl %[rc], %[rc], %[c]\n" " bnez %[rc], 0b\n" - " fence rw, rw\n" + RISCV_FULL_BARRIER "1:\n" : [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter) : diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 110752594228..880b56d8480d 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -11,28 +11,27 @@ #define _ASM_RISCV_BARRIER_H #ifndef __ASSEMBLY__ +#include <asm/fence.h> #define nop() __asm__ __volatile__ ("nop") #define __nops(n) ".rept " #n "\nnop\n.endr\n" #define nops(n) __asm__ __volatile__ (__nops(n)) -#define RISCV_FENCE(p, s) \ - __asm__ __volatile__ ("fence " #p "," #s : : : "memory") /* These barriers need to enforce ordering on both devices or memory. */ -#define mb() RISCV_FENCE(iorw,iorw) -#define rmb() RISCV_FENCE(ir,ir) -#define wmb() RISCV_FENCE(ow,ow) +#define __mb() RISCV_FENCE(iorw, iorw) +#define __rmb() RISCV_FENCE(ir, ir) +#define __wmb() RISCV_FENCE(ow, ow) /* These barriers do not need to enforce ordering on devices, just memory. */ -#define __smp_mb() RISCV_FENCE(rw,rw) -#define __smp_rmb() RISCV_FENCE(r,r) -#define __smp_wmb() RISCV_FENCE(w,w) +#define __smp_mb() RISCV_FENCE(rw, rw) +#define __smp_rmb() RISCV_FENCE(r, r) +#define __smp_wmb() RISCV_FENCE(w, w) #define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(rw,w); \ + RISCV_FENCE(rw, w); \ WRITE_ONCE(*p, v); \ } while (0) @@ -40,7 +39,7 @@ do { \ ({ \ typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(r,rw); \ + RISCV_FENCE(r, rw); \ ___p1; \ }) @@ -69,7 +68,7 @@ do { \ * instances the scheduler pairs this with an mb(), so nothing is necessary on * the new hart. */ -#define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw) +#define smp_mb__after_spinlock() RISCV_FENCE(iorw, iorw) #include <asm-generic/barrier.h> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2f4726d3cfcc..2fee65cc8443 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -8,7 +8,6 @@ #include <linux/bug.h> -#include <asm/barrier.h> #include <asm/fence.h> #define __xchg_relaxed(ptr, new, size) \ @@ -313,7 +312,7 @@ " bne %0, %z3, 1f\n" \ " sc.w.rl %1, %z4, %2\n" \ " bnez %1, 0b\n" \ - " fence rw, rw\n" \ + RISCV_FULL_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" ((long)__old), "rJ" (__new) \ @@ -325,7 +324,7 @@ " bne %0, %z3, 1f\n" \ " sc.d.rl %1, %z4, %2\n" \ " bnez %1, 0b\n" \ - " fence rw, rw\n" \ + RISCV_FULL_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ : "rJ" (__old), "rJ" (__new) \ diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h index 2ac955b51148..aa103530a5c8 100644 --- a/arch/riscv/include/asm/compat.h +++ b/arch/riscv/include/asm/compat.h @@ -14,9 +14,28 @@ static inline int is_compat_task(void) { + if (!IS_ENABLED(CONFIG_COMPAT)) + return 0; + return test_thread_flag(TIF_32BIT); } +static inline int is_compat_thread(struct thread_info *thread) +{ + if (!IS_ENABLED(CONFIG_COMPAT)) + return 0; + + return test_ti_thread_flag(thread, TIF_32BIT); +} + +static inline void set_compat_task(bool is_compat) +{ + if (is_compat) + set_thread_flag(TIF_32BIT); + else + clear_thread_flag(TIF_32BIT); +} + struct compat_user_regs_struct { compat_ulong_t pc; compat_ulong_t ra; diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 06c236bfab53..c7aea7886d22 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -53,13 +53,9 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); #define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) #ifdef CONFIG_64BIT -#ifdef CONFIG_COMPAT -#define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ +#define STACK_RND_MASK (is_compat_task() ? \ 0x7ff >> (PAGE_SHIFT - 12) : \ 0x3ffff >> (PAGE_SHIFT - 12)) -#else -#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) -#endif #endif /* @@ -139,10 +135,7 @@ do { \ #ifdef CONFIG_COMPAT #define SET_PERSONALITY(ex) \ -do { if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ - set_thread_flag(TIF_32BIT); \ - else \ - clear_thread_flag(TIF_32BIT); \ +do { set_compat_task((ex).e_ident[EI_CLASS] == ELFCLASS32); \ if (personality(current->personality) != PER_LINUX32) \ set_personality(PER_LINUX | \ (current->personality & (~PER_MASK))); \ diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h index 2b443a3a487f..6bcd80325dfc 100644 --- a/arch/riscv/include/asm/fence.h +++ b/arch/riscv/include/asm/fence.h @@ -1,12 +1,18 @@ #ifndef _ASM_RISCV_FENCE_H #define _ASM_RISCV_FENCE_H +#define RISCV_FENCE_ASM(p, s) "\tfence " #p "," #s "\n" +#define RISCV_FENCE(p, s) \ + ({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); }) + #ifdef CONFIG_SMP -#define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n" -#define RISCV_RELEASE_BARRIER "\tfence rw, w\n" +#define RISCV_ACQUIRE_BARRIER RISCV_FENCE_ASM(r, rw) +#define RISCV_RELEASE_BARRIER RISCV_FENCE_ASM(rw, w) +#define RISCV_FULL_BARRIER RISCV_FENCE_ASM(rw, rw) #else #define RISCV_ACQUIRE_BARRIER #define RISCV_RELEASE_BARRIER +#define RISCV_FULL_BARRIER #endif #endif /* _ASM_RISCV_FENCE_H */ diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 42497d487a17..1c5c641075d2 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -47,10 +47,10 @@ * sufficient to ensure this works sanely on controllers that support I/O * writes. */ -#define __io_pbr() __asm__ __volatile__ ("fence io,i" : : : "memory"); -#define __io_par(v) __asm__ __volatile__ ("fence i,ior" : : : "memory"); -#define __io_pbw() __asm__ __volatile__ ("fence iow,o" : : : "memory"); -#define __io_paw() __asm__ __volatile__ ("fence o,io" : : : "memory"); +#define __io_pbr() RISCV_FENCE(io, i) +#define __io_par(v) RISCV_FENCE(i, ior) +#define __io_pbw() RISCV_FENCE(iow, o) +#define __io_paw() RISCV_FENCE(o, io) /* * Accesses from a single hart to a single I/O address must be ordered. This diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index 4c58ee7f95ec..06cadfd7a237 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/ |
