diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-19 07:13:33 -0600 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-19 07:13:33 -0600 |
| commit | 5f6e430f931d245da838db3e10e918681207029b (patch) | |
| tree | 6adc54a582652ae470ce95d9205f798568339ff0 /arch/powerpc/mm/book3s64/hash_utils.c | |
| parent | a6e3e6f138058ff184d8ef5064a033b3f5fee8f8 (diff) | |
| parent | 980411a4d1bb925d28cd9e8d8301dc982ece788d (diff) | |
| download | linux-5f6e430f931d245da838db3e10e918681207029b.tar.gz linux-5f6e430f931d245da838db3e10e918681207029b.tar.bz2 linux-5f6e430f931d245da838db3e10e918681207029b.zip | |
Merge tag 'powerpc-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Add powerpc qspinlock implementation optimised for large system
scalability and paravirt. See the merge message for more details
- Enable objtool to be built on powerpc to generate mcount locations
- Use a temporary mm for code patching with the Radix MMU, so the
writable mapping is restricted to the patching CPU
- Add an option to build the 64-bit big-endian kernel with the ELFv2
ABI
- Sanitise user registers on interrupt entry on 64-bit Book3S
- Many other small features and fixes
Thanks to Aboorva Devarajan, Angel Iglesias, Benjamin Gray, Bjorn
Helgaas, Bo Liu, Chen Lifu, Christoph Hellwig, Christophe JAILLET,
Christophe Leroy, Christopher M. Riedl, Colin Ian King, Deming Wang,
Disha Goel, Dmitry Torokhov, Finn Thain, Geert Uytterhoeven, Gustavo A.
R. Silva, Haowen Bai, Joel Stanley, Jordan Niethe, Julia Lawall, Kajol
Jain, Laurent Dufour, Li zeming, Miaoqian Lin, Michael Jeanson, Nathan
Lynch, Naveen N. Rao, Nayna Jain, Nicholas Miehlbradt, Nicholas Piggin,
Pali Rohár, Randy Dunlap, Rohan McLure, Russell Currey, Sathvika
Vasireddy, Shaomin Deng, Stephen Kitt, Stephen Rothwell, Thomas
Weißschuh, Tiezhu Yang, Uwe Kleine-König, Xie Shaowen, Xiu Jianfeng,
XueBing Chen, Yang Yingliang, Zhang Jiaming, ruanjinjie, Jessica Yu,
and Wolfram Sang.
* tag 'powerpc-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (181 commits)
powerpc/code-patching: Fix oops with DEBUG_VM enabled
powerpc/qspinlock: Fix 32-bit build
powerpc/prom: Fix 32-bit build
powerpc/rtas: mandate RTAS syscall filtering
powerpc/rtas: define pr_fmt and convert printk call sites
powerpc/rtas: clean up includes
powerpc/rtas: clean up rtas_error_log_max initialization
powerpc/pseries/eeh: use correct API for error log size
powerpc/rtas: avoid scheduling in rtas_os_term()
powerpc/rtas: avoid device tree lookups in rtas_os_term()
powerpc/rtasd: use correct OF API for event scan rate
powerpc/rtas: document rtas_call()
powerpc/pseries: unregister VPA when hot unplugging a CPU
powerpc/pseries: reset the RCU watchdogs after a LPM
powerpc: Take in account addition CPU node when building kexec FDT
powerpc: export the CPU node count
powerpc/cpuidle: Set CPUIDLE_FLAG_POLLING for snooze state
powerpc/dts/fsl: Fix pca954x i2c-mux node names
cxl: Remove unnecessary cxl_pci_window_alignment()
selftests/powerpc: Fix resource leaks
...
Diffstat (limited to 'arch/powerpc/mm/book3s64/hash_utils.c')
| -rw-r--r-- | arch/powerpc/mm/book3s64/hash_utils.c | 130 |
1 files changed, 129 insertions, 1 deletions
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6df4c6d38b66..80a148c57de8 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } -static bool disable_1tb_segments = false; +static bool disable_1tb_segments __ro_after_init; static int __init parse_disable_1tb_segments(char *p) { @@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p) } early_param("disable_1tb_segments", parse_disable_1tb_segments); +bool stress_hpt_enabled __initdata; + +static int __init parse_stress_hpt(char *p) +{ + stress_hpt_enabled = true; + return 0; +} +early_param("stress_hpt", parse_stress_hpt); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key); + +/* + * per-CPU array allocated if we enable stress_hpt. + */ +#define STRESS_MAX_GROUPS 16 +struct stress_hpt_struct { + unsigned long last_group[STRESS_MAX_GROUPS]; +}; + +static inline int stress_nr_groups(void) +{ + /* + * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries + * to allow practical forward progress. Bare metal returns 1, which + * seems to help uncover more bugs. + */ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return STRESS_MAX_GROUPS; + else + return 1; +} + +static struct stress_hpt_struct *stress_hpt_struct; + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, pr_info("Partition table %p\n", partition_tb); } +void hpt_clear_stress(void); +static struct timer_list stress_hpt_timer; +void stress_hpt_timer_fn(struct timer_list *timer) +{ + int next_cpu; + + hpt_clear_stress(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) + tlbiel_all(); + + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer_on(&stress_hpt_timer, next_cpu); +} + static void __init htab_initialize(void) { unsigned long table; @@ -995,6 +1046,20 @@ static void __init htab_initialize(void) if (stress_slb_enabled) static_branch_enable(&stress_slb_key); + if (stress_hpt_enabled) { + unsigned long tmp; + static_branch_enable(&stress_hpt_key); + // Too early to use nr_cpu_ids, so use NR_CPUS + tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS, + 0, 0, MEMBLOCK_ALLOC_ANYWHERE); + memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS); + stress_hpt_struct = __va(tmp); + + timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer(&stress_hpt_timer); + } + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1980,6 +2045,69 @@ repeat: return slot; } +void hpt_clear_stress(void) +{ + int cpu = raw_smp_processor_id(); + int g; + + for (g = 0; g < stress_nr_groups(); g++) { + unsigned long last_group; + last_group = stress_hpt_struct[cpu].last_group[g]; + + if (last_group != -1UL) { + int i; + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[g] = -1; + } + } +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group) +{ + unsigned long last_group; + int cpu = raw_smp_processor_id(); + + last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1]; + if (hpte_group == last_group) + return; + + if (last_group != -1UL) { + int i; + /* + * Concurrent CPUs might be inserting into this group, so + * give up after a number of iterations, to prevent a live + * lock. + */ + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1; + } + + if (ea >= PAGE_OFFSET) { + /* + * We would really like to prefetch to get the TLB loaded, then + * remove the PTE before returning from fault interrupt, to + * increase the hash fault rate. + * + * Unfortunately QEMU TCG does not model the TLB in a way that + * makes this possible, and systemsim (mambo) emulator does not + * bring in TLBs with prefetches (although loads/stores do + * work for non-CI PTEs). + * + * So remember this PTE and clear it on the next hash fault. + */ + memmove(&stress_hpt_struct[cpu].last_group[1], + &stress_hpt_struct[cpu].last_group[0], + (stress_nr_groups() - 1) * sizeof(unsigned long)); + stress_hpt_struct[cpu].last_group[0] = hpte_group; + } +} + #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); |
