diff options
59 files changed, 3146 insertions, 471 deletions
diff --git a/Documentation/arch/powerpc/htm.rst b/Documentation/arch/powerpc/htm.rst new file mode 100644 index 000000000000..fcb4eb6306b1 --- /dev/null +++ b/Documentation/arch/powerpc/htm.rst @@ -0,0 +1,104 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _htm: + +=================================== +HTM (Hardware Trace Macro) +=================================== + +Athira Rajeev, 2 Mar 2025 + +.. contents:: + :depth: 3 + + +Basic overview +============== + +H_HTM is used as an interface for executing Hardware Trace Macro (HTM) +functions, including setup, configuration, control and dumping of the HTM data. +For using HTM, it is required to setup HTM buffers and HTM operations can +be controlled using the H_HTM hcall. The hcall can be invoked for any core/chip +of the system from within a partition itself. To use this feature, a debugfs +folder called "htmdump" is present under /sys/kernel/debug/powerpc. + + +HTM debugfs example usage +========================= + +.. code-block:: sh + + # ls /sys/kernel/debug/powerpc/htmdump/ + coreindexonchip htmcaps htmconfigure htmflags htminfo htmsetup + htmstart htmstatus htmtype nodalchipindex nodeindex trace + +Details on each file: + +* nodeindex, nodalchipindex, coreindexonchip specifies which partition to configure the HTM for. +* htmtype: specifies the type of HTM. Supported target is hardwareTarget. +* trace: is to read the HTM data. +* htmconfigure: Configure/Deconfigure the HTM. Writing 1 to the file will configure the trace, writing 0 to the file will do deconfigure. +* htmstart: start/Stop the HTM. Writing 1 to the file will start the tracing, writing 0 to the file will stop the tracing. +* htmstatus: get the status of HTM. This is needed to understand the HTM state after each operation. +* htmsetup: set the HTM buffer size. Size of HTM buffer is in power of 2 +* htminfo: provides the system processor configuration details. This is needed to understand the appropriate values for nodeindex, nodalchipindex, coreindexonchip. +* htmcaps : provides the HTM capabilities like minimum/maximum buffer size, what kind of tracing the HTM supports etc. +* htmflags : allows to pass flags to hcall. Currently supports controlling the wrapping of HTM buffer. + +To see the system processor configuration details: + +.. code-block:: sh + + # cat /sys/kernel/debug/powerpc/htmdump/htminfo > htminfo_file + +The result can be interpreted using hexdump. + +To collect HTM traces for a partition represented by nodeindex as +zero, nodalchipindex as 1 and coreindexonchip as 12 + +.. code-block:: sh + + # cd /sys/kernel/debug/powerpc/htmdump/ + # echo 2 > htmtype + # echo 33 > htmsetup ( sets 8GB memory for HTM buffer, number is size in power of 2 ) + +This requires a CEC reboot to get the HTM buffers allocated. + +.. code-block:: sh + + # cd /sys/kernel/debug/powerpc/htmdump/ + # echo 2 > htmtype + # echo 0 > nodeindex + # echo 1 > nodalchipindex + # echo 12 > coreindexonchip + # echo 1 > htmflags # to set noWrap for HTM buffers + # echo 1 > htmconfigure # Configure the HTM + # echo 1 > htmstart # Start the HTM + # echo 0 > htmstart # Stop the HTM + # echo 0 > htmconfigure # Deconfigure the HTM + # cat htmstatus # Dump the status of HTM entries as data + +Above will set the htmtype and core details, followed by executing respective HTM operation. + +Read the HTM trace data +======================== + +After starting the trace collection, run the workload +of interest. Stop the trace collection after required period +of time, and read the trace file. + +.. code-block:: sh + + # cat /sys/kernel/debug/powerpc/htmdump/trace > trace_file + +This trace file will contain the relevant instruction traces +collected during the workload execution. And can be used as +input file for trace decoders to understand data. + +Benefits of using HTM debugfs interface +======================================= + +It is now possible to collect traces for a particular core/chip +from within any partition of the system and decode it. Through +this enablement, a small partition can be dedicated to collect the +trace data and analyze to provide important information for Performance +analysis, Software tuning, or Hardware debug. diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst index 5defd13cc6c1..574592505604 100644 --- a/Documentation/arch/powerpc/kvm-nested.rst +++ b/Documentation/arch/powerpc/kvm-nested.rst @@ -208,13 +208,9 @@ associated values for each ID in the GSB:: flags: Bit 0: getGuestWideState: Request state of the Guest instead of an individual VCPU. - Bit 1: takeOwnershipOfVcpuState Indicate the L1 is taking - over ownership of the VCPU state and that the L0 can free - the storage holding the state. The VCPU state will need to - be returned to the Hypervisor via H_GUEST_SET_STATE prior - to H_GUEST_RUN_VCPU being called for this VCPU. The data - returned in the dataBuffer is in a Hypervisor internal - format. + Bit 1: getHostWideState: Request stats of the Host. This causes + the guestId and vcpuId parameters to be ignored and attempting + to get the VCPU/Guest state will cause an error. Bits 2-63: Reserved guestId: ID obtained from H_GUEST_CREATE vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU @@ -406,9 +402,10 @@ the partition like the timebase offset and partition scoped page table information. +--------+-------+----+--------+----------------------------------+ -| ID | Size | RW | Thread | Details | -| | Bytes | | Guest | | -| | | | Scope | | +| ID | Size | RW |(H)ost | Details | +| | Bytes | |(G)uest | | +| | | |(T)hread| | +| | | |Scope | | +========+=======+====+========+==================================+ | 0x0000 | | RW | TG | NOP element | +--------+-------+----+--------+----------------------------------+ @@ -434,6 +431,29 @@ table information. | | | | |- 0x8 Table size. | +--------+-------+----+--------+----------------------------------+ | 0x0007-| | | | Reserved | +| 0x07FF | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x0800 | 0x08 | R | H | Current usage in bytes of the | +| | | | | L0's Guest Management Space | +| | | | | for an L1-Lpar. | ++--------+-------+----+--------+----------------------------------+ +| 0x0801 | 0x08 | R | H | Max bytes available in the | +| | | | | L0's Guest Management Space for | +| | | | | an L1-Lpar | ++--------+-------+----+--------+----------------------------------+ +| 0x0802 | 0x08 | R | H | Current usage in bytes of the | +| | | | | L0's Guest Page Table Management | +| | | | | Space for an L1-Lpar | ++--------+-------+----+--------+----------------------------------+ +| 0x0803 | 0x08 | R | H | Max bytes available in the L0's | +| | | | | Guest Page Table Management | +| | | | | Space for an L1-Lpar | ++--------+-------+----+--------+----------------------------------+ +| 0x0804 | 0x08 | R | H | Cumulative Reclaimed bytes from | +| | | | | L0 Guest's Page Table Management | +| | | | | Space due to overcommit | ++--------+-------+----+--------+----------------------------------+ +| 0x0805-| | | | Reserved | | 0x0BFF | | | | | +--------+-------+----+--------+----------------------------------+ | 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: | diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 7a1409ecc238..fee5c4731501 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -366,6 +366,12 @@ Code Seq# Include File Comments <mailto:linuxppc-dev> 0xB2 01-02 arch/powerpc/include/uapi/asm/papr-sysparm.h powerpc/pseries system parameter API <mailto:linuxppc-dev> +0xB2 03-05 arch/powerpc/include/uapi/asm/papr-indices.h powerpc/pseries indices API + <mailto:linuxppc-dev> +0xB2 06-07 arch/powerpc/include/uapi/asm/papr-platform-dump.h powerpc/pseries Platform Dump API + <mailto:linuxppc-dev> +0xB2 08 powerpc/include/uapi/asm/papr-physical-attestation.h powerpc/pseries Physical Attestation API + <mailto:linuxppc-dev> 0xB3 00 linux/mmc/ioctl.h 0xB4 00-0F linux/gpio.h <mailto:linux-gpio@vger.kernel.org> 0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@vger.kernel.org> diff --git a/MAINTAINERS b/MAINTAINERS index d4a2c7d4d14a..4661dc6e7523 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13665,7 +13665,6 @@ M: Madhavan Srinivasan <maddy@linux.ibm.com> M: Michael Ellerman <mpe@ellerman.id.au> R: Nicholas Piggin <npiggin@gmail.com> R: Christophe Leroy <christophe.leroy@csgroup.eu> -R: Naveen N Rao <naveen@kernel.org> L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6722625a406a..c3e0cc83f120 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -277,6 +277,7 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_RETHOOK if KPROBES select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE @@ -894,7 +895,7 @@ config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 - range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx + range 14 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 @@ -907,10 +908,10 @@ config DATA_SHIFT On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. Smaller is the alignment, greater is the number of necessary DBATs. - On 8xx, large pages (512kb or 8M) are used to map kernel linear - memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. If PIN_TLB is selected, it must be aligned to 8M as - 8M pages will be pinned. + On 8xx, large pages (16kb or 512kb or 8M) are used to map kernel + linear memory. Aligning to 8M reduces TLB misses as only 8M pages + are used in that case. If PIN_TLB is selected, it must be aligned + to 8M as 8M pages will be pinned. config ARCH_FORCE_MAX_ORDER int "Order of maximal physically contiguous allocations" diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 184d0680e661..a7ab087d412c 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,6 +70,7 @@ BOOTCPPFLAGS := -nostdinc $(LINUXINCLUDE) BOOTCPPFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) BOOTCFLAGS := $(BOOTTARGETFLAGS) \ + -std=gnu11 \ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 \ -msoft-float -mno-altivec -mno-vsx \ diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h index a9d879155ef9..16df8f3c43f1 100644 --- a/arch/powerpc/boot/rs6000.h +++ b/arch/powerpc/boot/rs6000.h @@ -1,11 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* IBM RS/6000 "XCOFF" file definitions for BFD. Copyright (C) 1990, 1991 Free Software Foundation, Inc. - FIXME: Can someone provide a transliteration of this name into ASCII? - Using the following chars caused a compiler warning on HIUX (so I replaced - them with octal escapes), and isn't useful without an understanding of what - character set it is. - Written by Mimi Ph\373\364ng-Th\345o V\365 of IBM + Written by Mimi Phuong-Thao Vo of IBM and John Gilmore of Cygnus Support. */ /********************** FILE HEADER **********************/ diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h index d107abe1468f..acd61eb36d59 100644 --- a/arch/powerpc/include/asm/guest-state-buffer.h +++ b/arch/powerpc/include/asm/guest-state-buffer.h @@ -28,6 +28,21 @@ /* Process Table Info */ #define KVMPPC_GSID_PROCESS_TABLE 0x0006 +/* Guest Management Heap Size */ +#define KVMPPC_GSID_L0_GUEST_HEAP 0x0800 + +/* Guest Management Heap Max Size */ +#define KVMPPC_GSID_L0_GUEST_HEAP_MAX 0x0801 + +/* Guest Pagetable Size */ +#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE 0x0802 + +/* Guest Pagetable Max Size */ +#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX 0x0803 + +/* Guest Pagetable Reclaim in bytes */ +#define KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM 0x0804 + /* H_GUEST_RUN_VCPU input buffer Info */ #define KVMPPC_GSID_RUN_INPUT 0x0C00 /* H_GUEST_RUN_VCPU output buffer Info */ @@ -106,6 +121,11 @@ #define KVMPPC_GSE_GUESTWIDE_COUNT \ (KVMPPC_GSE_GUESTWIDE_END - KVMPPC_GSE_GUESTWIDE_START + 1) +#define KVMPPC_GSE_HOSTWIDE_START KVMPPC_GSID_L0_GUEST_HEAP +#define KVMPPC_GSE_HOSTWIDE_END KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM +#define KVMPPC_GSE_HOSTWIDE_COUNT \ + (KVMPPC_GSE_HOSTWIDE_END - KVMPPC_GSE_HOSTWIDE_START + 1) + #define KVMPPC_GSE_META_START KVMPPC_GSID_RUN_INPUT #define KVMPPC_GSE_META_END KVMPPC_GSID_VPA #define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1) @@ -130,7 +150,8 @@ (KVMPPC_GSE_INTR_REGS_END - KVMPPC_GSE_INTR_REGS_START + 1) #define KVMPPC_GSE_IDEN_COUNT \ - (KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \ + (KVMPPC_GSE_HOSTWIDE_COUNT + \ + KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \ KVMPPC_GSE_DW_REGS_COUNT + KVMPPC_GSE_W_REGS_COUNT + \ KVMPPC_GSE_VSRS_COUNT + KVMPPC_GSE_INTR_REGS_COUNT) @@ -139,10 +160,11 @@ */ enum { KVMPPC_GS_CLASS_GUESTWIDE = 0x01, - KVMPPC_GS_CLASS_META = 0x02, - KVMPPC_GS_CLASS_DWORD_REG = 0x04, - KVMPPC_GS_CLASS_WORD_REG = 0x08, - KVMPPC_GS_CLASS_VECTOR = 0x10, + KVMPPC_GS_CLASS_HOSTWIDE = 0x02, + KVMPPC_GS_CLASS_META = 0x04, + KVMPPC_GS_CLASS_DWORD_REG = 0x08, + KVMPPC_GS_CLASS_WORD_REG = 0x10, + KVMPPC_GS_CLASS_VECTOR = 0x18, KVMPPC_GS_CLASS_INTR = 0x20, }; @@ -164,6 +186,7 @@ enum { */ enum { KVMPPC_GS_FLAGS_WIDE = 0x01, + KVMPPC_GS_FLAGS_HOST_WIDE = 0x02, }; /** @@ -287,7 +310,7 @@ struct kvmppc_gs_msg_ops { * struct kvmppc_gs_msg - a guest state message * @bitmap: the guest state ids that should be included * @ops: modify message behavior for reading and writing to buffers - * @flags: guest wide or thread wide + * @flags: host wide, guest wide or thread wide * @data: location where buffer data will be written to or from. * * A guest state message is allows flexibility in sending in receiving data diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index eeef13db2770..6df6dbbe1e7c 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -490,14 +490,15 @@ #define H_RPTI_PAGE_ALL (-1UL) /* Flags for H_GUEST_{S,G}_STATE */ -#define H_GUEST_FLAGS_WIDE (1UL<<(63-0)) +#define H_GUEST_FLAGS_WIDE (1UL << (63 - 0)) +#define H_GUEST_FLAGS_HOST_WIDE (1UL << (63 - 1)) /* Flag values used for H_{S,G}SET_GUEST_CAPABILITIES */ -#define H_GUEST_CAP_COPY_MEM (1UL<<(63-0)) -#define H_GUEST_CAP_POWER9 (1UL<<(63-1)) -#define H_GUEST_CAP_POWER10 (1UL<<(63-2)) -#define H_GUEST_CAP_POWER11 (1UL<<(63-3)) -#define H_GUEST_CAP_BITMAP2 (1UL<<(63-63)) +#define H_GUEST_CAP_COPY_MEM (1UL << (63 - 0)) +#define H_GUEST_CAP_POWER9 (1UL << (63 - 1)) +#define H_GUEST_CAP_POWER10 (1UL << (63 - 2)) +#define H_GUEST_CAP_POWER11 (1UL << (63 - 3)) +#define H_GUEST_CAP_BITMAP2 (1UL << (63 - 63)) /* * Defines for H_HTM - Macros for hardware trace macro (HTM) function. diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 91be7b885944..f2b6cc4341bb 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -65,6 +65,14 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa) return vpa_call(H_VPA_REG_DTL, cpu, vpa); } +/* + * Invokes H_HTM hcall with parameters passed from htm_hcall_wrapper. + * flags: Set to hardwareTarget. + * target: Specifies target using node index, nodal chip index and core index. + * operation : action to perform ie configure, start, stop, deconfigure, trace + * based on the HTM type. + * param1, param2, param3: parameters for each action. + */ static inline long htm_call(unsigned long flags, unsigned long target, unsigned long operation, unsigned long param1, unsigned long param2, unsigned long param3) @@ -73,17 +81,17 @@ static inline long htm_call(unsigned long flags, unsigned long target, param1, param2, param3); } -static inline long htm_get_dump_hardware(unsigned long nodeindex, +static inline long htm_hcall_wrapper(unsigned long flags, unsigned long nodeindex, unsigned long nodalchipindex, unsigned long coreindexonchip, - unsigned long type, unsigned long addr, unsigned long size, - unsigned long offset) + unsigned long type, unsigned long htm_op, unsigned long param1, unsigned long param2, + unsigned long param3) { - return htm_call(H_HTM_FLAGS_HARDWARE_TARGET, + return htm_call(H_HTM_FLAGS_HARDWARE_TARGET | flags, H_HTM_TARGET_NODE_INDEX(nodeindex) | H_HTM_TARGET_NODAL_CHIP_INDEX(nodalchipindex) | H_HTM_TARGET_CORE_INDEX_ON_CHIP(coreindexonchip), - H_HTM_OP(H_HTM_OP_DUMP_DATA) | H_HTM_TYPE(type), - addr, size, offset); + H_HTM_OP(htm_op) | H_HTM_TYPE(type), + param1, param2, param3); } extern void vpa_init(int cpu); diff --git a/arch/powerpc/include/asm/preempt.h b/arch/powerpc/include/asm/preempt.h new file mode 100644 index 000000000000..000e2b9681f3 --- /dev/null +++ b/arch/powerpc/include/asm/preempt.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_POWERPC_PREEMPT_H +#define __ASM_POWERPC_PREEMPT_H + +#include <asm-generic/preempt.h> + +#if defined(CONFIG_PREEMPT_DYNAMIC) +#include <linux/jump_label.h> +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +#define need_irq_preemption() \ + (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) +#else +#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION)) +#endif + +#endif /* __ASM_POWERPC_PREEMPT_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 04406162fc5a..75fa0293c508 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -515,6 +515,10 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE]; extern unsigned long rtas_rmo_buf; extern struct mutex rtas_ibm_get_vpd_lock; +extern struct mutex rtas_ibm_get_indices_lock; +extern struct mutex rtas_ibm_set_dynamic_indicator_lock; +extern struct mutex rtas_ibm_get_dynamic_sensor_state_lock; +extern struct mutex rtas_ibm_physical_attestation_lock; #define GLOBAL_INTERRUPT_QUEUE 9005 diff --git a/arch/powerpc/include/uapi/asm/papr-indices.h b/arch/powerpc/include/uapi/asm/papr-indices.h |
