diff options
Diffstat (limited to 'arch/x86/kernel')
| -rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel_rdt.c | 375 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel_rdt.h | 440 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c (renamed from arch/x86/kernel/cpu/intel_rdt_schemata.c) | 67 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel_rdt_monitor.c | 499 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1117 | ||||
| -rw-r--r-- | arch/x86/kernel/process_32.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 2 |
8 files changed, 2235 insertions, 269 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cdf82492b770..e17942c131c8 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_INTEL_RDT_A) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_schemata.o +obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o intel_rdt_ctrlmondata.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 5b366462f579..cd5fc61ba450 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -30,7 +30,8 @@ #include <linux/cpuhotplug.h> #include <asm/intel-family.h> -#include <asm/intel_rdt.h> +#include <asm/intel_rdt_sched.h> +#include "intel_rdt.h" #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 @@ -38,7 +39,13 @@ /* Mutex to protect rdtgroup access. */ DEFINE_MUTEX(rdtgroup_mutex); -DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid); +/* + * The cached intel_pqr_state is strictly per CPU and can never be + * updated from a remote CPU. Functions which modify the state + * are called with interrupts disabled and no preemption, which + * is sufficient for the protection. + */ +DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); /* * Used to store the max resource name width and max resource data width @@ -46,6 +53,12 @@ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid); */ int max_name_width, max_data_width; +/* + * Global boolean for rdt_alloc which is true if any + * resource allocation is enabled. + */ +bool rdt_alloc_capable; + static void mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); static void @@ -54,7 +67,9 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) struct rdt_resource rdt_resources_all[] = { + [RDT_RESOURCE_L3] = { + .rid = RDT_RESOURCE_L3, .name = "L3", .domains = domain_init(RDT_RESOURCE_L3), .msr_base = IA32_L3_CBM_BASE, @@ -67,8 +82,11 @@ struct rdt_resource rdt_resources_all[] = { }, .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_L3DATA] = { + .rid = RDT_RESOURCE_L3DATA, .name = "L3DATA", .domains = domain_init(RDT_RESOURCE_L3DATA), .msr_base = IA32_L3_CBM_BASE, @@ -81,8 +99,11 @@ struct rdt_resource rdt_resources_all[] = { }, .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_L3CODE] = { + .rid = RDT_RESOURCE_L3CODE, .name = "L3CODE", .domains = domain_init(RDT_RESOURCE_L3CODE), .msr_base = IA32_L3_CBM_BASE, @@ -95,8 +116,11 @@ struct rdt_resource rdt_resources_all[] = { }, .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_L2] = { + .rid = RDT_RESOURCE_L2, .name = "L2", .domains = domain_init(RDT_RESOURCE_L2), .msr_base = IA32_L2_CBM_BASE, @@ -109,8 +133,11 @@ struct rdt_resource rdt_resources_all[] = { }, .parse_ctrlval = parse_cbm, .format_str = "%d=%0*x", + .fflags = RFTYPE_RES_CACHE, }, + [RDT_RESOURCE_MBA] = { + .rid = RDT_RESOURCE_MBA, .name = "MB", .domains = domain_init(RDT_RESOURCE_MBA), .msr_base = IA32_MBA_THRTL_BASE, @@ -118,6 +145,7 @@ struct rdt_resource rdt_resources_all[] = { .cache_level = 3, .parse_ctrlval = parse_bw, .format_str = "%d=%*d", + .fflags = RFTYPE_RES_MB, }, }; @@ -144,33 +172,28 @@ static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid) * is always 20 on hsw server parts. The minimum cache bitmask length * allowed for HSW server is always 2 bits. Hardcode all of them. */ -static inline bool cache_alloc_hsw_probe(void) +static inline void cache_alloc_hsw_probe(void) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == INTEL_FAM6_HASWELL_X) { - struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; - u32 l, h, max_cbm = BIT_MASK(20) - 1; - - if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0)) - return false; - rdmsr(IA32_L3_CBM_BASE, l, h); + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + u32 l, h, max_cbm = BIT_MASK(20) - 1; - /* If all the bits were set in MSR, return success */ - if (l != max_cbm) - return false; + if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0)) + return; + rdmsr(IA32_L3_CBM_BASE, l, h); - r->num_closid = 4; - r->default_ctrl = max_cbm; - r->cache.cbm_len = 20; - r->cache.min_cbm_bits = 2; - r->capable = true; - r->enabled = true; + /* If all the bits were set in MSR, return success */ + if (l != max_cbm) + return; - return true; - } + r->num_closid = 4; + r->default_ctrl = max_cbm; + r->cache.cbm_len = 20; + r->cache.shareable_bits = 0xc0000; + r->cache.min_cbm_bits = 2; + r->alloc_capable = true; + r->alloc_enabled = true; - return false; + rdt_alloc_capable = true; } /* @@ -213,15 +236,14 @@ static bool rdt_get_mem_config(struct rdt_resource *r) return false; } r->data_width = 3; - rdt_get_mba_infofile(r); - r->capable = true; - r->enabled = true; + r->alloc_capable = true; + r->alloc_enabled = true; return true; } -static void rdt_get_cache_config(int idx, struct rdt_resource *r) +static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) { union cpuid_0x10_1_eax eax; union cpuid_0x10_x_edx edx; @@ -231,10 +253,10 @@ static void rdt_get_cache_config(int idx, struct rdt_resource *r) r->num_closid = edx.split.cos_max + 1; r->cache.cbm_len = eax.split.cbm_len + 1; r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1; + r->cache.shareable_bits = ebx & r->default_ctrl; r->data_width = (r->cache.cbm_len + 3) / 4; - rdt_get_cache_infofile(r); - r->capable = true; - r->enabled = true; + r->alloc_capable = true; + r->alloc_enabled = true; } static void rdt_get_cdp_l3_config(int type) @@ -246,12 +268,12 @@ static void rdt_get_cdp_l3_config(int type) r->cache.cbm_len = r_l3->cache.cbm_len; r->default_ctrl = r_l3->default_ctrl; r->data_width = (r->cache.cbm_len + 3) / 4; - r->capable = true; + r->alloc_capable = true; /* * By default, CDP is disabled. CDP can be enabled by mount parameter * "cdp" during resctrl file system mount time. */ - r->enabled = false; + r->alloc_enabled = false; } static int get_cache_id(int cpu, int level) @@ -300,6 +322,19 @@ cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) wrmsrl(r->msr_base + cbm_idx(r, i), d->ctrl_val[i]); } +struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r) +{ + struct rdt_domain *d; + + list_for_each_entry(d, &r->domains, list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->cpu_mask)) + return d; + } + + return NULL; +} + void rdt_ctrl_update(void *arg) { struct msr_param *m = arg; @@ -307,12 +342,10 @@ void rdt_ctrl_update(void *arg) int cpu = smp_processor_id(); struct rdt_domain *d; - list_for_each_entry(d, &r->domains, list) { - /* Find the domain that contains this CPU */ - if (cpumask_test_cpu(cpu, &d->cpu_mask)) { - r->msr_update(d, m, r); - return; - } + d = get_domain_from_cpu(cpu, r); + if (d) { + r->msr_update(d, m, r); + return; } pr_warn_once("cpu %d not found in any domain for resource %s\n", cpu, r->name); @@ -326,8 +359,8 @@ void rdt_ctrl_update(void *arg) * caller, return the first domain whose id is bigger than the input id. * The domain list is sorted by id in ascending order. */ -static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, - struct list_head **pos) +struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, + struct list_head **pos) { struct rdt_domain *d; struct list_head *l; @@ -377,6 +410,44 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) return 0; } +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +{ + size_t tsize; + + if (is_llc_occupancy_enabled()) { + d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid), + sizeof(unsigned long), + GFP_KERNEL); + if (!d->rmid_busy_llc) + return -ENOMEM; + INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); + } + if (is_mbm_total_enabled()) { + tsize = sizeof(*d->mbm_total); + d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_total) { + kfree(d->rmid_busy_llc); + return -ENOMEM; + } + } + if (is_mbm_local_enabled()) { + tsize = sizeof(*d->mbm_local); + d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_local) { + kfree(d->rmid_busy_llc); + kfree(d->mbm_total); + return -ENOMEM; + } + } + + if (is_mbm_enabled()) { + INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); + mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); + } + + return 0; +} + /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -412,14 +483,26 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; d->id = id; + cpumask_set_cpu(cpu, &d->cpu_mask); - if (domain_setup_ctrlval(r, d)) { + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { + kfree(d); + return; + } + + if (r->mon_capable && domain_setup_mon_state(r, d)) { kfree(d); return; } - cpumask_set_cpu(cpu, &d->cpu_mask); list_add_tail(&d->list, add_pos); + + /* + * If resctrl is mounted, add + * per domain monitor data directories. + */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + mkdir_mondata_subdir_allrdtgrp(r, d); } static void domain_remove_cpu(int cpu, struct rdt_resource *r) @@ -435,19 +518,58 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) cpumask_clear_cpu(cpu, &d->cpu_mask); if (cpumask_empty(&d->cpu_mask)) { + /* + * If resctrl is mounted, remove all the + * per domain monitor data directories. + */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + rmdir_mondata_subdir_allrdtgrp(r, d->id); kfree(d->ctrl_val); + kfree(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); list_del(&d->list); + if (is_mbm_enabled()) + cancel_delayed_work(&d->mbm_over); + if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { + /* + * When a package is going down, forcefully + * decrement rmid->ebusy. There is no way to know + * that the L3 was flushed and hence may lead to + * incorrect counts in rare scenarios, but leaving + * the RMID as busy creates RMID leaks if the + * package never comes back. + */ + __check_limbo(d, true); + cancel_delayed_work(&d->cqm_limbo); + } + kfree(d); + return; + } + + if (r == &rdt_resources_all[RDT_RESOURCE_L3]) { + if (is_mbm_enabled() && cpu == d->mbm_work_cpu) { + cancel_delayed_work(&d->mbm_over); + mbm_setup_overflow_handler(d, 0); + } + if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu && + has_busy_rmid(r, d)) { + cancel_delayed_work(&d->cqm_limbo); + cqm_setup_limbo_handler(d, 0); + } } } -static void clear_closid(int cpu) +static void clear_closid_rmid(int cpu) { struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); - per_cpu(cpu_closid, cpu) = 0; - state->closid = 0; - wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0); + state->default_closid = 0; + state->default_rmid = 0; + state->cur_closid = 0; + state->cur_rmid = 0; + wrmsr(IA32_PQR_ASSOC, 0, 0); } static int intel_rdt_online_cpu(unsigned int cpu) @@ -459,12 +581,23 @@ static int intel_rdt_online_cpu(unsigned int cpu) domain_add_cpu(cpu, r); /* The cpu is set in default rdtgroup after online. */ cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask); - clear_closid(cpu); + clear_closid_rmid(cpu); mutex_unlock(&rdtgroup_mutex); return 0; } +static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) +{ + struct rdtgroup *cr; + + list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) { + if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask)) { + break; + } + } +} + static int intel_rdt_offline_cpu(unsigned int cpu) { struct rdtgroup *rdtgrp; @@ -474,10 +607,12 @@ static int intel_rdt_offline_cpu(unsigned int cpu) for_each_capable_rdt_resource(r) domain_remove_cpu(cpu, r); list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) { - if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) + if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) { + clear_childcpus(rdtgrp, cpu); break; + } } - clear_closid(cpu); + clear_closid_rmid(cpu); mutex_unlock(&rdtgroup_mutex); return 0; @@ -492,7 +627,7 @@ static __init void rdt_init_padding(void) struct rdt_resource *r; int cl; - for_each_capable_rdt_resource(r) { + for_each_alloc_capable_rdt_resource(r) { cl = strlen(r->name); if (cl > max_name_width) max_name_width = cl; @@ -502,38 +637,153 @@ static __init void rdt_init_padding(void) } } -static __init bool get_rdt_resources(void) +enum { + RDT_FLAG_CMT, + RDT_FLAG_MBM_TOTAL, + RDT_FLAG_MBM_LOCAL, + RDT_FLAG_L3_CAT, + RDT_FLAG_L3_CDP, + RDT_FLAG_L2_CAT, + RDT_FLAG_MBA, +}; + +#define RDT_OPT(idx, n, f) \ +[idx] = { \ + .name = n, \ + .flag = f \ +} + +struct rdt_options { + char *name; + int flag; + bool force_off, force_on; +}; + +static struct rdt_options rdt_options[] __initdata = { + RDT_OPT(RDT_FLAG_CMT, "cmt", X86_FEATURE_CQM_OCCUP_LLC), + RDT_OPT(RDT_FLAG_MBM_TOTAL, "mbmtotal", X86_FEATURE_CQM_MBM_TOTAL), + RDT_OPT(RDT_FLAG_MBM_LOCAL, "mbmlocal", X86_FEATURE_CQM_MBM_LOCAL), + RDT_OPT(RDT_FLAG_L3_CAT, "l3cat", X86_FEATURE_CAT_L3), + RDT_OPT(RDT_FLAG_L3_CDP, "l3cdp", X86_FEATURE_CDP_L3), + RDT_OPT(RDT_FLAG_L2_CAT, "l2cat", X86_FEATURE_CAT_L2), + RDT_OPT(RDT_FLAG_MBA, "mba", X86_FEATURE_MBA), +}; +#define NUM_RDT_OPTIONS ARRAY_SIZE(rdt_options) + +static int __init set_rdt_options(char *str) +{ + struct rdt_options *o; + bool force_off; + char *tok; + + if (*str == '=') + str++; + while ((tok = strsep(&str, ",")) != NULL) { + force_off = *tok == '!'; + if (force_off) + tok++; + for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { + if (strcmp(tok, o->name) == 0) { + if (force_off) + o->force_off = true; + else + o->force_on = true; + break; + } + } + } + return 1; +} +__setup("rdt", set_rdt_options); + +static bool __init rdt_cpu_has(int flag) +{ + bool ret = boot_cpu_has(flag); + struct rdt_options *o; + + if (!ret) + return ret; + + for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) { + if (flag == o->flag) { + if (o->force_off) + ret = false; + if (o->force_on) + ret = true; + break; + } + } + return ret; +} + +static __init bool get_rdt_alloc_resources(void) { bool ret = false; - if (cache_alloc_hsw_probe()) + if (rdt_alloc_capable) return true; if (!boot_cpu_has(X86_FEATURE_RDT_A)) return false; - if (boot_cpu_has(X86_FEATURE_CAT_L3)) { - rdt_get_cache_config(1, &rdt_resources_all[RDT_RESOURCE_L3]); - if (boot_cpu_has(X86_FEATURE_CDP_L3)) { + if (rdt_cpu_has(X86_FEATURE_CAT_L3)) { + rdt_get_cache_alloc_cfg(1, &rdt_resources_all[RDT_RESOURCE_L3]); + if (rdt_cpu_has(X86_FEATURE_CDP_L3)) { rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA); rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE); } ret = true; } - if (boot_cpu_has(X86_FEATURE_CAT_L2)) { + if (rdt_cpu_has(X86_FEATURE_CAT_L2)) { /* CPUID 0x10.2 fields are same format at 0x10.1 */ - rdt_get_cache_config(2, &rdt_resources_all[RDT_RESOURCE_L2]); + rdt_get_cache_alloc_cfg(2, &rdt_resources_all[RDT_RESOURCE_L2]); ret = true; } - if (boot_cpu_has(X86_FEATURE_MBA)) { + if (rdt_cpu_has(X86_FEATURE_MBA)) { if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA])) ret = true; } - return ret; } +static __init bool get_rdt_mon_resources(void) +{ + if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) + rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID); + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) + rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID); + if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) + rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID); + + if (!rdt_mon_features) + return false; + + return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]); +} + +static __init void rdt_quirks(void) +{ + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_HASWELL_X: + if (!rdt_options[RDT_FLAG_L3_CAT].force_off) + cache_alloc_hsw_probe(); + break; + case INTEL_FAM6_SKYLAKE_X: + if (boot_cpu_data.x86_mask <= 4) + set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); + } +} + +static __init bool get_rdt_resources(void) +{ + rdt_quirks(); + rdt_alloc_capable = get_rdt_alloc_resources(); + rdt_mon_capable = get_rdt_mon_resources(); + + return (rdt_mon_capable || rdt_alloc_capable); +} + static int __init intel_rdt_late_init(void) { struct rdt_resource *r; @@ -556,9 +806,12 @@ static int __init intel_rdt_late_init(void) return ret; } - for_each_capable_rdt_resource(r) + for_each_alloc_capable_rdt_resource(r) pr_info("Intel RDT %s allocation detected\n", r->name); + for_each_mon_capable_rdt_resource(r) + pr_info("Intel RDT %s monitoring detected\n", r->name); + return 0; } diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h new file mode 100644 index 000000000000..ebaddaeef023 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -0,0 +1,440 @@ +#ifndef _ASM_X86_INTEL_RDT_H +#define _ASM_X86_INTEL_RDT_H + +#include <linux/sched.h> +#include <linux/kernfs.h> +#include <linux/jump_label.h> + +#define IA32_L3_QOS_CFG 0xc81 +#define IA32_L3_CBM_BASE 0xc90 +#define IA32_L2_CBM_BASE 0xd10 +#define IA32_MBA_THRTL_BASE 0xd50 + +#define L3_QOS_CDP_ENABLE 0x01ULL + +/* + * Event IDs are used to program IA32_QM_EVTSEL before reading event + * counter from IA32_QM_CTR + */ +#define QOS_L3_OCCUP_EVENT_ID 0x01 +#define QOS_L3_MBM_TOTAL_EVENT_ID 0x02 +#define QOS_L3_MBM_LOCAL_EVENT_ID 0x03 + +#define CQM_LIMBOCHECK_INTERVAL 1000 + +#define MBM_CNTR_WIDTH 24 +#define MBM_OVERFLOW_INTERVAL 1000 + +#define RMID_VAL_ERROR BIT_ULL(63) +#define RMID_VAL_UNAVAIL BIT_ULL(62) + +DECLARE_STATIC_KEY_FALSE(rdt_enable_key); + +/** + * struct mon_evt - Entry in the event list of a resource + * @evtid: event id + * @name: name of the event + */ +struct mon_evt { + u32 evtid; + char *name; + struct list_head list; +}; + +/** + * struct mon_data_bits - Monitoring details for each event file + * @rid: Resource id associated with the event file. + * @evtid: Event id associated with the event file + * @domid: The domain to which the event file belongs + */ +union mon_data_bits { + void *priv; + struct { + unsigned int rid : 10; + unsigned int evtid : 8; + unsigned int domid : 14; + } u; +}; + +struct rmid_read { + struct rdtgroup *rgrp; + struct rdt_domain *d; + int evtid; + bool first; + u64 val; +}; + +extern unsigned int intel_cqm_threshold; +extern bool rdt_alloc_capable; +extern bool rdt_mon_capable; +extern unsigned int rdt_mon_features; + +enum rdt_group_type { + RDTCTRL_GROUP = 0, + RDTMON_GROUP, + RDT_NUM_GROUP, +}; + +/** + * struct mongroup - store mon group's data in resctrl fs. + * @mon_data_kn kernlfs node for the mon_data directory + * @parent: parent rdtgrp + * @crdtgrp_list: child rdtgroup node list + * @rmid: rmid for this rdtgroup + */ +struct mongroup { + struct kernfs_node *mon_data_kn; + struct rdtgroup *parent; + struct list_head crdtgrp_list; + u32 rmid; +}; + +/** + * struct rdtgroup - store rdtgroup's data in resctrl file system. + * @kn: kernfs node + * @rdtgroup_list: linked list for all rdtgroups + * @closid: closid for this rdtgroup + * @cpu_mask: CPUs assigned to this rdtgroup + * @flags: status bits + * @waitcount: how many cpus expect to find this + * group when they acquire rdtgroup_mutex + * @type: indicates type of this rdtgroup - either + * monitor only or ctrl_mon group + * @mon: mongroup related data + */ +struct rdtgroup { + struct kernfs_node *kn; + struct list_head rdtgroup_list; + u32 closid; + struct cpumask cpu_mask; + int flags; + atomic_t waitcount; + enum rdt_group_type type; + struct mongroup mon; +}; + +/* rdtgroup.flags */ +#define RDT_DELETED 1 + +/* rftype.flags */ +#define RFTYPE_FLAGS_CPUS_LIST 1 + +/* + * Define the file type flags for base and info directories. + */ +#define RFTYPE_INFO BIT(0) +#define RFTYPE_BASE BIT(1) +#define RF_CTRLSHIFT 4 +#define RF_MONSHIFT 5 +#define RFTYPE_CTRL BIT(RF_CTRLSHIFT) +#define RFTYPE_MON BIT(RF_MONSHIFT) +#define RFTYPE_RES_CACHE BIT(8) +#define RFTYPE_RES_MB BIT(9) +#define RF_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) +#define RF_MON_INFO (RFTYPE_INFO | RFTYPE_MON) +#define RF_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) + +/* List of all resource groups */ +extern struct list_head rdt_all_groups; + +extern int max_name_width, max_data_width; + +int __init rdtgroup_init(void); + +/** + * struct rftype - describe each file in the resctrl file system + * @name: File name + * @mode: Access mode + * @kf_ops: File operations + * @flags: File specific RFTYPE_FLAGS_* flags + * @fflags: File specific RF_* or RFTYPE_* flags + * @seq_show: Show content of the file + * @write: Write to the file + */ +struct rftype { + char *name; + umode_t mode; + struct kernfs_ops *kf_ops; + unsigned long flags; + unsigned long fflags; + + int (*seq_show)(struct kernfs_open_file *of, + struct seq_file *sf, void *v); + /* + * write() is the generic write callback which maps directly to + * kernfs write operation and overrides all other operations. + * Maximum write size is determined by ->max_write_len. + */ + ssize_t (*write)(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +}; + +/** + * struct mbm_state - status for each MBM counter in each domain + * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) + * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it + */ +struct mbm_state { + u64 chunks; + u64 prev_msr; +}; + +/** + * struct rdt_domain - group of cpus sharing an RDT resource + * @list: all instances of this resource + * @id: unique id for this instance + * @cpu_mask: which cpus share this resource + * @rmid_busy_llc: + * bitmap of which limbo RMIDs are above threshold + * @mbm_total: saved state for MBM total bandwidth + * @mbm_local: saved state for MBM local bandwidth + * @mbm_over: worker to periodically read MBM h/w counters + * @cqm_limbo: worker to periodically read CQM h/w counters + * @mbm_work_cpu: + * worker cpu for MBM h/w counters + * @cqm_work_cpu: + * worker cpu for CQM h/w counters + * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) + * @new_ctrl: new ctrl value to be loaded + * @have_new_ctrl: did user provide new_ctrl for this domain + */ +struct rdt_domain { + struct list_head list; + int id; + struct cpumask cpu_mask; + unsigned long *rmid_busy_llc; + struct mbm_state *mbm_total; + struct mbm_state *mbm_local; + struct delayed_work mbm_over; + struct delayed_work cqm_limbo; + int mbm_work_cpu; + int cqm_work_cpu; + u32 *ctrl_val; + u32 new_ctrl; + bool have_new_ctrl; +}; + +/** + * struct msr_param - set a range of MSRs from a domain + * @res: The resource to use + * @low: Beginning index from base MSR + * @high: End index + */ +struct msr_param { + struct rdt_resource *res; + int low; + int high; +}; + +/** + * struct rdt_cache - Cache allocation related data + * @cbm_len: Length of the cache bit mask + * @min_cbm_bits: Minimum number of consecutive bits to be set + * @cbm_idx_mult: Multiplier of CBM index + * @cbm_idx_offset: Offset of CBM index. CBM index is computed by: + * closid * cbm_idx_multi + cbm_idx_offset + * in a cache bit mask + * @shareable_bits: Bitmask of shareable resource with other + * executing entities + */ +struct rdt_cache { + unsigned int cbm_len; + unsigned int min_cbm_bits; + unsigned int cbm_idx_mult; + unsigned int cbm_idx_offset; + unsigned int shareable_bits; +}; + +/** + * struct rdt_membw - Memory bandwidth allocation related data + * @max_delay: Max throttle delay. Delay is the hardware + * representation for memory bandwidth. + * @min_bw: Minimum memory bandwidth percentage user can request + * @bw_gran: Granularity at which the memory bandwidth is allocated + * @delay_linear: True if memory B/W delay is in linear scale + * @mb_map: Mapping of memory B/W percentage to memory B/W delay + */ +struct rdt_membw { + u32 max_delay; + u32 min_bw; + u32 bw_gran; + u32 delay_linear; + u32 *mb_map; +}; + +static inline bool is_llc_occupancy_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID)); +} + +static inline bool is_mbm_total_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID)); +} + +static inline bool is_mbm_local_enabled(void) +{ + return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID)); +} + +static inline bool is_mbm_enabled(void) +{ + return (is_mbm_total_enabled() || is_mbm_local_enabled()); +} + +static inline bool is_mbm_event(int e) +{ + return (e >= QOS_L3_MBM_TOTAL_EVENT_ID && + e <= QOS_L3_MBM_LOCAL_EVENT_ID); +} + +/** + * struct rdt_resource - attributes of an RDT resource + * @rid: The index of the resource + * @alloc_enabled: Is allocation enabled on this machine + * @mon_enabled: Is monitoring enabled for this feature + * @alloc_capable: Is allocation available on this machine + * @mon_capable: Is monitor feature available on this machine + * @name: Name to use in "schemata" file + * @num_closid: Number of CLOSIDs available + * @cache_level: Which cache level defines scope of this resource + * @default_ctrl: Specifies default cache cbm or memory B/W percent. + * @msr_base: Base MSR address for CBMs + * @msr_update: Function pointer to update QOS MSRs + * @data_width: Character width of data when displaying + * @domains: All domains for this resource + * @cache: Cache allocation related data + * @format_str: Per resource format string to show domain value + * @parse_ctrlval: Per resource function pointer to parse control values + * @evt_list: List of monitoring events + * @num_rmid: Number of RMIDs available + * @mon_scale: cqm counter * mon_scale = occupancy in bytes + * @fflags: flags to choose base and info files + */ +struct rdt_resource { + int rid; + bool alloc_enabled; + bool mon_enabled; + bool alloc_capable; + bool mon_capable; + char *name; + int num_closid; + int cache_level; + u32 default_ctrl; + unsigned int msr_base; + void (*msr_update) (struct rdt_domain *d, struct msr_param *m, + struct rdt_resource *r); + int data_width; + struct list_head domains; + struct rdt_cache cache; + struct rdt_membw membw; + const char *format_str; + int (*parse_ctrlval) (char *buf, struct rdt_resource *r, + struct rdt_domain *d); + struct list_head evt_list; + int num_rmid; + unsigned int mon_scale; + unsigned long fflags; +}; + +int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d); +int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d); + +extern struct mutex rdtgroup_mutex; + +extern struct rdt_resource rdt_resources_all[]; +extern struct rdtgroup rdtgroup_default; +DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key); + +int __init rdtgroup_init(void); + +enum { + RDT_RESOURCE_L3, + RDT_RESOURCE_L3DATA, + RDT_RESOURCE_L3CODE, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + +#define for_each_capable_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) \ + if (r->alloc_capable || r->mon_capable) + +#define for_each_alloc_capable_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) \ + if (r->alloc_capable) + +#define for_each_mon_capable_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) \ + if (r->mon_capable) + +#define for_each_alloc_enabled_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) \ + if (r->alloc_enabled) + +#define for_each_mon_enabled_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) \ + if (r->mon_enabled) + +/* CPUID.(EAX=10H, ECX=ResID=1).EAX */ +union cpuid_0x10_1_eax { + struct { + unsigned int cbm_len:5; + } split; + unsigned int full; +}; + +/* CPUID.(EAX=10H, ECX=ResID=3).EAX */ +union cpuid_0x10_3_eax { + struct { + unsigned int max_delay:12; + } split; + unsigned int full; +}; + +/* CPUID.(EAX=10H, ECX=ResID).EDX */ +union cpuid_0x10_x_edx { + struct { + unsigned int cos_max:16; + } split; + unsigned int full; +}; + +void rdt_ctrl_update(void *arg); +struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); +void rdtgroup_kn_unlock(struct kernfs_node *kn); +struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, + struct list_head **pos); +ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off); +int rdtgroup_schemata_show(struct kernfs_open_file *of, + struct seq_file *s, void *v); +struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); +int alloc_rmid(void); +void free_rmid(u32 rmid); +int rdt_get_mon_l3_config(struct rdt_resource *r); +void mon_event_count(void *info); +int rdtgroup_mondata_show(struct seq_file *m, void *arg); +void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + unsigned int dom_id); +void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + struct rdt_domain *d); +void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, + struct rdtgroup *rdtgrp, int evtid, int first); +void mbm_setup_overflow_handler(struct rdt_domain *dom, + unsigned long delay_ms); +void mbm_handle_overflow(struct work_struct *work); +void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); +void cqm_handle_limbo(struct work_struct *work); +bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); +void __check_limbo(struct rdt_domain *d, bool force_free); + +#endif /* _ASM_X86_INTEL_RDT_H */ diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index 406d7a6532f9..f6ea94f8954a 100644 --- a/arch/x86/kernel/cpu/intel_rdt_schemata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -26,7 +26,7 @@ #include <linux/kernfs.h> #include <linux/seq_file.h> #include <linux/slab.h> |
