diff options
80 files changed, 7571 insertions, 245 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index df0c609272e5..1dbb58c1feed 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -52,6 +52,8 @@ config ARM select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 @@ -482,6 +484,7 @@ config ARCH_IXP4XX bool "IXP4xx-based" depends on MMU select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_SUPPORTS_BIG_ENDIAN select ARCH_REQUIRE_GPIOLIB select CLKSRC_MMIO select CPU_XSCALE @@ -1545,6 +1548,32 @@ config MCPM for (multi-)cluster based systems, such as big.LITTLE based systems. +config BIG_LITTLE + bool "big.LITTLE support (Experimental)" + depends on CPU_V7 && SMP + select MCPM + help + This option enables support selections for the big.LITTLE + system architecture. + +config BL_SWITCHER + bool "big.LITTLE switcher support" + depends on BIG_LITTLE && MCPM && HOTPLUG_CPU + select CPU_PM + select ARM_CPU_SUSPEND + help + The big.LITTLE "switcher" provides the core functionality to + transparently handle transition between a cluster of A15's + and a cluster of A7's in a big.LITTLE system. + +config BL_SWITCHER_DUMMY_IF + tristate "Simple big.LITTLE switcher user interface" + depends on BL_SWITCHER && DEBUG_KERNEL + help + This is a simple and dummy char dev interface to control + the big.LITTLE switcher core code. It is meant for + debugging purposes only. + choice prompt "Memory split" default VMSPLIT_3G diff --git a/arch/arm/Makefile b/arch/arm/Makefile index db50b626be98..25f45256f098 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,6 +16,7 @@ LDFLAGS := LDFLAGS_vmlinux :=-p --no-undefined -X ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 +LDFLAGS_MODULE += --be8 endif OBJCOPYFLAGS :=-O binary -R .comment -S diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 75189f13cf54..066b03480b63 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -135,6 +135,7 @@ start: .word _edata @ zImage end address THUMB( .thumb ) 1: + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 mrs r9, cpsr #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install @ get into SVC mode, reversibly @@ -699,9 +700,7 @@ __armv4_mmu_cache_on: mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x0030 -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables bl __common_mmu_cache_on mov r0, #0 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs @@ -728,9 +727,7 @@ __armv7_mmu_cache_on: orr r0, r0, #1 << 22 @ U (v6 unaligned access model) @ (needed for ARM1176) #ifdef CONFIG_MMU -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 8c60f473e976..5c8584c4944d 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -17,3 +17,5 @@ obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a obj-$(CONFIG_TI_PRIV_EDMA) += edma.o +obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o +obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c new file mode 100644 index 000000000000..5774b6ea7ad5 --- /dev/null +++ b/arch/arm/common/bL_switcher.c @@ -0,0 +1,822 @@ +/* + * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/atomic.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cpu_pm.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/time.h> +#include <linux/clockchips.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/notifier.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/sysfs.h> +#include <linux/irqchip/arm-gic.h> +#include <linux/moduleparam.h> + +#include <asm/smp_plat.h> +#include <asm/cputype.h> +#include <asm/suspend.h> +#include <asm/mcpm.h> +#include <asm/bL_switcher.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/power_cpu_migrate.h> + + +/* + * Use our own MPIDR accessors as the generic ones in asm/cputype.h have + * __attribute_const__ and we don't want the compiler to assume any + * constness here as the value _does_ change along some code paths. + */ + +static int read_mpidr(void) +{ + unsigned int id; + asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (id)); + return id & MPIDR_HWID_BITMASK; +} + +/* + * Get a global nanosecond time stamp for tracing. + */ +static s64 get_ns(void) +{ + struct timespec ts; + getnstimeofday(&ts); + return timespec_to_ns(&ts); +} + +/* + * bL switcher core code. + */ + +static void bL_do_switch(void *_arg) +{ + unsigned ib_mpidr, ib_cpu, ib_cluster; + long volatile handshake, **handshake_ptr = _arg; + + pr_debug("%s\n", __func__); + + ib_mpidr = cpu_logical_map(smp_processor_id()); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + /* Advertise our handshake location */ + if (handshake_ptr) { + handshake = 0; + *handshake_ptr = &handshake; + } else + handshake = -1; + + /* + * Our state has been saved at this point. Let's release our + * inbound CPU. + */ + mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume); + sev(); + + /* + * From this point, we must assume that our counterpart CPU might + * have taken over in its parallel world already, as if execution + * just returned from cpu_suspend(). It is therefore important to + * be very careful not to make any change the other guy is not + * expecting. This is why we need stack isolation. + * + * Fancy under cover tasks could be performed here. For now + * we have none. + */ + + /* + * Let's wait until our inbound is alive. + */ + while (!handshake) { + wfe(); + smp_mb(); + } + + /* Let's put ourself down. */ + mcpm_cpu_power_down(); + + /* should never get here */ + BUG(); +} + +/* + * Stack isolation. To ensure 'current' remains valid, we just use another + * piece of our thread's stack space which should be fairly lightly used. + * The selected area starts just above the thread_info structure located + * at the very bottom of the stack, aligned to a cache line, and indexed + * with the cluster number. + */ +#define STACK_SIZE 512 +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +static int bL_switchpoint(unsigned long _arg) +{ + unsigned int mpidr = read_mpidr(); + unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1); + void *stack = current_thread_info() + 1; + stack = PTR_ALIGN(stack, L1_CACHE_BYTES); + stack += clusterid * STACK_SIZE + STACK_SIZE; + call_with_stack(bL_do_switch, (void *)_arg, stack); + BUG(); +} + +/* + * Generic switcher interface + */ + +static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS]; +static int bL_switcher_cpu_pairing[NR_CPUS]; + +/* + * bL_switch_to - Switch to a specific cluster for the current CPU + * @new_cluster_id: the ID of the cluster to switch to. + * + * This function must be called on the CPU to be switched. + * Returns 0 on success, else a negative status code. + */ +static int bL_switch_to(unsigned int new_cluster_id) +{ + unsigned int mpidr, this_cpu, that_cpu; + unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; + struct completion inbound_alive; + struct tick_device *tdev; + enum clock_event_mode tdev_mode; + long volatile *handshake_ptr; + int ipi_nr, ret; + + this_cpu = smp_processor_id(); + ob_mpidr = read_mpidr(); + ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0); + ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1); + BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr); + + if (new_cluster_id == ob_cluster) + return 0; + + that_cpu = bL_switcher_cpu_pairing[this_cpu]; + ib_mpidr = cpu_logical_map(that_cpu); + ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0); + ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1); + + pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n", + this_cpu, ob_mpidr, ib_mpidr); + + this_cpu = smp_processor_id(); + + /* Close the gate for our entry vectors */ + mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL); + mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL); + + /* Install our "inbound alive" notifier. */ + init_completion(&inbound_alive); + ipi_nr = register_ipi_completion(&inbound_alive, this_cpu); + ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]); + mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr); + + /* + * Let's wake up the inbound CPU now in case it requires some delay + * to come online, but leave it gated in our entry vector code. + */ + ret = mcpm_cpu_power_up(ib_cpu, ib_cluster); + if (ret) { + pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret); + return ret; + } + + /* + * Raise a SGI on the inbound CPU to make sure it doesn't stall + * in a possible WFI, such as in bL_power_down(). + */ + gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0); + + /* + * Wait for the inbound to come up. This allows for other + * tasks to be scheduled in the mean time. + */ + wait_for_completion(&inbound_alive); + mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0); + + /* + * From this point we are entering the switch critical zone + * and can't take any interrupts anymore. + */ + local_irq_disable(); + local_fiq_disable(); + trace_cpu_migrate_begin(get_ns(), ob_mpidr); + + /* redirect GIC's SGIs to our counterpart */ + gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); + + tdev = tick_get_device(this_cpu); + if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) + tdev = NULL; + if (tdev) { + tdev_mode = tdev->evtdev->mode; + clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); + } + + ret = cpu_pm_enter(); + + /* we can not tolerate errors at this point */ + if (ret) + panic("%s: cpu_pm_enter() returned %d\n", __func__, ret); + + /* Swap the physical CPUs in the logical map for this logical CPU. */ + cpu_logical_map(this_cpu) = ib_mpidr; + cpu_logical_map(that_cpu) = ob_mpidr; + + /* Let's do the actual CPU switch. */ + ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint); + if (ret > 0) + panic("%s: cpu_suspend() returned %d\n", __func__, ret); + + /* We are executing on the inbound CPU at this point */ + mpidr = read_mpidr(); + pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr); + BUG_ON(mpidr != ib_mpidr); + + mcpm_cpu_powered_up(); + + ret = cpu_pm_exit(); + + if (tdev) { + clockevents_set_mode(tdev->evtdev, tdev_mode); + clockevents_program_event(tdev->evtdev, + tdev->evtdev->next_event, 1); + } + + trace_cpu_migrate_finish(get_ns(), ib_mpidr); + local_fiq_enable(); + local_irq_enable(); + + *handshake_ptr = 1; + dsb_sev(); + + if (ret) + pr_err("%s exiting with error %d\n", __func__, ret); + return ret; +} + +struct bL_thread { + spinlock_t lock; + struct task_struct *task; + wait_queue_head_t wq; + int wanted_cluster; + struct completion started; + bL_switch_completion_handler completer; + void *completer_cookie; +}; + +static struct bL_thread bL_threads[NR_CPUS]; + +static int bL_switcher_thread(void *arg) +{ + struct bL_thread *t = arg; + struct sched_param param = { .sched_priority = 1 }; + int cluster; + bL_switch_completion_handler completer; + void *completer_cookie; + + sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); + complete(&t->started); + + do { + if (signal_pending(current)) + flush_signals(current); + wait_event_interruptible(t->wq, + t->wanted_cluster != -1 || + kthread_should_stop()); + + spin_lock(&t->lock); + cluster = t->wanted_cluster; + completer = t->completer; + completer_cookie = t->completer_cookie; + t->wanted_cluster = -1; + t->completer = NULL; + spin_unlock(&t->lock); + + if (cluster != -1) { + bL_switch_to(cluster); + + if (completer) + completer(completer_cookie); + } + } while (!kthread_should_stop()); + + return 0; +} + +static struct task_struct *bL_switcher_thread_create(int cpu, void *arg) +{ + struct task_struct *task; + + task = kthread_create_on_node(bL_switcher_thread, arg, + cpu_to_node(cpu), "kswitcher_%d", cpu); + if (!IS_ERR(task)) { + kthread_bind(task, cpu); + wake_up_process(task); + } else + pr_err("%s failed for CPU %d\n", __func__, cpu); + return task; +} + +/* + * bL_switch_request_cb - Switch to a specific cluster for the given CPU, + * with completion notification via a callback + * + * @cpu: the CPU to switch + * @new_cluster_id: the ID of the cluster to switch to. + * @completer: switch completion callback. if non-NULL, + * @completer(@completer_cookie) will be called on completion of + * the switch, in non-atomic context. + * @completer_cookie: opaque context argument for @completer. + * + * This function causes a cluster switch on the given CPU by waking up + * the appropriate switcher thread. This function may or may not return + * before the switch has occurred. + * + * If a @completer callback function is supplied, it will be called when + * the switch is complete. This can be used to determine asynchronously + * when the switch is complete, regardless of when bL_switch_request() + * returns. When @completer is supplied, no new switch request is permitted + * for the affected CPU until after the switch is complete, and @completer + * has returned. + */ +int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id, + bL_switch_completion_handler completer, + void *completer_cookie) +{ + struct bL_thread *t; + + if (cpu >= ARRAY_SIZE(bL_threads)) { + pr_err("%s: cpu %d out of bounds\n", __func__, cpu); + return -EINVAL; + } + + t = &bL_threads[cpu]; + + if (IS_ERR(t->task)) + return PTR_ERR(t->task); + if (!t->task) + return -ESRCH; < |
