/*
* pSeries NUMA support
*
* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
#include <linux/bootmem.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/export.h>
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/pfn.h>
#include <linux/cpuset.h>
#include <linux/node.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/system.h>
#include <asm/smp.h>
#include <asm/firmware.h>
#include <asm/paca.h>
#include <asm/hvcall.h>
static int numa_enabled = 1;
static char *cmdline __initdata;
static int numa_debug;
#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
int numa_cpu_lookup_table[NR_CPUS];
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
struct pglist_data *node_data[MAX_NUMNODES];
EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(node_to_cpumask_map);
EXPORT_SYMBOL(node_data);
static int min_common_depth;
static int n_mem_addr_cells, n_mem_size_cells;
static int form1_affinity;
#define MAX_DISTANCE_REF_POINTS 4
static int distance_ref_points_depth;
static const unsigned int *distance_ref_points;
static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
* Note: node_to_cpumask() is not valid until after this is done.
*/
static void __init setup_node_to_cpumask_map(void)
{
unsigned int node, num = 0;
/* setup nr_node_ids if not done yet */
if (nr_node_ids == MAX_NUMNODES) {
for_each_node_mask(node, node_possible_map)
num = node;
nr_node_ids = num + 1;
}
/* allocate the map */
for (node = 0; node < nr_node_ids; node++)
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
/* cpumask_of_node() will now work */
dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
}
static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
unsigned int *nid)
{
unsigned long long mem;
char *p = cmdline;
static unsigned int fake_nid;
static unsigned long long curr_boundary;
/*
* Modify node id, iff we started creating NUMA nodes
* We want to continue from where we left of the last time
*/
if (fake_nid)
*nid = fake_nid;
/*
* In case there are no more arguments to parse, the
* node_id should be the same as the last fake node id
* (we've handled this above).
*/
if (!p)
return 0;
mem = memparse(p, &p);
if (!mem)
return 0;
if (mem < curr_boundary)
return 0;
curr_boundary = mem;
if ((end_pfn << PAGE_SHIFT) > mem) {
/*
* Skip commas and spaces
*/
while (*p == ',' || *p == ' ' || *p == '\t')
p++;
cmdline = p;
fake_nid++;
*nid = fake_nid;
dbg("created new fake_node with id %d\n", fake_nid);
return 1;
}
return 0;
}
/*
* get_node_active_region - Return active region containing pfn
* Active range returned is empty if none found.
* @pfn: The page to return the region for
* @node_ar: Returned set to the active region containing @pfn
*/
static void __init get_node_active_region(unsigned long pfn,
struct node_active_region *node_ar)
{
unsigned long start_pfn, end_pfn;
int i, nid;
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
if (pfn >= start_pfn && pfn < end_pfn) {
node_ar->nid = nid;
node_ar->start_pfn = start_pfn;
node_ar->end_pfn = end_pfn;
break;
}
}
}
static void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
dbg("adding cpu %d to node %d\n", cpu, node);
if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
}
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
static void unmap_cpu_from_node(unsigned long cpu)
{
int node = numa_cpu_lookup_table[cpu];
dbg("removing cpu %lu from node %d\n", cpu, node);
if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
} else {
printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
cpu, node);
}
}
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
/* must hold reference to node during call */
static const int *of_get_associativity(struct device_node *dev)
{
return of_get_property(dev, "ibm,associativity", NULL);
}
/*
* Returns the property linux,drconf-usable-memory if
* it exists (the property exists only in kexec/kdump kernels,
* added by kexec-tools)
*/
static const u32 *of_get_usable_memory(struct device_node *memory)
{
const u32 *prop;
u32 len;
prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
if (!prop || len < sizeof(unsigned int))
return 0;
return prop;
}
int __node_distance(int a, int b)
{
int i;
int distance = LOCAL_DISTANCE;
if (!form1_affinity)
return distance;
for (i = 0; i < distance_ref_points_depth; i++) {
if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
break;
/* Double the distance for each NUMA level */
distance *= 2;
}
return distance;
}
static void initialize_distance_lookup_table(int nid,
const unsigned int *associativity)
{
int i;
if (!form1_affinity)
return;
for (i = 0; i < distance_ref_points_depth; i++) {
distance_lookup_table[nid][i] =
associativity[distance_ref_points[i]];
}
}
/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
* info is found.
*/
static int associativity_to_nid(const unsigned int *associativity)
{
int nid = -1;
if (min_common_depth == -1)
goto out;
if (associativity[0] >= min_common_depth)
nid = associativity[min_common_depth];
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= MAX_NUMNODES)
nid = -1;
if (nid > 0 && associativity[0] >= distance_ref_points_depth)
initialize_distance_lookup_table(nid, associativity);
out:
return nid;
}
/* Returns the nid associated with the given device tree node,
* or -1 if not found.
*/
static int of_node_to_nid_single(struct device_node *device)
{
int nid = -1;
const unsigned int *tmp;
tmp = of_get_associativity(device);
if (tmp)
nid = associativity_to_nid(tmp);
return nid;
}
/* Walk the device tree upwards, looking for an associativity id */
int of_node_to_nid(struct device_node *device)
{
struct device_node *tmp;
int nid = -1;
of_node_get(device);
while (device) {
nid = of_node_to_nid_single(device);
if (nid != -1)
break;
tmp = device;
device = of_get_parent(tmp);
of_node_put(tmp);
}
of_node_put(device);
return nid;
}
EXPORT_SYMBOL_GPL(of_node_to_nid);
static int __init find_min_common_depth(void)
{
int depth;
struct device_node *chosen;
struct device_node *root;
const char *vec5;
if (firmware_has_feature(FW_FEATURE_OPAL))
root = of_find_node_by_path("/ibm,opal");
else
root = of_find_node_by_path("/rtas");
if (!root)
root = of_find_node_by_path("/");
/*
* This property is a set of 32-bit integers, each representing
* an index into the ibm,associativity nodes.
*
*
|