/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mmzone.h>
#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/node.h>
#include <linux/cpu.h>
#include <linux/ioport.h>
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/pci.h>
#include <linux/initrd.h>
#include <linux/io.h>
#include <linux/highmem.h>
#include <linux/smp.h>
#include <linux/timex.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
#include <hv/hypervisor.h>
#include <arch/interrupts.h>
/* <linux/smp.h> doesn't provide this definition. */
#ifndef CONFIG_SMP
#define setup_max_cpus 1
#endif
static inline int ABS(int x) { return x >= 0 ? x : -x; }
/* Chip information */
char chip_model[64] __write_once;
struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
/* We only create bootmem data on node 0. */
static bootmem_data_t __initdata node0_bdata;
/* Information on the NUMA nodes that we compute early */
unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
unsigned long __initdata node_memmap_pfn[MAX_NUMNODES];
unsigned long __initdata node_percpu_pfn[MAX_NUMNODES];
unsigned long __initdata node_free_pfn[MAX_NUMNODES];
static unsigned long __initdata node_percpu[MAX_NUMNODES];
#ifdef CONFIG_HIGHMEM
/* Page frame index of end of lowmem on each controller. */
unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES];
/* Number of pages that can be mapped into lowmem. */
static unsigned long __initdata mappable_physpages;
#endif
/* Data on which physical memory controller corresponds to which NUMA node */
int node_controller[MAX_NUMNODES] = { [0 ... MAX_NUMNODES-1] = -1 };
#ifdef CONFIG_HIGHMEM
/* Map information from VAs to PAs */
unsigned long pbase_map[1 << (32 - HPAGE_SHIFT)]
__write_once __attribute__((aligned(L2_CACHE_BYTES)));
EXPORT_SYMBOL(pbase_map);
/* Map information from PAs to VAs */
void *vbase_map[NR_PA_HIGHBIT_VALUES]
__write_once __attribute__((aligned(L2_CACHE_BYTES)));
EXPORT_SYMBOL(vbase_map);
#endif
/* Node number as a function of the high PA bits */
int highbits_to_node[NR_PA_HIGHBIT_VALUES] __write_once;
EXPORT_SYMBOL(highbits_to_node);
static unsigned int __initdata maxmem_pfn = -1U;
static unsigned int __initdata maxnodemem_pfn[MAX_NUMNODES] = {
[0 ... MAX_NUMNODES-1] = -1U
};
static nodemask_t __initdata isolnodes;
#ifdef CONFIG_PCI
enum { DEFAULT_PCI_RESERVE_MB = 64 };
static unsigned int __initdata pci_reserve_mb = DEFAULT_PCI_RESERVE_MB;
unsigned long __initdata pci_reserve_start_pfn = -1U;
unsigned long __initdata pci_reserve_end_pfn = -1U;
#endif
static int __init setup_maxmem(char *str)
{
long maxmem_mb;
if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 ||
maxmem_mb == 0)
return -EINVAL;
maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) <<
(HPAGE_SHIFT - PAGE_SHIFT);
pr_info("Forcing RAM used to no more than %dMB\n",
maxmem_pfn >> (20 - PAGE_SHIFT));
return 0;
}
early_param("maxmem", setup_maxmem);
static int __init setup_maxnodemem(char *str)
{
char *endp;
long maxnodemem_mb, node;
node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
if (node >= MAX_NUMNODES || *endp != ':' ||
strict_strtol(endp+1, 0, &maxnodemem_mb) != 0)
return -EINVAL;
maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) <<
(HPAGE_SHIFT - PAGE_SHIFT);
pr_info("Forcing RAM used on node %ld to no more than %dMB\n",
node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT));
return 0;
}
early_param("maxnodemem", setup_maxnodemem);
static int __init setup_isolnodes(char *str)
{
char buf[MAX_NUMNODES * 5];
if (str == NULL || nodelist_parse(str, isolnodes) != 0)
return -EINVAL;
nodelist_scnprintf(buf, sizeof(buf), isolnodes);
pr_info("Set isolnodes value to '%s'\n", buf);
return 0;
}
early_param("isolnodes", setup_isolnodes);
#ifdef CONFIG_PCI
static int __init setup_pci_reserve(char* str)
{
unsigned long mb;
if (str == NULL || strict_strtoul(str, 0, &mb) != 0 ||
mb > 3 * 1024)
return -EINVAL;
pci_reserve_mb = mb;
pr_info("Reserving %dMB for PCIE root complex mappings\n",
pci_reserve_mb);
return 0;
}
early_param("pci_reserve", setup_pci_reserve);
#endif
#ifndef __tilegx__
/*
* vmalloc=size forces the vmalloc area to be exactly 'size' bytes.
* This can be used to increase (or decrease) the vmalloc area.
*/
static int __init parse_vmalloc(char *arg)
{
if (!arg)
return -EINVAL;
VMALLOC_RESERVE = (memparse(arg, &arg) + PGDIR_SIZE - 1) & PGDIR_MASK;
/* See validate_va() for more on this test. */
if ((long)_VMALLOC_START >= 0)
early_panic("\"vmalloc=%#lx\" value too large: maximum %#lx\n",
VMALLOC_RESERVE, _VMALLOC_END - 0x80000000UL);
return 0;
}
early_param("vmalloc", parse_vmalloc);
#endif
#ifdef CONFIG_HIGHMEM
/*
* Determine for each controller where its lowmem is mapped and how much of
* it is mapped there. On controller zero, the first few megabytes are
* already mapped in as code at MEM_SV_INTRPT, so in principle we could
* start our data mappings higher up, but for now we don't bother, to avoid
* additional confusion.
*
* One question is whether, on systems with more than 768 Mb and
* controllers of different sizes, to map in a proportionate amount of
* each one, or to try to map the same amount from each controller.
* (E.g. if we have three controllers with 256MB, 1GB, and 256MB
* respectively, do we map 256MB from each, or do we map 128 MB, 512
* MB, and 128 MB respectively?) For now we use a proportionate
* solution like the latter.
*
* The VA/PA mapping demands that we align our decisions at 16 MB
* boundaries so that we can rapidly convert VA to PA.
*/
static void *__init setup_pa_va_mapping(void)
{
unsigned long curr_pages = 0;
unsigned long vaddr = PAGE_OFFSET;
nodemask_t highonlynodes = isolnodes;
int i, j;
memset(pbase_map, -1, sizeof(pbase_map));
memset(vbase_map, -1, sizeof(vbase_map));
/* Node zero cannot be isolated for LOWMEM purposes. */
node_clear(0, highonlynodes);
/* Count up the number of pages on non-highonlynodes controllers. */
mappable_physpages = 0;
for_each_online_node(i) {
if (!node_isset(i, highonlynodes))
mappable_physpages +=
node_end_pfn[i] - node_start_pfn[i];
}
for_each_online_node(i) {
unsigned long start = node_start_pfn[i];
unsigned long end = node_end_pfn[i];
unsigned long size = end - start;
unsigned long vaddr_end;
if (node_isset(i, highonlynodes)) {
/* Mark this controller as having no lowmem. */
node_lowmem_end_pfn[i] = start;
continue;
}
curr_pages += size;
if (mappable_physpages > MAXMEM_PFN) {
vaddr_end = PAGE_OFFSET +
(((u64)curr_pages * MAXMEM_PFN /
mappable_physpages)
<< PAGE_SHIFT);
} else {
vaddr_end = PAGE_OFFSET + (curr_pages << PAGE_SHIFT);
}
for (j = 0; vaddr < vaddr_end; vaddr += HPAGE_SIZE, ++j) {
unsigned long this_pfn =
start + (j << HUGETLB_PAGE_ORDER);
pbase_map[vaddr >> HPAGE_SHIFT] = this_pfn;
if (vbase_map[__pfn_to_highbits(t
|