aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig23
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c20
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c84
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c22
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c7
-rw-r--r--arch/powerpc/platforms/pseries/io_event_irq.c231
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c123
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c5
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c48
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h27
-rw-r--r--arch/powerpc/platforms/pseries/ras.c6
-rw-r--r--arch/powerpc/platforms/pseries/setup.c50
-rw-r--r--arch/powerpc/platforms/pseries/smp.c41
-rw-r--r--arch/powerpc/platforms/pseries/xics.c949
-rw-r--r--arch/powerpc/platforms/pseries/xics.h23
17 files changed, 539 insertions, 1124 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5b3da4b4ea7..71af4c5d6c0 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -3,7 +3,10 @@ config PPC_PSERIES
bool "IBM pSeries & new (POWER5-based) iSeries"
select MPIC
select PCI_MSI
- select XICS
+ select PPC_XICS
+ select PPC_ICP_NATIVE
+ select PPC_ICP_HV
+ select PPC_ICS_RTAS
select PPC_I8259
select PPC_RTAS
select PPC_RTAS_DAEMON
@@ -47,6 +50,24 @@ config SCANLOG
tristate "Scanlog dump interface"
depends on RTAS_PROC && PPC_PSERIES
+config IO_EVENT_IRQ
+ bool "IO Event Interrupt support"
+ depends on PPC_PSERIES
+ default y
+ help
+ Select this option, if you want to enable support for IO Event
+ interrupts. IO event interrupt is a mechanism provided by RTAS
+ to return information about hardware error and non-error events
+ which may need OS attention. RTAS returns events for multiple
+ event types and scopes. Device drivers can register their handlers
+ to receive events.
+
+ This option will only enable the IO event platform code. You
+ will still need to enable or compile the actual drivers
+ that use this infrastruture to handle IO event interrupts.
+
+ Say Y if you are unsure.
+
config LPARCFG
bool "LPAR Configuration Data"
depends on PPC_PSERIES || PPC_ISERIES
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fc5237810ec..3556e402cbf 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -5,7 +5,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \
setup.o iommu.o event_sources.o ras.o \
firmware.o power.o dlpar.o mobility.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_SCANLOG) += scanlog.o
obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_KEXEC) += kexec.o
@@ -22,6 +21,7 @@ obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_DTL) += dtl.o
+obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
ifeq ($(CONFIG_PPC_PSERIES),y)
obj-$(CONFIG_SUSPEND) += suspend.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index b74a9230edc..57ceb92b228 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -74,7 +74,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa)
return NULL;
/* The configure connector reported name does not contain a
- * preceeding '/', so we allocate a buffer large enough to
+ * preceding '/', so we allocate a buffer large enough to
* prepend this to the full_name.
*/
name = (char *)ccwa + ccwa->name_offset;
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index c371bc06434..e9190073bb9 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -52,10 +52,10 @@ static u8 dtl_event_mask = 0x7;
/*
- * Size of per-cpu log buffers. Default is just under 16 pages worth.
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
*/
-static int dtl_buf_entries = (16 * 85);
-
+static int dtl_buf_entries = N_DISPATCH_LOG;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
struct dtl_ring {
@@ -151,7 +151,7 @@ static int dtl_start(struct dtl *dtl)
/* Register our dtl buffer with the hypervisor. The HV expects the
* buffer size to be passed in the second word of the buffer */
- ((u32 *)dtl->buf)[1] = dtl->buf_entries * sizeof(struct dtl_entry);
+ ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
hwcpu = get_hard_smp_processor_id(dtl->cpu);
addr = __pa(dtl->buf);
@@ -196,13 +196,15 @@ static int dtl_enable(struct dtl *dtl)
long int rc;
struct dtl_entry *buf = NULL;
+ if (!dtl_cache)
+ return -ENOMEM;
+
/* only allow one reader */
if (dtl->buf)
return -EBUSY;
n_entries = dtl_buf_entries;
- buf = kmalloc_node(n_entries * sizeof(struct dtl_entry),
- GFP_KERNEL, cpu_to_node(dtl->cpu));
+ buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
if (!buf) {
printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
__func__, dtl->cpu);
@@ -223,7 +225,7 @@ static int dtl_enable(struct dtl *dtl)
spin_unlock(&dtl->lock);
if (rc)
- kfree(buf);
+ kmem_cache_free(dtl_cache, buf);
return rc;
}
@@ -231,7 +233,7 @@ static void dtl_disable(struct dtl *dtl)
{
spin_lock(&dtl->lock);
dtl_stop(dtl);
- kfree(dtl->buf);
+ kmem_cache_free(dtl_cache, dtl->buf);
dtl->buf = NULL;
dtl->buf_entries = 0;
spin_unlock(&dtl->lock);
@@ -365,7 +367,7 @@ static int dtl_init(void)
event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
dtl_dir, &dtl_event_mask);
- buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0600,
+ buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
dtl_dir, &dtl_buf_entries);
if (!event_mask_file || !buf_entries_file) {
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 3cc4d102b1f..46b55cf563e 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -65,7 +65,7 @@
* with EEH.
*
* Ideally, a PCI device driver, when suspecting that an isolation
- * event has occured (e.g. by reading 0xff's), will then ask EEH
+ * event has occurred (e.g. by reading 0xff's), will then ask EEH
* whether this is the case, and then take appropriate steps to
* reset the PCI slot, the PCI device, and then resume operations.
* However, until that day, the checking is done here, with the
@@ -93,6 +93,7 @@ static int ibm_slot_error_detail;
static int ibm_get_config_addr_info;
static int ibm_get_config_addr_info2;
static int ibm_configure_bridge;
+static int ibm_configure_pe;
int eeh_subsystem_enabled;
EXPORT_SYMBOL(eeh_subsystem_enabled);
@@ -261,6 +262,8 @@ void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
pci_regs_buf[0] = 0;
rtas_pci_enable(pdn, EEH_THAW_MMIO);
+ rtas_configure_bridge(pdn);
+ eeh_restore_bars(pdn);
loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
@@ -448,6 +451,39 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
+void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
+{
+ struct device_node *dn;
+
+ for_each_child_of_node(parent, dn) {
+ if (PCI_DN(dn)) {
+
+ struct pci_dev *dev = PCI_DN(dn)->pcidev;
+
+ if (dev && dev->driver)
+ *freset |= dev->needs_freset;
+
+ __eeh_set_pe_freset(dn, freset);
+ }
+ }
+}
+
+void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+{
+ struct pci_dev *dev;
+ dn = find_device_pe(dn);
+
+ /* Back up one, since config addrs might be shared */
+ if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
+ dn = dn->parent;
+
+ dev = PCI_DN(dn)->pcidev;
+ if (dev)
+ *freset |= dev->needs_freset;
+
+ __eeh_set_pe_freset(dn, freset);
+}
+
/**
* eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
* @dn device node
@@ -692,15 +728,24 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)
if (pdn->eeh_pe_config_addr)
config_addr = pdn->eeh_pe_config_addr;
- rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+ rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
config_addr,
BUID_HI(pdn->phb->buid),
BUID_LO(pdn->phb->buid),
state);
- if (rc)
- printk (KERN_WARNING "EEH: Unable to reset the failed slot,"
- " (%d) #RST=%d dn=%s\n",
- rc, state, pdn->node->full_name);
+
+ /* Fundamental-reset not supported on this PE, try hot-reset */
+ if (rc == -8 && state == 3) {
+ rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr,
+ BUID_HI(pdn->phb->buid),
+ BUID_LO(pdn->phb->buid), 1);
+ if (rc)
+ printk(KERN_WARNING
+ "EEH: Unable to reset the failed slot,"
+ " #RST=%d dn=%s\n",
+ rc, pdn->node->full_name);
+ }
}
/**
@@ -736,18 +781,21 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
/**
* rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
* @pdn: pci device node to be reset.
- *
- * Return 0 if success, else a non-zero value.
*/
static void __rtas_set_slot_reset(struct pci_dn *pdn)
{
- struct pci_dev *dev = pdn->pcidev;
+ unsigned int freset = 0;
- /* Determine type of EEH reset required by device,
- * default hot reset or fundamental reset
- */
- if (dev && dev->needs_freset)
+ /* Determine type of EEH reset required for
+ * Partitionable Endpoint, a hot-reset (1)
+ * or a fundamental reset (3).
+ * A fundamental reset required by any device under
+ * Partitionable Endpoint trumps hot-reset.
+ */
+ eeh_set_pe_freset(pdn->node, &freset);
+
+ if (freset)
rtas_pci_slot_reset(pdn, 3);
else
rtas_pci_slot_reset(pdn, 1);
@@ -895,13 +943,20 @@ rtas_configure_bridge(struct pci_dn *pdn)
{
int config_addr;
int rc;
+ int token;
/* Use PE configuration address, if present */
config_addr = pdn->eeh_config_addr;
if (pdn->eeh_pe_config_addr)
config_addr = pdn->eeh_pe_config_addr;
- rc = rtas_call(ibm_configure_bridge,3,1, NULL,
+ /* Use new configure-pe function, if supported */
+ if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
+ token = ibm_configure_pe;
+ else
+ token = ibm_configure_bridge;
+
+ rc = rtas_call(token, 3, 1, NULL,
config_addr,
BUID_HI(pdn->phb->buid),
BUID_LO(pdn->phb->buid));
@@ -1077,6 +1132,7 @@ void __init eeh_init(void)
ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
+ ibm_configure_pe = rtas_token("ibm,configure-pe");
if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
return;
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index b8d70f5d9aa..1b6cb10589e 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -328,7 +328,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
struct pci_bus *frozen_bus;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
- const char *location, *pci_str, *drv_str;
+ const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
frozen_dn = find_device_pe(event->dn);
if (!frozen_dn) {
@@ -364,13 +364,8 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
frozen_pdn = PCI_DN(frozen_dn);
frozen_pdn->eeh_freeze_count++;
- if (frozen_pdn->pcidev) {
- pci_str = pci_name (frozen_pdn->pcidev);
- drv_str = pcid_name (frozen_pdn->pcidev);
- } else {
- pci_str = eeh_pci_name(event->dev);
- drv_str = pcid_name (event->dev);
- }
+ pci_str = eeh_pci_name(event->dev);
+ drv_str = pcid_name(event->dev);
if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
goto excess_failures;
@@ -378,8 +373,17 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
printk(KERN_WARNING
"EEH: This PCI device has failed %d times in the last hour:\n",
frozen_pdn->eeh_freeze_count);
+
+ if (frozen_pdn->pcidev) {
+ bus_pci_str = pci_name(frozen_pdn->pcidev);
+ bus_drv_str = pcid_name(frozen_pdn->pcidev);
+ printk(KERN_WARNING
+ "EEH: Bus location=%s driver=%s pci addr=%s\n",
+ location, bus_drv_str, bus_pci_str);
+ }
+
printk(KERN_WARNING
- "EEH: location=%s driver=%s pci addr=%s\n",
+ "EEH: Device location=%s driver=%s pci addr=%s\n",
location, drv_str, pci_str);
/* Walk the various device drivers attached to this slot through
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fd50ccd4bac..46f13a3c5d0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -19,6 +19,7 @@
*/
#include <linux/kernel.h>
+#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <asm/system.h>
@@ -28,7 +29,7 @@
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
#include <asm/pSeries_reconfig.h>
-#include "xics.h"
+#include <asm/xics.h>
#include "plpar_wrappers.h"
#include "offline_states.h"
@@ -216,7 +217,7 @@ static void pseries_cpu_die(unsigned int cpu)
cpu, pcpu, cpu_status);
}
- /* Isolation and deallocation are definatly done by
+ /* Isolation and deallocation are definitely done by
* drslot_chrp_cpu. If they were not they would be
* done here. Change isolate state to Isolate and
* change allocation-state to Unusable.
@@ -280,7 +281,7 @@ static int pseries_add_processor(struct device_node *np)
}
for_each_cpu(cpu, tmp) {
- BUG_ON(cpumask_test_cpu(cpu, cpu_present_mask));
+ BUG_ON(cpu_present(cpu));
set_cpu_present(cpu, true);
set_hard_smp_processor_id(cpu, *intserv++);
}
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 00000000000..c829e6067d5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * p = (struct pseries_io_event_sect_data *) data;
+ * if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * :
+ * :
+ * return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * .notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+/* pSeries event log format */
+
+/* Two bytes ASCII section IDs */
+#define PSERIES_ELOG_SECT_ID_PRIV_HDR (('P' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_HDR (('U' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_PRIMARY_SRC (('P' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_EXTENDED_UH (('E' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FAILING_MTMS (('M' << 8) | 'T')
+#define PSERIES_ELOG_SECT_ID_SECONDARY_SRC (('S' << 8) | 'S')
+#define PSERIES_ELOG_SECT_ID_DUMP_LOCATOR (('D' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_FW_ERROR (('S' << 8) | 'W')
+#define PSERIES_ELOG_SECT_ID_IMPACT_PART_ID (('L' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_LOGIC_RESOURCE_ID (('L' << 8) | 'R')
+#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M')
+#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I')
+#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
+#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D')
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_elog_section {
+ uint16_t id; /* 0x00 2-byte ASCII section ID */
+ uint16_t length; /* 0x02 Section length in bytes */
+ uint8_t version; /* 0x04 Section version */
+ uint8_t subtype; /* 0x05 Section subtype */
+ uint16_t creator_component; /* 0x06 Creator component ID */
+ uint8_t data[]; /* 0x08 Start of section data */
+};
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find data portion of a specific section in RTAS extended event log.
+ * @elog: RTAS error/event log.
+ * @sect_id: secsion ID.
+ *
+ * Return:
+ * pointer to the section data of the specified section
+ * NULL if not found
+ */
+static struct pseries_elog_section *find_xelog_section(struct rtas_error_log *elog,
+ uint16_t sect_id)
+{
+ struct rtas_ext_event_log_v6 *xelog =
+ (struct rtas_ext_event_log_v6 *) elog->buffer;
+ struct pseries_elog_section *sect;
+ unsigned char *p, *log_end;
+
+ /* Check that we understand the format */
+ if (elog->extended_log_length < sizeof(struct rtas_ext_event_log_v6) ||
+ xelog->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
+ xelog->company_id != RTAS_V6EXT_COMPANY_ID_IBM)
+ return NULL;
+
+ log_end = elog->buffer + elog->extended_log_length;
+ p = xelog->vendor_log;
+ while (p < log_end) {
+ sect = (struct pseries_elog_section *)p;
+ if (sect->id == sect_id)
+ return sect;
+ p += sect->length;
+ }
+ return NULL;
+}
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+ struct pseries_elog_section *sect;
+
+ /* We should only ever get called for io-event interrupts, but if
+ * we do get called for another type then something went wrong so
+ * make some noise about it.
+ * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+ * No need to check event log version.
+ */
+ if (unlikely(elog->type != RTAS_TYPE_IO)) {
+ printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d",
+ elog->type);
+ return NULL;
+ }
+
+ sect = find_xelog_section(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+ if (unlikely(!sect)) {
+ printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+ "log does not contain an IO Event section. "
+ "Could be a bug in system firmware!\n");
+ return NULL;
+ }
+ return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ * error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ * multiple events through a single interrupt, it must ensure that the
+ * interrupt remains asserted until check-exception has been used to
+ * process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ * sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ * event type, and sub-type. There is no easy way to pre-sort clients
+ * by scope or event type alone. For example, Torrent ISR route change
+ * event is reported with scope 0x00 (Not Applicatable) rather than
+ * 0x3B (Torrent-hub). It is better to let the clients to identify
+ * who owns the the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+ struct pseries_io_event *event;
+ int rtas_rc;
+
+ for (;;) {
+ rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+ RTAS_VECTOR_EXTERNAL_INTERRUPT,
+ virq_to_hw(irq),
+ RTAS_IO_EVENTS, 1 /* Time Critical */,
+ __pa(ioei_rtas_buf),
+ RTAS_DATA_BUF_SIZE);
+ if (rtas_rc != 0)
+ break;
+
+ event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+ if (!event)
+ continue;
+
+ atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+ 0, event);
+ }
+ return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+ struct device_node *np;
+
+ ioei_check_exception_token = rtas_token("check-exception");
+ if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) {
+ pr_warning("IO Event IRQ not supported on this system !\n");
+ return -ENODEV;
+ }
+ np = of_find_node_by_path("/event-sources/ibm,io-events");
+ if (np) {
+ request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+ of_node_put(np);
+ } else {
+ pr_err("io_event_irq: No ibm,io-events on system! "
+ "IO Event interrupt disabled.\n");
+ return -ENODEV;
+ }
+ return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 154c464cdca..01faab9456c 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -272,7 +272,7 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
return tce_ret;
}
-/* this is compatable with cells for the device tree property */
+/* this is compatible with cells for the device tree property */
struct dynamic_dma_window_prop {
__be32 liobn; /* tce table number */
__be64 dma_base; /* address hi,lo */
@@ -659,15 +659,18 @@ static void remove_ddw(struct device_node *np)
{
struct dynamic_dma_window_prop *dwp;
struct property *win64;
- const u32 *ddr_avail;
+ const u32 *ddw_avail;
u64 liobn;
int len, ret;
- ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
+ ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
- if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
+ if (!win64)
return;
+ if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
+ goto delprop;
+
dwp = win64->value;
liobn = (u64)be32_to_cpu(dwp->liobn);
@@ -681,28 +684,29 @@ static void remove_ddw(struct device_node *np)
pr_debug("%s successfully cleared tces in window.\n",
np->full_name);
- ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
+ ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
if (ret)
pr_warning("%s: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddr_avail[2], liobn);
+ np->full_name, ret, ddw_avail[2], liobn);
else
pr_debug("%s: successfully removed direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np->full_name, ret, ddr_avail[2], liobn);
-}
+ np->full_name, ret, ddw_avail[2], liobn);
+delprop:
+ ret = prom_remove_property(np, win64);
+ if (ret)
+ pr_warning("%s: failed to remove direct window property: %d\n",
+ np->full_name, ret);
+}
-static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn)
{
- struct device_node *dn;
- struct pci_dn *pcidn;
struct direct_window *window;
const struct dynamic_dma_window_prop *direct64;
u64 dma_addr = 0;
- dn = pci_device_to_OF_node(dev);
- pcidn = PCI_DN(dn);
spin_lock(&direct_window_list_lock);
/* check if we already created a window and dupe that config if so */
list_for_each_entry(window, &direct_window_list, list) {
@@ -717,36 +721,40 @@ static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *
return dma_addr;
}
-static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
+static int find_existing_ddw_windows(void)
{
- struct device_node *dn;
- struct pci_dn *pcidn;
int len;
+ struct device_node *pdn;
struct direct_window *window;
const struct dynamic_dma_window_prop *direct64;
- u64 dma_addr = 0;
- dn = pci_device_to_OF_node(dev);
- pcidn = PCI_DN(dn);
- direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
- if (direct64) {
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
+ direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
+ if (!direct64)
+ continue;
+
window = kzalloc(sizeof(*window), GFP_KERNEL);
- if (!window) {
+ if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
+ kfree(window);
remove_ddw(pdn);
- } else {
- window->device = pdn;
- window->prop = direct64;
- spin_lock(&direct_window_list_lock);
- list_add(&window->list, &direct_window_list);
- spin_unlock(&direct_window_list_lock);
- dma_addr = direct64->dma_base;
+ continue;
}
+
+ window->device = pdn;
+ window->prop = direct64;
+ spin_lock(&direct_window_list_lock);
+ list_add(&window->list, &direct_window_list);
+ spin_unlock(&direct_window_list_lock);
}
- return dma_addr;
+ return 0;
}
+machine_arch_initcall(pseries, find_existing_ddw_windows);
-static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_query_response *query)
{
struct device_node *dn;
@@ -767,15 +775,15 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
if (pcidn->eeh_pe_config_addr)
cfg_addr = pcidn->eeh_pe_config_addr;
buid = pcidn->phb->buid;
- ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
+ ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
- " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
+ " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
BUID_LO(buid), ret);
return ret;
}
-static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
struct ddw_create_response *create, int page_shift,
int window_shift)
{
@@ -800,12 +808,12 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
- ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
+ ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr,
BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
} while (rtas_busy_delay(ret));
dev_info(&dev->dev,
"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
- "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
+ "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
@@ -831,18 +839,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
int page_shift;
u64 dma_addr, max_addr;
struct device_node *dn;
- const u32 *uninitialized_var(ddr_avail);
+ const u32 *uninitialized_var(ddw_avail);
struct direct_window *window;
- struct property *uninitialized_var(win64);
+ struct property *win64;
struct dynamic_dma_window_prop *ddwprop;
mutex_lock(&direct_window_init_mutex);
- dma_addr = dupe_ddw_if_already_created(dev, pdn);
- if (dma_addr != 0)
- goto out_unlock;
-
- dma_addr = dupe_ddw_if_kexec(dev, pdn);
+ dma_addr = find_existing_ddw(pdn);
if (dma_addr != 0)
goto out_unlock;
@@ -854,8 +858,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* for the given node in that order.
* the property is actually in the parent, not the PE
*/
- ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
- if (!ddr_avail || len < 3 * sizeof(u32))
+ ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
+ if (!ddw_avail || len < 3 * sizeof(u32))
goto out_unlock;
/*
@@ -865,7 +869,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* of page sizes: supported and supported for migrate-dma.
*/
dn = pci_device_to_OF_node(dev);
- ret = query_ddw(dev, ddr_avail, &query);
+ ret = query_ddw(dev, ddw_avail, &query);
if (ret != 0)
goto out_unlock;
@@ -907,13 +911,14 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
}
win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
+ win64->length = sizeof(*ddwprop);
if (!win64->name || !win64->value) {
dev_info(&dev->dev,
"couldn't allocate property name and value\n");
goto out_free_prop;
}
- ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
+ ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
if (ret != 0)
goto out_free_prop;
@@ -976,7 +981,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
/* dev setup for LPAR is a little tricky, since the device tree might
- * contain the dma-window properties per-device and not neccesarily
+ * contain the dma-window properties per-device and not necessarily
* for the bus. So we need to search upwards in the tree until we
* either hit a dma-window property, OR find a parent with a table
* already allocated.
@@ -1021,19 +1026,22 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
const void *dma_window = NULL;
u64 dma_offset;
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ if (!dev->dma_mask)
return -EIO;
+ if (!dev_is_pci(dev))
+ goto check_mask;
+
+ pdev = to_pci_dev(dev);
+
/* only attempt to use a new window if 64-bit DMA is requested */
if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
- pdev = to_pci_dev(dev);
-
dn = pci_device_to_OF_node(pdev);
dev_dbg(dev, "node is %s\n", dn->full_name);
/*
* the device tree might contain the dma-window properties
- * per-device and not neccesarily for the bus. So we need to
+ * per-device and not necessarily for the bus. So we need to
* search upwards in the tree until we either hit a dma-window
* property, OR find a parent with a table already allocated.
*/
@@ -1054,12 +1062,17 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
}
}
- /* fall-through to iommu ops */
- if (!ddw_enabled) {
- dev_info(dev, "Using 32-bit DMA via iommu\n");
+ /* fall back on iommu ops, restore table pointer with ops */
+ if (!ddw_enabled && get_dma_ops(dev) != &dma_iommu_ops) {
+ dev_info(dev, "Restoring 32-bit DMA via iommu\n");
set_dma_ops(dev, &dma_iommu_ops);
+ pci_dma_dev_setup_pSeriesLP(pdev);
}
+check_mask:
+ if (!dma_supported(dev, dma_mask))
+ return -EIO;
+
*dev->dma_mask = dma_mask;
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index 77d38a5e2ff..54cf3a4aa16 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -7,15 +7,18 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
#include <asm/machdep.h>
#include <asm/page.h>
#include <asm/firmware.h>
#include <asm/kexec.h>
#include <asm/mpic.h>
+#include <asm/xics.h>
#include <asm/smp.h>
#include "pseries.h"
-#include "xics.h"
#include "plpar_wrappers.h"
static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ca5d5898d32..39e6e0a7b2f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -329,6 +329,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
/* Make pHyp happy */
if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
hpte_r &= ~_PAGE_COHERENT;
+ if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+ flags |= H_COALESCE_CAND;
lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
if (unlikely(lpar_rc == H_PTEG_FULL)) {
@@ -573,7 +575,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
unsigned long i, pix, rc;
unsigned long flags = 0;
struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
- int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE);
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
unsigned long param[9];
unsigned long va;
unsigned long hash, index, shift, hidx, slot;
@@ -771,3 +773,47 @@ out:
local_irq_restore(flags);
}
#endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+ mpp_data->entitled_mem = retbuf[0];
+ mpp_data->mapped_mem = retbuf[1];
+
+ mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ mpp_data->pool_num = retbuf[2] & 0xffff;
+
+ mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+ mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+ mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+ mpp_data->pool_size = retbuf[4];
+ mpp_data->loan_request = retbuf[5];
+ mpp_data->backing_mem = retbuf[6];
+
+ return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+ rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+ mpp_x_data->coalesced_bytes = retbuf[0];
+ mpp_x_data->pool_coalesced_bytes = retbuf[1];
+ mpp_x_data->pool_purr_cycles = retbuf[2];
+ mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+ return rc;
+}
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index d9801117124..4bf21207d7d 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -270,31 +270,4 @@ static inline long plpar_put_term_char(unsigned long termno, unsigned long len,
lbuf[1]);
}
-static inline long plpar_eoi(unsigned long xirr)
-{
- return plpar_hcall_norets(H_EOI, xirr);
-}
-
-static inline long plpar_cppr(unsigned long cppr)
-{
- return plpar_hcall_norets(H_CPPR, cppr);
-}
-
-static inline long plpar_ipi(unsigned long servernum, unsigned long mfrr)
-{
- return plpar_hcall_norets(H_IPI, servernum, mfrr);
-}
-
-static inline long plpar_xirr(unsigned long *xirr_ret, unsigned char cppr)
-{
- long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall(H_XIRR, retbuf, cppr);
-
- *xirr_ret = retbuf[0];
-
- return rc;
-}
-
#endif /* _PSERIES_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c55d7ad9c64..086d2ae4e06 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -122,7 +122,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RTAS_VECTOR_EXTERNAL_INTERRUPT,
- irq_map[irq].hwirq,
+ virq_to_hw(irq),
RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
critical, __pa(&ras_log_buf),
rtas_get_error_log_max());
@@ -157,7 +157,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RTAS_VECTOR_EXTERNAL_INTERRUPT,
- irq_map[irq].hwirq,
+ virq_to_hw(irq),
RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
__pa(&ras_log_buf),
rtas_get_error_log_max());
@@ -227,7 +227,7 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
struct rtas_error_log *h, *errhdr = NULL;
if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
- printk(KERN_ERR "FWNMI: corrupt r3\n");
+ printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
return NULL;
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index c319d04aa79..593acceeff9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -53,9 +53,9 @@
#include <asm/irq.h>
#include <asm/time.h>
#include <asm/nvram.h>
-#include "xics.h"
#include <asm/pmc.h>
#include <asm/mpic.h>
+#include <asm/xics.h>
#include <asm/ppc-pci.h>
#include <asm/i8259.h>
#include <asm/udbg.h>
@@ -205,6 +205,9 @@ static void __init pseries_mpic_init_IRQ(void)
mpic_assign_isu(mpic, n, isuaddr);
}
+ /* Setup top-level get_irq */
+ ppc_md.get_irq = mpic_get_irq;
+
/* All ISUs are setup, complete initialization */
mpic_init(mpic);
@@ -214,7 +217,7 @@ static void __init pseries_mpic_init_IRQ(void)
static void __init pseries_xics_init_IRQ(void)
{
- xics_init_IRQ();
+ xics_init();
pseries_setup_i8259_cascade();
}
@@ -238,7 +241,6 @@ static void __init pseries_discover_pic(void)
if (strstr(typep, "open-pic")) {
pSeries_mpic_node = of_node_get(np);
ppc_md.init_IRQ = pseries_mpic_init_IRQ;
- ppc_md.get_irq = mpic_get_irq;
setup_kexec_cpu_down_mpic();
smp_init_pseries_mpic();
return;
@@ -276,6 +278,8 @@ static struct notifier_block pci_dn_reconfig_nb = {
.notifier_call = pci_dn_reconfig_notifier,
};
+struct kmem_cache *dtl_cache;
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
/*
* Allocate space for the dispatch trace log for all possible cpus
@@ -291,10 +295,12 @@ static int alloc_dispatch_logs(void)
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return 0;
+ if (!dtl_cache)
+ return 0;
+
for_each_possible_cpu(cpu) {
pp = &paca[cpu];
- dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
- cpu_to_node(cpu));
+ dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
if (!dtl) {
pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
cpu);
@@ -324,10 +330,27 @@ static int alloc_dispatch_logs(void)
return 0;
}
-
-early_initcall(alloc_dispatch_logs);
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+static inline int alloc_dispatch_logs(void)
+{
+ return 0;
+}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+static int alloc_dispatch_log_kmem_cache(void)
+{
+ dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, NULL);
+ if (!dtl_cache) {
+ pr_warn("Failed to create dispatch trace log buffer cache\n");
+ pr_warn("Stolen time statistics will be unreliable\n");
+ return 0;
+ }
+
+ return alloc_dispatch_logs();
+}
+early_initcall(alloc_dispatch_log_kmem_cache);
+
static void __init pSeries_setup_arch(void)
{
/* Discover PIC type and setup ppc_md accordingly */
@@ -378,7 +401,7 @@ static int __init pSeries_init_panel(void)
return 0;
}
-arch_initcall(pSeries_init_panel);
+machine_arch_initcall(pseries, pSeries_init_panel);
static int pseries_set_dabr(unsigned long dabr)
{
@@ -395,6 +418,16 @@ static int pseries_set_xdabr(unsigned long dabr)
#define CMO_CHARACTERISTICS_TOKEN 44
#define CMO_MAXLENGTH 1026
+void pSeries_coalesce_init(void)
+{
+ struct hvcall_mpp_x_data mpp_x_data;
+
+ if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+ powerpc_firmware_features |= FW_FEATURE_XCMO;
+ else
+ powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
/**
* fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
* handle that here. (Stolen from parse_system_parameter_string)
@@ -464,6 +497,7 @@ void pSeries_cmo_feature_init(void)
pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
CMO_SecPSP);
powerpc_firmware_features |= FW_FEATURE_CMO;
+ pSeries_coalesce_init();
} else
pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
CMO_SecPSP);
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index d6479f9738f..fbffd7e47ab 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,10 +44,11 @@
#include <asm/mpic.h>
#include <asm/vdso_datapage.h>
#include <asm/cputhreads.h>
+#include <asm/mpic.h>
+#include <asm/xics.h>
#include "plpar_wrappers.h"
#include "pseries.h"
-#include "xics.h"
#include "offline_states.h"
@@ -112,10 +113,10 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
/* Fixup atomic count: it exited inside IRQ handler. */
task_thread_info(paca[lcpu].__current)->preempt_count = 0;
-
+#ifdef CONFIG_HOTPLUG_CPU
if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
goto out;
-
+#endif
/*
* If the RTAS start-cpu token does not exist then presume the
* cpu is already spinning.
@@ -130,11 +131,12 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
out:
+#endif
return 1;
}
-#ifdef CONFIG_XICS
static void __devinit smp_xics_setup_cpu(int cpu)
{
if (cpu != boot_cpuid)
@@ -144,20 +146,18 @@ static void __devinit smp_xics_setup_cpu(int cpu)
vpa_init(cpu);
cpumask_clear_cpu(cpu, of_spin_mask);
+#ifdef CONFIG_HOTPLUG_CPU
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
set_default_offline_state(cpu);
-
+#endif
}
-#endif /* CONFIG_XICS */
-static void __devinit smp_pSeries_kick_cpu(int nr)
+static int __devinit smp_pSeries_kick_cpu(int nr)
{
- long rc;
- unsigned long hcpuid;
BUG_ON(nr < 0 || nr >= NR_CPUS);
if (!smp_startup_cpu(nr))
- return;
+ return -ENOENT;
/*
* The processor is currently spinning, waiting for the
@@ -165,16 +165,22 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
* the processor will continue on to secondary_start
*/
paca[nr].cpu_start = 1;
-
+#ifdef CONFIG_HOTPLUG_CPU
set_preferred_offline_state(nr, CPU_STATE_ONLINE);
if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
+ long rc;
+ unsigned long hcpuid;
+
hcpuid = get_hard_smp_processor_id(nr);
rc = plpar_hcall_norets(H_PROD, hcpuid);
if (rc != H_SUCCESS)
printk(KERN_ERR "Error: Prod to wake up processor %d "
"Ret= %ld\n", nr, rc);
}
+#endif
+
+ return 0;
}
static int smp_pSeries_cpu_bootable(unsigned int nr)
@@ -192,23 +198,22 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
return 1;
}
-#ifdef CONFIG_MPIC
+
static struct smp_ops_t pSeries_mpic_smp_ops = {
.message_pass = smp_mpic_message_pass,
.probe = smp_mpic_probe,
.kick_cpu = smp_pSeries_kick_cpu,
.setup_cpu = smp_mpic_setup_cpu,
};
-#endif
-#ifdef CONFIG_XICS
+
static struct smp_ops_t pSeries_xics_smp_ops = {
- .message_pass = smp_xics_message_pass,
- .probe = smp_xics_probe,
+ .message_pass = smp_muxed_ipi_message_pass,
+ .cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
+ .probe = xics_smp_probe,
.kick_cpu = smp_pSeries_kick_cpu,
.setup_cpu = smp_xics_setup_cpu,
.cpu_bootable = smp_pSeries_cpu_bootable,
};
-#endif
/* This is called very early */
static void __init smp_init_pseries(void)
@@ -240,14 +245,12 @@ static void __init smp_init_pseries(void)
pr_debug(" <- smp_init_pSeries()\n");
}
-#ifdef CONFIG_MPIC
void __init smp_init_pseries_mpic(void)
{
smp_ops = &pSeries_mpic_smp_ops;
smp_init_pseries();
}
-#endif
void __init smp_init_pseries_xics(void)
{
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
deleted file mode 100644
index ec8fe22047b..00000000000
--- a/arch/powerpc/platforms/pseries/xics.c
+++ /dev/null
@@ -1,949 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.c
- *
- * Copyright 2000 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/kernel.h>
-#include <linux/irq.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/radix-tree.h>
-#include <linux/cpu.h>
-#include <linux/msi.h>
-#include <linux/of.h>
-#include <linux/percpu.h>
-
-#include <asm/firmware.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/smp.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
-#include <asm/machdep.h>
-
-#include "xics.h"
-#include "plpar_wrappers.h"
-
-static struct irq_host *xics_host;
-
-#define XICS_IPI 2
-#define XICS_IRQ_SPURIOUS 0
-
-/* Want a priority other than 0. Various HW issues require this. */
-#define DEFAULT_PRIORITY 5
-
-/*
- * Mark IPIs as higher priority so we can take them inside interrupts that
- * arent marked IRQF_DISABLED
- */
-#define IPI_PRIORITY 4
-
-/* The least favored priority */
-#define LOWEST_PRIORITY 0xFF
-
-/* The number of priorities defined above */
-#define MAX_NUM_PRIORITIES 3
-
-static unsigned int default_server = 0xFF;
-static unsigned int default_distrib_server = 0;
-static unsigned int interrupt_server_size = 8;
-
-/* RTAS service tokens */
-static int ibm_get_xive;
-static int ibm_set_xive;
-static int ibm_int_on;
-static int ibm_int_off;
-
-struct xics_cppr {
- unsigned char stack[MAX_NUM_PRIORITIES];
- int index;
-};
-
-static DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
-
-/* Direct hardware low level accessors */
-
-/* The part of the interrupt presentation layer that we care about */
-struct xics_ipl {
- union {
- u32 word;
- u8 bytes[4];
- } xirr_poll;
- union {
- u32 word;
- u8 bytes[4];
- } xirr;
- u32 dummy;
- union {
- u32 word;
- u8 bytes[4];
- } qirr;
-};
-
-static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
-
-static inline unsigned int direct_xirr_info_get(void)
-{
- int cpu = smp_processor_id();
-
- return in_be32(&xics_per_cpu[cpu]->xirr.word);
-}
-
-static inline void direct_xirr_info_set(unsigned int value)
-{
- int cpu = smp_processor_id();
-
- out_be32(&xics_per_cpu[cpu]->xirr.word, value);
-}
-
-static inline void direct_cppr_info(u8 value)
-{
- int cpu = smp_processor_id();
-
- out_8(&xics_per_cpu[cpu]->xirr.bytes[0], value);
-}
-
-static inline void direct_qirr_info(int n_cpu, u8 value)
-{
- out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
-}
-
-
-/* LPAR low level accessors */
-
-static inline unsigned int lpar_xirr_info_get(unsigned char cppr)
-{
- unsigned long lpar_rc;
- unsigned long return_value;
-
- lpar_rc = plpar_xirr(&return_value, cppr);
- if (lpar_rc != H_SUCCESS)
- panic(" bad return code xirr - rc = %lx\n", lpar_rc);
- return (unsigned int)return_value;
-}
-
-static inline void lpar_xirr_info_set(unsigned int value)
-{
- unsigned long lpar_rc;
-
- lpar_rc = plpar_eoi(value);
- if (lpar_rc != H_SUCCESS)
- panic("bad return code EOI - rc = %ld, value=%x\n", lpar_rc,
- value);
-}
-
-static inline void lpar_cppr_info(u8 value)
-{
- unsigned long lpar_rc;
-
- lpar_rc = plpar_cppr(value);
- if (lpar_rc != H_SUCCESS)
- panic("bad return code cppr - rc = %lx\n", lpar_rc);
-}
-
-static inline void lpar_qirr_info(int n_cpu , u8 value)
-{
- unsigned long lpar_rc;
-
- lpar_rc = plpar_ipi(get_hard_smp_processor_id(n_cpu), value);
- if (lpar_rc != H_SUCCESS)
- panic("bad return code qirr - rc = %lx\n", lpar_rc);
-}
-
-
-/* Interface to generic irq subsystem */
-
-#ifdef CONFIG_SMP
-/*
- * For the moment we only implement delivery to all cpus or one cpu.
- *
- * If the requested affinity is cpu_all_mask, we set global affinity.
- * If not we set it to the first cpu in the mask, even if multiple cpus
- * are set. This is so things like irqbalance (which set core and package
- * wide affinities) do the right thing.
- */
-static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
- unsigned int strict_check)
-{
-
- if (!distribute_irqs)
- return default_server;
-
- if (!cpumask_subset(cpu_possible_mask, cpumask)) {
- int server = cpumask_first_and(cpu_online_mask, cpumask);
-
- if (server < nr_cpu_ids)
- return get_hard_smp_processor_id(server);
-
- if (strict_check)
- return -1;
- }
-
- /*
- * Workaround issue with some versions of JS20 firmware that
- * deliver interrupts to cpus which haven't been started. This
- * happens when using the maxcpus= boot option.
- */
- if (cpumask_equal(cpu_online_mask, cpu_present_mask))
- return default_distrib_server;
-
- return default_server;
-}
-#else
-#define get_irq_server(virq, cpumask, strict_check) (default_server)
-#endif
-
-static void xics_unmask_irq(struct irq_data *d)
-{
- unsigned int hwirq;
- int call_status;
- int server;
-
- pr_devel("xics: unmask virq %d\n", d->irq);
-
- hwirq = (unsigned int)irq_map[d->irq].hwirq;
- pr_devel(" -> map to hwirq 0x%x\n", hwirq);
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- return;
-
- server = get_irq_server(d->irq, d->affinity, 0);
-
- call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq, server,
- DEFAULT_PRIORITY);
- if (call_status != 0) {
- printk(KERN_ERR
- "%s: ibm_set_xive irq %u server %x returned %d\n",
- __func__, hwirq, server, call_status);
- return;
- }
-
- /* Now unmask the interrupt (often a no-op) */
- call_status = rtas_call(ibm_int_on, 1, 1, NULL, hwirq);
- if (call_status != 0) {
- printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
- __func__, hwirq, call_status);
- return;
- }
-}
-
-static unsigned int xics_startup(struct irq_data *d)
-{
- /*
- * The generic MSI code returns with the interrupt disabled on the
- * card, using the MSI mask bits. Firmware doesn't appear to unmask
- * at that level, so we do it here by hand.
- */
- if (d->msi_desc)
- unmask_msi_irq(d);
-
- /* unmask it */
- xics_unmask_irq(d);
- return 0;
-}
-
-static void xics_mask_real_irq(unsigned int hwirq)
-{
- int call_status;
-
- if (hwirq == XICS_IPI)
- return;
-
- call_status = rtas_call(ibm_int_off, 1, 1, NULL, hwirq);
- if (call_status != 0) {
- printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
- __func__, hwirq, call_status);
- return;
- }
-
- /* Have to set XIVE to 0xff to be able to remove a slot */
- call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hwirq,
- default_server, 0xff);
- if (call_status != 0) {
- printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
- __func__, hwirq, call_status);
- return;
- }
-}
-
-static void xics_mask_irq(struct irq_data *d)
-{
- unsigned int hwirq;
-
- pr_devel("xics: mask virq %d\n", d->irq);
-
- hwirq = (unsigned int)irq_map[d->irq].hwirq;
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- return;
- xics_mask_real_irq(hwirq);
-}
-
-static void xics_mask_unknown_vec(unsigned int vec)
-{
- printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec);
- xics_mask_real_irq(vec);
-}
-
-static inline unsigned int xics_xirr_vector(unsigned int xirr)
-{
- /*
- * The top byte is the old cppr, to be restored on EOI.
- * The remaining 24 bits are the vector.
- */
- return xirr & 0x00ffffff;
-}
-
-static void push_cppr(unsigned int vec)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
- if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
- return;
-
- if (vec == XICS_IPI)
- os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
- else
- os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
-}
-
-static unsigned int xics_get_irq_direct(void)
-{
- unsigned int xirr = direct_xirr_info_get();
- unsigned int vec = xics_xirr_vector(xirr);
- unsigned int irq;
-
- if (vec == XICS_IRQ_SPURIOUS)
- return NO_IRQ;
-
- irq = irq_radix_revmap_lookup(xics_host, vec);
- if (likely(irq != NO_IRQ)) {
- push_cppr(vec);
- return irq;
- }
-
- /* We don't have a linux mapping, so have rtas mask it. */
- xics_mask_unknown_vec(vec);
-
- /* We might learn about it later, so EOI it */
- direct_xirr_info_set(xirr);
- return NO_IRQ;
-}
-
-static unsigned int xics_get_irq_lpar(void)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
- unsigned int xirr = lpar_xirr_info_get(os_cppr->stack[os_cppr->index]);
- unsigned int vec = xics_xirr_vector(xirr);
- unsigned int irq;
-
- if (vec == XICS_IRQ_SPURIOUS)
- return NO_IRQ;
-
- irq = irq_radix_revmap_lookup(xics_host, vec);
- if (likely(irq != NO_IRQ)) {
- push_cppr(vec);
- return irq;
- }
-
- /* We don't have a linux mapping, so have RTAS mask it. */
- xics_mask_unknown_vec(vec);
-
- /* We might learn about it later, so EOI it */
- lpar_xirr_info_set(xirr);
- return NO_IRQ;
-}
-
-static unsigned char pop_cppr(void)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
- if (WARN_ON(os_cppr->index < 1))
- return LOWEST_PRIORITY;
-
- return os_cppr->stack[--os_cppr->index];
-}
-
-static void xics_eoi_direct(struct irq_data *d)
-{
- unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
- iosync();
- direct_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static void xics_eoi_lpar(struct irq_data *d)
-{
- unsigned int hwirq = (unsigned int)irq_map[d->irq].hwirq;
-
- iosync();
- lpar_xirr_info_set((pop_cppr() << 24) | hwirq);
-}
-
-static int
-xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
-{
- unsigned int hwirq;
- int status;
- int xics_status[2];
- int irq_server;
-
- hwirq = (unsigned int)irq_map[d->irq].hwirq;
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- return -1;
-
- status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
-
- if (status) {
- printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
- __func__, hwirq, status);
- return -1;
- }
-
- irq_server = get_irq_server(d->irq, cpumask, 1);
- if (irq_server == -1) {
- char cpulist[128];
- cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
- printk(KERN_WARNING
- "%s: No online cpus in the mask %s for irq %d\n",
- __func__, cpulist, d->irq);
- return -1;
- }
-
- status = rtas_call(ibm_set_xive, 3, 1, NULL,
- hwirq, irq_server, xics_status[1]);
-
- if (status) {
- printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
- __func__, hwirq, status);
- return -1;
- }
-
- return 0;
-}
-
-static struct irq_chip xics_pic_direct = {
- .name = "XICS",
- .irq_startup = xics_startup,
- .irq_mask = xics_mask_irq,
- .irq_unmask = xics_unmask_irq,
- .irq_eoi = xics_eoi_direct,
- .irq_set_affinity = xics_set_affinity
-};
-
-static struct irq_chip xics_pic_lpar = {
- .name = "XICS",
- .irq_startup = xics_startup,
- .irq_mask = xics_mask_irq,
- .irq_unmask = xics_unmask_irq,
- .irq_eoi = xics_eoi_lpar,
- .irq_set_affinity = xics_set_affinity
-};
-
-
-/* Interface to arch irq controller subsystem layer */
-
-/* Points to the irq_chip we're actually using */
-static struct irq_chip *xics_irq_chip;
-
-static int xics_host_match(struct irq_host *h, struct device_node *node)
-{
- /* IBM machines have interrupt parents of various funky types for things
- * like vdevices, events, etc... The trick we use here is to match
- * everything here except the legacy 8259 which is compatible "chrp,iic"
- */
- return !of_device_is_compatible(node, "chrp,iic");
-}
-
-static int xics_host_map(struct irq_host *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
-
- /* Insert the interrupt mapping into the radix tree for fast lookup */
- irq_radix_revmap_insert(xics_host, virq, hw);
-
- irq_set_status_flags(virq, IRQ_LEVEL);
- irq_set_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
- return 0;
-}
-
-static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
- const u32 *intspec, unsigned int intsize,
- irq_hw_number_t *out_hwirq, unsigned int *out_flags)
-
-{
- /* Current xics implementation translates everything
- * to level. It is not technically right for MSIs but this
- * is irrelevant at this point. We might get smarter in the future
- */
- *out_hwirq = intspec[0];
- *out_flags = IRQ_TYPE_LEVEL_LOW;
-
- return 0;
-}
-
-static struct irq_host_ops xics_host_ops = {
- .match = xics_host_match,
- .map = xics_host_map,
- .xlate = xics_host_xlate,
-};
-
-static void __init xics_init_host(void)
-{
- if (firmware_has_feature(FW_FEATURE_LPAR))
- xics_irq_chip = &xics_pic_lpar;
- else
- xics_irq_chip = &xics_pic_direct;
-
- xics_host = irq_alloc_host(NULL, IRQ_HOST_MAP_TREE, 0, &xics_host_ops,
- XICS_IRQ_SPURIOUS);
- BUG_ON(xics_host == NULL);
- irq_set_default_host(xics_host);
-}
-
-
-/* Inter-processor interrupt support */
-
-#ifdef CONFIG_SMP
-/*
- * XICS only has a single IPI, so encode the messages per CPU
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, xics_ipi_message);
-
-static inline void smp_xics_do_message(int cpu, int msg)
-{
- unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
- set_bit(msg, tgt);
- mb();
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_qirr_info(cpu, IPI_PRIORITY);
- else
- direct_qirr_info(cpu, IPI_PRIORITY);
-}
-
-void smp_xics_message_pass(int target, int msg)
-{
- unsigned int i;
-
- if (target < NR_CPUS) {
- smp_xics_do_message(target, msg);
- } else {
- for_each_online_cpu(i) {
- if (target == MSG_ALL_BUT_SELF
- && i == smp_processor_id())
- continue;
- smp_xics_do_message(i, msg);
- }
- }
-}
-
-static irqreturn_t xics_ipi_dispatch(int cpu)
-{
- unsigned long *tgt = &per_cpu(xics_ipi_message, cpu);
-
- mb(); /* order mmio clearing qirr */
- while (*tgt) {
- if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, tgt)) {
- smp_message_recv(PPC_MSG_CALL_FUNCTION);
- }
- if (test_and_clear_bit(PPC_MSG_RESCHEDULE, tgt)) {
- smp_message_recv(PPC_MSG_RESCHEDULE);
- }
- if (test_and_clear_bit(PPC_MSG_CALL_FUNC_SINGLE, tgt)) {
- smp_message_recv(PPC_MSG_CALL_FUNC_SINGLE);
- }
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
- if (test_and_clear_bit(PPC_MSG_DEBUGGER_BREAK, tgt)) {
- smp_message_recv(PPC_MSG_DEBUGGER_BREAK);
- }
-#endif
- }
- return IRQ_HANDLED;
-}
-
-static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id)
-{
- int cpu = smp_processor_id();
-
- direct_qirr_info(cpu, 0xff);
-
- return xics_ipi_dispatch(cpu);
-}
-
-static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id)
-{
- int cpu = smp_processor_id();
-
- lpar_qirr_info(cpu, 0xff);
-
- return xics_ipi_dispatch(cpu);
-}
-
-static void xics_request_ipi(void)
-{
- unsigned int ipi;
- int rc;
-
- ipi = irq_create_mapping(xics_host, XICS_IPI);
- BUG_ON(ipi == NO_IRQ);
-
- /*
- * IPIs are marked IRQF_DISABLED as they must run with irqs
- * disabled
- */
- irq_set_handler(ipi, handle_percpu_irq);
- if (firmware_has_feature(FW_FEATURE_LPAR))
- rc = request_irq(ipi, xics_ipi_action_lpar,
- IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
- else
- rc = request_irq(ipi, xics_ipi_action_direct,
- IRQF_DISABLED|IRQF_PERCPU, "IPI", NULL);
- BUG_ON(rc);
-}
-
-int __init smp_xics_probe(void)
-{
- xics_request_ipi();
-
- return cpumask_weight(cpu_possible_mask);
-}
-
-#endif /* CONFIG_SMP */
-
-
-/* Initialization */
-
-static void xics_update_irq_servers(void)
-{
- int i, j;
- struct device_node *np;
- u32 ilen;
- const u32 *ireg;
- u32 hcpuid;
-
- /* Find the server numbers for the boot cpu. */
- np = of_get_cpu_node(boot_cpuid, NULL);
- BUG_ON(!np);
-
- ireg = of_get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen);
- if (!ireg) {
- of_node_put(np);
- return;
- }
-
- i = ilen / sizeof(int);
- hcpuid = get_hard_smp_processor_id(boot_cpuid);
-
- /* Global interrupt distribution server is specified in the last
- * entry of "ibm,ppc-interrupt-gserver#s" property. Get the last
- * entry fom this property for current boot cpu id and use it as
- * default distribution server
- */
- for (j = 0; j < i; j += 2) {
- if (ireg[j] == hcpuid) {
- default_server = hcpuid;
- default_distrib_server = ireg[j+1];
- }
- }
-
- of_node_put(np);
-}
-
-static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
- unsigned long size)
-{
- int i;
-
- /* This may look gross but it's good enough for now, we don't quite
- * have a hard -> linux processor id matching.
- */
- for_each_possible_cpu(i) {
- if (!cpu_present(i))
- continue;
- if (hw_id == get_hard_smp_processor_id(i)) {
- xics_per_cpu[i] = ioremap(addr, size);
- return;
- }
- }
-}
-
-static void __init xics_init_one_node(struct device_node *np,
- unsigned int *indx)
-{
- unsigned int ilen;
- const u32 *ireg;
-
- /* This code does the theorically broken assumption that the interrupt
- * server numbers are the same as the hard CPU numbers.
- * This happens to be the case so far but we are playing with fire...
- * should be fixed one of these days. -BenH.
- */
- ireg = of_get_property(np, "ibm,interrupt-server-ranges", NULL);
-
- /* Do that ever happen ? we'll know soon enough... but even good'old
- * f80 does have that property ..
- */
- WARN_ON(ireg == NULL);
- if (ireg) {
- /*
- * set node starting index for this node
- */
- *indx = *ireg;
- }
- ireg = of_get_property(np, "reg", &ilen);
- if (!ireg)
- panic("xics_init_IRQ: can't find interrupt reg property");
-
- while (ilen >= (4 * sizeof(u32))) {
- unsigned long addr, size;
-
- /* XXX Use proper OF parsing code here !!! */
- addr = (unsigned long)*ireg++ << 32;
- ilen -= sizeof(u32);
- addr |= *ireg++;
- ilen -= sizeof(u32);
- size = (unsigned long)*ireg++ << 32;
- ilen -= sizeof(u32);
- size |= *ireg++;
- ilen -= sizeof(u32);
- xics_map_one_cpu(*indx, addr, size);
- (*indx)++;
- }
-}
-
-void __init xics_init_IRQ(void)
-{
- struct device_node *np;
- u32 indx = 0;
- int found = 0;
- const u32 *isize;
-
- ppc64_boot_msg(0x20, "XICS Init");
-
- ibm_get_xive = rtas_token("ibm,get-xive");
- ibm_set_xive = rtas_token("ibm,set-xive");
- ibm_int_on = rtas_token("ibm,int-on");
- ibm_int_off = rtas_token("ibm,int-off");
-
- for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
- found = 1;
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
- of_node_put(np);
- break;
- }
- xics_init_one_node(np, &indx);
- }
- if (found == 0)
- return;
-
- /* get the bit size of server numbers */
- found = 0;
-
- for_each_compatible_node(np, NULL, "ibm,ppc-xics") {
- isize = of_get_property(np, "ibm,interrupt-server#-size", NULL);
-
- if (!isize)
- continue;
-
- if (!found) {
- interrupt_server_size = *isize;
- found = 1;
- } else if (*isize != interrupt_server_size) {
- printk(KERN_WARNING "XICS: "
- "mismatched ibm,interrupt-server#-size\n");
- interrupt_server_size = max(*isize,
- interrupt_server_size);
- }
- }
-
- xics_update_irq_servers();
- xics_init_host();
-
- if (firmware_has_feature(FW_FEATURE_LPAR))
- ppc_md.get_irq = xics_get_irq_lpar;
- else
- ppc_md.get_irq = xics_get_irq_direct;
-
- xics_setup_cpu();
-
- ppc64_boot_msg(0x21, "XICS Done");
-}
-
-/* Cpu startup, shutdown, and hotplug */
-
-static void xics_set_cpu_priority(unsigned char cppr)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
-
- /*
- * we only really want to set the priority when there's
- * just one cppr value on the stack
- */
- WARN_ON(os_cppr->index != 0);
-
- os_cppr->stack[0] = cppr;
-
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_cppr_info(cppr);
- else
- direct_cppr_info(cppr);
- iosync();
-}
-
-/* Have the calling processor join or leave the specified global queue */
-static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
-{
- int index;
- int status;
-
- if (!rtas_indicator_present(GLOBAL_INTERRUPT_QUEUE, NULL))
- return;
-
- index = (1UL << interrupt_server_size) - 1 - gserver;
-
- status = rtas_set_indicator_fast(GLOBAL_INTERRUPT_QUEUE, index, join);
-
- WARN(status < 0, "set-indicator(%d, %d, %u) returned %d\n",
- GLOBAL_INTERRUPT_QUEUE, index, join, status);
-}
-
-void xics_setup_cpu(void)
-{
- xics_set_cpu_priority(LOWEST_PRIORITY);
-
- xics_set_cpu_giq(default_distrib_server, 1);
-}
-
-void xics_teardown_cpu(void)
-{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
- int cpu = smp_processor_id();
-
- /*
- * we have to reset the cppr index to 0 because we're
- * not going to return from the IPI
- */
- os_cppr->index = 0;
- xics_set_cpu_priority(0);
-
- /* Clear any pending IPI request */
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_qirr_info(cpu, 0xff);
- else
- direct_qirr_info(cpu, 0xff);
-}
-
-void xics_kexec_teardown_cpu(int secondary)
-{
- xics_teardown_cpu();
-
- /*
- * we take the ipi irq but and never return so we
- * need to EOI the IPI, but want to leave our priority 0
- *
- * should we check all the other interrupts too?
- * should we be flagging idle loop instead?
- * or creating some task to be scheduled?
- */
-
- if (firmware_has_feature(FW_FEATURE_LPAR))
- lpar_xirr_info_set((0x00 << 24) | XICS_IPI);
- else
- direct_xirr_info_set((0x00 << 24) | XICS_IPI);
-
- /*
- * Some machines need to have at least one cpu in the GIQ,
- * so leave the master cpu in the group.
- */
- if (secondary)
- xics_set_cpu_giq(default_distrib_server, 0);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* Interrupts are disabled. */
-void xics_migrate_irqs_away(void)
-{
- int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
- int virq;
-
- /* If we used to be the default server, move to the new "boot_cpuid" */
- if (hw_cpu == default_server)
- xics_update_irq_servers();
-
- /* Reject any interrupt that was queued to us... */
- xics_set_cpu_priority(0);
-
- /* Remove ourselves from the global interrupt queue */
- xics_set_cpu_giq(default_distrib_server, 0);
-
- /* Allow IPIs again... */
- xics_set_cpu_priority(DEFAULT_PRIORITY);
-
- for_each_irq(virq) {
- struct irq_desc *desc;
- struct irq_chip *chip;
- unsigned int hwirq;
- int xics_status[2];
- int status;
- unsigned long flags;
-
- /* We cant set affinity on ISA interrupts */
- if (virq < NUM_ISA_INTERRUPTS)
- continue;
- if (irq_map[virq].host != xics_host)
- continue;
- hwirq = (unsigned int)irq_map[virq].hwirq;
- /* We need to get IPIs still. */
- if (hwirq == XICS_IPI || hwirq == XICS_IRQ_SPURIOUS)
- continue;
-
- desc = irq_to_desc(virq);
-
- /* We only need to migrate enabled IRQS */
- if (desc == NULL || desc->action == NULL)
- continue;
-
- chip = irq_desc_get_chip(desc);
- if (chip == NULL || chip->irq_set_affinity == NULL)
- continue;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
-
- status = rtas_call(ibm_get_xive, 1, 3, xics_status, hwirq);
- if (status) {
- printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
- __func__, hwirq, status);
- goto unlock;
- }
-
- /*
- * We only support delivery to all cpus or to one cpu.
- * The irq has to be migrated only in the single cpu
- * case.
- */
- if (xics_status[0] != hw_cpu)
- goto unlock;
-
- /* This is expected during cpu offline. */
- if (cpu_online(cpu))
- printk(KERN_WARNING "IRQ %u affinity broken off cpu %u\n",
- virq, cpu);
-
- /* Reset affinity to all cpus */
- cpumask_setall(desc->irq_data.affinity);
- chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true);
-unlock:
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- }
-}
-#endif
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
deleted file mode 100644
index d1d5a83039a..00000000000
--- a/arch/powerpc/platforms/pseries/xics.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * arch/powerpc/platforms/pseries/xics.h
- *
- * Copyright 2000 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _POWERPC_KERNEL_XICS_H
-#define _POWERPC_KERNEL_XICS_H
-
-extern void xics_init_IRQ(void);
-extern void xics_setup_cpu(void);
-extern void xics_teardown_cpu(void);
-extern void xics_kexec_teardown_cpu(int secondary);
-extern void xics_migrate_irqs_away(void);
-extern int smp_xics_probe(void);
-extern void smp_xics_message_pass(int target, int msg);
-
-#endif /* _POWERPC_KERNEL_XICS_H */