From de400d6b78d15a73023485f050bc6b1709dc7a79 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Sun, 30 Oct 2011 15:16:04 +0100 Subject: [S390] fix mismatch in summation of I/O IRQ statistics Current IRQ statistics support does not show detail counts for I/O interrupts which are processed internally only. The result is a summation count which is way off such as this one: CPU0 CPU1 CPU2 I/O: 1331 710 442 [...] QAI: 15 16 16 [I/O] QDIO Adapter Interrupt QDI: 1 0 0 [I/O] QDIO Interrupt DAS: 706 645 381 [I/O] DASD C15: 26 10 0 [I/O] 3215 C70: 0 0 0 [I/O] 3270 TAP: 0 0 0 [I/O] Tape VMR: 0 0 0 [I/O] Unit Record Devices LCS: 0 0 0 [I/O] LCS CLW: 0 0 0 [I/O] CLAW CTC: 0 0 0 [I/O] CTC APB: 0 0 0 [I/O] AP Bus Fix this by moving I/O interrupt accounting into the common I/O layer. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ccwdev.h | 3 +++ arch/s390/include/asm/irq.h | 2 +- arch/s390/kernel/irq.c | 2 +- drivers/s390/block/dasd.c | 2 -- drivers/s390/block/dasd_eckd.c | 1 + drivers/s390/block/dasd_fba.c | 1 + drivers/s390/char/con3215.c | 3 +-- drivers/s390/char/raw3270.c | 3 +-- drivers/s390/char/tape_34xx.c | 1 + drivers/s390/char/tape_3590.c | 1 + drivers/s390/char/tape_core.c | 2 -- drivers/s390/char/vmur.c | 3 +-- drivers/s390/cio/cio.c | 17 ++++++++++++++--- drivers/s390/cio/device.c | 13 +++++++++++++ drivers/s390/cio/device.h | 13 ++++++++++++- drivers/s390/cio/io_sch.h | 2 ++ drivers/s390/cio/qdio_main.c | 2 -- drivers/s390/net/claw.c | 3 +-- drivers/s390/net/ctcm_main.c | 3 +-- drivers/s390/net/lcs.c | 3 +-- 20 files changed, 56 insertions(+), 24 deletions(-) diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 623f2fb7177..9381c92cc77 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -11,6 +11,7 @@ #include #include #include +#include /* structs from asm/cio.h */ struct irb; @@ -127,6 +128,7 @@ enum uc_todo { * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure + * @int_class: interruption class to use for accounting interrupts */ struct ccw_driver { struct ccw_device_id *ids; @@ -144,6 +146,7 @@ struct ccw_driver { int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; + enum interruption_class int_class; }; extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ba7b01c726a..1f686059f29 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -17,8 +17,8 @@ enum interruption_class { EXTINT_SCP, EXTINT_IUC, EXTINT_CPM, + IOINT_CIO, IOINT_QAI, - IOINT_QDI, IOINT_DAS, IOINT_C15, IOINT_C70, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1f4050d45f7..d382f9db3df 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -42,8 +42,8 @@ static const struct irq_class intrclass_names[] = { {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, {.name = "CPM", .desc = "[EXT] CPU Measurement" }, + {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, - {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, {.name = "C15", .desc = "[I/O] 3215" }, {.name = "C70", .desc = "[I/O] 3270" }, diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a1d3ddba99c..46054c75cf3 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -11,7 +11,6 @@ #define KMSG_COMPONENT "dasd" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include #include #include #include @@ -1594,7 +1593,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, unsigned long long now; int expires; - kstat_cpu(smp_processor_id()).irqs[IOINT_DAS]++; if (IS_ERR(irb)) { switch (PTR_ERR(irb)) { case -EIO: diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6e835c9fdfc..0e9c4dcf145 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3998,6 +3998,7 @@ static struct ccw_driver dasd_eckd_driver = { .thaw = dasd_generic_restore_device, .restore = dasd_generic_restore_device, .uc_handler = dasd_generic_uc_handler, + .int_class = IOINT_DAS, }; /* diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 4b71b116486..a62a75358eb 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -79,6 +79,7 @@ static struct ccw_driver dasd_fba_driver = { .freeze = dasd_generic_pm_freeze, .thaw = dasd_generic_restore_device, .restore = dasd_generic_restore_device, + .int_class = IOINT_DAS, }; static void diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 694464c65fc..934458ad55e 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -9,7 +9,6 @@ * Dan Morrison, IBM Corporation */ -#include #include #include #include @@ -362,7 +361,6 @@ static void raw3215_irq(struct ccw_device *cdev, unsigned long intparm, int cstat, dstat; int count; - kstat_cpu(smp_processor_id()).irqs[IOINT_C15]++; raw = dev_get_drvdata(&cdev->dev); req = (struct raw3215_req *) intparm; cstat = irb->scsw.cmd.cstat; @@ -776,6 +774,7 @@ static struct ccw_driver raw3215_ccw_driver = { .freeze = &raw3215_pm_stop, .thaw = &raw3215_pm_start, .restore = &raw3215_pm_start, + .int_class = IOINT_C15, }; #ifdef CONFIG_TN3215_CONSOLE diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 810ac38631c..e5cb9248a44 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -7,7 +7,6 @@ * Copyright IBM Corp. 2003, 2009 */ -#include #include #include #include @@ -330,7 +329,6 @@ raw3270_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) struct raw3270_request *rq; int rc; - kstat_cpu(smp_processor_id()).irqs[IOINT_C70]++; rp = dev_get_drvdata(&cdev->dev); if (!rp) return; @@ -1398,6 +1396,7 @@ static struct ccw_driver raw3270_ccw_driver = { .freeze = &raw3270_pm_stop, .thaw = &raw3270_pm_start, .restore = &raw3270_pm_start, + .int_class = IOINT_C70, }; static int diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 9eff2df70dd..934ef33eb9a 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -1330,6 +1330,7 @@ static struct ccw_driver tape_34xx_driver = { .set_online = tape_34xx_online, .set_offline = tape_generic_offline, .freeze = tape_generic_pm_suspend, + .int_class = IOINT_TAP, }; static int diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index a7d57072888..49c6aab7ad7 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1762,6 +1762,7 @@ static struct ccw_driver tape_3590_driver = { .set_offline = tape_generic_offline, .set_online = tape_3590_online, .freeze = tape_generic_pm_suspend, + .int_class = IOINT_TAP, }; /* diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index 7978a0adeaf..b3a3e8e8656 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -14,7 +14,6 @@ #define KMSG_COMPONENT "tape" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include #include #include // for kernel parameters #include // for requesting modules @@ -1115,7 +1114,6 @@ __tape_do_irq (struct ccw_device *cdev, unsigned long intparm, struct irb *irb) struct tape_request *request; int rc; - kstat_cpu(smp_processor_id()).irqs[IOINT_TAP]++; device = dev_get_drvdata(&cdev->dev); if (device == NULL) { return; diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c index f6b00c3df42..d291a54acfa 100644 --- a/drivers/s390/char/vmur.c +++ b/drivers/s390/char/vmur.c @@ -11,7 +11,6 @@ #define KMSG_COMPONENT "vmur" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include #include #include @@ -74,6 +73,7 @@ static struct ccw_driver ur_driver = { .set_online = ur_set_online, .set_offline = ur_set_offline, .freeze = ur_pm_suspend, + .int_class = IOINT_VMR, }; static DEFINE_MUTEX(vmur_mutex); @@ -305,7 +305,6 @@ static void ur_int_handler(struct ccw_device *cdev, unsigned long intparm, { struct urdev *urd; - kstat_cpu(smp_processor_id()).irqs[IOINT_VMR]++; TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n", intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat, irb->scsw.cmd.count); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index eb3140ee821..5586c1376cb 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -622,6 +622,7 @@ void __irq_entry do_IRQ(struct pt_regs *regs) sch = (struct subchannel *)(unsigned long)tpi_info->intparm; if (!sch) { /* Clear pending interrupt condition. */ + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; tsch(tpi_info->schid, irb); continue; } @@ -634,7 +635,10 @@ void __irq_entry do_IRQ(struct pt_regs *regs) /* Call interrupt handler if there is one. */ if (sch->driver && sch->driver->irq) sch->driver->irq(sch); - } + else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; + } else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; spin_unlock(sch->lock); /* * Are more interrupts pending? @@ -667,18 +671,23 @@ static int cio_tpi(void) tpi_info = (struct tpi_info *)&S390_lowcore.subchannel_id; if (tpi(NULL) != 1) return 0; + kstat_cpu(smp_processor_id()).irqs[IO_INTERRUPT]++; if (tpi_info->adapter_IO) { do_adapter_IO(tpi_info->isc); return 1; } irb = (struct irb *)&S390_lowcore.irb; /* Store interrupt response block to lowcore. */ - if (tsch(tpi_info->schid, irb) != 0) + if (tsch(tpi_info->schid, irb) != 0) { /* Not status pending or not operational. */ + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; return 1; + } sch = (struct subchannel *)(unsigned long)tpi_info->intparm; - if (!sch) + if (!sch) { + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; return 1; + } irq_context = in_interrupt(); if (!irq_context) local_bh_disable(); @@ -687,6 +696,8 @@ static int cio_tpi(void) memcpy(&sch->schib.scsw, &irb->scsw, sizeof(union scsw)); if (sch->driver && sch->driver->irq) sch->driver->irq(sch); + else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; spin_unlock(sch->lock); irq_exit(); if (!irq_context) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 8e04c00cf0a..d734f4a0eca 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -747,6 +748,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch, struct ccw_device *cdev) { cdev->private->cdev = cdev; + cdev->private->int_class = IOINT_CIO; atomic_set(&cdev->private->onoff, 0); cdev->dev.parent = &sch->dev; cdev->dev.release = ccw_device_release; @@ -1010,6 +1012,8 @@ static void io_subchannel_irq(struct subchannel *sch) CIO_TRACE_EVENT(6, dev_name(&sch->dev)); if (cdev) dev_fsm_event(cdev, DEV_EVENT_INTERRUPT); + else + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; } void io_subchannel_init_config(struct subchannel *sch) @@ -1621,6 +1625,7 @@ ccw_device_probe_console(void) memset(&console_private, 0, sizeof(struct ccw_device_private)); console_cdev.private = &console_private; console_private.cdev = &console_cdev; + console_private.int_class = IOINT_CIO; ret = ccw_device_console_enable(&console_cdev, sch); if (ret) { cio_release_console(); @@ -1702,11 +1707,18 @@ ccw_device_probe (struct device *dev) int ret; cdev->drv = cdrv; /* to let the driver call _set_online */ + /* Note: we interpret class 0 in this context as an uninitialized + * field since it translates to a non-I/O interrupt class. */ + if (cdrv->int_class != 0) + cdev->private->int_class = cdrv->int_class; + else + cdev->private->int_class = IOINT_CIO; ret = cdrv->probe ? cdrv->probe(cdev) : -ENODEV; if (ret) { cdev->drv = NULL; + cdev->private->int_class = IOINT_CIO; return ret; } @@ -1740,6 +1752,7 @@ ccw_device_remove (struct device *dev) } ccw_device_set_timeout(cdev, 0); cdev->drv = NULL; + cdev->private->int_class = IOINT_CIO; return 0; } diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index 0b7245c72d5..179824b3082 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -5,6 +5,7 @@ #include #include #include +#include #include "io_sch.h" /* @@ -56,7 +57,17 @@ extern fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS]; static inline void dev_fsm_event(struct ccw_device *cdev, enum dev_event dev_event) { - dev_jumptable[cdev->private->state][dev_event](cdev, dev_event); + int state = cdev->private->state; + + if (dev_event == DEV_EVENT_INTERRUPT) { + if (state == DEV_STATE_ONLINE) + kstat_cpu(smp_processor_id()). + irqs[cdev->private->int_class]++; + else if (state != DEV_STATE_CMFCHANGE && + state != DEV_STATE_CMFUPDATE) + kstat_cpu(smp_processor_id()).irqs[IOINT_CIO]++; + } + dev_jumptable[state][dev_event](cdev, dev_event); } /* diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index ba31ad88f4f..2ebb492a5c1 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -4,6 +4,7 @@ #include #include #include +#include #include "css.h" #include "orb.h" @@ -157,6 +158,7 @@ struct ccw_device_private { struct list_head cmb_list; /* list of measured devices */ u64 cmb_start_time; /* clock value of cmb reset */ void *cmb_wait; /* deferred cmb enable/disable */ + enum interruption_class int_class; }; static inline int rsch(struct subchannel_id schid) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 6547ff46941..7ded1b26fd2 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -1128,7 +1127,6 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } - kstat_cpu(smp_processor_id()).irqs[IOINT_QDI]++; if (irq_ptr->perf_stat_enabled) irq_ptr->perf_stat.qdio_int++; diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c index f1fa2483ae6..b41fae37d3a 100644 --- a/drivers/s390/net/claw.c +++ b/drivers/s390/net/claw.c @@ -63,7 +63,6 @@ #define KMSG_COMPONENT "claw" -#include #include #include #include @@ -291,6 +290,7 @@ static struct ccw_driver claw_ccw_driver = { .ids = claw_ids, .probe = ccwgroup_probe_ccwdev, .remove = ccwgroup_remove_ccwdev, + .int_class = IOINT_CLW, }; static ssize_t @@ -645,7 +645,6 @@ claw_irq_handler(struct ccw_device *cdev, struct claw_env *p_env; struct chbk *p_ch_r=NULL; - kstat_cpu(smp_processor_id()).irqs[IOINT_CLW]++; CLAW_DBF_TEXT(4, trace, "clawirq"); /* Bypass all 'unsolicited interrupts' */ privptr = dev_get_drvdata(&cdev->dev); diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 426787efc49..5cb93a8e340 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -24,7 +24,6 @@ #define KMSG_COMPONENT "ctcm" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include #include #include #include @@ -1203,7 +1202,6 @@ static void ctcm_irq_handler(struct ccw_device *cdev, int cstat; int dstat; - kstat_cpu(smp_processor_id()).irqs[IOINT_CTC]++; CTCM_DBF_TEXT_(TRACE, CTC_DBF_DEBUG, "Enter %s(%s)", CTCM_FUNTAIL, dev_name(&cdev->dev)); @@ -1769,6 +1767,7 @@ static struct ccw_driver ctcm_ccw_driver = { .ids = ctcm_ids, .probe = ccwgroup_probe_ccwdev, .remove = ccwgroup_remove_ccwdev, + .int_class = IOINT_CTC, }; static struct ccwgroup_driver ctcm_group_driver = { diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index fb246b944b1..05fb3f7c728 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -26,7 +26,6 @@ #define KMSG_COMPONENT "lcs" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#include #include #include #include @@ -1399,7 +1398,6 @@ lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) int rc, index; int cstat, dstat; - kstat_cpu(smp_processor_id()).irqs[IOINT_LCS]++; if (lcs_check_irb_error(cdev, irb)) return; @@ -2399,6 +2397,7 @@ static struct ccw_driver lcs_ccw_driver = { .ids = lcs_ids, .probe = ccwgroup_probe_ccwdev, .remove = ccwgroup_remove_ccwdev, + .int_class = IOINT_LCS, }; /** -- cgit v1.2.3 From dd4a5a31fc06a2cda4f6a1d9de74b9009f8e31de Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:05 +0100 Subject: [S390] avoid warning in show_cpuinfo The .start function and indirectly the .next function of the show_cpuinfo sequential operation uses NR_CPUS as limit instead of nr_cpu_ids. This can cause warnings like this: WARNING: at /usr/src/linux/include/linux/cpumask.h:107 Process lscpu (pid: 575, task: 000000007deb4338, ksp: 000000007794f588) Krnl PSW : 0704000180000000 0000000000106db4 (show_cpuinfo+0x108/0x234) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0 EA:3 Krnl GPRS: 0000000000000003 0000000000791988 000000000071b478 0000000000000004 0000000000000001 0000000000000000 000000007d139500 0000000000000400 0000000000000000 000000000070e24c 000000007d48d600 0000000000000005 000000007d48d600 00000000004dfa10 0000000000106cf8 000000007794fcc0 Krnl Code: 0000000000106da8: 95001000 cli 0(%r1),0 0000000000106dac: a774ffac brc 7,106d04 0000000000106db0: a7f40001 brc 15,106db2 >0000000000106db4: 92011000 mvi 0(%r1),1 0000000000106db8: a7f4ffa6 brc 15,106d04 0000000000106dbc: c0e5000065b4 brasl %r14,113924 0000000000106dc2: c09000303a45 larl %r9,70e24c 0000000000106dc8: c020001eefd4 larl %r2,4e4d70 Replacing NR_CPUS with nr_cpu_ids fixes it. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 311e9d71288..6e0073e43f5 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -74,7 +74,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) -- cgit v1.2.3 From caa04f69df9a5d4c10867b475006cf6f2f1e8500 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sun, 30 Oct 2011 15:16:06 +0100 Subject: [S390] topology: fix alloc_masks annotation Fix this warning: WARNING: vmlinux.o(.text+0x199b6): Section mismatch in reference from the function alloc_masks() to the function .init.text:__alloc_bootmem() Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 0cd340b7263..77b8942b9a1 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -299,8 +299,8 @@ out: } __initcall(init_topology_update); -static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, - int offset) +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) { int i, nr_masks; -- cgit v1.2.3 From a45aff5285871bf7be1781d9462d3fdbb6c913f9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:07 +0100 Subject: [S390] user per registers vs. ptrace single stepping git commit 5e9a2692 "[S390] ptrace cleanup" introduced a regression for the case when both a user PER set (e.g. a storage alteration trace) and PTRACE_SINGLESTEP are active. The new code will overrule the user PER set with a instruction-fetch PER set over the whole address space for ptrace single stepping. The inferior process will be stopped after each instruction with an instruction fetch event. Any other events that may have occurred concurrently are not reported (e.g. storage alteration event) because the control bits for them are not set. The solution is to merge the PER control bits of the user PER set with the PER_EVENT_IFETCH control bit for PTRACE_SINGLESTEP. Cc: stable@kernel.org Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ef86ad24398..ae0e14b8880 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -47,29 +47,31 @@ enum s390_regset { void update_per_regs(struct task_struct *task) { - static const struct per_regs per_single_step = { - .control = PER_EVENT_IFETCH, - .start = 0, - .end = PSW_ADDR_INSN, - }; struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - const struct per_regs *new; - struct per_regs old; - - /* TIF_SINGLE_STEP overrides the user specified PER registers. */ - new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ? - &per_single_step : &thread->per_user; + struct per_regs old, new; + + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + new.control |= PER_EVENT_IFETCH; + new.start = 0; + new.end = PSW_ADDR_INSN; + } /* Take care of the PER enablement bit in the PSW. */ - if (!(new->control & PER_EVENT_MASK)) { + if (!(new.control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; __ctl_store(old, 9, 11); - if (memcmp(new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(*new, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); } void user_enable_single_step(struct task_struct *task) -- cgit v1.2.3 From e73b7fffe487c315fd1a4fa22282e3362b440a06 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:08 +0100 Subject: [S390] memory leak with RCU_TABLE_FREE The rcu page table free code uses a couple of bits in the page table pointer passed to tlb_remove_table to discern the different page table types. __tlb_remove_table extracts the type with an incorrect mask which leads to memory leaks. The correct mask is ((FRAG_MASK << 4) | FRAG_MASK). Cc: stable@kernel.org Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5d56c2b95b1..529a0883837 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -662,8 +662,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) void __tlb_remove_table(void *_table) { - void *table = (void *)((unsigned long) _table & PAGE_MASK); - unsigned type = (unsigned long) _table & ~PAGE_MASK; + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; if (type) __page_table_free_rcu(table, type); -- cgit v1.2.3 From 017ec18360c9894d11f1a2ba5d69f7786732e07a Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:16:09 +0100 Subject: [S390] use ENTRY macro for sys_setns_wrapper Use the ENTRY macro for the system call wrapper sys_setns_wrapper similarly to the other wrappers. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_wrapper.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 7526db6bf50..5006a1d9f5d 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1623,8 +1623,7 @@ ENTRY(sys_syncfs_wrapper) lgfr %r2,%r2 # int jg sys_syncfs - .globl sys_setns_wrapper -sys_setns_wrapper: +ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns -- cgit v1.2.3 From 75a1c61b434759bf8f2c0118151713b010b44705 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sun, 30 Oct 2011 15:16:34 +0100 Subject: [S390] cio: add message for timeouts on internal I/O Print a message in case we do not receive an IRQ in time (for internal I/O). Also print the ID of the last used channel path, since it is possible that not the device itself but this specific path might have a defect. Signed-off-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ccwreq.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c index d15f8b4d78b..5156264d0c7 100644 --- a/drivers/s390/cio/ccwreq.c +++ b/drivers/s390/cio/ccwreq.c @@ -1,10 +1,13 @@ /* * Handling of internal CCW device requests. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009, 2011 * Author(s): Peter Oberparleiter */ +#define KMSG_COMPONENT "cio" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include #include #include @@ -323,7 +326,21 @@ void ccw_request_timeout(struct ccw_device *cdev) { struct subchannel *sch = to_subchannel(cdev->dev.parent); struct ccw_request *req = &cdev->private->req; - int rc; + int rc = -ENODEV, chp; + + if (cio_update_schib(sch)) + goto err; + + for (chp = 0; chp < 8; chp++) { + if ((0x80 >> chp) & sch->schib.pmcw.lpum) + pr_warning("%s: No interrupt was received within %lus " + "(CS=%02x, DS=%02x, CHPID=%x.%02x)\n", + dev_name(&cdev->dev), req->timeout / HZ, + scsw_cstat(&sch->schib.scsw), + scsw_dstat(&sch->schib.scsw), + sch->schid.cssid, + sch->schib.pmcw.chpid[chp]); + } if (!ccwreq_next_path(cdev)) { /* set the final return code for this request */ @@ -342,7 +359,7 @@ err: * ccw_request_notoper - notoper handler for I/O request procedure * @cdev: ccw device * - * Handle timeout during I/O request procedure. + * Handle notoper during I/O request procedure. */ void ccw_request_notoper(struct ccw_device *cdev) { -- cgit v1.2.3 From 80853a8ab4fcfff55d88e86ed1f50695dba0d8ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 30 Oct 2011 15:16:35 +0100 Subject: [S390] fix _TIF_SINGLE_STEP definition _TIF_SINGLE_STEP is incorrectly defined as 1< Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 1a5dbb6f149..f9a9a10979c 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -117,7 +117,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIE (1< Date: Sun, 30 Oct 2011 15:16:36 +0100 Subject: [S390] kdump: Add KEXEC_CRASH_CONTROL_MEMORY_LIMIT On s390 there is a different KEXEC_CONTROL_MEMORY_LIMIT for the normal and the kdump kexec case. Therefore this patch introduces a new macro KEXEC_CRASH_CONTROL_MEMORY_LIMIT. This is set to KEXEC_CONTROL_MEMORY_LIMIT for all architectures that do not define KEXEC_CRASH_CONTROL_MEMORY_LIMIT. Acked-by: Vivek Goyal Acked-by: Andrew Morton Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- include/linux/kexec.h | 4 ++++ kernel/kexec.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c2478a342cd..07d9aba7556 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -33,6 +33,10 @@ #error KEXEC_ARCH not defined #endif +#ifndef KEXEC_CRASH_CONTROL_MEMORY_LIMIT +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT KEXEC_CONTROL_MEMORY_LIMIT +#endif + #define KEXEC_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) #define KEXEC_CORE_NOTE_NAME "CORE" #define KEXEC_CORE_NOTE_NAME_BYTES ALIGN(sizeof(KEXEC_CORE_NOTE_NAME), 4) diff --git a/kernel/kexec.c b/kernel/kexec.c index 296fbc84d65..7204fb982ed 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -498,7 +498,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, while (hole_end <= crashk_res.end) { unsigned long i; - if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) + if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) break; if (hole_end > crashk_res.end) break; -- cgit v1.2.3 From d3bf37955d46718ee1a7f1fc69f953d2328ba7c2 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:37 +0100 Subject: [S390] kdump: Add size to elfcorehdr kernel parameter Currently only the address of the pre-allocated ELF header is passed with the elfcorehdr= kernel parameter. In order to reserve memory for the header in the 2nd kernel also the size is required. Current kdump architecture backends use different methods to do that, e.g. x86 uses the memmap= kernel parameter. On s390 there is no easy way to transfer this information. Therefore the elfcorehdr kernel parameter is extended to also pass the size. This now can also be used as standard mechanism by all future kdump architecture backends. The syntax of the kernel parameter is extended as follows: elfcorehdr=[size[KMG]@]offset[KMG] This change is backward compatible because elfcorehdr=size is still allowed. Acked-by: Vivek Goyal Acked-by: Andrew Morton Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 6 +++--- include/linux/crash_dump.h | 1 + kernel/crash_dump.c | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 93413ce9688..1fbe3625b2d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -741,10 +741,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See Documentation/block/cfq-iosched.txt and Documentation/block/deadline-iosched.txt for details. - elfcorehdr= [IA-64,PPC,SH,X86] + elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390] Specifies physical address of start of kernel core - image elf header. Generally kexec loader will - pass this option to capture kernel. + image elf header and optionally the size. Generally + kexec loader will pass this option to capture kernel. See Documentation/kdump/kdump.txt for details. enable_mtrr_cleanup [X86] diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 74054074e87..5c4abce94ad 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -10,6 +10,7 @@ #define ELFCORE_ADDR_ERR (-2ULL) extern unsigned long long elfcorehdr_addr; +extern unsigned long long elfcorehdr_size; extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c index 5f85690285d..69ebf3380ba 100644 --- a/kernel/crash_dump.c +++ b/kernel/crash_dump.c @@ -19,9 +19,16 @@ unsigned long saved_max_pfn; */ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; +/* + * stores the size of elf header of crash image + */ +unsigned long long elfcorehdr_size; + /* * elfcorehdr= specifies the location of elf core header stored by the crashed * kernel. This option will be passed by kexec loader to the capture kernel. + * + * Syntax: elfcorehdr=[size[KMG]@]offset[KMG] */ static int __init setup_elfcorehdr(char *arg) { @@ -29,6 +36,10 @@ static int __init setup_elfcorehdr(char *arg) if (!arg) return -EINVAL; elfcorehdr_addr = memparse(arg, &end); + if (*end == '@') { + elfcorehdr_size = elfcorehdr_addr; + elfcorehdr_addr = memparse(end + 1, &end); + } return end > arg ? 0 : -EINVAL; } early_param("elfcorehdr", setup_elfcorehdr); -- cgit v1.2.3 From 1943f53c9ca182fa233e5a17e89ef8e421d7819e Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:38 +0100 Subject: [S390] Force PSW restart on online CPU PSW restart can be triggered on offline CPUs. If this happens, currently the PSW restart code fails, because functions like smp_processor_id() do not work on offline CPUs. This patch fixes this as follows: If PSW restart is triggered on an offline CPU, the PSW restart (sigp restart) is done a second time on another CPU that is online and the old CPU is stopped afterwards. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/smp.h | 5 +++++ arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/smp.c | 23 +++++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 045e009fc16..ab47a69fdf0 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -33,6 +33,7 @@ extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, int from, int to); +extern void smp_restart_with_online_cpu(void); extern void smp_restart_cpu(void); /* @@ -64,6 +65,10 @@ static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) func(data); } +static inline void smp_restart_with_online_cpu(void) +{ +} + #define smp_vcpu_scheduled (1) #endif /* CONFIG_SMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 48c71020636..90769b4bc7f 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1738,6 +1738,7 @@ static struct kobj_attribute on_restart_attr = void do_restart(void) { + smp_restart_with_online_cpu(); smp_send_stop(); on_restart_trigger.action->fn(&on_restart_trigger); stop_run(&on_restart_trigger); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6ab16ac64d2..e4572601e91 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -97,6 +97,29 @@ static inline int cpu_stopped(int cpu) return raw_cpu_stopped(cpu_logical_map(cpu)); } +/* + * Ensure that PSW restart is done on an online CPU + */ +void smp_restart_with_online_cpu(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (stap() == __cpu_logical_map[cpu]) { + /* We are online: Enable DAT again and return */ + __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + return; + } + } + /* We are not online: Do PSW restart on an online CPU */ + while (sigp(cpu, sigp_restart) == sigp_busy) + cpu_relax(); + /* And stop ourself */ + while (raw_sigp(stap(), sigp_stop) == sigp_busy) + cpu_relax(); + for (;;); +} + void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) { struct _lowcore *lc, *current_lc; -- cgit v1.2.3 From 7f0bf656c66e4292e965c95fd9de55c72b6578bb Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:39 +0100 Subject: [S390] Add real memory access functions Add access function for real memory needed by s390 kdump backend. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/system.h | 2 ++ arch/s390/mm/maccess.c | 56 ++++++++++++++++++++++++++++++++++++++++++ drivers/s390/char/zcore.c | 20 ++------------- 3 files changed, 60 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 6582f69f238..afb849e4bf4 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -114,6 +114,8 @@ extern void pfault_fini(void); extern void cmma_init(void); extern int memcpy_real(void *, void *, size_t); extern void copy_to_absolute_zero(void *dest, void *src, size_t count); +extern int copy_to_user_real(void __user *dest, void *src, size_t count); +extern int copy_from_user_real(void *dest, void __user *src, size_t count); #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 5dbbaa6e594..1cb8427bedf 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -11,6 +11,7 @@ #include #include #include +#include #include /* @@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } +/* + * Copy memory in real mode (kernel to kernel) + */ int memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; @@ -101,3 +105,55 @@ void copy_to_absolute_zero(void *dest, void *src, size_t count) __ctl_load(cr0, 0, 0); preempt_enable(); } + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Copy memory from user (virtual) to kernel (real) + */ +int copy_from_user_real(void *dest, void __user *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (copy_from_user(buf, src + offs, size)) + goto out; + if (memcpy_real(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 3b94044027c..f4f1da213e2 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -142,22 +142,6 @@ static int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) return memcpy_hsa(dest, src, count, TO_KERNEL); } -static int memcpy_real_user(void __user *dest, unsigned long src, size_t count) -{ - static char buf[4096]; - int offs = 0, size; - - while (offs < count) { - size = min(sizeof(buf), count - offs); - if (memcpy_real(buf, (void *) src + offs, size)) - return -EFAULT; - if (copy_to_user(dest + offs, buf, size)) - return -EFAULT; - offs += size; - } - return 0; -} - static int __init init_cpu_info(enum arch_id arch) { struct save_area *sa; @@ -346,8 +330,8 @@ static ssize_t zcore_read(struct file *file, char __user *buf, size_t count, /* Copy from real mem */ size = count - mem_offs - hdr_count; - rc = memcpy_real_user(buf + hdr_count + mem_offs, mem_start + mem_offs, - size); + rc = copy_to_user_real(buf + hdr_count + mem_offs, + (void *) mem_start + mem_offs, size); if (rc) goto fail; -- cgit v1.2.3 From 60a0c68df2632feaa4a986af084650d1165d89c5 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:40 +0100 Subject: [S390] kdump backend code This patch provides the architecture specific part of the s390 kdump support. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 10 + arch/s390/include/asm/ipl.h | 1 + arch/s390/include/asm/kexec.h | 3 + arch/s390/include/asm/reset.h | 2 +- arch/s390/include/asm/setup.h | 13 ++ arch/s390/kernel/Makefile | 1 + arch/s390/kernel/base.S | 6 + arch/s390/kernel/crash_dump.c | 427 +++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/head.S | 22 ++ arch/s390/kernel/head_kdump.S | 119 +++++++++++ arch/s390/kernel/ipl.c | 12 +- arch/s390/kernel/machine_kexec.c | 161 ++++++++++++++- arch/s390/kernel/mem_detect.c | 69 +++++++ arch/s390/kernel/reipl.S | 6 + arch/s390/kernel/reipl64.S | 5 +- arch/s390/kernel/setup.c | 208 +++++++++++++++++++ arch/s390/kernel/smp.c | 19 +- arch/s390/mm/vmem.c | 6 + drivers/s390/cio/cio.c | 2 +- 19 files changed, 1079 insertions(+), 13 deletions(-) create mode 100644 arch/s390/kernel/crash_dump.c create mode 100644 arch/s390/kernel/head_kdump.S diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6b99fc3f9b6..a9fbd43395f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -569,6 +569,16 @@ config KEXEC current kernel, and to start another kernel. It is like a reboot but is independent of hardware/microcode support. +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + For more details see Documentation/kdump/kdump.txt + config ZFCPDUMP def_bool n prompt "zfcpdump support" diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 97cc4403fab..6940abfbe1d 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -168,5 +168,6 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); +extern void store_status(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index bb729b84a21..fb1c96fa348 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -30,6 +30,9 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index f584f4a5258..3d6ad4ad2a3 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -17,5 +17,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void); +extern void s390_reset_system(void (*func)(void *), void *data); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index d5e2ef10537..5a099714df0 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -26,15 +26,21 @@ #define IPL_DEVICE (*(unsigned long *) (0x10404)) #define INITRD_START (*(unsigned long *) (0x1040C)) #define INITRD_SIZE (*(unsigned long *) (0x10414)) +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) #else /* __s390x__ */ #define IPL_DEVICE (*(unsigned long *) (0x10400)) #define INITRD_START (*(unsigned long *) (0x10408)) #define INITRD_SIZE (*(unsigned long *) (0x10410)) +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) #endif /* __s390x__ */ #define COMMAND_LINE ((char *) (0x10480)) #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 +#define CHUNK_OLDMEM 4 +#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -48,6 +54,8 @@ extern int memory_end_set; extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); +void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, + unsigned long size, int type); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 @@ -106,6 +114,7 @@ extern unsigned int user_mode; #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) +#define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) /* * Console mode. Override with conmode= @@ -134,10 +143,14 @@ extern char kernel_nss_name[]; #define IPL_DEVICE 0x10404 #define INITRD_START 0x1040C #define INITRD_SIZE 0x10414 +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 #else /* __s390x__ */ #define IPL_DEVICE 0x10400 #define INITRD_START 0x10408 #define INITRD_SIZE 0x10410 +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 #endif /* __s390x__ */ #define COMMAND_LINE 0x10480 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index df3732249ba..dd4f0764091 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 255435663bf..f8828d38fa6 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -86,6 +86,8 @@ s390_base_pgm_handler_fn: ENTRY(diag308_reset) larl %r4,.Lctlregs # Save control registers stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 lghi %r3,0 lg %r4,0(%r4) # Save PSW @@ -99,6 +101,8 @@ ENTRY(diag308_reset) sam64 # Switch to 64 bit addressing mode larl %r4,.Lctlregs # Restore control registers lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) br %r14 .align 16 .Lrestart_psw: @@ -110,6 +114,8 @@ ENTRY(diag308_reset) .rept 16 .quad 0 .endr +.Lfpctl: + .long 0 .previous #else /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 00000000000..2a9a3f40557 --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,427 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +/* + * Copy one page from "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + unsigned long src; + int rc; + + if (!csize) + return 0; + + src = (pfn << PAGE_SHIFT) + offset; + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + rc = copy_to_user_real((void __user *) buf, (void *) src, + csize); + else + rc = memcpy_real(buf, (void *) src, csize); + return rc < 0 ? rc : csize; +} + +/* + * Copy memory from old kernel + */ +static int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + return memcpy_real(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Get memory layout and create hole for oldmem + */ +static struct mem_chunk *get_memory_layout(void) +{ + struct mem_chunk *chunk_array; + + chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); + detect_memory_layout(chunk_array); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + return chunk_array; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return ptr; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return ptr; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return ptr; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return ptr; + vmcoreinfo = kzalloc_panic(note.n_descsz + 1); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return ptr; + vmcoreinfo[note.n_descsz + 1] = 0; + return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; zfcpdump_save_areas[i]; i++) { + if (zfcpdump_save_areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i, cnt = 0; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + if (mem_chunk->size == 0) + continue; + cnt++; + } + kfree(chunk_array); + return cnt; +} + +/* + * Relocate pointer in order to allow vmcore code access the data + */ +static inline unsigned long relocate(unsigned long addr) +{ + return OLDMEM_BASE + addr; +} + +/* + * Initialize ELF loads (new kernel) + */ +static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (mem_chunk->size == 0) + break; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + else + phdr->p_filesz = mem_chunk->size; + phdr->p_type = PT_LOAD; + phdr->p_offset = mem_chunk->addr; + phdr->p_vaddr = mem_chunk->addr; + phdr->p_paddr = mem_chunk->addr; + phdr->p_memsz = mem_chunk->size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } + kfree(chunk_array); + return i; +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; zfcpdump_save_areas[i]; i++) { + sa = zfcpdump_save_areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = relocate(notes_offset); + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); + *elfcorebuf_sz = hdr_off; + *elfcorebuf = (void *) relocate((unsigned long) hdr); + BUG_ON(*elfcorebuf_sz > alloc_size); +} + +/* + * Create kdump ELF core header in new kernel, if it has not been passed via + * the "elfcorehdr" kernel parameter + */ +static int setup_kdump_elfcorehdr(void) +{ + size_t elfcorebuf_sz; + char *elfcorebuf; + + if (!OLDMEM_BASE || is_kdump_kernel()) + return -EINVAL; + s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); + elfcorehdr_addr = (unsigned long long) elfcorebuf; + elfcorehdr_size = elfcorebuf_sz; + return 0; +} + +subsys_initcall(setup_kdump_elfcorehdr); diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 2d781bab37b..900068d2bf9 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -449,10 +449,28 @@ ENTRY(start) # .org 0x10000 ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 + xc 0xe00(256),0xe00 stck __LC_LAST_UPDATE_CLOCK spt 5f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) @@ -534,6 +552,8 @@ ENTRY(startup) .align 8 5: .long 0x7fffffff,0xffffffff +#include "head_kdump.S" + # # params at 10400 (setup.h) # @@ -541,6 +561,8 @@ ENTRY(startup) .long 0,0 # IPL_DEVICE .long 0,0 # INITRD_START .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 00000000000..e1ac3893e97 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,119 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu + */ + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: + mvc 0(8,%r0),.Lrestart_psw-0b(%r13) # Setup restart PSW + mvc 464(16,%r0),.Lpgm_psw-0b(%r13) # Setup pgm check PSW + lhi %r1,1 # Start new kernel + diag %r1,%r1,0x308 # with diag 308 + +.Lno_diag308: # No diag 308 + sam31 # Switch to 31 bit addr mode + sr %r1,%r1 # Erase register r1 + sr %r2,%r2 # Erase register r2 + sigp %r1,%r2,0x12 # Switch to 31 bit arch mode + lpsw 0 # Start new kernel... +.align 8 +.Lrestart_psw: + .long 0x00080000,0x80000000 + startup +.Lpgm_psw: + .quad 0x0000000180000000,0x0000000000000000 + .Lno_diag308 +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 90769b4bc7f..ca0520c5254 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1740,6 +1741,9 @@ void do_restart(void) { smp_restart_with_online_cpu(); smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif on_restart_trigger.action->fn(&on_restart_trigger); stop_run(&on_restart_trigger); } @@ -2010,7 +2014,7 @@ static void do_reset_calls(void) u32 dump_prefix_page; -void s390_reset_system(void) +void s390_reset_system(void (*func)(void *), void *data) { struct _lowcore *lc; @@ -2038,6 +2042,10 @@ void s390_reset_system(void) S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + /* Store status at absolute zero */ + store_status(); + do_reset_calls(); + if (func) + func(data); } - diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b09b9c62573..0ceac06a029 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,10 +1,11 @@ /* * arch/s390/kernel/machine_kexec.c * - * Copyright IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005,2011 * * Author(s): Rolf Adelsberger, * Heiko Carstens + * Michael Holzheu */ #include @@ -21,12 +22,131 @@ #include #include #include +#include +#include typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; +#ifdef CONFIG_CRASH_DUMP + +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Store status of next available physical CPU + */ +static int store_status_next(int start_cpu, int this_cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + int cpu, rc; + + for (cpu = start_cpu; cpu < 65536; cpu++) { + if (cpu == this_cpu) + continue; + do { + rc = raw_sigp(cpu, sigp_stop_and_store_status); + } while (rc == sigp_busy); + if (rc != sigp_order_code_accepted) + continue; + if (sa->pref_reg) + return cpu; + } + return -1; +} + +/* + * Initialize CPU ELF notes + */ +void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu, phys_cpu = 0, first = 1; + + this_cpu = stap(); + + if (!S390_lowcore.prefixreg_save_area) + first = 0; + for_each_online_cpu(cpu) { + if (first) { + add_elf_notes(cpu); + first = 0; + continue; + } + phys_cpu = store_status_next(phys_cpu, this_cpu); + if (phys_cpu == -1) + break; + add_elf_notes(cpu); + phys_cpu++; + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + setup_regs(); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; @@ -35,6 +155,9 @@ int machine_kexec_prepare(struct kimage *image) if (ipl_flags & IPL_NSS_VALID) return -ENOSYS; + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); + /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; @@ -51,27 +174,53 @@ void machine_kexec_cleanup(struct kimage *image) { } +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + void machine_shutdown(void) { } -static void __machine_kexec(void *data) +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) { relocate_kernel_t data_mover; struct kimage *image = data; - pfault_fini(); - s390_reset_system(); - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); - for (;;); } +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + pfault_fini(); + if (image->type == KEXEC_TYPE_CRASH) + s390_reset_system(__do_machine_kdump, data); + else + s390_reset_system(__do_machine_kexec, data); + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ void machine_kexec(struct kimage *image) { + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; tracer_disable(); smp_send_stop(); smp_switch_to_ipl_cpu(__machine_kexec, image); diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 0fbe4e32f7b..19b4568f4ce 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -62,3 +62,72 @@ void detect_memory_layout(struct mem_chunk chunk[]) arch_local_irq_restore(flags); } EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address, size, and type + */ +void create_mem_hole(struct mem_chunk chunks[], unsigned long addr, + unsigned long size, int type) +{ + unsigned long start, end, new_size; + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunks[i].size == 0) + continue; + if (addr + size < chunks[i].addr) + continue; + if (addr >= chunks[i].addr + chunks[i].size) + continue; + start = max(addr, chunks[i].addr); + end = min(addr + size, chunks[i].addr + chunks[i].size); + new_size = end - start; + if (new_size == 0) + continue; + if (start == chunks[i].addr && + end == chunks[i].addr + chunks[i].size) { + /* Remove chunk */ + chunks[i].type = type; + } else if (start == chunks[i].addr) { + /* Make chunk smaller at start */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = chunks[i].addr + new_size; + chunks[i + 1].size = chunks[i].size - new_size; + chunks[i].size = new_size; + chunks[i].type = type; + i += 1; + } else if (end == chunks[i].addr + chunks[i].size) { + /* Make chunk smaller at end */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = start; + chunks[i + 1].size = new_size; + chunks[i + 1].type = type; + chunks[i].size -= new_size; + i += 1; + } else { + /* Create memory hole */ + if (i >= MEMORY_CHUNKS - 2) + panic("Unable to create memory hole"); + memmove(&chunks[i + 2], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 2))); + chunks[i + 1].addr = addr; + chunks[i + 1].size = size; + chunks[i + 1].type = type; + chunks[i + 2].addr = addr + size; + chunks[i + 2].size = + chunks[i].addr + chunks[i].size - (addr + size); + chunks[i + 2].type = chunks[i].type; + chunks[i].size = addr - chunks[i].addr; + i += 2; + } + } +} diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 303d961c3bb..ad67c214be0 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,6 +9,12 @@ #include #include +# +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 + # # do_reipl_asm # Parameter: r2 = schid of reipl device diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index e690975403f..a0f5b686a3c 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -62,8 +62,11 @@ ENTRY(store_status) larl %r2,store_status stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) br %r14 -.align 8 + + .section .bss + .align 8 .Lclkcmp: .quad 0x0000000000000000 + .previous # # do_reipl_asm diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7b371c37061..b5a30412b2e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include @@ -57,6 +60,7 @@ #include #include #include +#include long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | PSW_MASK_MCHECK | PSW_DEFAULT_KEY); @@ -435,6 +439,9 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_OLDMEM || + memory_chunk[i].type == CHUNK_CRASHK) + continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { @@ -479,6 +486,7 @@ static void __init setup_memory_end(void) unsigned long max_mem; int i; + #ifdef CONFIG_ZFCPDUMP if (ipl_info.type == IPL_TYPE_FCP_DUMP) { memory_end = ZFCPDUMP_HSA_SIZE; @@ -550,6 +558,187 @@ static void __init setup_restart_psw(void) copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } +#ifdef CONFIG_CRASH_DUMP + +/* + * Find suitable location for crashkernel memory + */ +static unsigned long __init find_crash_base(unsigned long crash_size, + char **msg) +{ + unsigned long crash_base; + struct mem_chunk *chunk; + int i; + + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return 0; + } + if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) + return OLDMEM_BASE; + + for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (chunk->type != CHUNK_READ_WRITE) + continue; + if (chunk->size < crash_size) + continue; + crash_base = (chunk->addr + chunk->size) - crash_size; + if (crash_base < crash_size) + continue; + if (crash_base < ZFCPDUMP_HSA_SIZE_MAX) + continue; + if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) + continue; + return crash_base; + } + *msg = "no suitable area found"; + return 0; +} + +/* + * Check if crash_base and crash_size is valid + */ +static int __init verify_crash_base(unsigned long crash_base, + unsigned long crash_size, + char **msg) +{ + struct mem_chunk *chunk; + int i; + + /* + * Because we do the swap to zero, we must have at least 'crash_size' + * bytes free space before crash_base + */ + if (crash_size > crash_base) { + *msg = "crashkernel offset must be greater than size"; + return -EINVAL; + } + + /* First memory chunk must be at least crash_size */ + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return -EINVAL; + } + /* Check if we fit into the respective memory chunk */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (crash_base < chunk->addr) + continue; + if (crash_base >= chunk->addr + chunk->size) + continue; + /* we have found the memory chunk */ + if (crash_base + crash_size > chunk->addr + chunk->size) { + *msg = "selected memory chunk is too small for " + "crashkernel memory"; + return -EINVAL; + } + return 0; + } + *msg = "invalid memory range specified"; + return -EINVAL; +} + +/* + * Reserve kdump memory by creating a memory hole in the mem_chunk array + */ +static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, + int type) +{ + + create_mem_hole(memory_chunk, addr, size, type); +} + +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} + +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; + +#endif + +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (!OLDMEM_BASE) + return; + + reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); + reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, + CHUNK_OLDMEM); + if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; + else + saved_max_pfn = PFN_DOWN(real_memory_size) - 1; +#endif +} + +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + char *msg; + int rc; + + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); + if (rc || crash_size == 0) + return; + crash_base = PAGE_ALIGN(crash_base); + crash_size = PAGE_ALIGN(crash_size); + if (register_memory_notifier(&kdump_mem_nb)) + return; + if (!crash_base) + crash_base = find_crash_base(crash_size, &msg); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (verify_crash_base(crash_base, crash_size, &msg)) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_READ_WRITE); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, memory_end >> 20); +#endif +} + static void __init setup_memory(void) { @@ -580,6 +769,14 @@ setup_memory(void) if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + /* Move initrd behind kdump oldmem */ + if (start + INITRD_SIZE > OLDMEM_BASE && + start < OLDMEM_BASE + OLDMEM_SIZE) + start = OLDMEM_BASE + OLDMEM_SIZE; + } +#endif if (start + INITRD_SIZE > memory_end) { pr_err("initrd extends beyond end of " "memory (0x%08lx > 0x%08lx) " @@ -644,6 +841,15 @@ setup_memory(void) reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, BOOTMEM_DEFAULT); +#ifdef CONFIG_CRASH_DUMP + if (crashk_res.start) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1, + BOOTMEM_DEFAULT); + if (is_kdump_kernel()) + reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); +#endif #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { if (INITRD_START + INITRD_SIZE <= memory_end) { @@ -812,6 +1018,8 @@ setup_arch(char **cmdline_p) setup_ipl(); setup_memory_end(); setup_addressing_mode(); + reserve_oldmem(); + reserve_crashkernel(); setup_memory(); setup_resources(); setup_restart_psw(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e4572601e91..e3f51dfa5ca 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -304,11 +305,13 @@ void smp_ctl_clear_bit(int cr, int bit) } EXPORT_SYMBOL(smp_ctl_clear_bit); -#ifdef CONFIG_ZFCPDUMP +#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) + return; + if (is_kdump_kernel()) return; if (cpu >= NR_CPUS) { pr_warning("CPU %i exceeds the maximum %i and is excluded from " @@ -426,6 +429,18 @@ static void __init smp_detect_cpus(void) info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE && !is_kdump_kernel()) { + struct save_area *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + 0x200, 0); + zfcpdump_save_areas[0] = save_area; + } +#endif /* Use sigp detection algorithm if sclp doesn't work. */ if (sclp_get_cpu_info(info)) { smp_use_sigp_detection = 1; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 781ff516956..4799383e2df 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -335,6 +335,9 @@ void __init vmem_map_init(void) ro_start = ((unsigned long)&_stext) & PAGE_MASK; ro_end = PFN_ALIGN((unsigned long)&_eshared); for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; if (start >= ro_end || end <= ro_start) @@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 5586c1376cb..dc67c397449 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -1069,7 +1069,7 @@ void reipl_ccw_dev(struct ccw_dev_id *devid) { struct subchannel_id schid; - s390_reset_system(); + s390_reset_system(NULL, NULL); if (reipl_find_schid(devid, &schid) != 0) panic("IPL Device not found\n"); do_reipl_asm(*((__u32*)&schid)); -- cgit v1.2.3 From fa8ff292bb4844cc0b66b7b24d611eb7177f7ded Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:41 +0100 Subject: [S390] kdump: Initialize vmcoreinfo note at startup Currently the vmcoreinfo note is only initialized in case of kdump. On s390 it is possible to create kernel dumps with other dump mechanisms than kdump (e.g. via hypervisor dump or stand-alone dump tools). For those dumps it would also be desirable to include the vmcoreinfo data. To accomplish this, with this patch the vmcoreinfo ELF note is always initialized, not only in case of a (kdump) crash. On s390 we will add an ABI defined pointer at a well known address to vmcoreinfo so that dump analysis tools are able to find this information. In particular on s390 we have a tool named zgetdump. With this tool it is possible to convert dump formats on the fly using fuse. E.g. you can mount a s390 stand-alone dump as ELF dump. When this is done, the tool finds the vmcoreinfo in the stand-alone dump via the well known ABI defined address and it creates the respective VMCOREINFO ELF note in the output ELF dump. This then can be used e.g. by makedumpfile for dump filtering. No more need for a vmlinux file with debug information. So this will look like the following: $ zgetdump --mount standalone.dump -f elf /mnt $ ls /mnt dump.elf $ readelf -n /mnt/dump.elf $ ... VMCOREINFO 0x00000474 Unknown note type: (0x00000000) $ makedumpfile -c -d 31 /mnt/dump.elf dump.kdump Cc: Andrew Morton Acked-by: Vivek Goyal Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- kernel/kexec.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/kexec.c b/kernel/kexec.c index 7204fb982ed..d3b8a4ceb90 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1380,24 +1380,23 @@ int __init parse_crashkernel(char *cmdline, } - -void crash_save_vmcoreinfo(void) +static void update_vmcoreinfo_note(void) { - u32 *buf; + u32 *buf = vmcoreinfo_note; if (!vmcoreinfo_size) return; - - vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); - - buf = (u32 *)vmcoreinfo_note; - buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, vmcoreinfo_size); - final_note(buf); } +void crash_save_vmcoreinfo(void) +{ + vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); + update_vmcoreinfo_note(); +} + void vmcoreinfo_append_str(const char *fmt, ...) { va_list args; @@ -1483,6 +1482,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_swapcache); arch_crash_save_vmcoreinfo(); + update_vmcoreinfo_note(); return 0; } -- cgit v1.2.3 From d38593f9387055566b782d00d38d9a347a96e7d9 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:42 +0100 Subject: [S390] Export vmcoreinfo note This patch defines for s390 an ABI defined pointer to the vmcoreinfo note at a well known address. With this patch tools are able to find this information in dumps created by stand-alone or hypervisor dump tools. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 10 ++++++---- arch/s390/kernel/setup.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index e85c911aabf..4f990a5e5b5 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -151,10 +151,11 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ + __u32 vmcore_info; /* 0x0e08 */ /* 64 bit save area */ - __u64 save_area_64; /* 0x0e08 */ - __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */ + __u64 save_area_64; /* 0x0e0c */ + __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -290,10 +291,11 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ + __u64 vmcore_info; /* 0x0e0c */ /* 64 bit save area */ - __u64 save_area_64; /* 0x0e0c */ - __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ + __u64 save_area_64; /* 0x0e14 */ + __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b5a30412b2e..63c81de665e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -558,6 +558,15 @@ static void __init setup_restart_psw(void) copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } +static void __init setup_vmcoreinfo(void) +{ +#ifdef CONFIG_KEXEC + unsigned long ptr = paddr_vmcoreinfo_note(); + + copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr)); +#endif +} + #ifdef CONFIG_CRASH_DUMP /* @@ -1022,6 +1031,7 @@ setup_arch(char **cmdline_p) reserve_crashkernel(); setup_memory(); setup_resources(); + setup_vmcoreinfo(); setup_restart_psw(); setup_lowcore(); -- cgit v1.2.3 From 558df7209e7997275f6b8ad37737494cf2da1512 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:43 +0100 Subject: [S390] kdump: Add infrastructure for unmapping crashkernel memory This patch introduces a mechanism that allows architecture backends to remove page tables for the crashkernel memory. This can protect the loaded kdump kernel from being overwritten by broken kernel code. Two new functions crash_map_reserved_pages() and crash_unmap_reserved_pages() are added that can be implemented by architecture code. The crash_map_reserved_pages() function is called before and crash_unmap_reserved_pages() after the crashkernel segments are loaded. The functions are also called in crash_shrink_memory() to create/remove page tables when the crashkernel memory size is reduced. To support architectures that have large pages this patch also introduces a new define KEXEC_CRASH_MEM_ALIGN. The crashkernel start and size must always be aligned with KEXEC_CRASH_MEM_ALIGN. Cc: Andrew Morton Acked-by: Vivek Goyal Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- include/linux/kexec.h | 6 ++++++ kernel/kexec.c | 21 +++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 07d9aba7556..fe45136b32c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -37,6 +37,10 @@ #define KEXEC_CRASH_CONTROL_MEMORY_LIMIT KEXEC_CONTROL_MEMORY_LIMIT #endif +#ifndef KEXEC_CRASH_MEM_ALIGN +#define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE +#endif + #define KEXEC_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) #define KEXEC_CORE_NOTE_NAME "CORE" #define KEXEC_CORE_NOTE_NAME_BYTES ALIGN(sizeof(KEXEC_CORE_NOTE_NAME), 4) @@ -133,6 +137,8 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); +void crash_map_reserved_pages(void); +void crash_unmap_reserved_pages(void); void arch_crash_save_vmcoreinfo(void); void vmcoreinfo_append_str(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); diff --git a/kernel/kexec.c b/kernel/kexec.c index d3b8a4ceb90..dc7bc082928 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -999,6 +999,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, kimage_free(xchg(&kexec_crash_image, NULL)); result = kimage_crash_alloc(&image, entry, nr_segments, segments); + crash_map_reserved_pages(); } if (result) goto out; @@ -1015,6 +1016,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, goto out; } kimage_terminate(image); + if (flags & KEXEC_ON_CRASH) + crash_unmap_reserved_pages(); } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); @@ -1026,6 +1029,18 @@ out: return result; } +/* + * Add and remove page tables for crashkernel memory + * + * Provide an empty default implementation here -- architecture + * code may override this + */ +void __weak crash_map_reserved_pages(void) +{} + +void __weak crash_unmap_reserved_pages(void) +{} + #ifdef CONFIG_COMPAT asmlinkage long compat_sys_kexec_load(unsigned long entry, unsigned long nr_segments, @@ -1134,14 +1149,16 @@ int crash_shrink_memory(unsigned long new_size) goto unlock; } - start = roundup(start, PAGE_SIZE); - end = roundup(start + new_size, PAGE_SIZE); + start = roundup(start, KEXEC_CRASH_MEM_ALIGN); + end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); + crash_map_reserved_pages(); crash_free_reserved_phys_range(end, crashk_res.end); if ((start == end) && (crashk_res.parent != NULL)) release_resource(&crashk_res); crashk_res.end = end - 1; + crash_unmap_reserved_pages(); unlock: mutex_unlock(&kexec_mutex); -- cgit v1.2.3 From dab7a7b1538fec48790a321a58180adae79a3f3c Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:44 +0100 Subject: [S390] Add architecture code for unmapping crashkernel memory This patch implements the crash_map_pages() function for s390. KEXEC_CRASH_MEM_ALIGN is set to HPAGE_SIZE, in order to support kernel mappings that use large pages. We also use HPAGE_SIZE alignment for CONFIG_HUGETLB_PAGE=n in order to have the same 1 MiB alignment on all s390 systems. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kexec.h | 3 +++ arch/s390/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++++ arch/s390/kernel/setup.c | 10 ++++++---- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index fb1c96fa348..cf4e47b0948 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -36,6 +36,9 @@ /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 0ceac06a029..13a0b528c70 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -132,6 +132,37 @@ static int kdump_csum_valid(struct kimage *image) #endif } +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else + vmem_remove_mapping(crashk_res.start, size); +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + /* * Give back memory to hypervisor before new kdump is loaded */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 63c81de665e..6f8e3777a0c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -446,6 +446,7 @@ static void __init setup_resources(void) res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: + case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -720,8 +721,8 @@ static void __init reserve_crashkernel(void) &crash_base); if (rc || crash_size == 0) return; - crash_base = PAGE_ALIGN(crash_base); - crash_size = PAGE_ALIGN(crash_size); + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); if (register_memory_notifier(&kdump_mem_nb)) return; if (!crash_base) @@ -741,7 +742,7 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - reserve_kdump_bootmem(crash_base, crash_size, CHUNK_READ_WRITE); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); @@ -816,7 +817,8 @@ setup_memory(void) for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE) + if (memory_chunk[i].type != CHUNK_READ_WRITE && + memory_chunk[i].type != CHUNK_CRASHK) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); -- cgit v1.2.3 From 0edc8faa769095e98a5a5adc28db982f99f0d663 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:45 +0100 Subject: [S390] lowcore cleanup Remove the save_area_64 field from the 0xe00 - 0xf00 area in the lowcore. Use a free slot in the save_area array instead. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 10 ++-------- arch/s390/kernel/asm-offsets.c | 1 - arch/s390/kernel/entry.S | 4 ++-- arch/s390/kernel/entry64.S | 4 ++-- arch/s390/kernel/reipl64.S | 4 ++-- 5 files changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 4f990a5e5b5..9e13c7d56cc 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -152,10 +152,7 @@ struct _lowcore { __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ __u32 vmcore_info; /* 0x0e08 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e0c */ - __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ + __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -292,10 +289,7 @@ struct _lowcore { __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ __u64 vmcore_info; /* 0x0e0c */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e14 */ - __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ + __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 2b45591e158..1140035b1cd 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -141,7 +141,6 @@ int main(void) DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); - DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64)); #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 02ec8fe7d03..195387ab7c9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -853,13 +853,13 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - st %r15,__LC_SAVE_AREA_64(%r0) # save r15 + st %r15,__LC_SAVE_AREA+48(%r0) # save r15 basr %r15,0 0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack l %r15,0(%r15) ahi %r15,-SP_SIZE # make room for pt_regs stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(4,%r15),__LC_SAVE_AREA+48(%r0)# store saved %r15 to stack mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 basr %r14,0 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 713da076053..3257f7f5555 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -869,12 +869,12 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - stg %r15,__LC_SAVE_AREA_64(%r0) # save r15 + stg %r15,__LC_SAVE_AREA+120(%r0) # save r15 larl %r15,restart_stack # load restart stack lg %r15,0(%r15) aghi %r15,-SP_SIZE # make room for pt_regs stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(8,%r15),__LC_SAVE_AREA+120(%r0)# store saved %r15 to stack mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 brasl %r14,do_restart diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index a0f5b686a3c..732a793ec53 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -17,11 +17,11 @@ # ENTRY(store_status) /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_64(%r0) + stg %r1,__LC_SAVE_AREA+120(%r0) lghi %r1,SAVE_AREA_BASE /* General purpose registers */ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_64(%r0) + lg %r2,__LC_SAVE_AREA+120(%r0) stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) /* Control registers */ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) -- cgit v1.2.3 From 3ee49c8f123257c72b74f398ef99ac3348c493cc Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Sun, 30 Oct 2011 15:16:46 +0100 Subject: [S390] defconfig: switch on CONFIG_DEVTMPFS Switching on the DEVTMPFS kernel option helpes to maintain a /dev file system early in the boot process, especially, in limited environments. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 29c82c640a8..6cf8e26b313 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -68,7 +68,7 @@ CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y -- cgit v1.2.3 From 20b40a794baf3b4b0320c0a77ce944d5d1a01f25 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:47 +0100 Subject: [S390] signal race with restarting system calls For a ERESTARTNOHAND/ERESTARTSYS/ERESTARTNOINTR restarting system call do_signal will prepare the restart of the system call with a rewind of the PSW before calling get_signal_to_deliver (where the debugger might take control). For A ERESTART_RESTARTBLOCK restarting system call do_signal will set -EINTR as return code. There are two issues with this approach: 1) strace never sees ERESTARTNOHAND, ERESTARTSYS, ERESTARTNOINTR or ERESTART_RESTARTBLOCK as the rewinding already took place or the return code has been changed to -EINTR 2) if get_signal_to_deliver does not return with a signal to deliver the restart via the repeat of the svc instruction is left in place. This opens a race if another signal is made pending before the system call instruction can be reexecuted. The original system call will be restarted even if the second signal would have ended the system call with -EINTR. These two issues can be solved by dropping the early rewind of the system call before get_signal_to_deliver has been called and by using the TIF_RESTART_SVC magic to do the restart if no signal has to be delivered. The only situation where the system call restart via the repeat of the svc instruction is appropriate is when a SA_RESTART signal is delivered to user space. Unfortunately this breaks inferior calls by the debugger again. The system call number and the length of the system call instruction is lost over the inferior call and user space will see ERESTARTNOHAND/ ERESTARTSYS/ERESTARTNOINTR/ERESTART_RESTARTBLOCK. To correct this a new ptrace interface is added to save/restore the system call number and system call instruction length. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ptrace.h | 5 +- arch/s390/include/asm/syscall.h | 5 +- arch/s390/include/asm/thread_info.h | 1 + arch/s390/kernel/asm-offsets.c | 3 +- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/entry.S | 28 +++++----- arch/s390/kernel/entry64.S | 30 +++++----- arch/s390/kernel/ptrace.c | 51 ++++++++++++++++- arch/s390/kernel/signal.c | 107 ++++++++++++++++++------------------ include/linux/elf.h | 1 + 10 files changed, 142 insertions(+), 91 deletions(-) diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62fd80c9e98..93c907b4776 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -328,8 +328,7 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short ilc; - unsigned short svcnr; + unsigned int svc_code; }; /* @@ -487,6 +486,8 @@ typedef struct #define PTRACE_POKETEXT_AREA 0x5004 #define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 5c0246b955d..614267f6071 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -13,6 +13,7 @@ #define _ASM_SYSCALL_H 1 #include +#include #include /* @@ -25,7 +26,7 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svcnr ? regs->svcnr : -1; + return regs->svc_code ? (regs->svc_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -37,7 +38,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index f9a9a10979c..0c4788eb5a6 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + unsigned int system_call; __u64 user_timer; __u64 system_timer; unsigned long last_break; /* last breaking-event-address. */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 1140035b1cd..751318765e2 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -45,8 +45,7 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); - DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_SVC_CODE, offsetof(struct pt_regs, svc_code)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a9a285b8c4a..d7c8e54c32e 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -342,7 +342,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) return err; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + regs->svc_code = 0; /* disable syscall checks */ return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 195387ab7c9..afe3685d30e 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -43,8 +43,7 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ @@ -229,7 +228,7 @@ sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -239,12 +238,12 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER sysc_do_svc: xr %r7,%r7 - icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load svc number and test for svc 0 bnz BASED(sysc_nr_ok) # svc number > 0 # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) bnl BASED(sysc_nr_ok) - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: sll %r7,2 # svc number *4 @@ -335,10 +334,11 @@ sysc_notify_resume: # sysc_restart: ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument lm %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) + xr %r7,%r7 # svc 0 returns -ENOSYS + clc SP_SVC_CODE+2(%r15),BASED(.Lnr_syscalls+2) + bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number b BASED(sysc_nr_ok) # restart svc # @@ -346,7 +346,7 @@ sysc_restart: # sysc_singlestep: ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return @@ -361,7 +361,7 @@ sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVCNR(%r15) + icm %r0,3,SP_SVC_CODE(%r15) st %r0,SP_R2(%r15) basr %r14,%r1 cl %r2,BASED(.Lnr_syscalls) @@ -454,7 +454,7 @@ ENTRY(pgm_check_handler) bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -531,7 +531,7 @@ pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -550,7 +550,7 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -966,7 +966,7 @@ cleanup_system_call: st %r15,12(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC mvc 0(4,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 3257f7f5555..7ff07d3a29c 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -43,8 +43,7 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER @@ -250,7 +249,7 @@ sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -260,14 +259,14 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK sysc_do_svc: - llgh %r7,SP_SVCNR(%r15) + llgh %r7,SP_SVC_CODE+2(%r15) slag %r7,%r7,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) slag %r7,%r1,2 # shift and test for svc 0 sysc_nr_ok: larl %r10,sys_call_table @@ -358,11 +357,12 @@ sysc_notify_resume: # sysc_restart: ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument lmg %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 + lghi %r7,0 # svc 0 returns -ENOSYS + lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r7,%r1,2 j sysc_nr_ok # restart svc # @@ -370,7 +370,7 @@ sysc_restart: # sysc_singlestep: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return jg do_per_trap @@ -382,7 +382,7 @@ sysc_singlestep: sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 - llgh %r0,SP_SVCNR(%r15) + llgh %r0,SP_SVC_CODE+2(%r15) stg %r0,SP_R2(%r15) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls @@ -470,7 +470,7 @@ ENTRY(pgm_check_handler) jnz pgm_per # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT @@ -551,7 +551,7 @@ pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -571,7 +571,7 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_per_trap j pgm_exit @@ -973,7 +973,7 @@ cleanup_system_call: stg %r11,0(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC mvc 8(8,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ae0e14b8880..bae1cc49fe9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,6 +42,7 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; @@ -303,6 +304,13 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) high order bit but older gdb's rely on it */ data |= PSW_ADDR_AMODE; #endif + if (addr == (addr_t) &dummy->regs.psw.addr) + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; + *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -610,6 +618,11 @@ static int __poke_user_compat(struct task_struct *child, /* Build a 64 bit psw address from 31 bit address. */ task_pt_regs(child)->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ *(__u32*)((addr_t) &task_pt_regs(child)->psw @@ -737,7 +750,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svcnr = 0; + regs->svc_code = 0; ret = -1; } @@ -899,6 +912,26 @@ static int s390_last_break_get(struct task_struct *target, #endif +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -925,6 +958,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1104,6 +1145,14 @@ static const struct user_regset s390_compat_regsets[] = { .align = sizeof(long), .get = s390_compat_last_break_get, }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9a40e1cc5ec..e751cab80e0 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "entry.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -156,7 +157,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + regs->svc_code = 0; /* disable syscall checks */ return 0; } @@ -401,7 +402,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; @@ -421,54 +421,43 @@ void do_signal(struct pt_regs *regs) else oldset = ¤t->blocked; - /* Are we from a system call? */ - if (regs->svcnr) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; - - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; - break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; - } - regs->svcnr = 0; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ + /* + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. + */ + current_thread_info()->system_call = regs->svc_code; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } + regs->svc_code = current_thread_info()->system_call; if (signr > 0) { /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { + if (regs->svc_code > 0) { + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = regs->psw.addr - + (regs->svc_code >> 16); + break; + } + /* No longer in a system call */ + regs->svc_code = 0; + } + + if ((is_compat_task() ? + handle_signal32(signr, &ka, &info, oldset, regs) : + handle_signal(signr, &ka, &info, oldset, regs)) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -482,11 +471,28 @@ void do_signal(struct pt_regs *regs) * Let tracing know that we've done the handler setup. */ tracehook_signal_handler(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); + test_thread_flag(TIF_SINGLE_STEP)); } return; } + /* No handlers present - check for system call restart */ + if (regs->svc_code > 0) { + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->svc_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_thread_flag(TIF_RESTART_SVC); + break; + } + } + /* * If there's no signal to deliver, we just put the saved sigmask back. */ @@ -494,13 +500,6 @@ void do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } } void do_notify_resume(struct pt_regs *regs) diff --git a/include/linux/elf.h b/include/linux/elf.h index 110821cb6ea..31f0508d7da 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -395,6 +395,7 @@ typedef struct elf64_shdr { #define NT_S390_CTRS 0x304 /* s390 control registers */ #define NT_S390_PREFIX 0x305 /* s390 prefix register */ #define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ +#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ -- cgit v1.2.3 From ccf45cafb0805978e6f13a672caca0e536e87cad Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:48 +0100 Subject: [S390] addressing mode limits and psw address wrapping An instruction with an address right below the adress limit for the current addressing mode will wrap. The instruction restart logic in the protection fault handler and the signal code need to follow the wrapping rules to find the correct instruction address. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 22 +++++++++++++++++++++- arch/s390/include/asm/ptrace.h | 4 ++++ arch/s390/kernel/signal.c | 5 +++-- arch/s390/mm/fault.c | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index a4b6229e5d4..306c93c1b18 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -187,7 +187,6 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ - static inline void __load_psw_mask (unsigned long mask) { unsigned long addr; @@ -212,6 +211,27 @@ static inline void __load_psw_mask (unsigned long mask) : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); #endif /* __s390x__ */ } + +/* + * Rewind PSW instruction address by specified number of bytes. + */ +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) +{ +#ifndef __s390x__ + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} /* * Function to stop a processor until an interruption occurred diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 93c907b4776..2904cc66967 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -236,6 +236,8 @@ typedef struct #define PSW_MASK_ASC 0x0000C000UL #define PSW_MASK_CC 0x00003000UL #define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL @@ -261,6 +263,8 @@ typedef struct #define PSW_MASK_ASC 0x0000C00000000000UL #define PSW_MASK_CC 0x0000300000000000UL #define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index e751cab80e0..058e372bada 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -447,8 +447,9 @@ void do_signal(struct pt_regs *regs) /* fallthrough */ case -ERESTARTNOINTR: regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = regs->psw.addr - - (regs->svc_code >> 16); + regs->psw.addr = + __rewind_psw(regs->psw, + regs->svc_code >> 16); break; } /* No longer in a system call */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9564fc779b2..a90fd91a9c7 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -393,7 +393,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, int fault; /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr -= (pgm_int_code >> 16); + regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code -- cgit v1.2.3 From b6ef5bb3d93efb95ba855a628740375c2280a59e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:49 +0100 Subject: [S390] add TIF_SYSCALL thread flag Add an explicit TIF_SYSCALL bit that indicates if a task is inside a system call. The svc_code in the pt_regs structure is now only valid if TIF_SYSCALL is set. With this definition TIF_RESTART_SVC can be replaced with TIF_SYSCALL. Overall do_signal is a bit more readable and it saves a few lines of code. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/syscall.h | 3 +- arch/s390/include/asm/thread_info.h | 4 +-- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/entry.S | 66 ++++++++++++++---------------------- arch/s390/kernel/entry64.S | 67 ++++++++++++++----------------------- arch/s390/kernel/ptrace.c | 2 +- arch/s390/kernel/signal.c | 19 +++++++---- 7 files changed, 69 insertions(+), 94 deletions(-) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 614267f6071..b239ff53b18 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -26,7 +26,8 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svc_code ? (regs->svc_code & 0xffff) : -1; + return test_tsk_thread_flag(task, TIF_SYSCALL) ? + (regs->svc_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 0c4788eb5a6..a23183423b1 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -85,10 +85,10 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ +#define TIF_SYSCALL 0 /* inside a system call */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_RESTART_SVC 4 /* restart svc with new svc number */ #define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ @@ -104,11 +104,11 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLE_STEP 20 /* This task is single stepped */ #define TIF_FREEZE 21 /* thread is freezing for suspend */ +#define _TIF_SYSCALL (1<thread.fp_regs); - regs->svc_code = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index afe3685d30e..b13157057e0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,11 +47,11 @@ SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -227,9 +227,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+3(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -248,7 +249,7 @@ sysc_do_svc: sysc_nr_ok: sll %r7,2 # svc number *4 l %r10,BASED(.Lsysc_table) - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r8,0(%r7,%r10) # get system call addr. bnz BASED(sysc_tracesys) @@ -258,23 +259,19 @@ sysc_nr_ok: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(sysc_restore) tm __TI_flags+3(%r12),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) + ni __TI_flags+3(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: -# -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) - # # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r12),_TIF_NEED_RESCHED @@ -283,8 +280,6 @@ sysc_work_tif: bo BASED(sysc_sigpending) tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME bo BASED(sysc_notify_resume) - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) tm __TI_flags+3(%r12),_TIF_PER_TRAP bo BASED(sysc_singlestep) b BASED(sysc_return) # beware of critical section cleanup @@ -313,11 +308,14 @@ sysc_sigpending: la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) + tm __TI_flags+3(%r12),_TIF_SYSCALL + bno BASED(sysc_return) + lm %r2,%r6,SP_R2(%r15) # load svc arguments + xr %r7,%r7 # svc 0 returns -ENOSYS + clc SP_SVC_CODE+2(2,%r15),BASED(.Lnr_syscalls+2) + bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number + b BASED(sysc_nr_ok) # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -328,25 +326,11 @@ sysc_notify_resume: la %r14,BASED(sysc_return) br %r1 # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lm %r2,%r6,SP_R2(%r15) # load svc arguments - xr %r7,%r7 # svc 0 returns -ENOSYS - clc SP_SVC_CODE+2(%r15),BASED(.Lnr_syscalls+2) - bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 - icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number - b BASED(sysc_nr_ok) # restart svc - # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code + ni __TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return @@ -376,7 +360,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 bz BASED(sysc_return) l %r1,BASED(.Ltrace_exit) la %r2,SP_PTREGS(%r15) # load pt_regs @@ -454,7 +438,6 @@ ENTRY(pgm_check_handler) bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -530,9 +513,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+3(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -540,7 +524,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) @@ -550,7 +533,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -965,9 +947,11 @@ cleanup_system_call: s %r15,BASED(.Lc_spsize) # make room for registers & psw st %r15,12(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA + mvc 0(4,%r12),__LC_THREAD_INFO + l %r12,__LC_THREAD_INFO mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - mvc 0(4,%r12),__LC_THREAD_INFO + oi __TI_flags+3(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) bhe BASED(cleanup_stime) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 7ff07d3a29c..e34907560c2 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -50,11 +50,11 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -248,9 +248,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+7(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -276,7 +277,7 @@ sysc_nr_ok: larl %r10,sys_call_table_emu # use 31 bit emulation system calls sysc_noemu: #endif - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(8,%r15),SP_R7(%r15) lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys @@ -286,23 +287,19 @@ sysc_noemu: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+7(%r12),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) + ni __TI_flags+7(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: -# -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore - # # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED @@ -311,8 +308,6 @@ sysc_work_tif: jo sysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart tm __TI_flags+7(%r12),_TIF_PER_TRAP jo sysc_singlestep j sysc_return # beware of critical section cleanup @@ -338,11 +333,15 @@ sysc_sigpending: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep - j sysc_return + tm __TI_flags+7(%r12),_TIF_SYSCALL + jno sysc_return + lmg %r2,%r6,SP_R2(%r15) # load svc arguments + lghi %r7,0 # svc 0 returns -ENOSYS + lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r7,%r1,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -352,25 +351,11 @@ sysc_notify_resume: larl %r14,sysc_return jg do_notify_resume # call do_notify_resume -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - lghi %r7,0 # svc 0 returns -ENOSYS - lh %r1,SP_SVC_CODE+2(%r15) # load new svc number - cghi %r1,NR_syscalls - jnl sysc_nr_ok # invalid svc number -> do svc 0 - slag %r7,%r1,2 - j sysc_nr_ok # restart svc - # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code + ni __TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return jg do_per_trap @@ -397,7 +382,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 jz sysc_return la %r2,SP_PTREGS(%r15) # load pt_regs larl %r14,sysc_return # return point is sysc_return @@ -470,7 +455,6 @@ ENTRY(pgm_check_handler) jnz pgm_per # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT @@ -550,9 +534,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+7(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -561,7 +546,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc @@ -571,7 +555,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_per_trap j pgm_exit @@ -972,9 +955,11 @@ cleanup_system_call: stg %r15,32(%r12) stg %r11,0(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA + mvc 8(8,%r12),__LC_THREAD_INFO + lg %r12,__LC_THREAD_INFO mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - mvc 8(8,%r12),__LC_THREAD_INFO + oi __TI_flags+7(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) jhe cleanup_stime diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index bae1cc49fe9..3519c99ae0c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -750,7 +750,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svc_code = 0; + clear_thread_flag(TIF_SYSCALL); ret = -1; } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 058e372bada..0e905cb7604 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -157,7 +157,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svc_code = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -426,13 +426,14 @@ void do_signal(struct pt_regs *regs) * the debugger may change all our registers, including the system * call information. */ - current_thread_info()->system_call = regs->svc_code; + current_thread_info()->system_call = + test_thread_flag(TIF_SYSCALL) ? regs->svc_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - regs->svc_code = current_thread_info()->system_call; if (signr > 0) { /* Whee! Actually deliver the signal. */ - if (regs->svc_code > 0) { + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; /* Check for system call restarting. */ switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: @@ -453,7 +454,7 @@ void do_signal(struct pt_regs *regs) break; } /* No longer in a system call */ - regs->svc_code = 0; + clear_thread_flag(TIF_SYSCALL); } if ((is_compat_task() ? @@ -478,7 +479,8 @@ void do_signal(struct pt_regs *regs) } /* No handlers present - check for system call restart */ - if (regs->svc_code > 0) { + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: /* Restart with sys_restart_syscall */ @@ -489,7 +491,10 @@ void do_signal(struct pt_regs *regs) case -ERESTARTNOINTR: /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_thread_flag(TIF_RESTART_SVC); + set_thread_flag(TIF_SYSCALL); + break; + default: + clear_thread_flag(TIF_SYSCALL); break; } } -- cgit v1.2.3 From b50511e41aa51a89b4176784a670582424bc7db6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:50 +0100 Subject: [S390] cleanup psw related bits and pieces Split out addressing mode bits from PSW_BASE_BITS, rename PSW_BASE_BITS to PSW_MASK_BASE, get rid of psw_user32_bits, remove unused function enabled_wait(), introduce PSW_MASK_USER, and drop PSW_MASK_MERGE macros. Change psw_kernel_bits / psw_user_bits to contain only the bits that are always set in the respective mode. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 13 +++++----- arch/s390/include/asm/processor.h | 30 ++++++++-------------- arch/s390/include/asm/ptrace.h | 20 +++++---------- arch/s390/include/asm/system.h | 6 +++-- arch/s390/kernel/compat_linux.c | 9 +++---- arch/s390/kernel/compat_signal.c | 10 ++++---- arch/s390/kernel/early.c | 2 +- arch/s390/kernel/ipl.c | 4 +-- arch/s390/kernel/machine_kexec.c | 2 +- arch/s390/kernel/process.c | 3 ++- arch/s390/kernel/ptrace.c | 36 +++++++++++++------------- arch/s390/kernel/setup.c | 53 +++++++++++++++++---------------------- arch/s390/kernel/signal.c | 7 +++--- arch/s390/kernel/smp.c | 16 +++++++----- arch/s390/kernel/traps.c | 2 +- arch/s390/kernel/vtime.c | 9 ++++--- arch/s390/lib/delay.c | 3 ++- arch/s390/mm/fault.c | 2 +- drivers/s390/char/sclp_cmd.c | 4 +-- drivers/s390/char/sclp_quiesce.c | 3 ++- 20 files changed, 109 insertions(+), 125 deletions(-) diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index cdb9b78f6c0..2e49748b27d 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -12,6 +12,7 @@ #define PSW32_MASK_IO 0x02000000UL #define PSW32_MASK_EXT 0x01000000UL #define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ #define PSW32_MASK_MCHECK 0x00040000UL #define PSW32_MASK_WAIT 0x00020000UL #define PSW32_MASK_PSTATE 0x00010000UL @@ -19,21 +20,19 @@ #define PSW32_MASK_CC 0x00003000UL #define PSW32_MASK_PM 0x00000f00UL -#define PSW32_ADDR_AMODE31 0x80000000UL +#define PSW32_MASK_USER 0x00003F00UL + +#define PSW32_ADDR_AMODE 0x80000000UL #define PSW32_ADDR_INSN 0x7FFFFFFFUL -#define PSW32_BASE_BITS 0x00080000UL +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) #define PSW32_ASC_PRIMARY 0x00000000UL #define PSW32_ASC_ACCREG 0x00004000UL #define PSW32_ASC_SECONDARY 0x00008000UL #define PSW32_ASC_HOME 0x0000C000UL -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - -extern long psw32_user_bits; +extern u32 psw32_user_bits; #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 306c93c1b18..20ffb12d4ae 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -118,17 +118,17 @@ struct stack_frame { /* * Do necessary setup to start up a new thread. */ -#define start_thread(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ } while (0) -#define start_thread31(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user32_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ } while (0) /* Forward declaration, a strange C thing */ @@ -233,25 +233,15 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) #endif } -/* - * Function to stop a processor until an interruption occurred - */ -static inline void enabled_wait(void) -{ - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | - PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY); -} - /* * Function to drop a processor into disabled wait state */ - static inline void ATTRIB_NORET disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; - dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; dw_psw.addr = code; /* * Store status and then load disabled wait psw, diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 2904cc66967..6fc00d26814 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -230,6 +230,7 @@ typedef struct #define PSW_MASK_IO 0x02000000UL #define PSW_MASK_EXT 0x01000000UL #define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ #define PSW_MASK_MCHECK 0x00040000UL #define PSW_MASK_WAIT 0x00020000UL #define PSW_MASK_PSTATE 0x00010000UL @@ -239,10 +240,11 @@ typedef struct #define PSW_MASK_EA 0x00000000UL #define PSW_MASK_BA 0x00000000UL +#define PSW_MASK_USER 0x00003F00UL + #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL -#define PSW_BASE_BITS 0x00080000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) #define PSW_ASC_PRIMARY 0x00000000UL @@ -256,6 +258,7 @@ typedef struct #define PSW_MASK_DAT 0x0400000000000000UL #define PSW_MASK_IO 0x0200000000000000UL #define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL #define PSW_MASK_KEY 0x00F0000000000000UL #define PSW_MASK_MCHECK 0x0004000000000000UL #define PSW_MASK_WAIT 0x0002000000000000UL @@ -266,11 +269,11 @@ typedef struct #define PSW_MASK_EA 0x0000000100000000UL #define PSW_MASK_BA 0x0000000080000000UL +#define PSW_MASK_USER 0x00003F0000000000UL + #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL -#define PSW_BASE_BITS 0x0000000180000000UL -#define PSW_BASE32_BITS 0x0000000080000000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) #define PSW_ASC_PRIMARY 0x0000000000000000UL @@ -283,18 +286,7 @@ typedef struct #ifdef __KERNEL__ extern long psw_kernel_bits; extern long psw_user_bits; -#ifdef CONFIG_64BIT -extern long psw_user32_bits; #endif -#endif - -/* This macro merges a NEW PSW mask specified by the user into - the currently active PSW mask CURRENT, modifying only those - bits in CURRENT that the user may be allowed to change: this - is the condition code and the program mask bits. */ -#define PSW_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW_MASK_CC|PSW_MASK_PM)) | \ - ((NEW) & (PSW_MASK_CC|PSW_MASK_PM))) /* * The s390_regs structure is used to define the elf_gregset_t. diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index afb849e4bf4..d73cc6b6000 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -212,8 +212,10 @@ __set_psw_mask(unsigned long mask) __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); } -#define local_mcck_enable() __set_psw_mask(psw_kernel_bits) -#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK) +#define local_mcck_enable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) #ifdef CONFIG_SMP diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa86dd9..24b218737b9 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -60,12 +60,9 @@ #include "compat_linux.h" -long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); -long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE); +u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE | PSW32_ASC_HOME; /* For this source file, we want overflow handling. */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 72a1c8d8d21..fd83b69207f 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -300,9 +300,9 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) _s390_regs_common32 regs32; int err, i; - regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; + regs32.psw.mask = psw32_user_bits | + ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); + regs32.psw.addr = PSW32_ADDR_AMODE | (__u32) regs->psw.addr; for (i = 0; i < NUM_GPRS; i++) regs32.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); @@ -327,8 +327,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32; regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f297456dba7..37394b3413e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,7 +252,7 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ca0520c5254..296458360a3 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2033,12 +2033,12 @@ void s390_reset_system(void (*func)(void *), void *data) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 13a0b528c70..3cd0f25ab01 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -108,7 +108,7 @@ static void __do_machine_kdump(void *image) #ifdef CONFIG_CRASH_DUMP int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); setup_regs(); start_kdump(1); #endif diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 541a7509fae..5e64a9a29ea 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -117,7 +117,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; regs.gprs[9] = (unsigned long) fn; regs.gprs[10] = (unsigned long) arg; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 3519c99ae0c..41e20763886 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -169,8 +169,9 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + /* Return a clean psw mask. */ + tmp = psw_user_bits | (tmp & PSW_MASK_USER) | + PSW_MASK_EA | PSW_MASK_BA; } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -285,17 +286,18 @@ static inline void __poke_user_per(struct task_struct *child, static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { struct user *dummy = NULL; - addr_t offset; + addr_t offset, tmp; if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ + tmp = (data & ~PSW_MASK_USER) ^ psw_user_bits; if (addr == (addr_t) &dummy->regs.psw.mask && #ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(psw_user32_bits, data) && + tmp != PSW_MASK_BA && #endif - data != PSW_MASK_MERGE(psw_user_bits, data)) + tmp != (PSW_MASK_EA | PSW_MASK_BA)) /* Invalid psw mask. */ return -EINVAL; #ifndef CONFIG_64BIT @@ -505,21 +507,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) __u32 tmp; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | PSW32_ADDR_AMODE; } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -604,20 +605,20 @@ static int __poke_user_compat(struct task_struct *child, addr_t offset; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) + if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(tmp & PSW32_MASK_USER) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; /* * The debugger changed the instruction address, * reset system call restart, see signal.c:do_signal @@ -625,8 +626,7 @@ static int __poke_user_compat(struct task_struct *child, task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 6f8e3777a0c..8ac6bfa2786 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -62,11 +62,11 @@ #include #include -long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY); -long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); +long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | + PSW_MASK_EA | PSW_MASK_BA; +long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_ASC_HOME; /* * User copy operations. @@ -278,22 +278,14 @@ early_param("mem", early_parse_mem); unsigned int user_mode = HOME_SPACE_MODE; EXPORT_SYMBOL_GPL(user_mode); -static int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int set_amode_primary(void) { - psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; + psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; + psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; #ifdef CONFIG_COMPAT - psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; - psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE; + psw32_user_bits = + (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; #endif - psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY; if (MACHINE_HAS_MVCOS) { memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); @@ -329,7 +321,7 @@ early_param("user_mode", early_parse_user_mode); static void setup_addressing_mode(void) { if (user_mode == PRIMARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) + if (set_amode_primary()) pr_info("Address spaces switched, " "mvcos available\n"); else @@ -348,24 +340,25 @@ setup_lowcore(void) */ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = psw_kernel_bits; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lc->restart_psw.mask |= PSW_ASC_HOME; - lc->external_new_psw.mask = psw_kernel_bits; + lc->external_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits; + lc->program_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = psw_kernel_bits; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits; + lc->io_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; @@ -554,7 +547,7 @@ static void __init setup_restart_psw(void) * Setup restart PSW for absolute zero lowcore. This is necesary * if PSW restart is done on an offline CPU that has lowcore zero */ - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 0e905cb7604..c19755815e5 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -117,7 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); + user_sregs.regs.psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA | + (regs->psw.mask & PSW_MASK_USER); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, @@ -144,8 +145,8 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - user_sregs.regs.psw.mask); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (user_sregs.regs.psw.mask & PSW_MASK_USER); regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e3f51dfa5ca..6c8a977af59 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -108,7 +108,7 @@ void smp_restart_with_online_cpu(void) for_each_online_cpu(cpu) { if (stap() == __cpu_logical_map[cpu]) { /* We are online: Enable DAT again and return */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); return; } } @@ -130,14 +130,16 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) if (smp_processor_id() == 0) func(data); - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | + PSW_MASK_EA | PSW_MASK_BA); /* Disable lowcore protection */ __ctl_clear_bit(0, 28); current_lc = lowcore_ptr[smp_processor_id()]; lc = lowcore_ptr[0]; if (!lc) lc = current_lc; - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; if (!cpu_online(0)) smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); @@ -159,7 +161,7 @@ void smp_send_stop(void) int cpu, rc; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); /* stop all processors */ @@ -501,7 +503,8 @@ int __cpuinit start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); __ctl_clear_bit(0, 28); /* Disable lowcore protection */ - S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + S390_lowcore.restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; S390_lowcore.restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; __ctl_set_bit(0, 28); /* Enable lowcore protection */ @@ -549,7 +552,8 @@ static int __cpuinit smp_alloc_lowcore(int cpu) memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); lowcore->async_stack = async_stack + ASYNC_SIZE; lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lowcore->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lowcore->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; if (user_mode != HOME_SPACE_MODE) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ffabcd9d336..79eee3f27af 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -200,7 +200,7 @@ void show_registers(struct pt_regs *regs) mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); #ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS)); + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); #endif printk("\n%s GPRS: " FOURLONG, mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 2d6228f60cd..4f3de980b0e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -170,7 +170,8 @@ void __kprobes vtime_stop_cpu(void) psw_t psw; /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; idle->nohz_delay = 0; @@ -183,7 +184,8 @@ void __kprobes vtime_stop_cpu(void) * set_cpu_timer(VTIMER_MAX_SLICE); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -216,7 +218,8 @@ void __kprobes vtime_stop_cpu(void) * vq->idle = get_cpu_timer(); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a65229d91c9..db92f044024 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -32,7 +32,8 @@ static void __udelay_disabled(unsigned long long usecs) u64 clock_saved; u64 end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT | + PSW_MASK_EXT | PSW_MASK_MCHECK; end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); __ctl_store(cr0_saved, 0, 0); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a90fd91a9c7..de3af0c053c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -454,7 +454,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) struct pt_regs regs; int access, fault; - regs.psw.mask = psw_kernel_bits; + regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; if (!irqs_disabled()) regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) __builtin_return_address(0); diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 837e010299a..0b54a91f8dc 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -61,8 +61,8 @@ static int __init sclp_cmd_sync_early(sclp_cmdw_t cmd, void *sccb) rc = sclp_service_call(cmd, sccb); if (rc) goto out; - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_EXT | - PSW_MASK_WAIT | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | + PSW_MASK_BA | PSW_MASK_EXT | PSW_MASK_WAIT); local_irq_disable(); out: /* Contents of the sccb might have changed. */ diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c index a90a02c28d6..87fc0ac11e6 100644 --- a/drivers/s390/char/sclp_quiesce.c +++ b/drivers/s390/char/sclp_quiesce.c @@ -30,7 +30,8 @@ static void do_machine_quiesce(void) psw_t quiesce_psw; smp_send_stop(); - quiesce_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + quiesce_psw.mask = + PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_WAIT; quiesce_psw.addr = 0xfff; __load_psw(quiesce_psw); } -- cgit v1.2.3 From d4e81b35b882d96f059afdb0f98e5b6025973b09 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:51 +0100 Subject: [S390] allow all addressing modes The user space program can change its addressing mode between the 24-bit, 31-bit and the 64-bit mode if the kernel is 64 bit. Currently the kernel always forces the standard amode on signal delivery and signal return and on ptrace: 64-bit for a 64-bit process, 31-bit for a compat process and 31-bit kernels. Change the signal and ptrace code to allow the full range of addressing modes. Signal handlers are run in the standard addressing mode for the process. One caveat is that even an 31-bit compat process can switch to the 64-bit mode. The next signal will switch back into the 31-bit mode and there is no room in the 31-bit compat signal frame to store the information that the program came from the 64-bit mode. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ptrace.h | 2 +- arch/s390/kernel/compat_signal.c | 12 ++++++++---- arch/s390/kernel/ptrace.c | 25 ++++++++++--------------- arch/s390/kernel/signal.c | 14 ++++++++++---- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 6fc00d26814..a65846340d5 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -269,7 +269,7 @@ typedef struct #define PSW_MASK_EA 0x0000000100000000UL #define PSW_MASK_BA 0x0000000080000000UL -#define PSW_MASK_USER 0x00003F0000000000UL +#define PSW_MASK_USER 0x00003F0180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index fd83b69207f..a1dc6457016 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -302,7 +302,8 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) regs32.psw.mask = psw32_user_bits | ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); - regs32.psw.addr = PSW32_ADDR_AMODE | (__u32) regs->psw.addr; + regs32.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) regs32.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); @@ -328,7 +329,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) if (err) return err; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32; + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; @@ -496,9 +498,9 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, (u16 __user *)(frame->retcode))) goto give_sigsegv; @@ -510,6 +512,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (__u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); @@ -573,6 +576,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (__u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 41e20763886..450931a45b6 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -170,8 +170,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); if (addr == (addr_t) &dummy->regs.psw.mask) /* Return a clean psw mask. */ - tmp = psw_user_bits | (tmp & PSW_MASK_USER) | - PSW_MASK_EA | PSW_MASK_BA; + tmp = psw_user_bits | (tmp & PSW_MASK_USER); } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -286,26 +285,17 @@ static inline void __poke_user_per(struct task_struct *child, static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { struct user *dummy = NULL; - addr_t offset, tmp; + addr_t offset; if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ - tmp = (data & ~PSW_MASK_USER) ^ psw_user_bits; if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - tmp != PSW_MASK_BA && -#endif - tmp != (PSW_MASK_EA | PSW_MASK_BA)) + ((data & ~PSW_MASK_USER) != psw_user_bits || + ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; -#ifndef CONFIG_64BIT - if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif if (addr == (addr_t) &dummy->regs.psw.addr) /* * The debugger changed the instruction address, @@ -517,7 +507,8 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) regs->psw.addr | PSW32_ADDR_AMODE; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); @@ -615,10 +606,14 @@ static int __poke_user_compat(struct task_struct *child, /* Invalid psw mask. */ return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & PSW32_MASK_USER) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); /* * The debugger changed the instruction address, * reset system call restart, see signal.c:do_signal diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index c19755815e5..05a85bc14c9 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -117,8 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA | - (regs->psw.mask & PSW_MASK_USER); + user_sregs.regs.psw.mask = psw_user_bits | + (regs->psw.mask & PSW_MASK_USER); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, @@ -145,9 +145,13 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); if (err) return err; + /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (user_sregs.regs.psw.mask & PSW_MASK_USER); - regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; + (user_sregs.regs.psw.mask & PSW_MASK_USER); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(sregs->regs.acrs)); @@ -290,6 +294,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -358,6 +363,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); -- cgit v1.2.3 From dbdf1afcaaabe83dea15a3cb9b9013e73ae3b1ad Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sun, 30 Oct 2011 15:16:52 +0100 Subject: [S390] ccwgroup: move attributes to attribute group Put sysfs attributes of ccwgroup devices in an attribute group to ensure that these attributes are actually present when userspace is notified via uevents. Cc: stable@kernel.org Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ccwgroup.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 5c567414c4b..cda9bd6e48e 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -87,6 +87,12 @@ static void __ccwgroup_remove_cdev_refs(struct ccwgroup_device *gdev) } } +static ssize_t ccwgroup_online_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +static ssize_t ccwgroup_online_show(struct device *dev, + struct device_attribute *attr, + char *buf); /* * Provide an 'ungroup' attribute so the user can remove group devices no * longer needed or accidentially created. Saves memory :) @@ -134,6 +140,20 @@ out: } static DEVICE_ATTR(ungroup, 0200, NULL, ccwgroup_ungroup_store); +static DEVICE_ATTR(online, 0644, ccwgroup_online_show, ccwgroup_online_store); + +static struct attribute *ccwgroup_attrs[] = { + &dev_attr_online.attr, + &dev_attr_ungroup.attr, + NULL, +}; +static struct attribute_group ccwgroup_attr_group = { + .attrs = ccwgroup_attrs, +}; +static const struct attribute_group *ccwgroup_attr_groups[] = { + &ccwgroup_attr_group, + NULL, +}; static void ccwgroup_release (struct device *dev) @@ -293,25 +313,17 @@ int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, } dev_set_name(&gdev->dev, "%s", dev_name(&gdev->cdev[0]->dev)); - + gdev->dev.groups = ccwgroup_attr_groups; rc = device_add(&gdev->dev); if (rc) goto error; get_device(&gdev->dev); - rc = device_create_file(&gdev->dev, &dev_attr_ungroup); - - if (rc) { - device_unregister(&gdev->dev); - goto error; - } - rc = __ccwgroup_create_symlinks(gdev); if (!rc) { mutex_unlock(&gdev->reg_mutex); put_device(&gdev->dev); return 0; } - device_remove_file(&gdev->dev, &dev_attr_ungroup); device_unregister(&gdev->dev); error: for (i = 0; i < num_devices; i++) @@ -423,7 +435,7 @@ ccwgroup_online_store (struct device *dev, struct device_attribute *attr, const int ret; if (!dev->driver) - return -ENODEV; + return -EINVAL; gdev = to_ccwgroupdev(dev); gdrv = to_ccwgroupdrv(dev->driver); @@ -456,8 +468,6 @@ ccwgroup_online_show (struct device *dev, struct device_attribute *attr, char *b return sprintf(buf, online ? "1\n" : "0\n"); } -static DEVICE_ATTR(online, 0644, ccwgroup_online_show, ccwgroup_online_store); - static int ccwgroup_probe (struct device *dev) { @@ -469,12 +479,7 @@ ccwgroup_probe (struct device *dev) gdev = to_ccwgroupdev(dev); gdrv = to_ccwgroupdrv(dev->driver); - if ((ret = device_create_file(dev, &dev_attr_online))) - return ret; - ret = gdrv->probe ? gdrv->probe(gdev) : -ENODEV; - if (ret) - device_remove_file(dev, &dev_attr_online); return ret; } @@ -485,9 +490,6 @@ ccwgroup_remove (struct device *dev) struct ccwgroup_device *gdev; struct ccwgroup_driver *gdrv; - device_remove_file(dev, &dev_attr_online); - device_remove_file(dev, &dev_attr_ungroup); - if (!dev->driver) return 0; -- cgit v1.2.3 From dad572e370138539ea45be9b53d168568e562565 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sun, 30 Oct 2011 15:16:53 +0100 Subject: [S390] ccwgroup: cleanup Fix coding style, remove forward declerations, simplify code. Also remove a superfluous get_device/put_device pair in ccwgroup_create_from_string. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/ccwgroup.c | 299 ++++++++++++++++++-------------------------- 1 file changed, 119 insertions(+), 180 deletions(-) diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index cda9bd6e48e..4f1989d27b1 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -29,31 +29,20 @@ /* a device matches a driver if all its slave devices match the same * entry of the driver */ -static int -ccwgroup_bus_match (struct device * dev, struct device_driver * drv) +static int ccwgroup_bus_match(struct device *dev, struct device_driver * drv) { - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(drv); + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(drv); if (gdev->creator_id == gdrv->driver_id) return 1; return 0; } -static int -ccwgroup_uevent (struct device *dev, struct kobj_uevent_env *env) -{ - /* TODO */ - return 0; -} static struct bus_type ccwgroup_bus_type; -static void -__ccwgroup_remove_symlinks(struct ccwgroup_device *gdev) +static void __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev) { int i; char str[8]; @@ -63,7 +52,6 @@ __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev) sysfs_remove_link(&gdev->dev.kobj, str); sysfs_remove_link(&gdev->cdev[i]->dev.kobj, "group_device"); } - } /* @@ -87,12 +75,87 @@ static void __ccwgroup_remove_cdev_refs(struct ccwgroup_device *gdev) } } +static int ccwgroup_set_online(struct ccwgroup_device *gdev) +{ + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); + int ret = 0; + + if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) + return -EAGAIN; + if (gdev->state == CCWGROUP_ONLINE) + goto out; + if (gdrv->set_online) + ret = gdrv->set_online(gdev); + if (ret) + goto out; + + gdev->state = CCWGROUP_ONLINE; +out: + atomic_set(&gdev->onoff, 0); + return ret; +} + +static int ccwgroup_set_offline(struct ccwgroup_device *gdev) +{ + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(gdev->dev.driver); + int ret = 0; + + if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) + return -EAGAIN; + if (gdev->state == CCWGROUP_OFFLINE) + goto out; + if (gdrv->set_offline) + ret = gdrv->set_offline(gdev); + if (ret) + goto out; + + gdev->state = CCWGROUP_OFFLINE; +out: + atomic_set(&gdev->onoff, 0); + return ret; +} + static ssize_t ccwgroup_online_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count); + const char *buf, size_t count) +{ + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); + unsigned long value; + int ret; + + if (!dev->driver) + return -EINVAL; + if (!try_module_get(gdrv->driver.owner)) + return -EINVAL; + + ret = strict_strtoul(buf, 0, &value); + if (ret) + goto out; + + if (value == 1) + ret = ccwgroup_set_online(gdev); + else if (value == 0) + ret = ccwgroup_set_offline(gdev); + else + ret = -EINVAL; +out: + module_put(gdrv->driver.owner); + return (ret == 0) ? count : ret; +} + static ssize_t ccwgroup_online_show(struct device *dev, struct device_attribute *attr, - char *buf); + char *buf) +{ + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + int online; + + online = (gdev->state == CCWGROUP_ONLINE) ? 1 : 0; + + return scnprintf(buf, PAGE_SIZE, "%d\n", online); +} + /* * Provide an 'ungroup' attribute so the user can remove group devices no * longer needed or accidentially created. Saves memory :) @@ -110,14 +173,13 @@ static void ccwgroup_ungroup_callback(struct device *dev) mutex_unlock(&gdev->reg_mutex); } -static ssize_t -ccwgroup_ungroup_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +static ssize_t ccwgroup_ungroup_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { - struct ccwgroup_device *gdev; + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); int rc; - gdev = to_ccwgroupdev(dev); - /* Prevent concurrent online/offline processing and ungrouping. */ if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) return -EAGAIN; @@ -138,7 +200,6 @@ out: } return count; } - static DEVICE_ATTR(ungroup, 0200, NULL, ccwgroup_ungroup_store); static DEVICE_ATTR(online, 0644, ccwgroup_online_show, ccwgroup_online_store); @@ -155,21 +216,19 @@ static const struct attribute_group *ccwgroup_attr_groups[] = { NULL, }; -static void -ccwgroup_release (struct device *dev) +static void ccwgroup_release(struct device *dev) { kfree(to_ccwgroupdev(dev)); } -static int -__ccwgroup_create_symlinks(struct ccwgroup_device *gdev) +static int __ccwgroup_create_symlinks(struct ccwgroup_device *gdev) { char str[8]; int i, rc; for (i = 0; i < gdev->count; i++) { - rc = sysfs_create_link(&gdev->cdev[i]->dev.kobj, &gdev->dev.kobj, - "group_device"); + rc = sysfs_create_link(&gdev->cdev[i]->dev.kobj, + &gdev->dev.kobj, "group_device"); if (rc) { for (--i; i >= 0; i--) sysfs_remove_link(&gdev->cdev[i]->dev.kobj, @@ -179,8 +238,8 @@ __ccwgroup_create_symlinks(struct ccwgroup_device *gdev) } for (i = 0; i < gdev->count; i++) { sprintf(str, "cdev%d", i); - rc = sysfs_create_link(&gdev->dev.kobj, &gdev->cdev[i]->dev.kobj, - str); + rc = sysfs_create_link(&gdev->dev.kobj, + &gdev->cdev[i]->dev.kobj, str); if (rc) { for (--i; i >= 0; i--) { sprintf(str, "cdev%d", i); @@ -317,14 +376,13 @@ int ccwgroup_create_from_string(struct device *root, unsigned int creator_id, rc = device_add(&gdev->dev); if (rc) goto error; - get_device(&gdev->dev); rc = __ccwgroup_create_symlinks(gdev); - if (!rc) { - mutex_unlock(&gdev->reg_mutex); - put_device(&gdev->dev); - return 0; + if (rc) { + device_del(&gdev->dev); + goto error; } - device_unregister(&gdev->dev); + mutex_unlock(&gdev->reg_mutex); + return 0; error: for (i = 0; i < num_devices; i++) if (gdev->cdev[i]) { @@ -342,7 +400,15 @@ error: EXPORT_SYMBOL(ccwgroup_create_from_string); static int ccwgroup_notifier(struct notifier_block *nb, unsigned long action, - void *data); + void *data) +{ + struct device *dev = data; + + if (action == BUS_NOTIFY_UNBIND_DRIVER) + device_schedule_callback(dev, ccwgroup_ungroup_callback); + + return NOTIFY_OK; +} static struct notifier_block ccwgroup_nb = { .notifier_call = ccwgroup_notifier @@ -374,128 +440,21 @@ module_exit(cleanup_ccwgroup); /************************** driver stuff ******************************/ -static int -ccwgroup_set_online(struct ccwgroup_device *gdev) -{ - struct ccwgroup_driver *gdrv; - int ret; - - if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) - return -EAGAIN; - if (gdev->state == CCWGROUP_ONLINE) { - ret = 0; - goto out; - } - if (!gdev->dev.driver) { - ret = -EINVAL; - goto out; - } - gdrv = to_ccwgroupdrv (gdev->dev.driver); - if ((ret = gdrv->set_online ? gdrv->set_online(gdev) : 0)) - goto out; - - gdev->state = CCWGROUP_ONLINE; - out: - atomic_set(&gdev->onoff, 0); - return ret; -} - -static int -ccwgroup_set_offline(struct ccwgroup_device *gdev) -{ - struct ccwgroup_driver *gdrv; - int ret; - - if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0) - return -EAGAIN; - if (gdev->state == CCWGROUP_OFFLINE) { - ret = 0; - goto out; - } - if (!gdev->dev.driver) { - ret = -EINVAL; - goto out; - } - gdrv = to_ccwgroupdrv (gdev->dev.driver); - if ((ret = gdrv->set_offline ? gdrv->set_offline(gdev) : 0)) - goto out; - - gdev->state = CCWGROUP_OFFLINE; - out: - atomic_set(&gdev->onoff, 0); - return ret; -} - -static ssize_t -ccwgroup_online_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; - unsigned long value; - int ret; - - if (!dev->driver) - return -EINVAL; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - - if (!try_module_get(gdrv->driver.owner)) - return -EINVAL; - - ret = strict_strtoul(buf, 0, &value); - if (ret) - goto out; - - if (value == 1) - ret = ccwgroup_set_online(gdev); - else if (value == 0) - ret = ccwgroup_set_offline(gdev); - else - ret = -EINVAL; -out: - module_put(gdrv->driver.owner); - return (ret == 0) ? count : ret; -} - -static ssize_t -ccwgroup_online_show (struct device *dev, struct device_attribute *attr, char *buf) +static int ccwgroup_probe(struct device *dev) { - int online; - - online = (to_ccwgroupdev(dev)->state == CCWGROUP_ONLINE); - - return sprintf(buf, online ? "1\n" : "0\n"); -} - -static int -ccwgroup_probe (struct device *dev) -{ - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; - - int ret; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - - ret = gdrv->probe ? gdrv->probe(gdev) : -ENODEV; + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); - return ret; + return gdrv->probe ? gdrv->probe(gdev) : -ENODEV; } -static int -ccwgroup_remove (struct device *dev) +static int ccwgroup_remove(struct device *dev) { - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); if (!dev->driver) return 0; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - if (gdrv->remove) gdrv->remove(gdev); @@ -504,15 +463,11 @@ ccwgroup_remove (struct device *dev) static void ccwgroup_shutdown(struct device *dev) { - struct ccwgroup_device *gdev; - struct ccwgroup_driver *gdrv; + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); if (!dev->driver) return; - - gdev = to_ccwgroupdev(dev); - gdrv = to_ccwgroupdrv(dev->driver); - if (gdrv->shutdown) gdrv->shutdown(gdev); } @@ -588,26 +543,12 @@ static const struct dev_pm_ops ccwgroup_pm_ops = { static struct bus_type ccwgroup_bus_type = { .name = "ccwgroup", .match = ccwgroup_bus_match, - .uevent = ccwgroup_uevent, .probe = ccwgroup_probe, .remove = ccwgroup_remove, .shutdown = ccwgroup_shutdown, .pm = &ccwgroup_pm_ops, }; - -static int ccwgroup_notifier(struct notifier_block *nb, unsigned long action, - void *data) -{ - struct device *dev = data; - - if (action == BUS_NOTIFY_UNBIND_DRIVER) - device_schedule_callback(dev, ccwgroup_ungroup_callback); - - return NOTIFY_OK; -} - - /** * ccwgroup_driver_register() - register a ccw group driver * @cdriver: driver to be registered @@ -621,9 +562,9 @@ int ccwgroup_driver_register(struct ccwgroup_driver *cdriver) return driver_register(&cdriver->driver); } +EXPORT_SYMBOL(ccwgroup_driver_register); -static int -__ccwgroup_match_all(struct device *dev, void *data) +static int __ccwgroup_match_all(struct device *dev, void *data) { return 1; } @@ -654,6 +595,7 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver) put_driver(&cdriver->driver); driver_unregister(&cdriver->driver); } +EXPORT_SYMBOL(ccwgroup_driver_unregister); /** * ccwgroup_probe_ccwdev() - probe function for slave devices @@ -668,6 +610,7 @@ int ccwgroup_probe_ccwdev(struct ccw_device *cdev) { return 0; } +EXPORT_SYMBOL(ccwgroup_probe_ccwdev); /** * ccwgroup_remove_ccwdev() - remove function for slave devices @@ -704,9 +647,5 @@ void ccwgroup_remove_ccwdev(struct ccw_device *cdev) /* Release ccwgroup device reference for local processing. */ put_device(&gdev->dev); } - -MODULE_LICENSE("GPL"); -EXPORT_SYMBOL(ccwgroup_driver_register); -EXPORT_SYMBOL(ccwgroup_driver_unregister); -EXPORT_SYMBOL(ccwgroup_probe_ccwdev); EXPORT_SYMBOL(ccwgroup_remove_ccwdev); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 65b4e403ac926f5196c2f28c4ec783d32dc322f0 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sun, 30 Oct 2011 15:16:54 +0100 Subject: [S390] chsc_sch: add support for irq statistics Add support for CHSC I/O interrupt statistics in /proc/interrupts. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 1 + arch/s390/kernel/irq.c | 1 + drivers/s390/cio/chsc_sch.c | 5 ++++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 1f686059f29..eadfa9fc03b 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -28,6 +28,7 @@ enum interruption_class { IOINT_CLW, IOINT_CTC, IOINT_APB, + IOINT_CSC, NMI_NMI, NR_IRQS, }; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index d382f9db3df..30faaef3445 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -53,6 +53,7 @@ static const struct irq_class intrclass_names[] = { {.name = "CLW", .desc = "[I/O] CLAW" }, {.name = "CTC", .desc = "[I/O] CTC" }, {.name = "APB", .desc = "[I/O] AP Bus" }, + {.name = "CSC", .desc = "[I/O] CHSC Subchannel" }, {.name = "NMI", .desc = "[NMI] Machine Check" }, }; diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index e950f1ad4dd..0c87b0fc771 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -1,7 +1,7 @@ /* * Driver for s390 chsc subchannels * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2011 * * Author(s): Cornelia Huck * @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,8 @@ static void chsc_subchannel_irq(struct subchannel *sch) CHSC_LOG(4, "irb"); CHSC_LOG_HEX(4, irb, sizeof(*irb)); + kstat_cpu(smp_processor_id()).irqs[IOINT_CSC]++; + /* Copy irb to provided request and set done. */ if (!request) { CHSC_MSG(0, "Interrupt on sch 0.%x.%04x with no request\n", -- cgit v1.2.3 From 3948a102509f3bc3cc8728b2556a1ad7a4dbd7e7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 30 Oct 2011 15:16:55 +0100 Subject: [S390] zcore: add missing module.h include Add missing module.h include to prevent build breakage after the module.h split work hits Linus' tree. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/zcore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index f4f1da213e2..43068fbd0ba 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 214b8ffc205bcf2ca5b04b3903be13a9257c3fbd Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Sun, 30 Oct 2011 15:16:56 +0100 Subject: [S390] dasd: wait for terminated request After terminating a request in the dasd_sleep_on_immediatly function, wait for the clear interrupt to be received before starting the new request. This prevents the requests from getting mixed up. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 46054c75cf3..3b94b6542fc 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2261,7 +2261,11 @@ int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr) cqr->callback = dasd_wakeup_cb; cqr->callback_data = DASD_SLEEPON_START_TAG; cqr->status = DASD_CQR_QUEUED; - list_add(&cqr->devlist, &device->ccw_queue); + /* + * add new request as second + * first the terminated cqr needs to be finished + */ + list_add(&cqr->devlist, device->ccw_queue.next); /* let the bh start the request to keep them in order */ dasd_schedule_device_bh(device); -- cgit v1.2.3 From 5915a873fcb1cea5260940be519f2cdf898f7be3 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Sun, 30 Oct 2011 15:16:57 +0100 Subject: [S390] dasd: re-initialize read_conf buffer for retries The buffer for read configuration data has to be initialized with an EBCDIC string to show support for extended UIDs to z/VM. If this read configuration data CQR needs to be retried, the buffer may have changed in between. So re-initialize the buffer to get a correct extended UID under z/VM. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 7 +++++-- drivers/s390/block/dasd_eckd.c | 26 ++++++++++++++++++++++++++ drivers/s390/block/dasd_int.h | 1 + 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 3b94b6542fc..ce2a780a9ea 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2059,13 +2059,14 @@ void dasd_add_request_tail(struct dasd_ccw_req *cqr) /* * Wakeup helper for the 'sleep_on' functions. */ -static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) +void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data) { spin_lock_irq(get_ccwdev_lock(cqr->startdev->cdev)); cqr->callback_data = DASD_SLEEPON_END_TAG; spin_unlock_irq(get_ccwdev_lock(cqr->startdev->cdev)); wake_up(&generic_waitq); } +EXPORT_SYMBOL_GPL(dasd_wakeup_cb); static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr) { @@ -2165,7 +2166,9 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) } else wait_event(generic_waitq, !(device->stopped)); - cqr->callback = dasd_wakeup_cb; + if (!cqr->callback) + cqr->callback = dasd_wakeup_cb; + cqr->callback_data = DASD_SLEEPON_START_TAG; dasd_add_request_tail(cqr); if (interruptible) { diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0e9c4dcf145..cb1bbc2947e 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -844,6 +844,30 @@ static void dasd_eckd_fill_rcd_cqr(struct dasd_device *device, set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags); } +/* + * Wakeup helper for read_conf + * if the cqr is not done and needs some error recovery + * the buffer has to be re-initialized with the EBCDIC "V1.0" + * to show support for virtual device SNEQ + */ +static void read_conf_cb(struct dasd_ccw_req *cqr, void *data) +{ + struct ccw1 *ccw; + __u8 *rcd_buffer; + + if (cqr->status != DASD_CQR_DONE) { + ccw = cqr->cpaddr; + rcd_buffer = (__u8 *)((addr_t) ccw->cda); + memset(rcd_buffer, 0, sizeof(*rcd_buffer)); + + rcd_buffer[0] = 0xE5; + rcd_buffer[1] = 0xF1; + rcd_buffer[2] = 0x4B; + rcd_buffer[3] = 0xF0; + } + dasd_wakeup_cb(cqr, data); +} + static int dasd_eckd_read_conf_immediately(struct dasd_device *device, struct dasd_ccw_req *cqr, __u8 *rcd_buffer, @@ -863,6 +887,7 @@ static int dasd_eckd_read_conf_immediately(struct dasd_device *device, clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); set_bit(DASD_CQR_ALLOW_SLOCK, &cqr->flags); cqr->retries = 5; + cqr->callback = read_conf_cb; rc = dasd_sleep_on_immediatly(cqr); return rc; } @@ -900,6 +925,7 @@ static int dasd_eckd_read_conf_lpm(struct dasd_device *device, goto out_error; } dasd_eckd_fill_rcd_cqr(device, cqr, rcd_buf, lpm); + cqr->callback = read_conf_cb; ret = dasd_sleep_on(cqr); /* * on success we update the user input parms diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 1dd12bd85a6..563bf8a25dc 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -643,6 +643,7 @@ struct dasd_ccw_req * dasd_smalloc_request(int , int, int, struct dasd_device *); void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *); +void dasd_wakeup_cb(struct dasd_ccw_req *, void *); static inline int dasd_kmalloc_set_cda(struct ccw1 *ccw, void *cda, struct dasd_device *device) -- cgit v1.2.3 From d98e19ccef10c5aadccc4ffe2925e4099ff9606a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:58 +0100 Subject: [S390] smp: external call vs. emergency signal Use a sigp sense running to decide which signal processor order to use for an ipi. If the target cpu is running use external call, if the target cpu is not running use emergency signal. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head31.S | 2 +- arch/s390/kernel/head64.S | 2 +- arch/s390/kernel/smp.c | 12 +++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index f21954b44dc..d3f1ab7d90a 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -92,7 +92,7 @@ ENTRY(_stext) .LPG3: # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index ae5d492b069..99348c0eaa4 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -90,7 +90,7 @@ ENTRY(_stext) .LPG3: # check control registers stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal + oi 6(%r15),0x60 # enable sigp emergency & external call oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6c8a977af59..3bde5688ceb 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -209,12 +209,19 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, */ static void smp_ext_bitcall(int cpu, int sig) { + int order; + /* * Set signaling bit in lowcore of target cpu and kick it */ set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy) + while (1) { + order = smp_vcpu_scheduled(cpu) ? + sigp_external_call : sigp_emergency_signal; + if (sigp(cpu, order) != sigp_busy) + break; udelay(10); + } } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -754,6 +761,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); + /* request the 0x1202 external call external interrupt */ + if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + panic("Couldn't request external interrupt 0x1202"); /* Reallocate current lowcore, but keep its contents. */ lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); -- cgit v1.2.3 From 69ba97436647f3b793f8e0784d1cde63adf241ea Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:16:59 +0100 Subject: [S390] load user asce on sie_fault On sie_fault we need to switch back to user ASCE. Otherwise we get interresting effects when exiting to "userspace" while the guest space is still active. Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index e34907560c2..83a93747e2f 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -1081,6 +1081,7 @@ sie_exit: lghi %r2,0 br %r14 sie_fault: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lg %r14,__LC_THREAD_INFO # pointer thread_info struct ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area -- cgit v1.2.3 From a9162f238a84ee05b09ea4b0ebd97fb20448c28c Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:17:00 +0100 Subject: [S390] fix possible deadlock in gmap_map_segment Fix possible deadlock reported by lockdep: qemu-system-s39/2963 is trying to acquire lock: (&mm->mmap_sem){++++++}, at: gmap_alloc_table+0x9c/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: gmap_map_segment+0xa6/0x27c Actually gmap_alloc_table is the only called in gmap_map_segment with mmap_sem held, thus it's safe to simply remove the inner lock. Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 529a0883837..e4a4cefb92b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -256,6 +256,9 @@ void gmap_disable(struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_disable); +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long init) { @@ -267,14 +270,12 @@ static int gmap_alloc_table(struct gmap *gmap, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - down_read(&gmap->mm->mmap_sem); if (*table & _REGION_ENTRY_INV) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); } else __free_pages(page, ALLOC_ORDER); - up_read(&gmap->mm->mmap_sem); return 0; } -- cgit v1.2.3 From cc772456ac9b460693492b3a3d89e8c81eda5874 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:17:01 +0100 Subject: [S390] fix list corruption in gmap reverse mapping This introduces locking via mm->page_table_lock to protect the rmap list for guest mappings from being corrupted by concurrent operations. Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e4a4cefb92b..96e85ac8926 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -222,6 +222,7 @@ void gmap_free(struct gmap *gmap) /* Free all segment & region tables. */ down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { table = (unsigned long *) page_to_phys(page); if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) @@ -230,6 +231,7 @@ void gmap_free(struct gmap *gmap) gmap_unlink_segment(gmap, table); __free_pages(page, ALLOC_ORDER); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); list_del(&gmap->list); kfree(gmap); @@ -300,6 +302,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -321,6 +324,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) *table = _SEGMENT_ENTRY_INV; } out: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); @@ -351,6 +355,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -374,12 +379,14 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); return 0; out_unmap: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); gmap_unmap_segment(gmap, to, len); return -ENOMEM; @@ -446,7 +453,9 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; rmap->entry = table; + spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); + spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ *table = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); -- cgit v1.2.3 From 499069e1a421e2a85e76846c3237f00f1a5cb435 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:17:02 +0100 Subject: [S390] take mmap_sem when walking guest page table gmap_fault needs to walk the guest page table. However, parts of that may change if some other thread does munmap. In that case gmap_unmap_notifier will also unmap the corresponding parts from the guest page table. We need to take mmap_sem in order to serialize these operations. do_exception now calls __gmap_fault with mmap_sem held which does not get exported to modules. The exported function, which is called from KVM, now takes mmap_sem. Reported-by: Heiko Carstens Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 1 + arch/s390/mm/fault.c | 2 +- arch/s390/mm/pgtable.c | 15 ++++++++++++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c0cb794bb36..bc5f520f6f8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -696,6 +696,7 @@ void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long length); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); /* diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index de3af0c053c..1766def5bc3 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -307,7 +307,7 @@ static inline int do_exception(struct pt_regs *regs, int access, #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { - address = gmap_fault(address, + address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 96e85ac8926..441d34445d0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -393,7 +393,10 @@ out_unmap: } EXPORT_SYMBOL_GPL(gmap_map_segment); -unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { unsigned long *table, vmaddr, segment; struct mm_struct *mm; @@ -461,7 +464,17 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) return vmaddr | (address & ~PMD_MASK); } return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; } EXPORT_SYMBOL_GPL(gmap_fault); -- cgit v1.2.3 From 388186bc920d9200202e4d25de66fa95b1b8fc68 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Sun, 30 Oct 2011 15:17:03 +0100 Subject: [S390] kvm: Handle diagnose 0x10 (release pages) Linux on System z uses a ballooner based on diagnose 0x10. (aka as collaborative memory management). This patch implements diagnose 0x10 on the guest address space. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/asm/pgtable.h | 1 + arch/s390/kvm/diag.c | 32 +++++++++++++++++++++++++- arch/s390/kvm/kvm-s390.c | 1 + arch/s390/mm/pgtable.c | 49 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 82 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 00ff00dfb24..46c0bdaf479 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -143,6 +143,7 @@ struct kvm_vcpu_stat { u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 diagnose_10; u32 diagnose_44; }; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index bc5f520f6f8..34ede0ea85a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -698,6 +698,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9e4c84187cf..87cedd61be0 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,7 +1,7 @@ /* * diag.c - handling diagnose instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2011 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -15,6 +15,34 @@ #include #include "kvm-s390.h" +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = vcpu->arch.sie_block->prefix; + + start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); @@ -57,6 +85,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; switch (code) { + case 0x10: + return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); case 0x308: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc2b580e27b..189d6bdcac0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -69,6 +69,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { NULL } }; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 441d34445d0..301c84d3b54 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007,2009 + * Copyright IBM Corp. 2007,2011 * Author(s): Martin Schwidefsky */ @@ -478,6 +478,53 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_fault); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) { struct gmap_rmap *rmap, *next; -- cgit v1.2.3 From 80376f347d70ce5fcfb98105d83624518e0911d6 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:04 +0100 Subject: [S390] Introduce get_clock_fast() Add get_clock_fast() which uses the slightly faster stckf if available. If stckf is not available fall back to stck, which has the same width. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/timex.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a40af6..d610bef9c5e 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -86,6 +86,17 @@ static inline void get_clock_ext(char *clk) asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); } +static inline unsigned long long get_clock_fast(void) +{ + unsigned long long clk; + + if (test_facility(25)) + asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); + else + clk = get_clock(); + return clk; +} + static inline unsigned long long get_clock_xt(void) { unsigned char clk[16]; -- cgit v1.2.3 From a2b86019826cb97fd964fbaf101410c64cd78681 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:05 +0100 Subject: [S390] qdio: add timestamp for last queue scan time Add a timestamp per queue and update the timestamp when the queue is scanned. Add the queue timestamps and the timestamp of the last adapter interrupt to the debugfs output. The timestamps are useful for debugging stall conditions. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio.h | 5 +++++ drivers/s390/cio/qdio_debug.c | 10 ++++++---- drivers/s390/cio/qdio_main.c | 4 ++++ drivers/s390/cio/qdio_thinint.c | 2 +- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 3dd86441da3..c15624b7690 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -290,6 +290,9 @@ struct qdio_q { /* error condition during a data transfer */ unsigned int qdio_error; + /* last scan of the queue */ + u64 timestamp; + struct tasklet_struct tasklet; struct qdio_queue_perf_stat q_stats; @@ -449,6 +452,8 @@ static inline int shared_ind(struct qdio_q *q) return references_shared_dsci(i) || has_multiple_inq_on_dsci(i); } +extern u64 last_ai_time; + /* prototypes for thin interrupt */ void qdio_setup_thinint(struct qdio_irq *irq_ptr); int qdio_establish_thinint(struct qdio_irq *irq_ptr); diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c index aaf7f935bfd..ed68245f974 100644 --- a/drivers/s390/cio/qdio_debug.c +++ b/drivers/s390/cio/qdio_debug.c @@ -54,15 +54,17 @@ static int qstat_show(struct seq_file *m, void *v) if (!q) return 0; - seq_printf(m, "DSCI: %d nr_used: %d\n", - *(u32 *)q->irq_ptr->dsci, atomic_read(&q->nr_buf_used)); - seq_printf(m, "ftc: %d last_move: %d\n", + seq_printf(m, "Timestamp: %Lx Last AI: %Lx\n", + q->timestamp, last_ai_time); + seq_printf(m, "nr_used: %d ftc: %d last_move: %d\n", + atomic_read(&q->nr_buf_used), q->first_to_check, q->last_move); if (q->is_input_q) { seq_printf(m, "polling: %d ack start: %d ack count: %d\n", q->u.in.polling, q->u.in.ack_start, q->u.in.ack_count); - seq_printf(m, "IRQs disabled: %u\n", + seq_printf(m, "DSCI: %d IRQs disabled: %u\n", + *(u32 *)q->irq_ptr->dsci, test_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state)); } diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 7ded1b26fd2..a76d6764ce6 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -512,6 +512,8 @@ static int get_inbound_buffer_frontier(struct qdio_q *q) int count, stop; unsigned char state = 0; + q->timestamp = get_clock_fast(); + /* * Don't check 128 buffers, as otherwise qdio_inbound_q_moved * would return 0. @@ -781,6 +783,8 @@ static int get_outbound_buffer_frontier(struct qdio_q *q) int count, stop; unsigned char state = 0; + q->timestamp = get_clock_fast(); + if (need_siga_sync(q)) if (((queue_type(q) != QDIO_IQDIO_QFMT) && !pci_out_supported(q)) || diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index a3e3949d7b6..8c1f412b7e6 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -36,7 +36,7 @@ static u8 *tiqdio_alsi; struct indicator_t *q_indicators; -static u64 last_ai_time; +u64 last_ai_time; /* returns addr for the device state change indicator */ static u32 *get_indicator(void) -- cgit v1.2.3 From 25f269f17316549e026c5dd0db7526411a504de6 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:06 +0100 Subject: [S390] qdio: EQBS retry after CCQ 96 Running under z/VM with QIOASSIST enabled, qdio queues could stall if EQBS did not extract all SBAL states. Add an instant retry for EQBS and, if the retry fails, set up a timer to ensure outstanding SBALs are processed later. While at it, optimize qdio_do_eqbs and qdio_do_sqbs to eliminate 3 jumps on the hot path. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_main.c | 81 ++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index a76d6764ce6..7c567b2268a 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -104,9 +104,12 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) /* all done or next buffer state different */ if (ccq == 0 || ccq == 32) return 0; - /* not all buffers processed */ - if (ccq == 96 || ccq == 97) + /* no buffer processed */ + if (ccq == 97) return 1; + /* not all buffers processed */ + if (ccq == 96) + return 2; /* notify devices immediately */ DBF_ERROR("%4x ccq:%3d", SCH_NO(q), ccq); return -EIO; @@ -126,10 +129,8 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq) static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state, int start, int count, int auto_ack) { + int rc, tmp_count = count, tmp_start = start, nr = q->nr, retried = 0; unsigned int ccq = 0; - int tmp_count = count, tmp_start = start; - int nr = q->nr; - int rc; BUG_ON(!q->irq_ptr->sch_token); qperf_inc(q, eqbs); @@ -140,30 +141,34 @@ again: ccq = do_eqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count, auto_ack); rc = qdio_check_ccq(q, ccq); - - /* At least one buffer was processed, return and extract the remaining - * buffers later. - */ - if ((ccq == 96) && (count != tmp_count)) { - qperf_inc(q, eqbs_partial); - return (count - tmp_count); - } + if (!rc) + return count - tmp_count; if (rc == 1) { DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS again:%2d", ccq); goto again; } - if (rc < 0) { - DBF_ERROR("%4x EQBS ERROR", SCH_NO(q)); - DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); - q->handler(q->irq_ptr->cdev, - QDIO_ERROR_ACTIVATE_CHECK_CONDITION, - q->nr, q->first_to_kick, count, - q->irq_ptr->int_parm); - return 0; + if (rc == 2) { + BUG_ON(tmp_count == count); + qperf_inc(q, eqbs_partial); + DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS part:%02x", + tmp_count); + /* + * Retry once, if that fails bail out and process the + * extracted buffers before trying again. + */ + if (!retried++) + goto again; + else + return count - tmp_count; } - return count - tmp_count; + + DBF_ERROR("%4x EQBS ERROR", SCH_NO(q)); + DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); + q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; } /** @@ -196,22 +201,22 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start, again: ccq = do_sqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count); rc = qdio_check_ccq(q, ccq); - if (rc == 1) { + if (!rc) { + WARN_ON(tmp_count); + return count - tmp_count; + } + + if (rc == 1 || rc == 2) { DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "SQBS again:%2d", ccq); qperf_inc(q, sqbs_partial); goto again; } - if (rc < 0) { - DBF_ERROR("%4x SQBS ERROR", SCH_NO(q)); - DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); - q->handler(q->irq_ptr->cdev, - QDIO_ERROR_ACTIVATE_CHECK_CONDITION, - q->nr, q->first_to_kick, count, - q->irq_ptr->int_parm); - return 0; - } - WARN_ON(tmp_count); - return count - tmp_count; + + DBF_ERROR("%4x SQBS ERROR", SCH_NO(q)); + DBF_ERROR("%3d%3d%2d", count, tmp_count, nr); + q->handler(q->irq_ptr->cdev, QDIO_ERROR_ACTIVATE_CHECK_CONDITION, + 0, -1, -1, q->irq_ptr->int_parm); + return 0; } /* returns number of examined buffers and their common state in *state */ @@ -915,10 +920,6 @@ static void __qdio_outbound_processing(struct qdio_q *q) if (!pci_out_supported(q) && !qdio_outbound_q_done(q)) goto sched; - /* bail out for HiperSockets unicast queues */ - if (queue_type(q) == QDIO_IQDIO_QFMT && !multicast_outbound(q)) - return; - if ((queue_type(q) == QDIO_IQDIO_QFMT) && (atomic_read(&q->nr_buf_used)) > QDIO_IQDIO_POLL_LVL) goto sched; @@ -928,8 +929,8 @@ static void __qdio_outbound_processing(struct qdio_q *q) /* * Now we know that queue type is either qeth without pci enabled - * or HiperSockets multicast. Make sure buffer switch from PRIMED to - * EMPTY is noticed and outbound_handler is called after some time. + * or HiperSockets. Make sure buffer switch from PRIMED to EMPTY + * is noticed and outbound_handler is called after some time. */ if (qdio_outbound_q_done(q)) del_timer(&q->u.out.timer); -- cgit v1.2.3 From 2768b2ded129ee92a8f8bf9049983c6b37ed44bf Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:07 +0100 Subject: [S390] qdio: reset outbound SBAL error states Don't leave outbound SBALs in error state after a target full condition. Reset the state to not initialized to make the error handling consistent across all types of errors. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 7c567b2268a..5ec5317f790 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -450,7 +450,7 @@ static void process_buffer_error(struct qdio_q *q, int count) qperf_inc(q, target_full); DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "OUTFULL FTC:%02x", q->first_to_check); - return; + goto set; } DBF_ERROR("%4x BUF ERROR", SCH_NO(q)); @@ -460,6 +460,7 @@ static void process_buffer_error(struct qdio_q *q, int count) q->sbal[q->first_to_check]->element[14].sflags, q->sbal[q->first_to_check]->element[15].sflags); +set: /* * Interrupts may be avoided as long as the error is present * so change the buffer state immediately to avoid starvation. -- cgit v1.2.3 From 6ffed94ea73c0c15e3201d4d479b6efe8343fb96 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:08 +0100 Subject: [S390] qdio: remove multicast polling The multicast poll check for the outbound queue is redundant since 3d6c76f "[S390] qdio: outbound tasklet scan threshold". Remove the check. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio.h | 8 -------- drivers/s390/cio/qdio_main.c | 4 ---- 2 files changed, 12 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index c15624b7690..498a4cd99ff 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -18,14 +18,6 @@ #define QDIO_BUSY_BIT_RETRIES 1000 /* = 10s retry time */ #define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */ -/* - * if an asynchronous HiperSockets queue runs full, the 10 seconds timer wait - * till next initiative to give transmitted skbs back to the stack is too long. - * Therefore polling is started in case of multicast queue is filled more - * than 50 percent. - */ -#define QDIO_IQDIO_POLL_LVL 65 /* HS multicast queue */ - enum qdio_irq_states { QDIO_IRQ_STATE_INACTIVE, QDIO_IRQ_STATE_ESTABLISHED, diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 5ec5317f790..ab9de1be77a 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -921,10 +921,6 @@ static void __qdio_outbound_processing(struct qdio_q *q) if (!pci_out_supported(q) && !qdio_outbound_q_done(q)) goto sched; - if ((queue_type(q) == QDIO_IQDIO_QFMT) && - (atomic_read(&q->nr_buf_used)) > QDIO_IQDIO_POLL_LVL) - goto sched; - if (q->u.out.pci_out_enabled) return; -- cgit v1.2.3 From c8d1c0ff840bbf06c60ff4235202a4b1457d8f59 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Sun, 30 Oct 2011 15:17:09 +0100 Subject: [S390] dasd: prevent path verification before resume Mark the device as suspended and delay execution of the path verification worker to prevent mix-up. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 4 ++++ drivers/s390/block/dasd_eckd.c | 14 ++++++++++++-- drivers/s390/block/dasd_int.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ce2a780a9ea..65894f05a80 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3289,6 +3289,9 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev) if (IS_ERR(device)) return PTR_ERR(device); + /* mark device as suspended */ + set_bit(DASD_FLAG_SUSPENDED, &device->flags); + if (device->discipline->freeze) rc = device->discipline->freeze(device); @@ -3363,6 +3366,7 @@ int dasd_generic_restore_device(struct ccw_device *cdev) if (device->block) dasd_schedule_block_bh(device->block); + clear_bit(DASD_FLAG_SUSPENDED, &device->flags); dasd_put_device(device); return 0; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index cb1bbc2947e..cca25096efc 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1101,6 +1101,12 @@ static void do_path_verification_work(struct work_struct *work) data = container_of(work, struct path_verification_work_data, worker); device = data->device; + /* delay path verification until device was resumed */ + if (test_bit(DASD_FLAG_SUSPENDED, &device->flags)) { + schedule_work(work); + return; + } + opm = 0; npm = 0; ppm = 0; @@ -2047,9 +2053,13 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device, /* first of all check for state change pending interrupt */ mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP; if ((scsw_dstat(&irb->scsw) & mask) == mask) { - /* for alias only and not in offline processing*/ + /* + * for alias only, not in offline processing + * and only if not suspended + */ if (!device->block && private->lcu && - !test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + !test_bit(DASD_FLAG_OFFLINE, &device->flags) && + !test_bit(DASD_FLAG_SUSPENDED, &device->flags)) { /* * the state change could be caused by an alias * reassignment remove device from alias handling diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 563bf8a25dc..afe8c33422e 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -516,6 +516,7 @@ struct dasd_block { */ #define DASD_FLAG_IS_RESERVED 7 /* The device is reserved */ #define DASD_FLAG_LOCK_STOLEN 8 /* The device lock was stolen */ +#define DASD_FLAG_SUSPENDED 9 /* The device was suspended */ void dasd_put_device_wake(struct dasd_device *); -- cgit v1.2.3 From 399c1d8dbfdcf46977fd2e2a833b02e18a284810 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:10 +0100 Subject: [S390] sparse: fix access past end of array warnings Remove unnecessary code to avoid false positives from sparse, e.g. arch/s390/kernel/compat_signal.c:221:61: warning: invalid access past the end of 'set32' (8 8) Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_linux.c | 23 +++++------------------ arch/s390/kernel/compat_signal.c | 28 ++++------------------------ 2 files changed, 9 insertions(+), 42 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 24b218737b9..84a98289844 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -362,12 +362,7 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, if (set) { if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } + s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } set_fs (KERNEL_DS); ret = sys_rt_sigprocmask(how, @@ -377,12 +372,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, set_fs (old_fs); if (ret) return ret; if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } @@ -401,12 +392,8 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); set_fs (old_fs); if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a1dc6457016..c68ea9c1804 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -213,16 +213,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = get_user(sa_handler, &act->sa_handler); ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } + new_ka.sa.sa_mask.sig[0] = + set32.sig[0] | (((long)set32.sig[1]) << 32); ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) @@ -233,20 +225,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); -- cgit v1.2.3 From c4736d968254d71eba6814b2234a4e63f40aca15 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:11 +0100 Subject: [S390] sparse: fix sparse static warnings Make functions and data static to avoid sparse warnings. Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/misc.c | 2 +- arch/s390/kernel/ipl.c | 8 ++++---- arch/s390/kernel/kprobes.c | 2 +- arch/s390/kernel/vtime.c | 2 +- drivers/s390/cio/qdio_main.c | 2 +- drivers/s390/cio/qdio_thinint.c | 2 +- drivers/s390/kvm/kvm_virtio.c | 2 +- drivers/s390/net/ctcm_sysfs.c | 2 +- drivers/s390/net/lcs.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 028f23ea81d..465eca756fe 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -61,7 +61,7 @@ static unsigned long free_mem_end_ptr; extern _sclp_print_early(const char *); -int puts(const char *s) +static int puts(const char *s) { _sclp_print_early(s); return 0; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 296458360a3..6296809c8e0 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -276,8 +276,8 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); /* VM IPL PARM routines */ -size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { int i; size_t len; @@ -339,8 +339,8 @@ static size_t scpdata_length(const char* buf, size_t count) return count; } -size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { size_t count; size_t i; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1d05d669107..64b761aef00 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -635,7 +635,7 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -void __kprobes jprobe_return_end(void) +static void __used __kprobes jprobe_return_end(void) { asm volatile("bcr 0,0"); } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4f3de980b0e..bb48977f546 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -461,7 +461,7 @@ void add_virt_timer_periodic(void *new) } EXPORT_SYMBOL(add_virt_timer_periodic); -int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) +static int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) { struct vtimer_queue *vq; unsigned long flags; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index ab9de1be77a..2fcdc0b2f0a 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -281,7 +281,7 @@ static inline int set_buf_state(struct qdio_q *q, int bufnr, } /* set slsb states to initial state */ -void qdio_init_buf_states(struct qdio_irq *irq_ptr) +static void qdio_init_buf_states(struct qdio_irq *irq_ptr) { struct qdio_q *q; int i; diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 8c1f412b7e6..9d1e7efb5bb 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -29,7 +29,7 @@ /* list of thin interrupt input queues */ static LIST_HEAD(tiq_list); -DEFINE_MUTEX(tiq_list_lock); +static DEFINE_MUTEX(tiq_list_lock); /* adapter local summary indicator */ static u8 *tiqdio_alsi; diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index aec60d55b10..83c69fbc43f 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -33,7 +33,7 @@ * The pointer to our (page) of device descriptions. */ static void *kvm_devices; -struct work_struct hotplug_work; +static struct work_struct hotplug_work; struct kvm_device { struct virtio_device vdev; diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c index 8305319b2a8..650aec1839e 100644 --- a/drivers/s390/net/ctcm_sysfs.c +++ b/drivers/s390/net/ctcm_sysfs.c @@ -159,7 +159,7 @@ static ssize_t ctcm_proto_store(struct device *dev, return count; } -const char *ctcm_type[] = { +static const char *ctcm_type[] = { "not a channel", "CTC/A", "FICON channel", diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 05fb3f7c728..c28713da1ec 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1970,7 +1970,7 @@ lcs_portno_store (struct device *dev, struct device_attribute *attr, const char static DEVICE_ATTR(portno, 0644, lcs_portno_show, lcs_portno_store); -const char *lcs_type[] = { +static const char *lcs_type[] = { "not a channel", "2216 parallel", "2216 channel", diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ce735204d31..e4c1176ee25 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1415,7 +1415,7 @@ static int qeth_l3_send_checksum_command(struct qeth_card *card) return 0; } -int qeth_l3_set_rx_csum(struct qeth_card *card, int on) +static int qeth_l3_set_rx_csum(struct qeth_card *card, int on) { int rc = 0; -- cgit v1.2.3 From e54aafa0c3bef84bfd39b4c713942ae7cdcfc58a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:12 +0100 Subject: [S390] sparse: fix sparse ANSI-C warnings Fix prototype of some functions in arch/s390/oprofile to avoid non-ANSI warnings from sparse. Signed-off-by: Martin Schwidefsky --- arch/s390/oprofile/hwsampler.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 4552ce40c81..f43c0e4282a 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -994,7 +994,7 @@ allocate_error: * * Returns 0 on success, !0 on failure. */ -int hwsampler_deallocate() +int hwsampler_deallocate(void) { int rc; @@ -1035,7 +1035,7 @@ unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) return cb->sample_overflow; } -int hwsampler_setup() +int hwsampler_setup(void) { int rc; int cpu; @@ -1102,7 +1102,7 @@ setup_exit: return rc; } -int hwsampler_shutdown() +int hwsampler_shutdown(void) { int rc; @@ -1203,7 +1203,7 @@ start_all_exit: * * Returns 0 on success, !0 on failure. */ -int hwsampler_stop_all() +int hwsampler_stop_all(void) { int tmp_rc, rc, cpu; struct hws_cpu_buffer *cb; -- cgit v1.2.3 From 638ad34a8811119b32247b7722288ef8b90907d1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:13 +0100 Subject: [S390] sparse: fix sparse warnings about missing prototypes Add prototypes and includes for functions used in different modules. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 1 + arch/s390/include/asm/processor.h | 2 ++ arch/s390/include/asm/spinlock.h | 2 ++ arch/s390/include/asm/system.h | 2 ++ arch/s390/kernel/entry.h | 13 +++++++++++-- arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/process.c | 1 + arch/s390/kernel/suspend.c | 1 + arch/s390/kernel/time.c | 1 + arch/s390/mm/mmap.c | 1 + arch/s390/mm/pageattr.c | 1 + drivers/s390/cio/css.h | 2 ++ kernel/sysctl.c | 8 -------- 13 files changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index accb372ddc7..f7ec548c2b9 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,6 +177,7 @@ static inline int page_test_and_clear_young(unsigned long pfn) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 20ffb12d4ae..5f33d37d032 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -33,6 +33,8 @@ static inline void get_cpu_id(struct cpuid *ptr) extern void s390_adjust_jiffies(void); extern int get_cpu_capability(unsigned int *); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 56612fc8186..fd94dfec8d0 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include +extern int spin_retry; + static inline int _raw_compare_and_swap(volatile unsigned int *lock, unsigned int old, unsigned int new) diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index d73cc6b6000..ef573c1d71a 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -20,6 +20,8 @@ struct task_struct; +extern int sysctl_userprocess_debug; + extern struct task_struct *__switch_to(void *, void *); extern void update_per_regs(struct task_struct *task); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 66729eb7bbc..ef8fb1d6e8d 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,24 +5,33 @@ #include #include + +extern void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long); +extern void *restart_stack; + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + void do_protection_exception(struct pt_regs *, long, unsigned long); void do_dat_exception(struct pt_regs *, long, unsigned long); void do_asce_exception(struct pt_regs *, long, unsigned long); -extern int sysctl_userprocess_debug; - void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +void do_restart(void); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(const char * str, struct pt_regs * regs, long err); +void __init time_init(void); + struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 6296809c8e0..affa8e68124 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -27,6 +27,7 @@ #include #include #include +#include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 5e64a9a29ea..9451b210a1b 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index b6f9afed74e..47df775c844 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include +#include #include #include diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 8d65bd0383f..ebbfab3c6e5 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -48,6 +48,7 @@ #include #include #include +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c9a9f7f1818..f09c74881b7 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d013ed39743..b36537a5f43 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -5,6 +5,7 @@ #include #include #include +#include #include static void change_page_attr(unsigned long addr, int numpages, diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 80ebdddf774..33bb4d891e1 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -133,6 +133,8 @@ struct channel_subsystem { extern struct channel_subsystem *channel_subsystems[]; +void channel_subsystem_reinit(void); + /* Helper functions to build lists for the slow path. */ void css_schedule_eval(struct subchannel_id schid); void css_schedule_eval_all(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2d2ecdcc8cd..2fe2bc2a57e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -151,14 +151,6 @@ extern int pwrsw_enabled; extern int unaligned_enabled; #endif -#ifdef CONFIG_S390 -#ifdef CONFIG_MATHEMU -extern int sysctl_ieee_emulation_warnings; -#endif -extern int sysctl_userprocess_debug; -extern int spin_retry; -#endif - #ifdef CONFIG_IA64 extern int no_unaligned_warning; extern int unaligned_dump_stack; -- cgit v1.2.3 From 5b479a79bff752c6719463f093bdd191f4b837db Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:14 +0100 Subject: [S390] sparse: fix sparse warnings in math-emu Fix three sparse warnings in math-emu / sysinfo: arch/s390/kernel/sysinfo.c:448:17: error: return expression in void function arch/s390/kernel/sysinfo.c:445:25: warning: shift too big (32) for type unsigned int arch/s390/kernel/sysinfo.c:445:25: warning: shift too big (32) for type unsigned int Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sfp-util.h | 2 +- arch/s390/kernel/sysinfo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h index 0addc6466d9..ca3f8814e36 100644 --- a/arch/s390/include/asm/sfp-util.h +++ b/arch/s390/include/asm/sfp-util.h @@ -72,6 +72,6 @@ extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, #define UDIV_NEEDS_NORMALIZATION 0 -#define abort() return 0 +#define abort() BUG() #define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 5c9e439bf3f..2a94b774695 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -442,7 +442,7 @@ void s390_adjust_jiffies(void) */ FP_UNPACK_SP(SA, &fmil); if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, info->capability, 32, int); + FP_FROM_INT_S(SB, (long) info->capability, 64, long); else FP_UNPACK_SP(SB, &info->capability); FP_DIV_S(SR, SA, SB); -- cgit v1.2.3 From 3c52e49d7c81434e3d2ccb520b3a654c2cc7d03d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:15 +0100 Subject: [S390] sparse: fix sparse warnings with __user pointers Use __force to quiet sparse warnings about user address space. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_signal.c | 23 ++++++++++++----------- arch/s390/kernel/traps.c | 3 ++- arch/s390/lib/uaccess_pt.c | 6 ++++-- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index c68ea9c1804..4f68c81d3ff 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -141,7 +141,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -482,7 +483,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, } else { regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -491,12 +492,12 @@ static int setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; + regs->gprs[15] = (__force __u64) frame; regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ @@ -504,7 +505,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, regs->gprs[5] = current->thread.prot_addr; /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; @@ -547,21 +548,21 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, } else { regs->gprs[14] = (__u64) frame->retcode; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + (u16 __force __user *)(frame->retcode)); } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; + regs->gprs[15] = (__force __u64) frame; regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; return 0; give_sigsegv: diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 79eee3f27af..a9807dd8627 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -334,7 +334,8 @@ void __kprobes do_per_trap(struct pt_regs *regs) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; - info.si_addr = (void *) current->thread.per_event.address; + info.si_addr = + (void __force __user *) current->thread.per_event.address; force_sig_info(SIGTRAP, &info, current); } diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 74833831417..342ae35a5ba 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -342,7 +342,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +379,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; -- cgit v1.2.3 From 246ccea184423ccdec3be1ca3d400cc3c547404d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:16 +0100 Subject: [S390] sparse: fix sparse NULL pointer warnings Fix two NULL pointer warnings in the dasd driver: drivers/s390/block/dasd_eckd.c:2353:20: warning: Using plain integer as NULL pointer drivers/s390/block/dasd_eckd.c:2415:44: warning: Using plain integer as NULL pointer Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index cca25096efc..6ab29680586 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2386,7 +2386,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( new_track = 1; end_idaw = 0; len_to_track_end = 0; - idaw_dst = 0; + idaw_dst = NULL; idaw_len = 0; rq_for_each_segment(bv, req, iter) { dst = page_address(bv->bv_page) + bv->bv_offset; @@ -2448,7 +2448,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( if (end_idaw) { idaws = idal_create_words(idaws, idaw_dst, idaw_len); - idaw_dst = 0; + idaw_dst = NULL; idaw_len = 0; end_idaw = 0; } -- cgit v1.2.3 From 7bf4074dd9d41d28447f1b106d91ad0ad0ea454a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:17 +0100 Subject: [S390] sparse: fix sparse symbol shadow warning to_kvmdev and dev_to_virtio both use container_of. Avoid to nest the two macros to quiet the following sparse warning: drivers/s390/kvm/kvm_virtio.c:337:20: warning: symbol '__mptr' shadows an earlier one drivers/s390/kvm/kvm_virtio.c:337:20: originally declared here Signed-off-by: Martin Schwidefsky --- drivers/s390/kvm/kvm_virtio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 83c69fbc43f..3c2c923d5c0 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -334,10 +334,10 @@ static void scan_devices(void) */ static int match_desc(struct device *dev, void *data) { - if ((ulong)to_kvmdev(dev_to_virtio(dev))->desc == (ulong)data) - return 1; + struct virtio_device *vdev = dev_to_virtio(dev); + struct kvm_device *kdev = to_kvmdev(vdev); - return 0; + return kdev->desc == data; } /* -- cgit v1.2.3 From e1c4d0142d32f97706b752d94c9e20c3f21901f8 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:18 +0100 Subject: [S390] add missing __tlb_flush_global() for !CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this compiler error for !CONFIG_SMP: CC arch/s390/mm/pgtable.o arch/s390/mm/pgtable.c: In function ‘gmap_flush_tlb’: arch/s390/mm/pgtable.c:202:3: error: implicit declaration of function ‘__tlb_flush_global’ [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/tlbflush.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 30444538238..1d8648cf2fe 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -59,6 +59,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) } #else #define __tlb_flush_full(mm) __tlb_flush_local() +#define __tlb_flush_global() __tlb_flush_local() #endif /* -- cgit v1.2.3 From 2a3a2d66aa4e5abaf8f9222d21735321f02a00dc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 30 Oct 2011 15:17:19 +0100 Subject: [S390] irqstats: split IPI interrupt accounting We use both the external call and emergency call IPIs to signal remote cpus. Therefore it makes sense to account them differently withing /proc/irqstats so we actually know what happened. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 3 ++- arch/s390/kernel/irq.c | 3 ++- arch/s390/kernel/smp.c | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index eadfa9fc03b..ba6d85f88d5 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -8,7 +8,8 @@ enum interruption_class { EXTERNAL_INTERRUPT, IO_INTERRUPT, EXTINT_CLK, - EXTINT_IPI, + EXTINT_EXC, + EXTINT_EMS, EXTINT_TMR, EXTINT_TLA, EXTINT_PFL, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 30faaef3445..b9a7fdd9c81 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -33,7 +33,8 @@ static const struct irq_class intrclass_names[] = { {.name = "EXT" }, {.name = "I/O" }, {.name = "CLK", .desc = "[EXT] Clock Comparator" }, - {.name = "IPI", .desc = "[EXT] Signal Processor" }, + {.name = "EXC", .desc = "[EXT] External Call" }, + {.name = "EMS", .desc = "[EXT] Emergency Signal" }, {.name = "TMR", .desc = "[EXT] CPU Timer" }, {.name = "TAL", .desc = "[EXT] Timing Alert" }, {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3bde5688ceb..3ea872890da 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -187,7 +187,10 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, { unsigned long bits; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; + if (ext_int_code == 0x1202) + kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; + else + kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; /* * handle bit signal external calls */ -- cgit v1.2.3 From 5f4026f8b2e4c5e26713d6c707592a33326a88c4 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:20 +0100 Subject: [S390] qdio: prevent dsci access without adapter interrupts A kernel panic may occur during sending or receiving network packets on a machine without adapter interrupts since commit d36deae. The bug is triggered by writing to the shared indicator address which is set to 0 if the machine doesn't have adapter interrupts. Make the reading and setting of the shared indicator dependent on the adapter interrupt feature and while at it move the code to the file containing the adapter interrupt related code. Thanks to Jan Jaeger for tracking this down. Reported-by: Jan Jaeger Tested-by: Jan Jaeger Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio.h | 29 ++----------------------- drivers/s390/cio/qdio_main.c | 6 ++---- drivers/s390/cio/qdio_thinint.c | 48 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 498a4cd99ff..b962ffbc080 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -418,32 +418,6 @@ static inline int multicast_outbound(struct qdio_q *q) #define queue_irqs_disabled(q) \ (test_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state) != 0) -#define TIQDIO_SHARED_IND 63 - -/* device state change indicators */ -struct indicator_t { - u32 ind; /* u32 because of compare-and-swap performance */ - atomic_t count; /* use count, 0 or 1 for non-shared indicators */ -}; - -extern struct indicator_t *q_indicators; - -static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq) -{ - return irq->nr_input_qs > 1; -} - -static inline int references_shared_dsci(struct qdio_irq *irq) -{ - return irq->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; -} - -static inline int shared_ind(struct qdio_q *q) -{ - struct qdio_irq *i = q->irq_ptr; - return references_shared_dsci(i) || has_multiple_inq_on_dsci(i); -} - extern u64 last_ai_time; /* prototypes for thin interrupt */ @@ -457,7 +431,8 @@ int tiqdio_allocate_memory(void); void tiqdio_free_memory(void); int tiqdio_register_thinints(void); void tiqdio_unregister_thinints(void); - +void clear_nonshared_ind(struct qdio_irq *); +int test_nonshared_ind(struct qdio_irq *); /* prototypes for setup */ void qdio_inbound_processing(unsigned long data); diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 2fcdc0b2f0a..3ef8d071c64 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1719,9 +1719,7 @@ int qdio_start_irq(struct ccw_device *cdev, int nr) WARN_ON(queue_irqs_enabled(q)); - if (!shared_ind(q)) - xchg(q->irq_ptr->dsci, 0); - + clear_nonshared_ind(irq_ptr); qdio_stop_polling(q); clear_bit(QDIO_QUEUE_IRQS_DISABLED, &q->u.in.queue_irq_state); @@ -1729,7 +1727,7 @@ int qdio_start_irq(struct ccw_device *cdev, int nr) * We need to check again to not lose initiative after * resetting the ACK state. */ - if (!shared_ind(q) && *q->irq_ptr->dsci) + if (test_nonshared_ind(irq_ptr)) goto rescan; if (!qdio_inbound_q_done(q)) goto rescan; diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 9d1e7efb5bb..011eadea3ee 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -26,6 +26,13 @@ */ #define TIQDIO_NR_NONSHARED_IND 63 #define TIQDIO_NR_INDICATORS (TIQDIO_NR_NONSHARED_IND + 1) +#define TIQDIO_SHARED_IND 63 + +/* device state change indicators */ +struct indicator_t { + u32 ind; /* u32 because of compare-and-swap performance */ + atomic_t count; /* use count, 0 or 1 for non-shared indicators */ +}; /* list of thin interrupt input queues */ static LIST_HEAD(tiq_list); @@ -34,7 +41,7 @@ static DEFINE_MUTEX(tiq_list_lock); /* adapter local summary indicator */ static u8 *tiqdio_alsi; -struct indicator_t *q_indicators; +static struct indicator_t *q_indicators; u64 last_ai_time; @@ -90,6 +97,43 @@ void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr) synchronize_rcu(); } +static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr) +{ + return irq_ptr->nr_input_qs > 1; +} + +static inline int references_shared_dsci(struct qdio_irq *irq_ptr) +{ + return irq_ptr->dsci == &q_indicators[TIQDIO_SHARED_IND].ind; +} + +static inline int shared_ind(struct qdio_irq *irq_ptr) +{ + return references_shared_dsci(irq_ptr) || + has_multiple_inq_on_dsci(irq_ptr); +} + +void clear_nonshared_ind(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return; + if (shared_ind(irq_ptr)) + return; + xchg(irq_ptr->dsci, 0); +} + +int test_nonshared_ind(struct qdio_irq *irq_ptr) +{ + if (!is_thinint_irq(irq_ptr)) + return 0; + if (shared_ind(irq_ptr)) + return 0; + if (*irq_ptr->dsci) + return 1; + else + return 0; +} + static inline u32 clear_shared_ind(void) { if (!atomic_read(&q_indicators[TIQDIO_SHARED_IND].count)) @@ -119,7 +163,7 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq) q->u.in.queue_start_poll(q->irq_ptr->cdev, q->nr, q->irq_ptr->int_parm); } else { - if (!shared_ind(q)) + if (!shared_ind(q->irq_ptr)) xchg(q->irq_ptr->dsci, 0); /* -- cgit v1.2.3 From 07ea815b22b9f70ec8de6ddf8db63a1dd1585caf Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:17:21 +0100 Subject: [S390] Remove error checking from copy_oldmem_page() Currently it can happen that the pre-allocated ELF header contains a wrong memory map which would result in errors when copying /proc/vmcore. In order to still get a valid vmcore, we (temporarily) disable the error checking in copy_oldmem_page(). This will then produce zero pages for those memory regions. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/crash_dump.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 2a9a3f40557..39f8fd4438f 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -31,7 +31,6 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { unsigned long src; - int rc; if (!csize) return 0; @@ -43,11 +42,11 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, src < OLDMEM_BASE + OLDMEM_SIZE) src -= OLDMEM_BASE; if (userbuf) - rc = copy_to_user_real((void __user *) buf, (void *) src, - csize); + copy_to_user_real((void __force __user *) buf, (void *) src, + csize); else - rc = memcpy_real(buf, (void *) src, csize); - return rc < 0 ? rc : csize; + memcpy_real(buf, (void *) src, csize); + return csize; } /* -- cgit v1.2.3