aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/00-INDEX2
-rw-r--r--Documentation/devicetree/bindings/metag/meta-intc.txt82
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/metag/00-INDEX4
-rw-r--r--Documentation/metag/kernel-ABI.txt256
-rw-r--r--MAINTAINERS12
-rw-r--r--arch/Kconfig16
-rw-r--r--arch/metag/Kconfig290
-rw-r--r--arch/metag/Kconfig.debug40
-rw-r--r--arch/metag/Kconfig.soc55
-rw-r--r--arch/metag/Makefile87
-rw-r--r--arch/metag/boot/.gitignore4
-rw-r--r--arch/metag/boot/Makefile68
-rw-r--r--arch/metag/boot/dts/Makefile16
-rw-r--r--arch/metag/boot/dts/skeleton.dts10
-rw-r--r--arch/metag/boot/dts/skeleton.dtsi14
-rw-r--r--arch/metag/configs/meta1_defconfig40
-rw-r--r--arch/metag/configs/meta2_defconfig41
-rw-r--r--arch/metag/configs/meta2_smp_defconfig42
-rw-r--r--arch/metag/include/asm/Kbuild54
-rw-r--r--arch/metag/include/asm/atomic.h53
-rw-r--r--arch/metag/include/asm/atomic_lnkget.h234
-rw-r--r--arch/metag/include/asm/atomic_lock1.h160
-rw-r--r--arch/metag/include/asm/barrier.h85
-rw-r--r--arch/metag/include/asm/bitops.h132
-rw-r--r--arch/metag/include/asm/bug.h12
-rw-r--r--arch/metag/include/asm/cache.h23
-rw-r--r--arch/metag/include/asm/cacheflush.h250
-rw-r--r--arch/metag/include/asm/cachepart.h42
-rw-r--r--arch/metag/include/asm/checksum.h92
-rw-r--r--arch/metag/include/asm/clock.h51
-rw-r--r--arch/metag/include/asm/cmpxchg.h65
-rw-r--r--arch/metag/include/asm/cmpxchg_irq.h42
-rw-r--r--arch/metag/include/asm/cmpxchg_lnkget.h86
-rw-r--r--arch/metag/include/asm/cmpxchg_lock1.h48
-rw-r--r--arch/metag/include/asm/core_reg.h35
-rw-r--r--arch/metag/include/asm/cpu.h14
-rw-r--r--arch/metag/include/asm/da.h43
-rw-r--r--arch/metag/include/asm/delay.h29
-rw-r--r--arch/metag/include/asm/div64.h12
-rw-r--r--arch/metag/include/asm/dma-mapping.h190
-rw-r--r--arch/metag/include/asm/elf.h128
-rw-r--r--arch/metag/include/asm/fixmap.h99
-rw-r--r--arch/metag/include/asm/ftrace.h23
-rw-r--r--arch/metag/include/asm/global_lock.h100
-rw-r--r--arch/metag/include/asm/gpio.h4
-rw-r--r--arch/metag/include/asm/highmem.h62
-rw-r--r--arch/metag/include/asm/hugetlb.h86
-rw-r--r--arch/metag/include/asm/hwthread.h40
-rw-r--r--arch/metag/include/asm/io.h165
-rw-r--r--arch/metag/include/asm/irq.h32
-rw-r--r--arch/metag/include/asm/irqflags.h93
-rw-r--r--arch/metag/include/asm/l2cache.h258
-rw-r--r--arch/metag/include/asm/linkage.h7
-rw-r--r--arch/metag/include/asm/mach/arch.h86
-rw-r--r--arch/metag/include/asm/metag_isa.h81
-rw-r--r--arch/metag/include/asm/metag_mem.h1106
-rw-r--r--arch/metag/include/asm/metag_regs.h1184
-rw-r--r--arch/metag/include/asm/mman.h11
-rw-r--r--arch/metag/include/asm/mmu.h77
-rw-r--r--arch/metag/include/asm/mmu_context.h113
-rw-r--r--arch/metag/include/asm/mmzone.h42
-rw-r--r--arch/metag/include/asm/module.h37
-rw-r--r--arch/metag/include/asm/page.h128
-rw-r--r--arch/metag/include/asm/perf_event.h4
-rw-r--r--arch/metag/include/asm/pgalloc.h79
-rw-r--r--arch/metag/include/asm/pgtable.h370
-rw-r--r--arch/metag/include/asm/processor.h202
-rw-r--r--arch/metag/include/asm/prom.h23
-rw-r--r--arch/metag/include/asm/ptrace.h60
-rw-r--r--arch/metag/include/asm/setup.h8
-rw-r--r--arch/metag/include/asm/smp.h29
-rw-r--r--arch/metag/include/asm/sparsemem.h13
-rw-r--r--arch/metag/include/asm/spinlock.h22
-rw-r--r--arch/metag/include/asm/spinlock_lnkget.h249
-rw-r--r--arch/metag/include/asm/spinlock_lock1.h184
-rw-r--r--arch/metag/include/asm/spinlock_types.h20
-rw-r--r--arch/metag/include/asm/stacktrace.h20
-rw-r--r--arch/metag/include/asm/string.h13
-rw-r--r--arch/metag/include/asm/switch.h21
-rw-r--r--arch/metag/include/asm/syscall.h104
-rw-r--r--arch/metag/include/asm/syscalls.h39
-rw-r--r--arch/metag/include/asm/tbx.h1425
-rw-r--r--arch/metag/include/asm/tcm.h30
-rw-r--r--arch/metag/include/asm/thread_info.h155
-rw-r--r--arch/metag/include/asm/tlb.h36
-rw-r--r--arch/metag/include/asm/tlbflush.h77
-rw-r--r--arch/metag/include/asm/topology.h53
-rw-r--r--arch/metag/include/asm/traps.h48
-rw-r--r--arch/metag/include/asm/uaccess.h241
-rw-r--r--arch/metag/include/asm/unistd.h12
-rw-r--r--arch/metag/include/asm/user_gateway.h44
-rw-r--r--arch/metag/include/uapi/asm/Kbuild13
-rw-r--r--arch/metag/include/uapi/asm/byteorder.h1
-rw-r--r--arch/metag/include/uapi/asm/ptrace.h113
-rw-r--r--arch/metag/include/uapi/asm/resource.h7
-rw-r--r--arch/metag/include/uapi/asm/sigcontext.h31
-rw-r--r--arch/metag/include/uapi/asm/siginfo.h8
-rw-r--r--arch/metag/include/uapi/asm/swab.h26
-rw-r--r--arch/metag/include/uapi/asm/unistd.h21
-rw-r--r--arch/metag/kernel/.gitignore1
-rw-r--r--arch/metag/kernel/Makefile39
-rw-r--r--arch/metag/kernel/asm-offsets.c14
-rw-r--r--arch/metag/kernel/cachepart.c124
-rw-r--r--arch/metag/kernel/clock.c53
-rw-r--r--arch/metag/kernel/core_reg.c117
-rw-r--r--arch/metag/kernel/da.c23
-rw-r--r--arch/metag/kernel/devtree.c114
-rw-r--r--arch/metag/kernel/dma.c507
-rw-r--r--arch/metag/kernel/ftrace.c126
-rw-r--r--arch/metag/kernel/ftrace_stub.S76
-rw-r--r--arch/metag/kernel/head.S57
-rw-r--r--arch/metag/kernel/irq.c323
-rw-r--r--arch/metag/kernel/kick.c101
-rw-r--r--arch/metag/kernel/machines.c20
-rw-r--r--arch/metag/kernel/metag_ksyms.c49
-rw-r--r--arch/metag/kernel/module.c284
-rw-r--r--arch/metag/kernel/perf/Makefile3
-rw-r--r--arch/metag/kernel/perf/perf_event.c861
-rw-r--r--arch/metag/kernel/perf/perf_event.h106
-rw-r--r--arch/metag/kernel/perf_callchain.c96
-rw-r--r--arch/metag/kernel/process.c461
-rw-r--r--arch/metag/kernel/ptrace.c380
-rw-r--r--arch/metag/kernel/setup.c631
-rw-r--r--arch/metag/kernel/signal.c344
-rw-r--r--arch/metag/kernel/smp.c575
-rw-r--r--arch/metag/kernel/stacktrace.c187
-rw-r--r--arch/metag/kernel/sys_metag.c180
-rw-r--r--arch/metag/kernel/tbiunexp.S22
-rw-r--r--arch/metag/kernel/tcm.c151
-rw-r--r--arch/metag/kernel/time.c15
-rw-r--r--arch/metag/kernel/topology.c77
-rw-r--r--arch/metag/kernel/traps.c995
-rw-r--r--arch/metag/kernel/user_gateway.S97
-rw-r--r--arch/metag/kernel/vmlinux.lds.S71
-rw-r--r--arch/metag/lib/Makefile22
-rw-r--r--arch/metag/lib/ashldi3.S33
-rw-r--r--arch/metag/lib/ashrdi3.S33
-rw-r--r--arch/metag/lib/checksum.c168
-rw-r--r--arch/metag/lib/clear_page.S17
-rw-r--r--arch/metag/lib/cmpdi2.S32
-rw-r--r--arch/metag/lib/copy_page.S20
-rw-r--r--arch/metag/lib/delay.c56
-rw-r--r--arch/metag/lib/div64.S108
-rw-r--r--arch/metag/lib/divsi3.S100
-rw-r--r--arch/metag/lib/ip_fast_csum.S32
-rw-r--r--arch/metag/lib/lshrdi3.S33
-rw-r--r--arch/metag/lib/memcpy.S185
-rw-r--r--arch/metag/lib/memmove.S345
-rw-r--r--arch/metag/lib/memset.S86
-rw-r--r--arch/metag/lib/modsi3.S38
-rw-r--r--arch/metag/lib/muldi3.S44
-rw-r--r--arch/metag/lib/ucmpdi2.S27
-rw-r--r--arch/metag/lib/usercopy.c1354
-rw-r--r--arch/metag/mm/Kconfig153
-rw-r--r--arch/metag/mm/Makefile19
-rw-r--r--arch/metag/mm/cache.c521
-rw-r--r--arch/metag/mm/extable.c15
-rw-r--r--arch/metag/mm/fault.c239
-rw-r--r--arch/metag/mm/highmem.c133
-rw-r--r--arch/metag/mm/hugetlbpage.c259
-rw-r--r--arch/metag/mm/init.c451
-rw-r--r--arch/metag/mm/ioremap.c89
-rw-r--r--arch/metag/mm/l2cache.c192
-rw-r--r--arch/metag/mm/maccess.c68
-rw-r--r--arch/metag/mm/mmu-meta1.c157
-rw-r--r--arch/metag/mm/mmu-meta2.c207
-rw-r--r--arch/metag/mm/numa.c81
-rw-r--r--arch/metag/tbx/Makefile21
-rw-r--r--arch/metag/tbx/tbicore.S136
-rw-r--r--arch/metag/tbx/tbictx.S366
-rw-r--r--arch/metag/tbx/tbictxfpu.S190
-rw-r--r--arch/metag/tbx/tbidefr.S175
-rw-r--r--arch/metag/tbx/tbidspram.S161
-rw-r--r--arch/metag/tbx/tbilogf.S48
-rw-r--r--arch/metag/tbx/tbipcx.S451
-rw-r--r--arch/metag/tbx/tbiroot.S87
-rw-r--r--arch/metag/tbx/tbisoft.S237
-rw-r--r--arch/metag/tbx/tbistring.c114
-rw-r--r--arch/metag/tbx/tbitimer.S207
-rw-r--r--drivers/clocksource/Kconfig5
-rw-r--r--drivers/clocksource/Makefile1
-rw-r--r--drivers/clocksource/metag_generic.c198
-rw-r--r--drivers/irqchip/Makefile2
-rw-r--r--drivers/irqchip/irq-metag-ext.c868
-rw-r--r--drivers/irqchip/irq-metag.c343
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--include/asm-generic/io.h2
-rw-r--r--include/asm-generic/unistd.h9
-rw-r--r--include/clocksource/metag_generic.h21
-rw-r--r--include/linux/irqchip/metag-ext.h33
-rw-r--r--include/linux/irqchip/metag.h24
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/uapi/linux/elf.h2
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--lib/Kconfig.debug4
-rwxr-xr-xscripts/checkstack.pl8
-rw-r--r--scripts/genksyms/genksyms.c3
-rw-r--r--scripts/recordmcount.c13
-rw-r--r--tools/perf/perf.h6
200 files changed, 26934 insertions, 8 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 0f3e8bbab8d..45b3df936d2 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -299,6 +299,8 @@ memory-hotplug.txt
- Hotpluggable memory support, how to use and current status.
memory.txt
- info on typical Linux memory problems.
+metag/
+ - directory with info about Linux on Meta architecture.
mips/
- directory with info about Linux on MIPS architecture.
misc-devices/
diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt
new file mode 100644
index 00000000000..8c47dcbfabc
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/meta-intc.txt
@@ -0,0 +1,82 @@
+* Meta External Trigger Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a Meta external trigger controller.
+
+Required properties:
+
+ - compatible: Specifies the compatibility list for the interrupt controller.
+ The type shall be <string> and the value shall include "img,meta-intc".
+
+ - num-banks: Specifies the number of interrupt banks (each of which can
+ handle 32 interrupt sources).
+
+ - interrupt-controller: The presence of this property identifies the node
+ as an interupt controller. No property value shall be defined.
+
+ - #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 2.
+
+ - #address-cells: Specifies the number of cells needed to encode an
+ address. The type shall be <u32> and the value shall be 0. As such,
+ 'interrupt-map' nodes do not have to specify a parent unit address.
+
+Optional properties:
+
+ - no-mask: The controller doesn't have any mask registers.
+
+* Interrupt Specifier Definition
+
+ Interrupt specifiers consists of 2 cells encoded as follows:
+
+ - <1st-cell>: The interrupt-number that identifies the interrupt source.
+
+ - <2nd-cell>: The Linux interrupt flags containing level-sense information,
+ encoded as follows:
+ 1 = edge triggered
+ 4 = level-sensitive
+
+* Examples
+
+Example 1:
+
+ /*
+ * Meta external trigger block
+ */
+ intc: intc {
+ // This is an interrupt controller node.
+ interrupt-controller;
+
+ // No address cells so that 'interrupt-map' nodes which
+ // reference this interrupt controller node do not need a parent
+ // address specifier.
+ #address-cells = <0>;
+
+ // Two cells to encode interrupt sources.
+ #interrupt-cells = <2>;
+
+ // Number of interrupt banks
+ num-banks = <2>;
+
+ // No HWMASKEXT is available (specify on Chorus2 and Comet ES1)
+ no-mask;
+
+ // Compatible with Meta hardware trigger block.
+ compatible = "img,meta-intc";
+ };
+
+Example 2:
+
+ /*
+ * An interrupt generating device that is wired to a Meta external
+ * trigger block.
+ */
+ uart1: uart@0x02004c00 {
+ // Interrupt source '5' that is level-sensitive.
+ // Note that there are only two cells as specified in the
+ // interrupt parent's '#interrupt-cells' property.
+ interrupts = <5 4 /* level */>;
+
+ // The interrupt controller that this device is wired to.
+ interrupt-parent = <&intc>;
+ };
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 3a54fca730c..4609e81dbc3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -978,6 +978,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
If specified, z/VM IUCV HVC accepts connections
from listed z/VM user IDs only.
+ hwthread_map= [METAG] Comma-separated list of Linux cpu id to
+ hardware thread id mappings.
+ Format: <cpu>:<hwthread>
+
keep_bootcon [KNL]
Do not unregister boot console at start. This is only
useful for debugging when something happens in the window
diff --git a/Documentation/metag/00-INDEX b/Documentation/metag/00-INDEX
new file mode 100644
index 00000000000..db11c513bd5
--- /dev/null
+++ b/Documentation/metag/00-INDEX
@@ -0,0 +1,4 @@
+00-INDEX
+ - this file
+kernel-ABI.txt
+ - Documents metag ABI details
diff --git a/Documentation/metag/kernel-ABI.txt b/Documentation/metag/kernel-ABI.txt
new file mode 100644
index 00000000000..7b8dee83b9c
--- /dev/null
+++ b/Documentation/metag/kernel-ABI.txt
@@ -0,0 +1,256 @@
+ ==========================
+ KERNEL ABIS FOR METAG ARCH
+ ==========================
+
+This document describes the Linux ABIs for the metag architecture, and has the
+following sections:
+
+ (*) Outline of registers
+ (*) Userland registers
+ (*) Kernel registers
+ (*) System call ABI
+ (*) Calling conventions
+
+
+====================
+OUTLINE OF REGISTERS
+====================
+
+The main Meta core registers are arranged in units:
+
+ UNIT Type DESCRIPTION GP EXT PRIV GLOBAL
+ ======= ======= =============== ======= ======= ======= =======
+ CT Special Control unit
+ D0 General Data unit 0 0-7 8-15 16-31 16-31
+ D1 General Data unit 1 0-7 8-15 16-31 16-31
+ A0 General Address unit 0 0-3 4-7 8-15 8-15
+ A1 General Address unit 1 0-3 4-7 8-15 8-15
+ PC Special PC unit 0 1
+ PORT Special Ports
+ TR Special Trigger unit 0-7
+ TT Special Trace unit 0-5
+ FX General FP unit 0-15
+
+GP registers form part of the main context.
+
+Extended context registers (EXT) may not be present on all hardware threads and
+can be context switched if support is enabled and the appropriate bits are set
+in e.g. the D0.8 register to indicate what extended state to preserve.
+
+Global registers are shared between threads and are privilege protected.
+
+See arch/metag/include/asm/metag_regs.h for definitions relating to core
+registers and the fields and bits they contain. See the TRMs for further details
+about special registers.
+
+Several special registers are preserved in the main context, these are the
+interesting ones:
+
+ REG (ALIAS) PURPOSE
+ ======================= ===============================================
+ CT.1 (TXMODE) Processor mode bits (particularly for DSP)
+ CT.2 (TXSTATUS) Condition flags and LSM_STEP (MGET/MSET step)
+ CT.3 (TXRPT) Branch repeat counter
+ PC.0 (PC) Program counter
+
+Some of the general registers have special purposes in the ABI and therefore
+have aliases:
+
+ D0 REG (ALIAS) PURPOSE D1 REG (ALIAS) PURPOSE
+ =============== =============== =============== =======================
+ D0.0 (D0Re0) 32bit result D1.0 (D1Re0) Top half of 64bit result
+ D0.1 (D0Ar6) Argument 6 D1.1 (D1Ar5) Argument 5
+ D0.2 (D0Ar4) Argument 4 D1.2 (D1Ar3) Argument 3
+ D0.3 (D0Ar2) Argument 2 D1.3 (D1Ar1) Argument 1
+ D0.4 (D0FrT) Frame temp D1.4 (D1RtP) Return pointer
+ D0.5 Call preserved D1.5 Call preserved
+ D0.6 Call preserved D1.6 Call preserved
+ D0.7 Call preserved D1.7 Call preserved
+
+ A0 REG (ALIAS) PURPOSE A1 REG (ALIAS) PURPOSE
+ =============== =============== =============== =======================
+ A0.0 (A0StP) Stack pointer A1.0 (A1GbP) Global base pointer
+ A0.1 (A0FrP) Frame pointer A1.1 (A1LbP) Local base pointer
+ A0.2 A1.2
+ A0.3 A1.3
+
+
+==================
+USERLAND REGISTERS
+==================
+
+All the general purpose D0, D1, A0, A1 registers are preserved when entering the
+kernel (including asynchronous events such as interrupts and timer ticks) except
+the following which have special purposes in the ABI:
+
+ REGISTERS WHEN STATUS PURPOSE
+ =============== ======= =============== ===============================
+ D0.8 DSP Preserved ECH, determines what extended
+ DSP state to preserve.
+ A0.0 (A0StP) ALWAYS Preserved Stack >= A0StP may be clobbered
+ at any time by the creation of a
+ signal frame.
+ A1.0 (A1GbP) SMP Clobbered Used as temporary for loading
+ kernel stack pointer and saving
+ core context.
+ A0.15 !SMP Protected Stores kernel stack pointer.
+ A1.15 ALWAYS Protected Stores kernel base pointer.
+
+On UP A0.15 is used to store the kernel stack pointer for storing the userland
+context. A0.15 is global between hardware threads though which means it cannot
+be used on SMP for this purpose. Since no protected local registers are
+available A1GbP is reserved for use as a temporary to allow a percpu stack
+pointer to be loaded for storing the rest of the context.
+
+
+================
+KERNEL REGISTERS
+================
+
+When in the kernel the following registers have special purposes in the ABI:
+
+ REGISTERS WHEN STATUS PURPOSE
+ =============== ======= =============== ===============================
+ A0.0 (A0StP) ALWAYS Preserved Stack >= A0StP may be clobbered
+ at any time by the creation of
+ an irq signal frame.
+ A1.0 (A1GbP) ALWAYS Preserved Reserved (kernel base pointer).
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+When a system call is made, the following registers are effective:
+
+ REGISTERS CALL RETURN
+ =============== ======================= ===============================
+ D0.0 (D0Re0) Return value (or -errno)
+ D1.0 (D1Re0) System call number Clobbered
+ D0.1 (D0Ar6) Syscall arg #6 Preserved
+ D1.1 (D1Ar5) Syscall arg #5 Preserved
+ D0.2 (D0Ar4) Syscall arg #4 Preserved
+ D1.2 (D1Ar3) Syscall arg #3 Preserved
+ D0.3 (D0Ar2) Syscall arg #2 Preserved
+ D1.3 (D1Ar1) Syscall arg #1 Preserved
+
+Due to the limited number of argument registers and some system calls with badly
+aligned 64-bit arguments, 64-bit values are always packed in consecutive
+arguments, even if this is contrary to the normal calling conventions (where the
+two halves would go in a matching pair of data registers).
+
+For example fadvise64_64 usually has the signature:
+
+ long sys_fadvise64_64(i32 fd, i64 offs, i64 len, i32 advice);
+
+But for metag fadvise64_64 is wrapped so that the 64-bit arguments are packed:
+
+ long sys_fadvise64_64_metag(i32 fd, i32 offs_lo,
+ i32 offs_hi, i32 len_lo,
+ i32 len_hi, i32 advice)
+
+So the arguments are packed in the registers like this:
+
+ D0 REG (ALIAS) VALUE D1 REG (ALIAS) VALUE
+ =============== =============== =============== =======================
+ D0.1 (D0Ar6) advice D1.1 (D1Ar5) hi(len)
+ D0.2 (D0Ar4) lo(len) D1.2 (D1Ar3) hi(offs)
+ D0.3 (D0Ar2) lo(offs) D1.3 (D1Ar1) fd
+
+
+===================
+CALLING CONVENTIONS
+===================
+
+These calling conventions apply to both user and kernel code. The stack grows
+from low addresses to high addresses in the metag ABI. The stack pointer (A0StP)
+should always point to the next free address on the stack and should at all
+times be 64-bit aligned. The following registers are effective at the point of a
+call:
+
+ REGISTERS CALL RETURN
+ =============== ======================= ===============================
+ D0.0 (D0Re0) 32bit return value
+ D1.0 (D1Re0) Upper half of 64bit return value
+ D0.1 (D0Ar6) 32bit argument #6 Clobbered
+ D1.1 (D1Ar5) 32bit argument #5 Clobbered
+ D0.2 (D0Ar4) 32bit argument #4 Clobbered
+ D1.2 (D1Ar3) 32bit argument #3 Clobbered
+ D0.3 (D0Ar2) 32bit argument #2 Clobbered
+ D1.3 (D1Ar1) 32bit argument #1 Clobbered
+ D0.4 (D0FrT) Clobbered
+ D1.4 (D1RtP) Return pointer Clobbered
+ D{0-1}.{5-7} Preserved
+ A0.0 (A0StP) Stack pointer Preserved
+ A1.0 (A0GbP) Preserved
+ A0.1 (A0FrP) Frame pointer Preserved
+ A1.1 (A0LbP) Preserved
+ A{0-1},{2-3} Clobbered
+
+64-bit arguments are placed in matching pairs of registers (i.e. the same
+register number in both D0 and D1 units), with the least significant half in D0
+and the most significant half in D1, leaving a gap where necessary. Futher
+arguments are stored on the stack in reverse order (earlier arguments at higher
+addresses):
+
+ ADDRESS 0 1 2 3 4 5 6 7
+ =============== ===== ===== ===== ===== ===== ===== ===== =====
+ A0StP -->
+ A0StP-0x08 32bit argument #8 32bit argument #7
+ A0StP-0x10 32bit argument #10 32bit argument #9
+
+Function prologues tend to look a bit like this:
+
+ /* If frame pointer in use, move it to frame temp register so it can be
+ easily pushed onto stack */
+ MOV D0FrT,A0FrP
+
+ /* If frame pointer in use, set it to stack pointer */
+ ADD A0FrP,A0StP,#0
+
+ /* Preserve D0FrT, D1RtP, D{0-1}.{5-7} on stack, incrementing A0StP */
+ MSETL [A0StP++],D0FrT,D0.5,D0.6,D0.7
+
+ /* Allocate some stack space for local variables */
+ ADD A0StP,A0StP,#0x10
+
+At this point the stack would look like this:
+
+ ADDRESS 0 1 2 3 4 5 6 7
+ =============== ===== ===== ===== ===== ===== ===== ===== =====
+ A0StP -->
+ A0StP-0x08
+ A0StP-0x10
+ A0StP-0x18 Old D0.7 Old D1.7
+ A0StP-0x20 Old D0.6 Old D1.6
+ A0StP-0x28 Old D0.5 Old D1.5
+ A0FrP --> Old A0FrP (frame ptr) Old D1RtP (return ptr)
+ A0FrP-0x08 32bit argument #8 32bit argument #7
+ A0FrP-0x10 32bit argument #10 32bit argument #9
+
+Function epilogues tend to differ depending on the use of a frame pointer. An
+example of a frame pointer epilogue:
+
+ /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack, incrementing A0FrP */
+ MGETL D0FrT,D0.5,D0.6,D0.7,[A0FrP++]
+ /* Restore stack pointer to where frame pointer was before increment */
+ SUB A0StP,A0FrP,#0x20
+ /* Restore frame pointer from frame temp */
+ MOV A0FrP,D0FrT
+ /* Return to caller via restored return pointer */
+ MOV PC,D1RtP
+
+If the function hasn't touched the frame pointer, MGETL cannot be safely used
+with A0StP as it always increments and that would expose the stack to clobbering
+by interrupts (kernel) or signals (user). Therefore it's common to see the MGETL
+split into separate GETL instructions:
+
+ /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack */
+ GETL D0FrT,D1RtP,[A0StP+#-0x30]
+ GETL D0.5,D1.5,[A0StP+#-0x28]
+ GETL D0.6,D1.6,[A0StP+#-0x20]
+ GETL D0.7,D1.7,[A0StP+#-0x18]
+ /* Restore stack pointer */
+ SUB A0StP,A0StP,#0x30
+ /* Return to caller via restored return pointer */
+ MOV PC,D1RtP
diff --git a/MAINTAINERS b/MAINTAINERS
index aea0adf414d..e95b1e944eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5204,6 +5204,18 @@ F: drivers/mtd/
F: include/linux/mtd/
F: include/uapi/mtd/
+METAG ARCHITECTURE
+M: James Hogan <james.hogan@imgtec.com>
+S: Supported
+F: arch/metag/
+F: Documentation/metag/
+F: Documentation/devicetree/bindings/metag/
+F: drivers/clocksource/metag_generic.c
+F: drivers/irqchip/irq-metag.c
+F: drivers/irqchip/irq-metag-ext.c
+F: drivers/tty/metag_da.c
+F: fs/imgdafs/
+
MICROBLAZE ARCHITECTURE
M: Michal Simek <monstr@monstr.eu>
L: microblaze-uclinux@itee.uq.edu.au (moderated for non-subscribers)
diff --git a/arch/Kconfig b/arch/Kconfig
index dcd91a85536..5a1779c9394 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,6 +103,22 @@ config UPROBES
If in doubt, say "N".
+config HAVE_64BIT_ALIGNED_ACCESS
+ def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
+ help
+ Some architectures require 64 bit accesses to be 64 bit
+ aligned, which also requires structs containing 64 bit values
+ to be 64 bit aligned too. This includes some 32 bit
+ architectures which can do 64 bit accesses, as well as 64 bit
+ architectures without unaligned access.
+
+ This symbol should be selected by an architecture if 64 bit
+ accesses are required to be 64 bit aligned in this way even
+ though it is not a 64 bit architecture.
+
+ See Documentation/unaligned-memory-access.txt for more
+ information on the topic of unaligned memory accesses.
+
config HAVE_EFFICIENT_UNALIGNED_ACCESS
bool
help
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
new file mode 100644
index 00000000000..afc8973d148
--- /dev/null
+++ b/arch/metag/Kconfig
@@ -0,0 +1,290 @@
+config SYMBOL_PREFIX
+ string
+ default "_"
+
+config METAG
+ def_bool y
+ select EMBEDDED
+ select GENERIC_ATOMIC64
+ select GENERIC_CLOCKEVENTS
+ select GENERIC_IRQ_SHOW
+ select GENERIC_SMP_IDLE_THREAD
+ select HAVE_64BIT_ALIGNED_ACCESS
+ select HAVE_ARCH_TRACEHOOK
+ select HAVE_C_RECORDMCOUNT
+ select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_GENERIC_HARDIRQS
+ select HAVE_KERNEL_BZIP2
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_XZ
+ select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP
+ select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_PERF_EVENTS
+ select HAVE_SYSCALL_TRACEPOINTS
+ select IRQ_DOMAIN
+ select MODULES_USE_ELF_RELA
+ select OF
+ select OF_EARLY_FLATTREE
+ select SPARSE_IRQ
+
+config STACKTRACE_SUPPORT
+ def_bool y
+
+config LOCKDEP_SUPPORT
+ def_bool y
+
+config HAVE_LATENCYTOP_SUPPORT
+ def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+ def_bool y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+
+config GENERIC_HWEIGHT
+ def_bool y
+
+config GENERIC_CALIBRATE_DELAY
+ def_bool y
+
+config GENERIC_GPIO
+ def_bool n
+
+config NO_IOPORT
+ def_bool y
+
+source "init/Kconfig"
+
+source "kernel/Kconfig.freezer"
+
+menu "Processor type and features"
+
+config MMU
+ def_bool y
+
+config STACK_GROWSUP
+ def_bool y
+
+config HOTPLUG_CPU
+ bool "Enable CPU hotplug support"
+ depends on SMP
+ help
+ Say Y here to allow turning CPUs off and on. CPUs can be
+ controlled through /sys/devices/system/cpu.
+
+ Say N if you want to disable CPU hotplug.
+
+config HIGHMEM
+ bool "High Memory Support"
+ help
+ The address space of Meta processors is only 4 Gigabytes large
+ and it has to accommodate user address space, kernel address
+ space as well as some memory mapped IO. That means that, if you
+ have a large amount of physical memory and/or IO, not all of the
+ memory can be "permanently mapped" by the kernel. The physical
+ memory that is not permanently mapped is called "high memory".
+
+ Depending on the selected kernel/user memory split, minimum
+ vmalloc space and actual amount of RAM, you may not need this
+ option which should result in a slightly faster kernel.
+
+ If unsure, say n.
+
+source "arch/metag/mm/Kconfig"
+
+source "arch/metag/Kconfig.soc"
+
+config METAG_META12
+ bool
+ help
+ Select this from the SoC config symbol to indicate that it contains a
+ Meta 1.2 core.
+
+config METAG_META21
+ bool
+ help
+ Select this from the SoC config symbol to indicate that it contains a
+ Meta 2.1 core.
+
+config SMP
+ bool "Symmetric multi-processing support"
+ depends on METAG_META21 && METAG_META21_MMU
+ select USE_GENERIC_SMP_HELPERS
+ help
+ This enables support for systems with more than one thread running
+ Linux. If you have a system with only one thread running Linux,
+ say N. Otherwise, say Y.
+
+config NR_CPUS
+ int "Maximum number of CPUs (2-4)" if SMP
+ range 2 4 if SMP
+ default "1" if !SMP
+ default "4" if SMP
+
+config METAG_SMP_WRITE_REORDERING
+ bool
+ help
+ This attempts to prevent cache-memory incoherence due to external
+ reordering of writes from different hardware threads when SMP is
+ enabled. It adds fences (system event 0) to smp_mb and smp_rmb in an
+ attempt to catch some of the cases, and also before writes to shared
+ memory in LOCK1 protected atomics and spinlocks.
+ This will not completely prevent cache incoherency on affected cores.
+
+config METAG_LNKGET_AROUND_CACHE
+ bool
+ depends on METAG_META21
+ help
+ This indicates that the LNKGET/LNKSET instructions go around the
+ cache, which requires some extra cache flushes when the memory needs
+ to be accessed by normal GET/SET instructions too.
+
+choice
+ prompt "Atomicity primitive"
+ default METAG_ATOMICITY_LNKGET
+ help
+ This option selects the mechanism for performing atomic operations.
+
+config METAG_ATOMICITY_IRQSOFF
+ depends on !SMP
+ bool "irqsoff"
+ help
+ This option disables interrupts to achieve atomicity. This mechanism
+ is not SMP-safe.
+
+config METAG_ATOMICITY_LNKGET
+ depends on METAG_META21
+ bool "lnkget/lnkset"
+ help
+ This option uses the LNKGET and LNKSET instructions to achieve
+ atomicity. LNKGET/LNKSET are load-link/store-conditional instructions.
+ Choose this option if your system requires low latency.
+
+config METAG_ATOMICITY_LOCK1
+ depends on SMP
+ bool "lock1"
+ help
+ This option uses the LOCK1 instruction for atomicity. This is mainly
+ provided as a debugging aid if the lnkget/lnkset atomicity primitive
+ isn't working properly.
+
+endchoice
+
+config METAG_FPU
+ bool "FPU Support"
+ depends on METAG_META21
+ default y
+ help
+ This option allows processes to use FPU hardware available with this
+ CPU. If this option is not enabled FPU registers will not be saved
+ and restored on context-switch.
+
+ If you plan on running programs which are compiled to use hard floats
+ say Y here.
+
+config METAG_DSP
+ bool "DSP Support"
+ help
+ This option allows processes to use DSP hardware available
+ with this CPU. If this option is not enabled DSP registers
+ will not be saved and restored on context-switch.
+
+ If you plan on running DSP programs say Y here.
+
+config METAG_PERFCOUNTER_IRQS
+ bool "PerfCounters interrupt support"
+ depends on METAG_META21
+ help
+ This option enables using interrupts to collect information from
+ Performance Counters. This option is supported in new META21
+ (starting from HTP265).
+
+ When disabled, Performance Counters information will be collected
+ based on Timer Interrupt.
+
+config METAG_DA
+ bool "DA support"
+ help
+ Say Y if you plan to use a DA debug adapter with Linux. The presence
+ of the DA will be detected automatically at boot, so it is safe to say
+ Y to this option even when booting without a DA.
+
+ This enables support for services provided by DA JTAG debug adapters,
+ such as:
+ - communication over DA channels (such as the console driver).
+ - use of the DA filesystem.
+
+menu "Boot options"
+
+config METAG_BUILTIN_DTB
+ bool "Embed DTB in kernel image"
+ default y
+ help
+ Embeds a device tree binary in the kernel image.
+
+config METAG_BUILTIN_DTB_NAME
+ string "Built in DTB"
+ depends on METAG_BUILTIN_DTB
+ help
+ Set the name of the DTB to embed (leave blank to pick one
+ automatically based on kernel configuration).
+
+config CMDLINE_BOOL
+ bool "Default bootloader kernel arguments"
+
+config CMDLINE
+ string "Kernel command line"
+ depends on CMDLINE_BOOL
+ help
+ On some architectures there is currently no way for the boot loader
+ to pass arguments to the kernel. For these architectures, you should
+ supply some command-line options at build time by entering them
+ here.
+
+config CMDLINE_FORCE
+ bool "Force default kernel command string"
+ depends on CMDLINE_BOOL
+ help
+ Set this to have arguments from the default kernel command string
+ override those passed by the boot loader.
+
+endmenu
+
+source "kernel/Kconfig.preempt"
+
+source kernel/Kconfig.hz
+
+endmenu
+
+menu "Power management options"
+
+source kernel/power/Kconfig
+
+endmenu
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/metag/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/metag/Kconfig.debug b/arch/metag/Kconfig.debug
new file mode 100644
index 00000000000..e45bbf6a7a5
--- /dev/null
+++ b/arch/metag/Kconfig.debug
@@ -0,0 +1,40 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+ bool
+ default y
+
+source "lib/Kconfig.debug"
+
+config DEBUG_STACKOVERFLOW
+ bool "Check for stack overflows"
+ depends on DEBUG_KERNEL
+ help
+ This option will cause messages to be printed if free stack space
+ drops below a certain limit.
+
+config 4KSTACKS
+ bool "Use 4Kb for kernel stacks instead of 8Kb"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here the kernel will use a 4Kb stacksize for the
+ kernel stack attached to each process/thread. This facilitates
+ running more threads on a system and also reduces the pressure
+ on the VM subsystem for higher order allocations. This option
+ will also use IRQ stacks to compensate for the reduced stackspace.
+
+config METAG_FUNCTION_TRACE
+ bool "Output Meta real-time trace data for function entry/exit"
+ help
+ If you say Y here the kernel will use the Meta hardware trace
+ unit to output information about function entry and exit that
+ can be used by a debugger for profiling and call-graphs.
+
+config METAG_POISON_CATCH_BUFFERS
+ bool "Poison catch buffer contents on kernel entry"
+ help
+ If you say Y here the kernel will write poison data to the
+ catch buffer registers on kernel entry. This will make any
+ problem with catch buffer handling much more apparent.
+
+endmenu
diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc
new file mode 100644
index 00000000000..ec079cfb7c6
--- /dev/null
+++ b/arch/metag/Kconfig.soc
@@ -0,0 +1,55 @@
+choice
+ prompt "SoC Type"
+ default META21_FPGA
+
+config META12_FPGA
+ bool "Meta 1.2 FPGA"
+ select METAG_META12
+ help
+ This is a Meta 1.2 FPGA bitstream, just a bare CPU.
+
+config META21_FPGA
+ bool "Meta 2.1 FPGA"
+ select METAG_META21
+ help
+ This is a Meta 2.1 FPGA bitstream, just a bare CPU.
+
+endchoice
+
+menu "SoC configuration"
+
+if METAG_META21
+
+# Meta 2.x specific options
+
+config METAG_META21_MMU
+ bool "Meta 2.x MMU mode"
+ default y
+ help
+ Use the Meta 2.x MMU in extended mode.
+
+config METAG_UNALIGNED
+ bool "Meta 2.x unaligned access checking"
+ default y
+ help
+ All memory accesses will be checked for alignment and an exception
+ raised on unaligned accesses. This feature does cost performance
+ but without it there will be no notification of this type of error.
+
+config METAG_USER_TCM
+ bool "Meta on-chip memory support for userland"
+ select GENERIC_ALLOCATOR
+ default y
+ help
+ Allow the on-chip memories of Meta SoCs to be used by user
+ applications.
+
+endif
+
+config METAG_HALT_ON_PANIC
+ bool "Halt the core on panic"
+ help
+ Halt the core when a panic occurs. This is useful when running
+ pre-production silicon or in an FPGA environment.
+
+endmenu
diff --git a/arch/metag/Makefile b/arch/metag/Makefile
new file mode 100644
index 00000000000..81bd6a1c748
--- /dev/null
+++ b/arch/metag/Makefile
@@ -0,0 +1,87 @@
+#
+# metag/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" cleaning up for this architecture.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+# 2007,2008,2012 by Imagination Technologies Ltd.
+#
+
+LDFLAGS :=
+OBJCOPYFLAGS := -O binary -R .note -R .comment -S
+
+checkflags-$(CONFIG_METAG_META12) += -DMETAC_1_2
+checkflags-$(CONFIG_METAG_META21) += -DMETAC_2_1
+CHECKFLAGS += -D__metag__ $(checkflags-y)
+
+KBUILD_DEFCONFIG := meta2_defconfig
+
+sflags-$(CONFIG_METAG_META12) += -mmetac=1.2
+ifeq ($(CONFIG_METAG_META12),y)
+# Only use TBI API 1.4 if DSP is enabled for META12 cores
+sflags-$(CONFIG_METAG_DSP) += -DTBI_1_4
+endif
+sflags-$(CONFIG_METAG_META21) += -mmetac=2.1 -DTBI_1_4
+
+cflags-$(CONFIG_METAG_FUNCTION_TRACE) += -mhwtrace-leaf -mhwtrace-retpc
+cflags-$(CONFIG_METAG_META21) += -mextensions=bex
+
+KBUILD_CFLAGS += -pipe
+KBUILD_CFLAGS += -ffunction-sections
+
+KBUILD_CFLAGS += $(sflags-y) $(cflags-y)
+KBUILD_AFLAGS += $(sflags-y)
+
+LDFLAGS_vmlinux := $(ldflags-y)
+
+head-y := arch/metag/kernel/head.o
+
+core-y += arch/metag/boot/dts/
+core-y += arch/metag/kernel/
+core-y += arch/metag/mm/
+
+libs-y += arch/metag/lib/
+libs-y += arch/metag/tbx/
+
+boot := arch/metag/boot
+
+boot_targets += uImage
+boot_targets += uImage.gz
+boot_targets += uImage.bz2
+boot_targets += uImage.xz
+boot_targets += uImage.lzo
+boot_targets += uImage.bin
+boot_targets += vmlinux.bin
+
+PHONY += $(boot_targets)
+
+all: vmlinux.bin
+
+$(boot_targets): vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+%.dtb %.dtb.S %.dtb.o: scripts
+ $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
+
+dtbs: scripts
+ $(Q)$(MAKE) $(build)=$(boot)/dts dtbs
+
+archclean:
+ $(Q)$(MAKE) $(clean)=$(boot)
+
+define archhelp
+ echo '* vmlinux.bin - Binary kernel image (arch/$(ARCH)/boot/vmlinux.bin)'
+ @echo ' uImage - Alias to bootable U-Boot image'
+ @echo ' uImage.bin - Kernel-only image for U-Boot (bin)'
+ @echo ' uImage.gz - Kernel-only image for U-Boot (gzip)'
+ @echo ' uImage.bz2 - Kernel-only image for U-Boot (bzip2)'
+ @echo ' uImage.xz - Kernel-only image for U-Boot (xz)'
+ @echo ' uImage.lzo - Kernel-only image for U-Boot (lzo)'
+ @echo ' dtbs - Build device tree blobs for enabled boards'
+endef
diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore
new file mode 100644
index 00000000000..a021da20115
--- /dev/null
+++ b/arch/metag/boot/.gitignore
@@ -0,0 +1,4 @@
+vmlinux*
+uImage*
+ramdisk.*
+*.dtb
diff --git a/arch/metag/boot/Makefile b/arch/metag/boot/Makefile
new file mode 100644
index 00000000000..5a1f88cf91e
--- /dev/null
+++ b/arch/metag/boot/Makefile
@@ -0,0 +1,68 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License. See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2007,2012 Imagination Technologies Ltd.
+#
+
+suffix-y := bin
+suffix-$(CONFIG_KERNEL_GZIP) := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_XZ) := xz
+suffix-$(CONFIG_KERNEL_LZO) := lzo
+
+targets += vmlinux.bin
+targets += uImage
+targets += uImage.gz
+targets += uImage.bz2
+targets += uImage.xz
+targets += uImage.lzo
+targets += uImage.bin
+
+extra-y += vmlinux.bin
+extra-y += vmlinux.bin.gz
+extra-y += vmlinux.bin.bz2
+extra-y += vmlinux.bin.xz
+extra-y += vmlinux.bin.lzo
+
+UIMAGE_LOADADDR = $(CONFIG_PAGE_OFFSET)
+
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
+orig_cflags := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(orig_cflags))
+endif
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,gzip)
+
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,bzip2)
+
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,xzkern)
+
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,lzo)
+
+$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
+ $(call if_changed,uimage,gzip)
+
+$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE
+ $(call if_changed,uimage,bzip2)
+
+$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz FORCE
+ $(call if_changed,uimage,xz)
+
+$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE
+ $(call if_changed,uimage,lzo)
+
+$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
+ $(call if_changed,uimage,none)
+
+$(obj)/uImage: $(obj)/uImage.$(suffix-y)
+ @ln -sf $(notdir $<) $@
+ @echo ' Image $@ is ready'
diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile
new file mode 100644
index 00000000000..e0b5afd8bde
--- /dev/null
+++ b/arch/metag/boot/dts/Makefile
@@ -0,0 +1,16 @@
+dtb-y += skeleton.dtb
+
+# Built-in dtb
+builtindtb-y := skeleton
+
+ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"")
+ builtindtb-y := $(CONFIG_METAG_BUILTIN_DTB_NAME)
+endif
+obj-$(CONFIG_METAG_BUILTIN_DTB) += $(patsubst "%",%,$(builtindtb-y)).dtb.o
+
+targets += dtbs
+targets += $(dtb-y)
+
+dtbs: $(addprefix $(obj)/, $(dtb-y))
+
+clean-files += *.dtb
diff --git a/arch/metag/boot/dts/skeleton.dts b/arch/metag/boot/dts/skeleton.dts
new file mode 100644
index 00000000000..7244d1f0d55
--- /dev/null
+++ b/arch/metag/boot/dts/skeleton.dts
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
diff --git a/arch/metag/boot/dts/skeleton.dtsi b/arch/metag/boot/dts/skeleton.dtsi
new file mode 100644
index 00000000000..78229eacced
--- /dev/null
+++ b/arch/metag/boot/dts/skeleton.dtsi
@@ -0,0 +1,14 @@
+/*
+ * Skeleton device tree; the bare minimum needed to boot; just include and
+ * add a compatible value. The bootloader will typically populate the memory
+ * node.
+ */
+
+/ {
+ compatible = "img,meta";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ chosen { };
+ aliases { };
+ memory { device_type = "memory"; reg = <0 0>; };
+};
diff --git a/arch/metag/configs/meta1_defconfig b/arch/metag/configs/meta1_defconfig
new file mode 100644
index 00000000000..c35a75e8ecf
--- /dev/null
+++ b/arch/metag/configs/meta1_defconfig
@@ -0,0 +1,40 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_META12_FPGA=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_defconfig b/arch/metag/configs/meta2_defconfig
new file mode 100644
index 00000000000..fb314841018
--- /dev/null
+++ b/arch/metag/configs/meta2_defconfig
@@ -0,0 +1,41 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_smp_defconfig b/arch/metag/configs/meta2_smp_defconfig
new file mode 100644
index 00000000000..6c7b777ac27
--- /dev/null
+++ b/arch/metag/configs/meta2_smp_defconfig
@@ -0,0 +1,42 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_SMP=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
new file mode 100644
index 00000000000..6ae0ccb632c
--- /dev/null
+++ b/arch/metag/include/asm/Kbuild
@@ -0,0 +1,54 @@
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += bugs.h
+generic-y += clkdev.h
+generic-y += cputime.h
+generic-y += current.h
+generic-y += device.h
+generic-y += dma.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += exec.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += futex.h
+generic-y += hardirq.h
+generic-y += hw_irq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += kvm_para.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += pci.h
+generic-y += percpu.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += scatterlist.h
+generic-y += sections.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += switch_to.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += timex.h
+generic-y += trace_clock.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += unaligned.h
+generic-y += user.h
+generic-y += vga.h
+generic-y += xor.h
diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h
new file mode 100644
index 00000000000..307ecd2bd9a
--- /dev/null
+++ b/arch/metag/include/asm/atomic.h
@@ -0,0 +1,53 @@
+#ifndef __ASM_METAG_ATOMIC_H
+#define __ASM_METAG_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/cmpxchg.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+/* The simple UP case. */
+#include <asm-generic/atomic.h>
+#else
+
+#if defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/atomic_lock1.h>
+#else
+#include <asm/atomic_lnkget.h>
+#endif
+
+#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
+
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
+#define atomic_inc_return(v) atomic_add_return(1, (v))
+
+/*
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+
+#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
+
+#define atomic_inc(v) atomic_add(1, (v))
+#define atomic_dec(v) atomic_sub(1, (v))
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define smp_mb__before_atomic_dec() barrier()
+#define smp_mb__after_atomic_dec() barrier()
+#define smp_mb__before_atomic_inc() barrier()
+#define smp_mb__after_atomic_inc() barrier()
+
+#endif
+
+#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
+
+#include <asm-generic/atomic64.h>
+
+#endif /* __ASM_METAG_ATOMIC_H */
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
new file mode 100644
index 00000000000..d2e60a18986
--- /dev/null
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -0,0 +1,234 @@
+#ifndef __ASM_METAG_ATOMIC_LNKGET_H
+#define __ASM_METAG_ATOMIC_LNKGET_H
+
+#define ATOMIC_INIT(i) { (i) }
+
+#define atomic_set(v, i) ((v)->counter = (i))
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int atomic_read(const atomic_t *v)
+{
+ int temp;
+
+ asm volatile (
+ "LNKGETD %0, [%1]\n"
+ : "=da" (temp)
+ : "da" (&v->counter));
+
+ return temp;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+ int temp;
+
+ asm volatile (
+ "1: LNKGETD %0, [%1]\n"
+ " ADD %0, %0, %2\n"
+ " LNKSETD [%1], %0\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp)
+ : "da" (&v->counter), "bd" (i)
+ : "cc");
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+ int temp;
+
+ asm volatile (
+ "1: LNKGETD %0, [%1]\n"
+ " SUB %0, %0, %2\n"
+ " LNKSETD [%1], %0\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp)
+ : "da" (&v->counter), "bd" (i)
+ : "cc");
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ int result, temp;
+
+ smp_mb();
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " ADD %1, %1, %3\n"
+ " LNKSETD [%2], %1\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp), "=&da" (result)
+ : "da" (&v->counter), "bd" (i)
+ : "cc");
+
+ smp_mb();
+
+ return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+ int result, temp;
+
+ smp_mb();
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " SUB %1, %1, %3\n"
+ " LNKSETD [%2], %1\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp), "=&da" (result)
+ : "da" (&v->counter), "bd" (i)
+ : "cc");
+
+ smp_mb();
+
+ return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+ int temp;
+
+ asm volatile (
+ "1: LNKGETD %0, [%1]\n"
+ " AND %0, %0, %2\n"
+ " LNKSETD [%1] %0\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp)
+ : "da" (&v->counter), "bd" (~mask)
+ : "cc");
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+ int temp;
+
+ asm volatile (
+ "1: LNKGETD %0, [%1]\n"
+ " OR %0, %0, %2\n"
+ " LNKSETD [%1], %0\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp)
+ : "da" (&v->counter), "bd" (mask)
+ : "cc");
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+ int result, temp;
+
+ smp_mb();
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " CMP %1, %3\n"
+ " LNKSETDEQ [%2], %4\n"
+ " BNE 2f\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ "2:\n"
+ : "=&d" (temp), "=&d" (result)
+ : "da" (&v->counter), "bd" (old), "da" (new)
+ : "cc");
+
+ smp_mb();
+
+ return result;
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+ int temp, old;
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " LNKSETD [%2], %3\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp), "=&d" (old)
+ : "da" (&v->counter), "da" (new)
+ : "cc");
+
+ return old;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int result, temp;
+
+ smp_mb();
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " CMP %1, %3\n"
+ " ADD %0, %1, %4\n"
+ " LNKSETDNE [%2], %0\n"
+ " BEQ 2f\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ "2:\n"
+ : "=&d" (temp), "=&d" (result)
+ : "da" (&v->counter), "bd" (u), "bd" (a)
+ : "cc");
+
+ smp_mb();
+
+ return result;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+ int result, temp;
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " SUBS %1, %1, %3\n"
+ " LNKSETDGE [%2], %1\n"
+ " BLT 2f\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ "2:\n"
+ : "=&d" (temp), "=&da" (result)
+ : "da" (&v->counter), "bd" (i)
+ : "cc");
+
+ return result;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LNKGET_H */
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
new file mode 100644
index 00000000000..e578955e674
--- /dev/null
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -0,0 +1,160 @@
+#ifndef __ASM_METAG_ATOMIC_LOCK1_H
+#define __ASM_METAG_ATOMIC_LOCK1_H
+
+#define ATOMIC_INIT(i) { (i) }
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+static inline int atomic_read(const atomic_t *v)
+{
+ return (v)->counter;
+}
+
+/*
+ * atomic_set needs to be take the lock to protect atomic_add_unless from a
+ * possible race, as it reads the counter twice:
+ *
+ * CPU0 CPU1
+ * atomic_add_unless(1, 0)
+ * ret = v->counter (non-zero)
+ * if (ret != u) v->counter = 0
+ * v->counter += 1 (counter set to 1)
+ *
+ * Making atomic_set take the lock ensures that ordering and logical
+ * consistency is preserved.
+ */
+static inline int atomic_set(atomic_t *v, int i)
+{
+ unsigned long flags;
+
+ __global_lock1(flags);
+ fence();
+ v->counter = i;
+ __global_unlock1(flags);
+ return i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+ unsigned long flags;
+
+ __global_lock1(flags);
+ fence();
+ v->counter += i;
+ __global_unlock1(flags);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+ unsigned long flags;
+
+ __global_lock1(flags);
+ fence();
+ v->counter -= i;
+ __global_unlock1(flags);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ unsigned long result;
+ unsigned long flags;
+
+ __global_lock1(flags);
+ result = v->counter;
+ result += i;
+ fence();
+ v->counter = result;
+ __global_unlock1(flags);
+
+ return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+ unsigned long result;
+ unsigned long flags;
+
+ __global_lock1(flags);
+ result = v->counter;
+ result -= i;
+ fence();
+ v->counter = result;
+ __global_unlock1(flags);
+
+ return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+ unsigned long flags;
+
+ __global_lock1(flags);
+ fence();
+ v->counter &= ~mask;
+ __global_unlock1(flags);
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+ unsigned long flags;
+
+ __global_lock1(flags);
+ fence();
+ v->counter |= mask;
+ __global_unlock1(flags);
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+ int ret;
+ unsigned long flags;
+
+ __global_lock1(flags);
+ ret = v->counter;
+ if (ret == old) {
+ fence();
+ v->counter = new;
+ }
+ __global_unlock1(flags);
+
+ return ret;
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int ret;
+ unsigned long flags;
+
+ __global_lock1(flags);
+ ret = v->counter;
+ if (ret != u) {
+ fence();
+ v->counter += a;
+ }
+ __global_unlock1(flags);
+
+ return ret;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+ int ret;
+ unsigned long flags;
+
+ __global_lock1(flags);
+ ret = v->counter - 1;
+ if (ret >= 0) {
+ fence();
+ v->counter = ret;
+ }
+ __global_unlock1(flags);
+
+ return ret;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LOCK1_H */
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
new file mode 100644
index 00000000000..c90bfc6bf64
--- /dev/null
+++ b/arch/metag/include/asm/barrier.h
@@ -0,0 +1,85 @@
+#ifndef _ASM_METAG_BARRIER_H
+#define _ASM_METAG_BARRIER_H
+
+#include <asm/metag_mem.h>
+
+#define nop() asm volatile ("NOP")
+#define mb() wmb()
+#define rmb() barrier()
+
+#ifdef CONFIG_METAG_META21
+
+/* HTP and above have a system event to fence writes */
+static inline void wr_fence(void)
+{
+ volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_FENCE;
+ barrier();
+ *flushptr = 0;
+}
+
+#else /* CONFIG_METAG_META21 */
+
+/*
+ * ATP doesn't have system event to fence writes, so it is necessary to flush
+ * the processor write queues as well as possibly the write combiner (depending
+ * on the page being written).
+ * To ensure the write queues are flushed we do 4 writes to a system event
+ * register (in this case write combiner flush) which will also flush the write
+ * combiner.
+ */
+static inline void wr_fence(void)
+{
+ volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH;
+ barrier();
+ *flushptr = 0;
+ *flushptr = 0;
+ *flushptr = 0;
+ *flushptr = 0;
+}
+
+#endif /* !CONFIG_METAG_META21 */
+
+static inline void wmb(void)
+{
+ /* flush writes through the write combiner */
+ wr_fence();
+}
+
+#define read_barrier_depends() do { } while (0)
+
+#ifndef CONFIG_SMP
+#define fence() do { } while (0)
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#else
+
+#ifdef CONFIG_METAG_SMP_WRITE_REORDERING
+/*
+ * Write to the atomic memory unlock system event register (command 0). This is
+ * needed before a write to shared memory in a critical section, to prevent
+ * external reordering of writes before the fence on other threads with writes
+ * after the fence on this thread (and to prevent the ensuing cache-memory
+ * incoherence). It is therefore ineffective if used after and on the same
+ * thread as a write.
+ */
+static inline void fence(void)
+{
+ volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_ATOMIC_UNLOCK;
+ barrier();
+ *flushptr = 0;
+}
+#define smp_mb() fence()
+#define smp_rmb() fence()
+#define smp_wmb() barrier()
+#else
+#define fence() do { } while (0)
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#endif
+#endif
+#define smp_read_barrier_depends() do { } while (0)
+#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+
+#endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h
new file mode 100644
index 00000000000..c0d0df0d137
--- /dev/null
+++ b/arch/metag/include/asm/bitops.h
@@ -0,0 +1,132 @@
+#ifndef __ASM_METAG_BITOPS_H
+#define __ASM_METAG_BITOPS_H
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler.
+ */
+#define smp_mb__before_clear_bit() barrier()
+#define smp_mb__after_clear_bit() barrier()
+
+#ifdef CONFIG_SMP
+/*
+ * These functions are the basis of our bit ops.
+ */
+static inline void set_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long flags;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __global_lock1(flags);
+ fence();
+ *p |= mask;
+ __global_unlock1(flags);
+}
+
+static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long flags;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __global_lock1(flags);
+ fence();
+ *p &= ~mask;
+ __global_unlock1(flags);
+}
+
+static inline void change_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long flags;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __global_lock1(flags);
+ fence();
+ *p ^= mask;
+ __global_unlock1(flags);
+}
+
+static inline int test_and_set_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long flags;
+ unsigned long old;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __global_lock1(flags);
+ old = *p;
+ if (!(old & mask)) {
+ fence();
+ *p = old | mask;
+ }
+ __global_unlock1(flags);
+
+ return (old & mask) != 0;
+}
+
+static inline int test_and_clear_bit(unsigned int bit,
+ volatile unsigned long *p)
+{
+ unsigned long flags;
+ unsigned long old;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __global_lock1(flags);
+ old = *p;
+ if (old & mask) {
+ fence();
+ *p = old & ~mask;
+ }
+ __global_unlock1(flags);
+
+ return (old & mask) != 0;
+}
+
+static inline int test_and_change_bit(unsigned int bit,
+ volatile unsigned long *p)
+{
+ unsigned long flags;
+ unsigned long old;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __global_lock1(flags);
+ fence();
+ old = *p;
+ *p = old ^ mask;
+ __global_unlock1(flags);
+
+ return (old & mask) != 0;
+}
+
+#else
+#include <asm-generic/bitops/atomic.h>
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* __ASM_METAG_BITOPS_H */
diff --git a/arch/metag/include/asm/bug.h b/arch/metag/include/asm/bug.h
new file mode 100644
index 00000000000..d04b48cefec
--- /dev/null
+++ b/arch/metag/include/asm/bug.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_METAG_BUG_H
+#define _ASM_METAG_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+extern const char *trap_name(int trapno);
+extern void die(const char *str, struct pt_regs *regs, long err,
+ unsigned long addr) __attribute__ ((noreturn));
+
+#endif
diff --git a/arch/metag/include/asm/cache.h b/arch/metag/include/asm/cache.h
new file mode 100644
index 00000000000..a43b650cfdc
--- /dev/null
+++ b/arch/metag/include/asm/cache.h
@@ -0,0 +1,23 @@
+#ifndef __ASM_METAG_CACHE_H
+#define __ASM_METAG_CACHE_H
+
+/* L1 cache line size (64 bytes) */
+#define L1_CACHE_SHIFT 6
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+/* Meta requires large data items to be 8 byte aligned. */
+#define ARCH_SLAB_MINALIGN 8
+
+/*
+ * With an L2 cache, we may invalidate dirty lines, so we need to ensure DMA
+ * buffers have cache line alignment.
+ */
+#ifdef CONFIG_METAG_L2C
+#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+#else
+#define ARCH_DMA_MINALIGN 8
+#endif
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif
diff --git a/arch/metag/include/asm/cacheflush.h b/arch/metag/include/asm/cacheflush.h
new file mode 100644
index 00000000000..7787ec5e3ed
--- /dev/null
+++ b/arch/metag/include/asm/cacheflush.h
@@ -0,0 +1,250 @@
+#ifndef _METAG_CACHEFLUSH_H
+#define _METAG_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+
+#include <asm/l2cache.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+void metag_cache_probe(void);
+
+void metag_data_cache_flush_all(const void *start);
+void metag_code_cache_flush_all(const void *start);
+
+/*
+ * Routines to flush physical cache lines that may be used to cache data or code
+ * normally accessed via the linear address range supplied. The region flushed
+ * must either lie in local or global address space determined by the top bit of
+ * the pStart address. If Bytes is >= 4K then the whole of the related cache
+ * state will be flushed rather than a limited range.
+ */
+void metag_data_cache_flush(const void *start, int bytes);
+void metag_code_cache_flush(const void *start, int bytes);
+
+#ifdef CONFIG_METAG_META12
+
+/* Write through, virtually tagged, split I/D cache. */
+
+static inline void __flush_cache_all(void)
+{
+ metag_code_cache_flush_all((void *) PAGE_OFFSET);
+ metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_cache_all() __flush_cache_all()
+
+/* flush the entire user address space referenced in this mm structure */
+static inline void flush_cache_mm(struct mm_struct *mm)
+{
+ if (mm == current->mm)
+ __flush_cache_all();
+}
+
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
+/* flush a range of addresses from this mm */
+static inline void flush_cache_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ flush_cache_mm(vma->vm_mm);
+}
+
+static inline void flush_cache_page(struct vm_area_struct *vma,
+ unsigned long vmaddr, unsigned long pfn)
+{
+ flush_cache_mm(vma->vm_mm);
+}
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+static inline void flush_dcache_page(struct page *page)
+{
+ metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
+
+static inline void flush_icache_page(struct vm_area_struct *vma,
+ struct page *page)
+{
+ metag_code_cache_flush(page_to_virt(page), PAGE_SIZE);
+}
+
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+ metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+ metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#else
+
+/* Write through, physically tagged, split I/D cache. */
+
+#define flush_cache_all() do { } while (0)
+#define flush_cache_mm(mm) do { } while (0)
+#define flush_cache_dup_mm(mm) do { } while (0)
+#define flush_cache_range(vma, start, end) do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
+#define flush_dcache_mmap_lock(mapping) do { } while (0)
+#define flush_dcache_mmap_unlock(mapping) do { } while (0)
+#define flush_icache_page(vma, pg) do { } while (0)
+#define flush_cache_vmap(start, end) do { } while (0)
+#define flush_cache_vunmap(start, end) do { } while (0)
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+static inline void flush_dcache_page(struct page *page)
+{
+ /* FIXME: We can do better than this. All we are trying to do is
+ * make the i-cache coherent, we should use the PG_arch_1 bit like
+ * e.g. powerpc.
+ */
+#ifdef CONFIG_SMP
+ metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+ metag_code_cache_flush_all((void *) PAGE_OFFSET);
+#endif
+}
+
+#endif
+
+/* Push n pages at kernel virtual address and clear the icache */
+static inline void flush_icache_range(unsigned long address,
+ unsigned long endaddr)
+{
+#ifdef CONFIG_SMP
+ metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+ metag_code_cache_flush((void *) address, endaddr - address);
+#endif
+}
+
+static inline void flush_cache_sigtramp(unsigned long addr, int size)
+{
+ /*
+ * Flush the icache in case there was previously some code
+ * fetched from this address, perhaps a previous sigtramp.
+ *
+ * We don't need to flush the dcache, it's write through and
+ * we just wrote the sigtramp code through it.
+ */
+#ifdef CONFIG_SMP
+ metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+ metag_code_cache_flush((void *) addr, size);
+#endif
+}
+
+#ifdef CONFIG_METAG_L2C
+
+/*
+ * Perform a single specific CACHEWD operation on an address, masking lower bits
+ * of address first.
+ */
+static inline void cachewd_line(void *addr, unsigned int data)
+{
+ unsigned long masked = (unsigned long)addr & -0x40;
+ __builtin_meta2_cachewd((void *)masked, data);
+}
+
+/* Perform a certain CACHEW op on each cache line in a range */
+static inline void cachew_region_op(void *start, unsigned long size,
+ unsigned int op)
+{
+ unsigned long offset = (unsigned long)start & 0x3f;
+ int i;
+ if (offset) {
+ size += offset;
+ start -= offset;
+ }
+ i = (size - 1) >> 6;
+ do {
+ __builtin_meta2_cachewd(start, op);
+ start += 0x40;
+ } while (i--);
+}
+
+/* prevent write fence and flushbacks being reordered in L2 */
+static inline void l2c_fence_flush(void *addr)
+{
+ /*
+ * Synchronise by reading back and re-flushing.
+ * It is assumed this access will miss, as the caller should have just
+ * flushed the cache line.
+ */
+ (void)(volatile u8 *)addr;
+ cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+}
+
+/* prevent write fence and writebacks being reordered in L2 */
+static inline void l2c_fence(void *addr)
+{
+ /*
+ * A write back has occurred, but not necessarily an invalidate, so the
+ * readback in l2c_fence_flush() would hit in the cache and have no
+ * effect. Therefore fully flush the line first.
+ */
+ cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+ l2c_fence_flush(addr);
+}
+
+/* Used to keep memory consistent when doing DMA. */
+static inline void flush_dcache_region(void *start, unsigned long size)
+{
+ /* metag_data_cache_flush won't flush L2 cache lines if size >= 4096 */
+ if (meta_l2c_is_enabled()) {
+ cachew_region_op(start, size, CACHEW_FLUSH_L1D_L2);
+ if (meta_l2c_is_writeback())
+ l2c_fence_flush(start + size - 1);
+ } else {
+ metag_data_cache_flush(start, size);
+ }
+}
+
+/* Write back dirty lines to memory (or do nothing if no writeback caches) */
+static inline void writeback_dcache_region(void *start, unsigned long size)
+{
+ if (meta_l2c_is_enabled() && meta_l2c_is_writeback()) {
+ cachew_region_op(start, size, CACHEW_WRITEBACK_L1D_L2);
+ l2c_fence(start + size - 1);
+ }
+}
+
+/* Invalidate (may also write back if necessary) */
+static inline void invalidate_dcache_region(void *start, unsigned long size)
+{
+ if (meta_l2c_is_enabled())
+ cachew_region_op(start, size, CACHEW_INVALIDATE_L1D_L2);
+ else
+ metag_data_cache_flush(start, size);
+}
+#else
+#define flush_dcache_region(s, l) metag_data_cache_flush((s), (l))
+#define writeback_dcache_region(s, l) do {} while (0)
+#define invalidate_dcache_region(s, l) flush_dcache_region((s), (l))
+#endif
+
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+ struct page *page, unsigned long vaddr,
+ void *dst, const void *src,
+ unsigned long len)
+{
+ memcpy(dst, src, len);
+ flush_icache_range((unsigned long)dst, (unsigned long)dst + len);
+}
+
+static inline void copy_from_user_page(struct vm_area_struct *vma,
+ struct page *page, unsigned long vaddr,
+ void *dst, const void *src,
+ unsigned long len)
+{
+ memcpy(dst, src, len);
+}
+
+#endif /* _METAG_CACHEFLUSH_H */
diff --git a/arch/metag/include/asm/cachepart.h b/arch/metag/include/asm/cachepart.h
new file mode 100644
index 00000000000..cf6b44e916b
--- /dev/null
+++ b/arch/metag/include/asm/cachepart.h
@@ -0,0 +1,42 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_CACHEPART_H_
+#define _METAG_CACHEPART_H_
+
+/**
+ * get_dcache_size() - Get size of data cache.
+ */
+unsigned int get_dcache_size(void);
+
+/**
+ * get_icache_size() - Get size of code cache.
+ */
+unsigned int get_icache_size(void);
+
+/**
+ * get_global_dcache_size() - Get the thread's global dcache.
+ *
+ * Returns the size of the current thread's global dcache partition.
+ */
+unsigned int get_global_dcache_size(void);
+
+/**
+ * get_global_icache_size() - Get the thread's global icache.
+ *
+ * Returns the size of the current thread's global icache partition.
+ */
+unsigned int get_global_icache_size(void);
+
+/**
+ * check_for_dache_aliasing() - Ensure that the bootloader has configured the
+ * dache and icache properly to avoid aliasing
+ * @thread_id: Hardware thread ID
+ *
+ */
+void check_for_cache_aliasing(int thread_id);
+
+#endif
diff --git a/arch/metag/include/asm/checksum.h b/arch/metag/include/asm/checksum.h
new file mode 100644
index 00000000000..999bf761a73
--- /dev/null
+++ b/arch/metag/include/asm/checksum.h
@@ -0,0 +1,92 @@
+#ifndef _METAG_CHECKSUM_H
+#define _METAG_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+ __wsum sum);
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+ int len, __wsum sum, int *csum_err);
+
+#define csum_partial_copy_nocheck(src, dst, len, sum) \
+ csum_partial_copy((src), (dst), (len), (sum))
+
+/*
+ * Fold a partial checksum
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ unsigned short len,
+ unsigned short proto,
+ __wsum sum)
+{
+ unsigned long len_proto = (proto + len) << 8;
+ asm ("ADD %0, %0, %1\n"
+ "ADDS %0, %0, %2\n"
+ "ADDCS %0, %0, #1\n"
+ "ADDS %0, %0, %3\n"
+ "ADDCS %0, %0, #1\n"
+ : "=d" (sum)
+ : "d" (daddr), "d" (saddr), "d" (len_proto),
+ "0" (sum)
+ : "cc");
+ return sum;
+}
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
+ unsigned short proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif /* _METAG_CHECKSUM_H */
diff --git a/arch/metag/include/asm/clock.h b/arch/metag/include/asm/clock.h
new file mode 100644
index 00000000000..3e2915a280c
--- /dev/null
+++ b/arch/metag/include/asm/clock.h
@@ -0,0 +1,51 @@
+/*
+ * arch/metag/include/asm/clock.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_CLOCK_H_
+#define _METAG_CLOCK_H_
+
+#include <asm/mach/arch.h>
+
+/**
+ * struct meta_clock_desc - Meta Core clock callbacks.
+ * @get_core_freq: Get the frequency of the Meta core. If this is NULL, the
+ * core frequency will be determined like this:
+ * Meta 1: based on loops_per_jiffy.
+ * Meta 2: (EXPAND_TIMER_DIV + 1) MHz.
+ */
+struct meta_clock_desc {
+ unsigned long (*get_core_freq)(void);
+};
+
+extern struct meta_clock_desc _meta_clock;
+
+/*
+ * Set up the default clock, ensuring all callbacks are valid - only accessible
+ * during boot.
+ */
+void setup_meta_clocks(struct meta_clock_desc *desc);
+
+/**
+ * get_coreclock() - Get the frequency of the Meta core clock.
+ *
+ * Returns: The Meta core clock frequency in Hz.
+ */
+static inline unsigned long get_coreclock(void)
+{
+ /*
+ * Use the current clock callback. If set correctly this will provide
+ * the most accurate frequency as it can be calculated directly from the
+ * PLL configuration. otherwise a default callback will have been set
+ * instead.
+ */
+ return _meta_clock.get_core_freq();
+}
+
+#endif /* _METAG_CLOCK_H_ */
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
new file mode 100644
index 00000000000..b1bc1be8540
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -0,0 +1,65 @@
+#ifndef __ASM_METAG_CMPXCHG_H
+#define __ASM_METAG_CMPXCHG_H
+
+#include <asm/barrier.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+#include <asm/cmpxchg_irq.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/cmpxchg_lock1.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+#include <asm/cmpxchg_lnkget.h>
+#endif
+
+extern void __xchg_called_with_bad_pointer(void);
+
+#define __xchg(ptr, x, size) \
+({ \
+ unsigned long __xchg__res; \
+ volatile void *__xchg_ptr = (ptr); \
+ switch (size) { \
+ case 4: \
+ __xchg__res = xchg_u32(__xchg_ptr, x); \
+ break; \
+ case 1: \
+ __xchg__res = xchg_u8(__xchg_ptr, x); \
+ break; \
+ default: \
+ __xchg_called_with_bad_pointer(); \
+ __xchg__res = x; \
+ break; \
+ } \
+ \
+ __xchg__res; \
+})
+
+#define xchg(ptr, x) \
+ ((__typeof__(*(ptr)))__xchg((ptr), (unsigned long)(x), sizeof(*(ptr))))
+
+/* This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ switch (size) {
+ case 4:
+ return __cmpxchg_u32(ptr, old, new);
+ }
+ __cmpxchg_called_with_bad_pointer();
+ return old;
+}
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
+ (unsigned long)_n_, \
+ sizeof(*(ptr))); \
+ })
+
+#endif /* __ASM_METAG_CMPXCHG_H */
diff --git a/arch/metag/include/asm/cmpxchg_irq.h b/arch/metag/include/asm/cmpxchg_irq.h
new file mode 100644
index 00000000000..649573168b0
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_irq.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_CMPXCHG_IRQ_H
+#define __ASM_METAG_CMPXCHG_IRQ_H
+
+#include <linux/irqflags.h>
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+ unsigned long flags, retval;
+
+ local_irq_save(flags);
+ retval = *m;
+ *m = val;
+ local_irq_restore(flags);
+ return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+ unsigned long flags, retval;
+
+ local_irq_save(flags);
+ retval = *m;
+ *m = val & 0xff;
+ local_irq_restore(flags);
+ return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+ unsigned long new)
+{
+ __u32 retval;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ retval = *m;
+ if (retval == old)
+ *m = new;
+ local_irq_restore(flags); /* implies memory barrier */
+ return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_IRQ_H */
diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h
new file mode 100644
index 00000000000..0154e2807eb
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_lnkget.h
@@ -0,0 +1,86 @@
+#ifndef __ASM_METAG_CMPXCHG_LNKGET_H
+#define __ASM_METAG_CMPXCHG_LNKGET_H
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+ int temp, old;
+
+ smp_mb();
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " LNKSETD [%2], %3\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+ " DCACHE [%2], %0\n"
+#endif
+ : "=&d" (temp), "=&d" (old)
+ : "da" (m), "da" (val)
+ : "cc"
+ );
+
+ smp_mb();
+
+ return old;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+ int temp, old;
+
+ smp_mb();
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " LNKSETD [%2], %3\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+ " DCACHE [%2], %0\n"
+#endif
+ : "=&d" (temp), "=&d" (old)
+ : "da" (m), "da" (val & 0xff)
+ : "cc"
+ );
+
+ smp_mb();
+
+ return old;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+ unsigned long new)
+{
+ __u32 retval, temp;
+
+ smp_mb();
+
+ asm volatile (
+ "1: LNKGETD %1, [%2]\n"
+ " CMP %1, %3\n"
+ " LNKSETDEQ [%2], %4\n"
+ " BNE 2f\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+ " DCACHE [%2], %0\n"
+#endif
+ "2:\n"
+ : "=&d" (temp), "=&da" (retval)
+ : "da" (m), "bd" (old), "da" (new)
+ : "cc"
+ );
+
+ smp_mb();
+
+ return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LNKGET_H */
diff --git a/arch/metag/include/asm/cmpxchg_lock1.h b/arch/metag/include/asm/cmpxchg_lock1.h
new file mode 100644
index 00000000000..fd685047496
--- /dev/null
+++ b/arch/metag/include/asm/cmpxchg_lock1.h
@@ -0,0 +1,48 @@
+#ifndef __ASM_METAG_CMPXCHG_LOCK1_H
+#define __ASM_METAG_CMPXCHG_LOCK1_H
+
+#include <asm/global_lock.h>
+
+/* Use LOCK2 as these have to be atomic w.r.t. ordinary accesses. */
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+ unsigned long flags, retval;
+
+ __global_lock2(flags);
+ fence();
+ retval = *m;
+ *m = val;
+ __global_unlock2(flags);
+ return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+ unsigned long flags, retval;
+
+ __global_lock2(flags);
+ fence();
+ retval = *m;
+ *m = val & 0xff;
+ __global_unlock2(flags);
+ return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+ unsigned long new)
+{
+ __u32 retval;
+ unsigned long flags;
+
+ __global_lock2(flags);
+ retval = *m;
+ if (retval == old) {
+ fence();
+ *m = new;
+ }
+ __global_unlock2(flags);
+ return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LOCK1_H */
diff --git a/arch/metag/include/asm/core_reg.h b/arch/metag/include/asm/core_reg.h
new file mode 100644
index 00000000000..bdbc3a51f31
--- /dev/null
+++ b/arch/metag/include/asm/core_reg.h
@@ -0,0 +1,35 @@
+#ifndef __ASM_METAG_CORE_REG_H_
+#define __ASM_METAG_CORE_REG_H_
+
+#include <asm/metag_regs.h>
+
+extern void core_reg_write(int unit, int reg, int thread, unsigned int val);
+extern unsigned int core_reg_read(int unit, int reg, int thread);
+
+/*
+ * These macros allow direct access from C to any register known to the
+ * assembler. Example candidates are TXTACTCYC, TXIDLECYC, and TXPRIVEXT.
+ */
+
+#define __core_reg_get(reg) ({ \
+ unsigned int __grvalue; \
+ asm volatile("MOV %0," #reg \
+ : "=r" (__grvalue)); \
+ __grvalue; \
+})
+
+#define __core_reg_set(reg, value) do { \
+ unsigned int __srvalue = (value); \
+ asm volatile("MOV " #reg ",%0" \
+ : \
+ : "r" (__srvalue)); \
+} while (0)
+
+#define __core_reg_swap(reg, value) do { \
+ unsigned int __srvalue = (value); \
+ asm volatile("SWAP " #reg ",%0" \
+ : "+r" (__srvalue)); \
+ (value) = __srvalue; \
+} while (0)
+
+#endif
diff --git a/arch/metag/include/asm/cpu.h b/arch/metag/include/asm/cpu.h
new file mode 100644
index 00000000000..decf1296926
--- /dev/null
+++ b/arch/metag/include/asm/cpu.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_METAG_CPU_H
+#define _ASM_METAG_CPU_H
+
+#include <linux/percpu.h>
+
+struct cpuinfo_metag {
+ struct cpu cpu;
+#ifdef CONFIG_SMP
+ unsigned long loops_per_jiffy;
+#endif
+};
+
+DECLARE_PER_CPU(struct cpuinfo_metag, cpu_data);
+#endif /* _ASM_METAG_CPU_H */
diff --git a/arch/metag/include/asm/da.h b/arch/metag/include/asm/da.h
new file mode 100644
index 00000000000..81bd5212fb0
--- /dev/null
+++ b/arch/metag/include/asm/da.h
@@ -0,0 +1,43 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_DA_H_
+#define _METAG_DA_H_
+
+#ifdef CONFIG_METAG_DA
+
+#include <linux/init.h>
+#include <linux/types.h>
+
+extern bool _metag_da_present;
+
+/**
+ * metag_da_enabled() - Find whether a DA is currently enabled.
+ *
+ * Returns: true if a DA was detected, false if not.
+ */
+static inline bool metag_da_enabled(void)
+{
+ return _metag_da_present;
+}
+
+/**
+ * metag_da_probe() - Try and detect a connected DA.
+ *
+ * This is used at start up to detect whether a DA is active.
+ *
+ * Returns: 0 on detection, -err otherwise.
+ */
+int __init metag_da_probe(void);
+
+#else /* !CONFIG_METAG_DA */
+
+#define metag_da_enabled() false
+#define metag_da_probe() do {} while (0)
+
+#endif
+
+#endif /* _METAG_DA_H_ */
diff --git a/arch/metag/include/asm/delay.h b/arch/metag/include/asm/delay.h
new file mode 100644
index 00000000000..9c92f996957
--- /dev/null
+++ b/arch/metag/include/asm/delay.h
@@ -0,0 +1,29 @@
+#ifndef _METAG_DELAY_H
+#define _METAG_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/metag/lib/delay.c
+ */
+
+/* Undefined functions to get compile-time errors */
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __const_udelay(unsigned long xloops);
+extern void __delay(unsigned long loops);
+
+/* 0x10c7 is 2**32 / 1000000 (rounded up) */
+#define udelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \
+ __udelay(n))
+
+/* 0x5 is 2**32 / 1000000000 (rounded up) */
+#define ndelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+ __ndelay(n))
+
+#endif /* _METAG_DELAY_H */
diff --git a/arch/metag/include/asm/div64.h b/arch/metag/include/asm/div64.h
new file mode 100644
index 00000000000..0fdd1167621
--- /dev/null
+++ b/arch/metag/include/asm/div64.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_DIV64_H__
+#define __ASM_DIV64_H__
+
+#include <asm-generic/div64.h>
+
+extern u64 div_u64(u64 dividend, u64 divisor);
+extern s64 div_s64(s64 dividend, s64 divisor);
+
+#define div_u64 div_u64
+#define div_s64 div_s64
+
+#endif
diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h
new file mode 100644
index 00000000000..14b23efd9b7
--- /dev/null
+++ b/arch/metag/include/asm/dma-mapping.h
@@ -0,0 +1,190 @@
+#ifndef _ASM_METAG_DMA_MAPPING_H
+#define _ASM_METAG_DMA_MAPPING_H
+
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <linux/scatterlist.h>
+#include <asm/bug.h>
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle);
+
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction);
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction);
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(size == 0);
+ dma_sync_for_device(ptr, size, direction);
+ return virt_to_phys(ptr);
+}
+
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ dma_sync_for_cpu(phys_to_virt(dma_addr), size, direction);
+}
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+ enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nents == 0 || sglist[0].length == 0);
+
+ for_each_sg(sglist, sg, nents, i) {
+ BUG_ON(!sg_page(sg));
+
+ sg->dma_address = sg_phys(sg);
+ dma_sync_for_device(sg_virt(sg), sg->length, direction);
+ }
+
+ return nents;
+}
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
+ direction);
+ return page_to_phys(page) + offset;
+}
+
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(!valid_dma_direction(direction));
+ dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
+}
+
+
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries,
+ enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nhwentries == 0 || sglist[0].length == 0);
+
+ for_each_sg(sglist, sg, nhwentries, i) {
+ BUG_ON(!sg_page(sg));
+
+ sg->dma_address = sg_phys(sg);
+ dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+ }
+}
+
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction)
+{
+ dma_sync_for_device(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_sync_for_cpu(phys_to_virt(dma_handle)+offset, size,
+ direction);
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ dma_sync_for_device(phys_to_virt(dma_handle)+offset, size,
+ direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+ enum dma_data_direction direction)
+{
+ int i;
+ for (i = 0; i < nelems; i++, sg++)
+ dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+ enum dma_data_direction direction)
+{
+ int i;
+ for (i = 0; i < nelems; i++, sg++)
+ dma_sync_for_device(sg_virt(sg), sg->length, direction);
+}
+
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return 0;
+}
+
+#define dma_supported(dev, mask) (1)
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
+
+ *dev->dma_mask = mask;
+
+ return 0;
+}
+
+/*
+ * dma_alloc_noncoherent() returns non-cacheable memory, so there's no need to
+ * do any flushing here.
+ */
+static inline void
+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
+
+/* drivers/base/dma-mapping.c */
+extern int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr,
+ size_t size);
+
+#define dma_get_sgtable(d, t, v, h, s) dma_common_get_sgtable(d, t, v, h, s)
+
+#endif
diff --git a/arch/metag/include/asm/elf.h b/arch/metag/include/asm/elf.h
new file mode 100644
index 00000000000..d63b9d0e57d
--- /dev/null
+++ b/arch/metag/include/asm/elf.h
@@ -0,0 +1,128 @@
+#ifndef __ASM_METAG_ELF_H
+#define __ASM_METAG_ELF_H
+
+#define EM_METAG 174
+
+/* Meta relocations */
+#define R_METAG_HIADDR16 0
+#define R_METAG_LOADDR16 1
+#define R_METAG_ADDR32 2
+#define R_METAG_NONE 3
+#define R_METAG_RELBRANCH 4
+#define R_METAG_GETSETOFF 5
+
+/* Backward compatability */
+#define R_METAG_REG32OP1 6
+#define R_METAG_REG32OP2 7
+#define R_METAG_REG32OP3 8
+#define R_METAG_REG16OP1 9
+#define R_METAG_REG16OP2 10
+#define R_METAG_REG16OP3 11
+#define R_METAG_REG32OP4 12
+
+#define R_METAG_HIOG 13
+#define R_METAG_LOOG 14
+
+/* GNU */
+#define R_METAG_GNU_VTINHERIT 30
+#define R_METAG_GNU_VTENTRY 31
+
+/* PIC relocations */
+#define R_METAG_HI16_GOTOFF 32
+#define R_METAG_LO16_GOTOFF 33
+#define R_METAG_GETSET_GOTOFF 34
+#define R_METAG_GETSET_GOT 35
+#define R_METAG_HI16_GOTPC 36
+#define R_METAG_LO16_GOTPC 37
+#define R_METAG_HI16_PLT 38
+#define R_METAG_LO16_PLT 39
+#define R_METAG_RELBRANCH_PLT 40
+#define R_METAG_GOTOFF 41
+#define R_METAG_PLT 42
+#define R_METAG_COPY 43
+#define R_METAG_JMP_SLOT 44
+#define R_METAG_RELATIVE 45
+#define R_METAG_GLOB_DAT 46
+
+/*
+ * ELF register definitions.
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_gp_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef unsigned long elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_METAG)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS ELFCLASS32
+#define ELF_DATA ELFDATA2LSB
+#define ELF_ARCH EM_METAG
+
+#define ELF_PLAT_INIT(_r, load_addr) \
+ do { _r->ctx.AX[0].U0 = 0; } while (0)
+
+#define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE PAGE_SIZE
+
+/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
+ use of this is to invoke "./ld.so someprog" to test out a new version of
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. */
+
+#define ELF_ET_DYN_BASE 0x08000000UL
+
+#define ELF_CORE_COPY_REGS(_dest, _regs) \
+ memcpy((char *)&_dest, (char *)_regs, sizeof(struct pt_regs));
+
+/* This yields a mask that user programs can use to figure out what
+ instruction set this cpu supports. */
+
+#define ELF_HWCAP (0)
+
+/* This yields a string that ld.so will use to load implementation
+ specific libraries for optimization. This is more specific in
+ intent than poking at uname or /proc/cpuinfo. */
+
+#define ELF_PLATFORM (NULL)
+
+#define SET_PERSONALITY(ex) \
+ set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
+
+#define STACK_RND_MASK (0)
+
+#ifdef CONFIG_METAG_USER_TCM
+
+struct elf32_phdr;
+struct file;
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+ struct elf32_phdr *eppnt, int prot, int type,
+ unsigned long total_size);
+
+static inline unsigned long metag_elf_map(struct file *filep,
+ unsigned long addr,
+ struct elf32_phdr *eppnt, int prot,
+ int type, unsigned long total_size)
+{
+ return __metag_elf_map(filep, addr, eppnt, prot, type, total_size);
+}
+#define elf_map metag_elf_map
+
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/fixmap.h b/arch/metag/include/asm/fixmap.h
new file mode 100644
index 00000000000..33312751c92
--- /dev/null
+++ b/arch/metag/include/asm/fixmap.h
@@ -0,0 +1,99 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <asm/pgtable.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special addresses
+ * from the end of the consistent memory region backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * higher than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+enum fixed_addresses {
+#define FIX_N_COLOURS 8
+#ifdef CONFIG_HIGHMEM
+ /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_BEGIN,
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+ __end_of_fixed_addresses
+};
+
+#define FIXADDR_TOP (CONSISTENT_START - PAGE_SIZE)
+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START ((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK)
+
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+extern void __this_fixmap_does_not_exist(void);
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+ /*
+ * this branch gets completely eliminated after inlining,
+ * except when someone tries to use fixaddr indices in an
+ * illegal way. (such as mixing up address types or using
+ * out-of-range indices).
+ *
+ * If it doesn't get removed, the linker will complain
+ * loudly with a reasonably clear error message..
+ */
+ if (idx >= __end_of_fixed_addresses)
+ __this_fixmap_does_not_exist();
+
+ return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+ return __virt_to_fix(vaddr);
+}
+
+#define kmap_get_fixmap_pte(vaddr) \
+ pte_offset_kernel( \
+ pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), \
+ (vaddr) \
+ )
+
+/*
+ * Called from pgtable_init()
+ */
+extern void fixrange_init(unsigned long start, unsigned long end,
+ pgd_t *pgd_base);
+
+
+#endif
diff --git a/arch/metag/include/asm/ftrace.h b/arch/metag/include/asm/ftrace.h
new file mode 100644
index 00000000000..2901f0f7d94
--- /dev/null
+++ b/arch/metag/include/asm/ftrace.h
@@ -0,0 +1,23 @@
+#ifndef _ASM_METAG_FTRACE
+#define _ASM_METAG_FTRACE
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount_wrapper(void);
+#define MCOUNT_ADDR ((long)(mcount_wrapper))
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct dyn_arch_ftrace {
+ /* No extra data needed on metag */
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_METAG_FTRACE */
diff --git a/arch/metag/include/asm/global_lock.h b/arch/metag/include/asm/global_lock.h
new file mode 100644
index 00000000000..fc831c88c22
--- /dev/null
+++ b/arch/metag/include/asm/global_lock.h
@@ -0,0 +1,100 @@
+#ifndef __ASM_METAG_GLOBAL_LOCK_H
+#define __ASM_METAG_GLOBAL_LOCK_H
+
+#include <asm/metag_mem.h>
+
+/**
+ * __global_lock1() - Acquire global voluntary lock (LOCK1).
+ * @flags: Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock (LOCK1), also taking care to disable
+ * all triggers so we cannot be interrupted, and to enforce a compiler barrier
+ * so that the compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to acquire the voluntary or exclusive
+ * locks until the voluntary lock is released with @__global_unlock1, but they
+ * may continue to execute as long as they aren't trying to acquire either of
+ * the locks.
+ */
+#define __global_lock1(flags) do { \
+ unsigned int __trval; \
+ asm volatile("MOV %0,#0\n\t" \
+ "SWAP %0,TXMASKI\n\t" \
+ "LOCK1" \
+ : "=r" (__trval) \
+ : \
+ : "memory"); \
+ (flags) = __trval; \
+} while (0)
+
+/**
+ * __global_unlock1() - Release global voluntary lock (LOCK1).
+ * @flags: Variable to restore flags from.
+ *
+ * Releases the Meta global voluntary lock (LOCK1) acquired with
+ * @__global_lock1, also taking care to re-enable triggers, and to enforce a
+ * compiler barrier so that the compiler cannot reorder memory accesses across
+ * the unlock.
+ *
+ * This immediately allows another hardware thread to acquire the voluntary or
+ * exclusive locks.
+ */
+#define __global_unlock1(flags) do { \
+ unsigned int __trval = (flags); \
+ asm volatile("LOCK0\n\t" \
+ "MOV TXMASKI,%0" \
+ : \
+ : "r" (__trval) \
+ : "memory"); \
+} while (0)
+
+/**
+ * __global_lock2() - Acquire global exclusive lock (LOCK2).
+ * @flags: Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock and global exclusive lock (LOCK2),
+ * also taking care to disable all triggers so we cannot be interrupted, to take
+ * the atomic lock (system event) and to enforce a compiler barrier so that the
+ * compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to execute code until the locks are
+ * released with @__global_unlock2.
+ */
+#define __global_lock2(flags) do { \
+ unsigned int __trval; \
+ unsigned int __aloc_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+ asm volatile("MOV %0,#0\n\t" \
+ "SWAP %0,TXMASKI\n\t" \
+ "LOCK2\n\t" \
+ "SETD [%1+#0x40],D1RtP" \
+ : "=r&" (__trval) \
+ : "u" (__aloc_hi) \
+ : "memory"); \
+ (flags) = __trval; \
+} while (0)
+
+/**
+ * __global_unlock2() - Release global exclusive lock (LOCK2).
+ * @flags: Variable to restore flags from.
+ *
+ * Releases the Meta global exclusive lock (LOCK2) and global voluntary lock
+ * acquired with @__global_lock2, also taking care to release the atomic lock
+ * (system event), re-enable triggers, and to enforce a compiler barrier so that
+ * the compiler cannot reorder memory accesses across the unlock.
+ *
+ * This immediately allows other hardware threads to continue executing and one
+ * of them to acquire locks.
+ */
+#define __global_unlock2(flags) do { \
+ unsigned int __trval = (flags); \
+ unsigned int __alock_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+ asm volatile("SETD [%1+#0x00],D1RtP\n\t" \
+ "LOCK0\n\t" \
+ "MOV TXMASKI,%0" \
+ : \
+ : "r" (__trval), \
+ "u" (__alock_hi) \
+ : "memory"); \
+} while (0)
+
+#endif /* __ASM_METAG_GLOBAL_LOCK_H */
diff --git a/arch/metag/include/asm/gpio.h b/arch/metag/include/asm/gpio.h
new file mode 100644
index 00000000000..b3799d88ffc
--- /dev/null
+++ b/arch/metag/include/asm/gpio.h
@@ -0,0 +1,4 @@
+#ifndef __LINUX_GPIO_H
+#warning Include linux/gpio.h instead of asm/gpio.h
+#include <linux/gpio.h>
+#endif
diff --git a/arch/metag/include/asm/highmem.h b/arch/metag/include/asm/highmem.h
new file mode 100644
index 00000000000..6646a15c73d
--- /dev/null
+++ b/arch/metag/include/asm/highmem.h
@@ -0,0 +1,62 @@
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+/*
+ * Ordering is (from lower to higher memory addresses):
+ *
+ * high_memory
+ * Persistent kmap area
+ * PKMAP_BASE
+ * fixed_addresses
+ * FIXADDR_START
+ * FIXADDR_TOP
+ * Vmalloc area
+ * VMALLOC_START
+ * VMALLOC_END
+ */
+#define PKMAP_BASE (FIXADDR_START - PMD_SIZE)
+#define LAST_PKMAP PTRS_PER_PTE
+#define LAST_PKMAP_MASK (LAST_PKMAP - 1)
+#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define kmap_prot PAGE_KERNEL
+
+static inline void flush_cache_kmaps(void)
+{
+ flush_cache_all();
+}
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *pkmap_page_table;
+
+extern void *kmap_high(struct page *page);
+extern void kunmap_high(struct page *page);
+
+extern void kmap_init(void);
+
+/*
+ * The following functions are already defined by <linux/highmem.h>
+ * when CONFIG_HIGHMEM is not set.
+ */
+#ifdef CONFIG_HIGHMEM
+extern void *kmap(struct page *page);
+extern void kunmap(struct page *page);
+extern void *kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
+extern void *kmap_atomic_pfn(unsigned long pfn);
+extern struct page *kmap_atomic_to_page(void *ptr);
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h
new file mode 100644
index 00000000000..f545477e61f
--- /dev/null
+++ b/arch/metag/include/asm/hugetlb.h
@@ -0,0 +1,86 @@
+#ifndef _ASM_METAG_HUGETLB_H
+#define _ASM_METAG_HUGETLB_H
+
+#include <asm/page.h>
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+ unsigned long addr,
+ unsigned long len) {
+ return 0;
+}
+
+int prepare_hugepage_range(struct file *file, unsigned long addr,
+ unsigned long len);
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+ unsigned long addr, unsigned long end,
+ unsigned long floor,
+ unsigned long ceiling)
+{
+ free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+ return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+ return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
+{
+ return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+ return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+ return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#endif /* _ASM_METAG_HUGETLB_H */
diff --git a/arch/metag/include/asm/hwthread.h b/arch/metag/include/asm/hwthread.h
new file mode 100644
index 00000000000..8f9786619b1
--- /dev/null
+++ b/arch/metag/include/asm/hwthread.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies
+ */
+#ifndef __METAG_HWTHREAD_H
+#define __METAG_HWTHREAD_H
+
+#include <linux/bug.h>
+#include <linux/io.h>
+
+#include <asm/metag_mem.h>
+
+#define BAD_HWTHREAD_ID (0xFFU)
+#define BAD_CPU_ID (0xFFU)
+
+extern u8 cpu_2_hwthread_id[];
+extern u8 hwthread_id_2_cpu[];
+
+/*
+ * Each hardware thread's Control Unit registers are memory-mapped
+ * and can therefore be accessed by any other hardware thread.
+ *
+ * This helper function returns the memory address where "thread"'s
+ * register "regnum" is mapped.
+ */
+static inline
+void __iomem *__CU_addr(unsigned int thread, unsigned int regnum)
+{
+ unsigned int base, thread_offset, thread_regnum;
+
+ WARN_ON(thread == BAD_HWTHREAD_ID);
+
+ base = T0UCTREG0; /* Control unit base */
+
+ thread_offset = TnUCTRX_STRIDE * thread;
+ thread_regnum = TXUCTREGn_STRIDE * regnum;
+
+ return (void __iomem *)(base + thread_offset + thread_regnum);
+}
+
+#endif /* __METAG_HWTHREAD_H */
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
new file mode 100644
index 00000000000..9359e504844
--- /dev/null
+++ b/arch/metag/include/asm/io.h
@@ -0,0 +1,165 @@
+#ifndef _ASM_METAG_IO_H
+#define _ASM_METAG_IO_H
+
+#include <linux/types.h>
+
+#define IO_SPACE_LIMIT 0
+
+#define page_to_bus page_to_phys
+#define bus_to_page phys_to_page
+
+/*
+ * Generic I/O
+ */
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ u8 ret;
+ asm volatile("GETB %0,[%1]"
+ : "=da" (ret)
+ : "da" (addr)
+ : "memory");
+ return ret;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ u16 ret;
+ asm volatile("GETW %0,[%1]"
+ : "=da" (ret)
+ : "da" (addr)
+ : "memory");
+ return ret;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ u32 ret;
+ asm volatile("GETD %0,[%1]"
+ : "=da" (ret)
+ : "da" (addr)
+ : "memory");
+ return ret;
+}
+
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ u64 ret;
+ asm volatile("GETL %0,%t0,[%1]"
+ : "=da" (ret)
+ : "da" (addr)
+ : "memory");
+ return ret;
+}
+
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+ asm volatile("SETB [%0],%1"
+ :
+ : "da" (addr),
+ "da" (b)
+ : "memory");
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+ asm volatile("SETW [%0],%1"
+ :
+ : "da" (addr),
+ "da" (b)
+ : "memory");
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+ asm volatile("SETD [%0],%1"
+ :
+ : "da" (addr),
+ "da" (b)
+ : "memory");
+}
+
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+ asm volatile("SETL [%0],%1,%t1"
+ :
+ : "da" (addr),
+ "da" (b)
+ : "memory");
+}
+
+/*
+ * The generic io.h can define all the other generic accessors
+ */
+
+#include <asm-generic/io.h>
+
+/*
+ * Despite being a 32bit architecture, Meta can do 64bit memory accesses
+ * (assuming the bus supports it).
+ */
+
+#define readq __raw_readq
+#define writeq __raw_writeq
+
+/*
+ * Meta specific I/O for accessing non-MMU areas.
+ *
+ * These can be provided with a physical address rather than an __iomem pointer
+ * and should only be used by core architecture code for accessing fixed core
+ * registers. Generic drivers should use ioremap and the generic I/O accessors.
+ */
+
+#define metag_in8(addr) __raw_readb((volatile void __iomem *)(addr))
+#define metag_in16(addr) __raw_readw((volatile void __iomem *)(addr))
+#define metag_in32(addr) __raw_readl((volatile void __iomem *)(addr))
+#define metag_in64(addr) __raw_readq((volatile void __iomem *)(addr))
+
+#define metag_out8(b, addr) __raw_writeb(b, (volatile void __iomem *)(addr))
+#define metag_out16(b, addr) __raw_writew(b, (volatile void __iomem *)(addr))
+#define metag_out32(b, addr) __raw_writel(b, (volatile void __iomem *)(addr))
+#define metag_out64(b, addr) __raw_writeq(b, (volatile void __iomem *)(addr))
+
+/*
+ * io remapping functions
+ */
+
+extern void __iomem *__ioremap(unsigned long offset,
+ size_t size, unsigned long flags);
+extern void __iounmap(void __iomem *addr);
+
+/**
+ * ioremap - map bus memory into CPU space
+ * @offset: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ */
+#define ioremap(offset, size) \
+ __ioremap((offset), (size), 0)
+
+#define ioremap_nocache(offset, size) \
+ __ioremap((offset), (size), 0)
+
+#define ioremap_cached(offset, size) \
+ __ioremap((offset), (size), _PAGE_CACHEABLE)
+
+#define ioremap_wc(offset, size) \
+ __ioremap((offset), (size), _PAGE_WR_COMBINE)
+
+#define iounmap(addr) \
+ __iounmap(addr)
+
+#endif /* _ASM_METAG_IO_H */
diff --git a/arch/metag/include/asm/irq.h b/arch/metag/include/asm/irq.h
new file mode 100644
index 00000000000..be0c8f3c5a5
--- /dev/null
+++ b/arch/metag/include/asm/irq.h
@@ -0,0 +1,32 @@
+#ifndef __ASM_METAG_IRQ_H
+#define __ASM_METAG_IRQ_H
+
+#ifdef CONFIG_4KSTACKS
+extern void irq_ctx_init(int cpu);
+extern void irq_ctx_exit(int cpu);
+# define __ARCH_HAS_DO_SOFTIRQ
+#else
+# define irq_ctx_init(cpu) do { } while (0)
+# define irq_ctx_exit(cpu) do { } while (0)
+#endif
+
+void tbi_startup_interrupt(int);
+void tbi_shutdown_interrupt(int);
+
+struct pt_regs;
+
+int tbisig_map(unsigned int hw);
+extern void do_IRQ(int irq, struct pt_regs *regs);
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+int traps_save_context(void);
+int traps_restore_context(void);
+#endif
+
+#include <asm-generic/irq.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void migrate_irqs(void);
+#endif
+
+#endif /* __ASM_METAG_IRQ_H */
diff --git a/arch/metag/include/asm/irqflags.h b/arch/metag/include/asm/irqflags.h
new file mode 100644
index 00000000000..339b16f062e
--- /dev/null
+++ b/arch/metag/include/asm/irqflags.h
@@ -0,0 +1,93 @@
+/*
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() functions from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/core_reg.h>
+#include <asm/metag_regs.h>
+
+#define INTS_OFF_MASK TXSTATI_BGNDHALT_BIT
+
+#ifdef CONFIG_SMP
+extern unsigned int get_trigger_mask(void);
+#else
+
+extern unsigned int global_trigger_mask;
+
+static inline unsigned int get_trigger_mask(void)
+{
+ return global_trigger_mask;
+}
+#endif
+
+static inline unsigned long arch_local_save_flags(void)
+{
+ return __core_reg_get(TXMASKI);
+}
+
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+ return (flags & ~INTS_OFF_MASK) == 0;
+}
+
+static inline int arch_irqs_disabled(void)
+{
+ unsigned long flags = arch_local_save_flags();
+
+ return arch_irqs_disabled_flags(flags);
+}
+
+static inline unsigned long __irqs_disabled(void)
+{
+ /*
+ * We shouldn't enable exceptions if they are not already
+ * enabled. This is required for chancalls to work correctly.
+ */
+ return arch_local_save_flags() & INTS_OFF_MASK;
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long arch_local_irq_save(void)
+{
+ unsigned long flags = __irqs_disabled();
+
+ asm volatile("SWAP %0,TXMASKI\n" : "=r" (flags) : "0" (flags)
+ : "memory");
+
+ return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+ asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ unsigned long flags = __irqs_disabled();
+
+ asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+#ifdef CONFIG_SMP
+/* Avoid circular include dependencies through <linux/preempt.h> */
+void arch_local_irq_enable(void);
+#else
+static inline void arch_local_irq_enable(void)
+{
+ arch_local_irq_restore(get_trigger_mask());
+}
+#endif
+
+#endif /* (__ASSEMBLY__) */
+
+#endif /* !(_ASM_IRQFLAGS_H) */
diff --git a/arch/metag/include/asm/l2cache.h b/arch/metag/include/asm/l2cache.h
new file mode 100644
index 00000000000..bffbeaa4d93
--- /dev/null
+++ b/arch/metag/include/asm/l2cache.h
@@ -0,0 +1,258 @@
+#ifndef _METAG_L2CACHE_H
+#define _METAG_L2CACHE_H
+
+#ifdef CONFIG_METAG_L2C
+
+#include <asm/global_lock.h>
+#include <asm/io.h>
+
+/*
+ * Store the last known value of pfenable (we don't want prefetch enabled while
+ * L2 is off).
+ */
+extern int l2c_pfenable;
+
+/* defined in arch/metag/drivers/core-sysfs.c */
+extern struct sysdev_class cache_sysclass;
+
+static inline void wr_fence(void);
+
+/*
+ * Functions for reading of L2 cache configuration.
+ */
+
+/* Get raw L2 config register (CORE_CONFIG3) */
+static inline unsigned int meta_l2c_config(void)
+{
+ const unsigned int *corecfg3 = (const unsigned int *)METAC_CORE_CONFIG3;
+ return *corecfg3;
+}
+
+/* Get whether the L2 is present */
+static inline int meta_l2c_is_present(void)
+{
+ return meta_l2c_config() & METAC_CORECFG3_L2C_HAVE_L2C_BIT;
+}
+
+/* Get whether the L2 is configured for write-back instead of write-through */
+static inline int meta_l2c_is_writeback(void)
+{
+ return meta_l2c_config() & METAC_CORECFG3_L2C_MODE_BIT;
+}
+
+/* Get whether the L2 is unified instead of separated code/data */
+static inline int meta_l2c_is_unified(void)
+{
+ return meta_l2c_config() & METAC_CORECFG3_L2C_UNIFIED_BIT;
+}
+
+/* Get the L2 cache size in bytes */
+static inline unsigned int meta_l2c_size(void)
+{
+ unsigned int size_s;
+ if (!meta_l2c_is_present())
+ return 0;
+ size_s = (meta_l2c_config() & METAC_CORECFG3_L2C_SIZE_BITS)
+ >> METAC_CORECFG3_L2C_SIZE_S;
+ /* L2CSIZE is in KiB */
+ return 1024 << size_s;
+}
+
+/* Get the number of ways in the L2 cache */
+static inline unsigned int meta_l2c_ways(void)
+{
+ unsigned int ways_s;
+ if (!meta_l2c_is_present())
+ return 0;
+ ways_s = (meta_l2c_config() & METAC_CORECFG3_L2C_NUM_WAYS_BITS)
+ >> METAC_CORECFG3_L2C_NUM_WAYS_S;
+ return 0x1 << ways_s;
+}
+
+/* Get the line size of the L2 cache */
+static inline unsigned int meta_l2c_linesize(void)
+{
+ unsigned int line_size;
+ if (!meta_l2c_is_present())
+ return 0;
+ line_size = (meta_l2c_config() & METAC_CORECFG3_L2C_LINE_SIZE_BITS)
+ >> METAC_CORECFG3_L2C_LINE_SIZE_S;
+ switch (line_size) {
+ case METAC_CORECFG3_L2C_LINE_SIZE_64B:
+ return 64;
+ default:
+ return 0;
+ }
+}
+
+/* Get the revision ID of the L2 cache */
+static inline unsigned int meta_l2c_revision(void)
+{
+ return (meta_l2c_config() & METAC_CORECFG3_L2C_REV_ID_BITS)
+ >> METAC_CORECFG3_L2C_REV_ID_S;
+}
+
+
+/*
+ * Start an initialisation of the L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section while the L2
+ * is disabled.
+ */
+static inline void _meta_l2c_init(void)
+{
+ metag_out32(SYSC_L2C_INIT_INIT, SYSC_L2C_INIT);
+ while (metag_in32(SYSC_L2C_INIT) == SYSC_L2C_INIT_IN_PROGRESS)
+ /* do nothing */;
+}
+
+/*
+ * Start a writeback of dirty L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section.
+ */
+static inline void _meta_l2c_purge(void)
+{
+ metag_out32(SYSC_L2C_PURGE_PURGE, SYSC_L2C_PURGE);
+ while (metag_in32(SYSC_L2C_PURGE) == SYSC_L2C_PURGE_IN_PROGRESS)
+ /* do nothing */;
+}
+
+/* Set whether the L2 cache is enabled. */
+static inline void _meta_l2c_enable(int enabled)
+{
+ unsigned int enable;
+
+ enable = metag_in32(SYSC_L2C_ENABLE);
+ if (enabled)
+ enable |= SYSC_L2C_ENABLE_ENABLE_BIT;
+ else
+ enable &= ~SYSC_L2C_ENABLE_ENABLE_BIT;
+ metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Set whether the L2 cache prefetch is enabled. */
+static inline void _meta_l2c_pf_enable(int pfenabled)
+{
+ unsigned int enable;
+
+ enable = metag_in32(SYSC_L2C_ENABLE);
+ if (pfenabled)
+ enable |= SYSC_L2C_ENABLE_PFENABLE_BIT;
+ else
+ enable &= ~SYSC_L2C_ENABLE_PFENABLE_BIT;
+ metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Return whether the L2 cache is enabled */
+static inline int _meta_l2c_is_enabled(void)
+{
+ return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_ENABLE_BIT;
+}
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int _meta_l2c_pf_is_enabled(void)
+{
+ return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_PFENABLE_BIT;
+}
+
+
+/* Return whether the L2 cache is enabled */
+static inline int meta_l2c_is_enabled(void)
+{
+ int en;
+
+ /*
+ * There is no need to lock at the moment, as the enable bit is never
+ * intermediately changed, so we will never see an intermediate result.
+ */
+ en = _meta_l2c_is_enabled();
+
+ return en;
+}
+
+/*
+ * Ensure the L2 cache is disabled.
+ * Return whether the L2 was previously disabled.
+ */
+int meta_l2c_disable(void);
+
+/*
+ * Ensure the L2 cache is enabled.
+ * Return whether the L2 was previously enabled.
+ */
+int meta_l2c_enable(void);
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int meta_l2c_pf_is_enabled(void)
+{
+ return l2c_pfenable;
+}
+
+/*
+ * Set whether the L2 cache prefetch is enabled.
+ * Return whether the L2 prefetch was previously enabled.
+ */
+int meta_l2c_pf_enable(int pfenable);
+
+/*
+ * Flush the L2 cache.
+ * Return 1 if the L2 is disabled.
+ */
+int meta_l2c_flush(void);
+
+/*
+ * Write back all dirty cache lines in the L2 cache.
+ * Return 1 if the L2 is disabled or there isn't any writeback.
+ */
+static inline int meta_l2c_writeback(void)
+{
+ unsigned long flags;
+ int en;
+
+ /* no need to purge if it's not a writeback cache */
+ if (!meta_l2c_is_writeback())
+ return 1;
+
+ /*
+ * Purge only works if the L2 is enabled, and involves reading back to
+ * detect completion, so keep this operation atomic with other threads.
+ */
+ __global_lock1(flags);
+ en = meta_l2c_is_enabled();
+ if (likely(en)) {
+ wr_fence();
+ _meta_l2c_purge();
+ }
+ __global_unlock1(flags);
+
+ return !en;
+}
+
+#else /* CONFIG_METAG_L2C */
+
+#define meta_l2c_config() 0
+#define meta_l2c_is_present() 0
+#define meta_l2c_is_writeback() 0
+#define meta_l2c_is_unified() 0
+#define meta_l2c_size() 0
+#define meta_l2c_ways() 0
+#define meta_l2c_linesize() 0
+#define meta_l2c_revision() 0
+
+#define meta_l2c_is_enabled() 0
+#define _meta_l2c_pf_is_enabled() 0
+#define meta_l2c_pf_is_enabled() 0
+#define meta_l2c_disable() 1
+#define meta_l2c_enable() 0
+#define meta_l2c_pf_enable(X) 0
+static inline int meta_l2c_flush(void)
+{
+ return 1;
+}
+static inline int meta_l2c_writeback(void)
+{
+ return 1;
+}
+
+#endif /* CONFIG_METAG_L2C */
+
+#endif /* _METAG_L2CACHE_H */
diff --git a/arch/metag/include/asm/linkage.h b/arch/metag/include/asm/linkage.h
new file mode 100644
index 00000000000..73bf25ba4e1
--- /dev/null
+++ b/arch/metag/include/asm/linkage.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .p2align 2
+#define __ALIGN_STR ".p2align 2"
+
+#endif
diff --git a/arch/metag/include/asm/mach/arch.h b/arch/metag/include/asm/mach/arch.h
new file mode 100644
index 00000000000..12c5664fea6
--- /dev/null
+++ b/arch/metag/include/asm/mach/arch.h
@@ -0,0 +1,86 @@
+/*
+ * arch/metag/include/asm/mach/arch.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * based on the ARM version:
+ * Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_MACH_ARCH_H_
+#define _METAG_MACH_ARCH_H_
+
+#include <linux/stddef.h>
+
+#include <asm/clock.h>
+
+/**
+ * struct machine_desc - Describes a board controlled by a Meta.
+ * @name: Board/SoC name.
+ * @dt_compat: Array of device tree 'compatible' strings.
+ * @clocks: Clock callbacks.
+ *
+ * @nr_irqs: Maximum number of IRQs.
+ * If 0, defaults to NR_IRQS in asm-generic/irq.h.
+ *
+ * @init_early: Early init callback.
+ * @init_irq: IRQ init callback for setting up IRQ controllers.
+ * @init_machine: Arch init callback for setting up devices.
+ * @init_late: Late init callback.
+ *
+ * This structure is provided by each board which can be controlled by a Meta.
+ * It is chosen by matching the compatible strings in the device tree provided
+ * by the bootloader with the strings in @dt_compat, and sets up any aspects of
+ * the machine that aren't configured with device tree (yet).
+ */
+struct machine_desc {
+ const char *name;
+ const char **dt_compat;
+ struct meta_clock_desc *clocks;
+
+ unsigned int nr_irqs;
+
+ void (*init_early)(void);
+ void (*init_irq)(void);
+ void (*init_machine)(void);
+ void (*init_late)(void);
+};
+
+/*
+ * Current machine - only accessible during boot.
+ */
+extern struct machine_desc *machine_desc;
+
+/*
+ * Machine type table - also only accessible during boot
+ */
+extern struct machine_desc __arch_info_begin[], __arch_info_end[];
+#define for_each_machine_desc(p) \
+ for (p = __arch_info_begin; p < __arch_info_end; p++)
+
+static inline struct machine_desc *default_machine_desc(void)
+{
+ /* the default machine is the last one linked in */
+ if (__arch_info_end - 1 < __arch_info_begin)
+ return NULL;
+ return __arch_info_end - 1;
+}
+
+/*
+ * Set of macros to define architecture features. This is built into
+ * a table by the linker.
+ */
+#define MACHINE_START(_type, _name) \
+static const struct machine_desc __mach_desc_##_type \
+__used \
+__attribute__((__section__(".arch.info.init"))) = { \
+ .name = _name,
+
+#define MACHINE_END \
+};
+
+#endif /* _METAG_MACH_ARCH_H_ */
diff --git a/arch/metag/include/asm/metag_isa.h b/arch/metag/include/asm/metag_isa.h
new file mode 100644
index 00000000000..c8aa2ae3899
--- /dev/null
+++ b/arch/metag/include/asm/metag_isa.h
@@ -0,0 +1,81 @@
+/*
+ * asm/metag_isa.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta instruction set.
+ */
+
+#ifndef _ASM_METAG_ISA_H_
+#define _ASM_METAG_ISA_H_
+
+
+/* L1 cache layout */
+
+/* Data cache line size as bytes and shift */
+#define DCACHE_LINE_BYTES 64
+#define DCACHE_LINE_S 6
+
+/* Number of ways in the data cache */
+#define DCACHE_WAYS 4
+
+/* Instruction cache line size as bytes and shift */
+#define ICACHE_LINE_BYTES 64
+#define ICACHE_LINE_S 6
+
+/* Number of ways in the instruction cache */
+#define ICACHE_WAYS 4
+
+
+/*
+ * CACHEWD/CACHEWL instructions use the bottom 8 bits of the data presented to
+ * control the operation actually achieved.
+ */
+/* Use of these two bits should be discouraged since the bits dont have
+ * consistent meanings
+ */
+#define CACHEW_ICACHE_BIT 0x01
+#define CACHEW_TLBFLUSH_BIT 0x02
+
+#define CACHEW_FLUSH_L1D_L2 0x0
+#define CACHEW_INVALIDATE_L1I 0x1
+#define CACHEW_INVALIDATE_L1DTLB 0x2
+#define CACHEW_INVALIDATE_L1ITLB 0x3
+#define CACHEW_WRITEBACK_L1D_L2 0x4
+#define CACHEW_INVALIDATE_L1D 0x8
+#define CACHEW_INVALIDATE_L1D_L2 0xC
+
+/*
+ * CACHERD/CACHERL instructions use bits 3:5 of the address presented to
+ * control the operation achieved and hence the specific result.
+ */
+#define CACHER_ADDR_BITS 0xFFFFFFC0
+#define CACHER_OPER_BITS 0x00000030
+#define CACHER_OPER_S 4
+#define CACHER_OPER_LINPHY 0
+#define CACHER_ICACHE_BIT 0x00000008
+#define CACHER_ICACHE_S 3
+
+/*
+ * CACHERD/CACHERL LINPHY Oper result is one/two 32-bit words
+ *
+ * If CRLINPHY0_VAL_BIT (Bit 0) set then,
+ * Lower 32-bits corresponds to MMCU_ENTRY_* above.
+ * Upper 32-bits corresponds to CRLINPHY1_* values below (if requested).
+ * else
+ * Lower 32-bits corresponds to CRLINPHY0_* values below.
+ * Upper 32-bits undefined.
+ */
+#define CRLINPHY0_VAL_BIT 0x00000001
+#define CRLINPHY0_FIRST_BIT 0x00000004 /* Set if VAL=0 due to first level */
+
+#define CRLINPHY1_READ_BIT 0x00000001 /* Set if reads permitted */
+#define CRLINPHY1_SINGLE_BIT 0x00000004 /* Set if TLB does not cache entry */
+#define CRLINPHY1_PAGEMSK_BITS 0x0000FFF0 /* Set to ((2^n-1)>>12) value */
+#define CRLINPHY1_PAGEMSK_S 4
+
+#endif /* _ASM_METAG_ISA_H_ */
diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h
new file mode 100644
index 00000000000..3f7b54d8cca
--- /dev/null
+++ b/arch/metag/include/asm/metag_mem.h
@@ -0,0 +1,1106 @@
+/*
+ * asm/metag_mem.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta (memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_MEM_H_
+#define _ASM_METAG_MEM_H_
+
+/*****************************************************************************
+ * META MEMORY MAP LINEAR ADDRESS VALUES
+ ****************************************************************************/
+/*
+ * COMMON MEMORY MAP
+ * -----------------
+ */
+
+#define LINSYSTEM_BASE 0x00200000
+#define LINSYSTEM_LIMIT 0x07FFFFFF
+
+/* Linear cache flush now implemented via DCACHE instruction. These defines
+ related to a special region that used to exist for achieving cache flushes.
+ */
+#define LINSYSLFLUSH_S 0
+
+#define LINSYSRES0_BASE 0x00200000
+#define LINSYSRES0_LIMIT 0x01FFFFFF
+
+#define LINSYSCUSTOM_BASE 0x02000000
+#define LINSYSCUSTOM_LIMIT 0x02FFFFFF
+
+#define LINSYSEXPAND_BASE 0x03000000
+#define LINSYSEXPAND_LIMIT 0x03FFFFFF
+
+#define LINSYSEVENT_BASE 0x04000000
+#define LINSYSEVENT_WR_ATOMIC_UNLOCK 0x04000000
+#define LINSYSEVENT_WR_ATOMIC_LOCK 0x04000040
+#define LINSYSEVENT_WR_CACHE_DISABLE 0x04000080
+#define LINSYSEVENT_WR_CACHE_ENABLE 0x040000C0
+#define LINSYSEVENT_WR_COMBINE_FLUSH 0x04000100
+#define LINSYSEVENT_WR_FENCE 0x04000140
+#define LINSYSEVENT_LIMIT 0x04000FFF
+
+#define LINSYSCFLUSH_BASE 0x04400000
+#define LINSYSCFLUSH_DCACHE_LINE 0x04400000
+#define LINSYSCFLUSH_ICACHE_LINE 0x04500000
+#define LINSYSCFLUSH_MMCU 0x04700000
+#ifndef METAC_1_2
+#define LINSYSCFLUSH_TxMMCU_BASE 0x04700020
+#define LINSYSCFLUSH_TxMMCU_STRIDE 0x00000008
+#endif
+#define LINSYSCFLUSH_ADDR_BITS 0x000FFFFF
+#define LINSYSCFLUSH_ADDR_S 0
+#define LINSYSCFLUSH_LIMIT 0x047FFFFF
+
+#define LINSYSCTRL_BASE 0x04800000
+#define LINSYSCTRL_LIMIT 0x04FFFFFF
+
+#define LINSYSMTABLE_BASE 0x05000000
+#define LINSYSMTABLE_LIMIT 0x05FFFFFF
+
+#define LINSYSDIRECT_BASE 0x06000000
+#define LINSYSDIRECT_LIMIT 0x07FFFFFF
+
+#define LINLOCAL_BASE 0x08000000
+#define LINLOCAL_LIMIT 0x7FFFFFFF
+
+#define LINCORE_BASE 0x80000000
+#define LINCORE_LIMIT 0x87FFFFFF
+
+#define LINCORE_CODE_BASE 0x80000000
+#define LINCORE_CODE_LIMIT 0x81FFFFFF
+
+#define LINCORE_DATA_BASE 0x82000000
+#define LINCORE_DATA_LIMIT 0x83FFFFFF
+
+
+/* The core can support locked icache lines in this region */
+#define LINCORE_ICACHE_BASE 0x84000000
+#define LINCORE_ICACHE_LIMIT 0x85FFFFFF
+
+/* The core can support locked dcache lines in this region */
+#define LINCORE_DCACHE_BASE 0x86000000
+#define LINCORE_DCACHE_LIMIT 0x87FFFFFF
+
+#define LINGLOBAL_BASE 0x88000000
+#define LINGLOBAL_LIMIT 0xFFFDFFFF
+
+/*
+ * CHIP Core Register Map
+ * ----------------------
+ */
+#define CORE_HWBASE 0x04800000
+#define PRIV_HWBASE 0x04810000
+#define TRIG_HWBASE 0x04820000
+#define SYSC_HWBASE 0x04830000
+
+/*****************************************************************************
+ * INTER-THREAD KICK REGISTERS FOR SOFTWARE EVENT GENERATION
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that can be used to supply
+ * kicks to threads that service arbitrary software events.
+ */
+
+#define T0KICK 0x04800800 /* Background kick 0 */
+#define TXXKICK_MAX 0xFFFF /* Maximum kicks */
+#define TnXKICK_STRIDE 0x00001000 /* Thread scale value */
+#define TnXKICK_STRIDE_S 12
+#define T0KICKI 0x04800808 /* Interrupt kick 0 */
+#define TXIKICK_OFFSET 0x00000008 /* Int level offset value */
+#define T1KICK 0x04801800 /* Background kick 1 */
+#define T1KICKI 0x04801808 /* Interrupt kick 1 */
+#define T2KICK 0x04802800 /* Background kick 2 */
+#define T2KICKI 0x04802808 /* Interrupt kick 2 */
+#define T3KICK 0x04803800 /* Background kick 3 */
+#define T3KICKI 0x04803808 /* Interrupt kick 3 */
+
+/*****************************************************************************
+ * GLOBAL REGISTER ACCESS RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that allow access to the
+ * internal state of all threads in order to allow global set-up of thread
+ * state and external handling of thread events, errors, or debugging.
+ *
+ * The actual unit and register index values needed to access individul
+ * registers are chip specific see - METAC_TXUXX_VALUES in metac_x_y.h.
+ * However two C array initialisers TXUXX_MASKS and TGUXX_MASKS will always be
+ * defined to allow arbitrary loading, display, and saving of all valid
+ * register states without detailed knowledge of their purpose - TXUXX sets
+ * bits for all valid registers and TGUXX sets bits for the sub-set which are
+ * global.
+ */
+
+#define T0UCTREG0 0x04800000 /* Access to all CT regs */
+#define TnUCTRX_STRIDE 0x00001000 /* Thread scale value */
+#define TXUCTREGn_STRIDE 0x00000008 /* Register scale value */
+
+#define TXUXXRXDT 0x0480FFF0 /* Data to/from any threads reg */
+#define TXUXXRXRQ 0x0480FFF8
+#define TXUXXRXRQ_DREADY_BIT 0x80000000 /* Poll for done */
+#define TXUXXRXRQ_DSPEXT_BIT 0x00020000 /* Addr DSP Regs */
+#define TXUXXRXRQ_RDnWR_BIT 0x00010000 /* Set for read */
+#define TXUXXRXRQ_TX_BITS 0x00003000 /* Thread number */
+#define TXUXXRXRQ_TX_S 12
+#define TXUXXRXRQ_RX_BITS 0x000001F0 /* Register num */
+#define TXUXXRXRQ_RX_S 4
+#define TXUXXRXRQ_DSPRARD0 0 /* DSP RAM A Read Pointer 0 */
+#define TXUXXRXRQ_DSPRARD1 1 /* DSP RAM A Read Pointer 1 */
+#define TXUXXRXRQ_DSPRAWR0 2 /* DSP RAM A Write Pointer 0 */
+#define TXUXXRXRQ_DSPRAWR2 3 /* DSP RAM A Write Pointer 1 */
+#define TXUXXRXRQ_DSPRBRD0 4 /* DSP RAM B Read Pointer 0 */
+#define TXUXXRXRQ_DSPRBRD1 5 /* DSP RAM B Read Pointer 1 */
+#define TXUXXRXRQ_DSPRBWR0 6 /* DSP RAM B Write Pointer 0 */
+#define TXUXXRXRQ_DSPRBWR1 7 /* DSP RAM B Write Pointer 1 */
+#define TXUXXRXRQ_DSPRARINC0 8 /* DSP RAM A Read Increment 0 */
+#define TXUXXRXRQ_DSPRARINC1 9 /* DSP RAM A Read Increment 1 */
+#define TXUXXRXRQ_DSPRAWINC0 10 /* DSP RAM A Write Increment 0 */
+#define TXUXXRXRQ_DSPRAWINC1 11 /* DSP RAM A Write Increment 1 */
+#define TXUXXRXRQ_DSPRBRINC0 12 /* DSP RAM B Read Increment 0 */
+#define TXUXXRXRQ_DSPRBRINC1 13 /* DSP RAM B Read Increment 1 */
+#define TXUXXRXRQ_DSPRBWINC0 14 /* DSP RAM B Write Increment 0 */
+#define TXUXXRXRQ_DSPRBWINC1 15 /* DSP RAM B Write Increment 1 */
+
+#define TXUXXRXRQ_ACC0L0 16 /* Accumulator 0 bottom 32-bits */
+#define TXUXXRXRQ_ACC1L0 17 /* Accumulator 1 bottom 32-bits */
+#define TXUXXRXRQ_ACC2L0 18 /* Accumulator 2 bottom 32-bits */
+#define TXUXXRXRQ_ACC3L0 19 /* Accumulator 3 bottom 32-bits */
+#define TXUXXRXRQ_ACC0HI 20 /* Accumulator 0 top 8-bits */
+#define TXUXXRXRQ_ACC1HI 21 /* Accumulator 1 top 8-bits */
+#define TXUXXRXRQ_ACC2HI 22 /* Accumulator 2 top 8-bits */
+#define TXUXXRXRQ_ACC3HI 23 /* Accumulator 3 top 8-bits */
+#define TXUXXRXRQ_UXX_BITS 0x0000000F /* Unit number */
+#define TXUXXRXRQ_UXX_S 0
+
+/*****************************************************************************
+ * PRIVILEGE CONTROL VALUES FOR MEMORY MAPPED RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the privilege required to access other memory mapped resources. These
+ * registers themselves always require privilege to update them.
+ */
+
+#define TXPRIVREG_STRIDE 0x8 /* Delta between per-thread regs */
+#define TXPRIVREG_STRIDE_S 3
+
+/*
+ * Each bit 0 to 15 defines privilege required to access internal register
+ * regions 0x04800000 to 0x048FFFFF in 64k chunks
+ */
+#define T0PIOREG 0x04810100
+#define T1PIOREG 0x04810108
+#define T2PIOREG 0x04810110
+#define T3PIOREG 0x04810118
+
+/*
+ * Each bit 0 to 31 defines privilege required to use the pair of
+ * system events implemented as writee in the regions 0x04000000 to
+ * 0x04000FFF in 2*64 byte chunks.
+ */
+#define T0PSYREG 0x04810180
+#define T1PSYREG 0x04810188
+#define T2PSYREG 0x04810190
+#define T3PSYREG 0x04810198
+
+/*
+ * CHIP PRIV CONTROLS
+ * ------------------
+ */
+
+/* The TXPIOREG register holds a bit mask directly mappable to
+ corresponding addresses in the range 0x04800000 to 049FFFFF */
+#define TXPIOREG_ADDR_BITS 0x1F0000 /* Up to 32x64K bytes */
+#define TXPIOREG_ADDR_S 16
+
+/* Hence based on the _HWBASE values ... */
+#define TXPIOREG_CORE_BIT (1<<((0x04800000>>16)&0x1F))
+#define TXPIOREG_PRIV_BIT (1<<((0x04810000>>16)&0x1F))
+#define TXPIOREG_TRIG_BIT (1<<((0x04820000>>16)&0x1F))
+#define TXPIOREG_SYSC_BIT (1<<((0x04830000>>16)&0x1F))
+
+#define TXPIOREG_WRC_BIT 0x00080000 /* Wr combiner reg priv */
+#define TXPIOREG_LOCALBUS_RW_BIT 0x00040000 /* Local bus rd/wr priv */
+#define TXPIOREG_SYSREGBUS_RD_BIT 0x00020000 /* Sys reg bus write priv */
+#define TXPIOREG_SYSREGBUS_WR_BIT 0x00010000 /* Sys reg bus read priv */
+
+/* CORE region privilege controls */
+#define T0PRIVCORE 0x04800828
+#define TXPRIVCORE_TXBKICK_BIT 0x001 /* Background kick priv */
+#define TXPRIVCORE_TXIKICK_BIT 0x002 /* Interrupt kick priv */
+#define TXPRIVCORE_TXAMAREGX_BIT 0x004 /* TXAMAREG4|5|6 priv */
+#define TnPRIVCORE_STRIDE 0x00001000
+
+#define T0PRIVSYSR 0x04810000
+#define TnPRIVSYSR_STRIDE 0x00000008
+#define TnPRIVSYSR_STRIDE_S 3
+#define TXPRIVSYSR_CFLUSH_BIT 0x01
+#define TXPRIVSYSR_MTABLE_BIT 0x02
+#define TXPRIVSYSR_DIRECT_BIT 0x04
+#ifdef METAC_1_2
+#define TXPRIVSYSR_ALL_BITS 0x07
+#else
+#define TXPRIVSYSR_CORE_BIT 0x08
+#define TXPRIVSYSR_CORECODE_BIT 0x10
+#define TXPRIVSYSR_ALL_BITS 0x1F
+#endif
+#define T1PRIVSYSR 0x04810008
+#define T2PRIVSYSR 0x04810010
+#define T3PRIVSYSR 0x04810018
+
+/*****************************************************************************
+ * H/W TRIGGER STATE/LEVEL REGISTERS AND H/W TRIGGER VECTORS
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the state of hardware trigger sources both external to the META processor
+ * and internal to it.
+ */
+
+#define HWSTATMETA 0x04820000 /* Hardware status/clear META trig */
+#define HWSTATMETA_T0HALT_BITS 0xF
+#define HWSTATMETA_T0HALT_S 0
+#define HWSTATMETA_T0BHALT_BIT 0x1 /* Background HALT */
+#define HWSTATMETA_T0IHALT_BIT 0x2 /* Interrupt HALT */
+#define HWSTATMETA_T0PHALT_BIT 0x4 /* PF/RO Memory HALT */
+#define HWSTATMETA_T0AMATR_BIT 0x8 /* AMA trigger */
+#define HWSTATMETA_TnINT_S 4 /* Shift by (thread*4) */
+#define HWSTATEXT 0x04820010 /* H/W status/clear external trigs 0-31 */
+#define HWSTATEXT2 0x04820018 /* H/W status/clear external trigs 32-63 */
+#define HWSTATEXT4 0x04820020 /* H/W status/clear external trigs 64-95 */
+#define HWSTATEXT6 0x04820028 /* H/W status/clear external trigs 96-128 */
+#define HWLEVELEXT 0x04820030 /* Edge/Level type of external trigs 0-31 */
+#define HWLEVELEXT2 0x04820038 /* Edge/Level type of external trigs 32-63 */
+#define HWLEVELEXT4 0x04820040 /* Edge/Level type of external trigs 64-95 */
+#define HWLEVELEXT6 0x04820048 /* Edge/Level type of external trigs 96-128 */
+#define HWLEVELEXT_XXX_LEVEL 1 /* Level sense logic in HWSTATEXTn */
+#define HWLEVELEXT_XXX_EDGE 0
+#define HWMASKEXT 0x04820050 /* Enable/disable of external trigs 0-31 */
+#define HWMASKEXT2 0x04820058 /* Enable/disable of external trigs 32-63 */
+#define HWMASKEXT4 0x04820060 /* Enable/disable of external trigs 64-95 */
+#define HWMASKEXT6 0x04820068 /* Enable/disable of external trigs 96-128 */
+#define T0VECINT_BHALT 0x04820500 /* Background HALT trigger vector */
+#define TXVECXXX_BITS 0xF /* Per-trigger vector vals 0,1,4-15 */
+#define TXVECXXX_S 0
+#define T0VECINT_IHALT 0x04820508 /* Interrupt HALT */
+#define T0VECINT_PHALT 0x04820510 /* PF/RO memory fault */
+#define T0VECINT_AMATR 0x04820518 /* AMA trigger */
+#define TnVECINT_STRIDE 0x00000020 /* Per thread stride */
+#define HWVEC0EXT 0x04820700 /* Vectors for external triggers 0-31 */
+#define HWVEC20EXT 0x04821700 /* Vectors for external triggers 32-63 */
+#define HWVEC40EXT 0x04822700 /* Vectors for external triggers 64-95 */
+#define HWVEC60EXT 0x04823700 /* Vectors for external triggers 96-127 */
+#define HWVECnEXT_STRIDE 0x00000008 /* Per trigger stride */
+#define HWVECnEXT_DEBUG 0x1 /* Redirect trigger to debug i/f */
+
+/*
+ * CORE HWCODE-BREAKPOINT REGISTERS/VALUES
+ * ---------------------------------------
+ */
+#define CODEB0ADDR 0x0480FF00 /* Address specifier */
+#define CODEBXADDR_MATCHX_BITS 0xFFFFFFFC
+#define CODEBXADDR_MATCHX_S 2
+#define CODEB0CTRL 0x0480FF08 /* Control */
+#define CODEBXCTRL_MATEN_BIT 0x80000000 /* Match 'Enable' */
+#define CODEBXCTRL_MATTXEN_BIT 0x10000000 /* Match threadn enable */
+#define CODEBXCTRL_HITC_BITS 0x00FF0000 /* Hit counter */
+#define CODEBXCTRL_HITC_S 16
+#define CODEBXHITC_NEXT 0xFF /* Next 'hit' will trigger */
+#define CODEBXHITC_HIT1 0x00 /* No 'hits' after trigger */
+#define CODEBXCTRL_MMASK_BITS 0x0000FFFC /* Mask ADDR_MATCH bits */
+#define CODEBXCTRL_MMASK_S 2
+#define CODEBXCTRL_MATLTX_BITS 0x00000003 /* Match threadn LOCAL addr */
+#define CODEBXCTRL_MATLTX_S 0 /* Match threadn LOCAL addr */
+#define CODEBnXXXX_STRIDE 0x00000010 /* Stride between CODEB reg sets */
+#define CODEBnXXXX_STRIDE_S 4
+#define CODEBnXXXX_LIMIT 3 /* Sets 0-3 */
+
+/*
+ * CORE DATA-WATCHPOINT REGISTERS/VALUES
+ * -------------------------------------
+ */
+#define DATAW0ADDR 0x0480FF40 /* Address specifier */
+#define DATAWXADDR_MATCHR_BITS 0xFFFFFFF8
+#define DATAWXADDR_MATCHR_S 3
+#define DATAWXADDR_MATCHW_BITS 0xFFFFFFFF
+#define DATAWXADDR_MATCHW_S 0
+#define DATAW0CTRL 0x0480FF48 /* Control */
+#define DATAWXCTRL_MATRD_BIT 0x80000000 /* Match 'Read' */
+#ifndef METAC_1_2
+#define DATAWXCTRL_MATNOTTX_BIT 0x20000000 /* Invert threadn enable */
+#endif
+#define DATAWXCTRL_MATWR_BIT 0x40000000 /* Match 'Write' */
+#define DATAWXCTRL_MATTXEN_BIT 0x10000000 /* Match threadn enable */
+#define DATAWXCTRL_WRSIZE_BITS 0x0F000000 /* Write Match Size */
+#define DATAWXCTRL_WRSIZE_S 24
+#define DATAWWRSIZE_ANY 0 /* Any size transaction matches */
+#define DATAWWRSIZE_8BIT 1 /* Specific sizes ... */
+#define DATAWWRSIZE_16BIT 2
+#define DATAWWRSIZE_32BIT 3
+#define DATAWWRSIZE_64BIT 4
+#define DATAWXCTRL_HITC_BITS 0x00FF0000 /* Hit counter */
+#define DATAWXCTRL_HITC_S 16
+#define DATAWXHITC_NEXT 0xFF /* Next 'hit' will trigger */
+#define DATAWXHITC_HIT1 0x00 /* No 'hits' after trigger */
+#define DATAWXCTRL_MMASK_BITS 0x0000FFF8 /* Mask ADDR_MATCH bits */
+#define DATAWXCTRL_MMASK_S 3
+#define DATAWXCTRL_MATLTX_BITS 0x00000003 /* Match threadn LOCAL addr */
+#define DATAWXCTRL_MATLTX_S 0 /* Match threadn LOCAL addr */
+#define DATAW0DMATCH0 0x0480FF50 /* Write match data */
+#define DATAW0DMATCH1 0x0480FF58
+#define DATAW0DMASK0 0x0480FF60 /* Write match data mask */
+#define DATAW0DMASK1 0x0480FF68
+#define DATAWnXXXX_STRIDE 0x00000040 /* Stride between DATAW reg sets */
+#define DATAWnXXXX_STRIDE_S 6
+#define DATAWnXXXX_LIMIT 1 /* Sets 0,1 */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CORE memory mapped AMA registers */
+#define T0AMAREG4 0x04800810
+#define TXAMAREG4_POOLSIZE_BITS 0x3FFFFF00
+#define TXAMAREG4_POOLSIZE_S 8
+#define TXAMAREG4_AVALUE_BITS 0x000000FF
+#define TXAMAREG4_AVALUE_S 0
+#define T0AMAREG5 0x04800818
+#define TXAMAREG5_POOLC_BITS 0x07FFFFFF
+#define TXAMAREG5_POOLC_S 0
+#define T0AMAREG6 0x04800820
+#define TXAMAREG6_DLINEDEF_BITS 0x00FFFFF0
+#define TXAMAREG6_DLINEDEF_S 0
+#define TnAMAREGX_STRIDE 0x00001000
+
+/*
+ * Memory Management Control Unit Table Entries
+ * --------------------------------------------
+ */
+#define MMCU_ENTRY_S 4 /* -> Entry size */
+#define MMCU_ENTRY_ADDR_BITS 0xFFFFF000 /* Physical address */
+#define MMCU_ENTRY_ADDR_S 12 /* -> Page size */
+#define MMCU_ENTRY_CWIN_BITS 0x000000C0 /* Caching 'window' selection */
+#define MMCU_ENTRY_CWIN_S 6
+#define MMCU_CWIN_UNCACHED 0 /* May not be memory etc. */
+#define MMCU_CWIN_BURST 1 /* Cached but LRU unset */
+#define MMCU_CWIN_C1SET 2 /* Cached in 1 set only */
+#define MMCU_CWIN_CACHED 3 /* Fully cached */
+#define MMCU_ENTRY_CACHE_BIT 0x00000080 /* Set for cached region */
+#define MMCU_ECACHE1_FULL_BIT 0x00000040 /* Use all the sets */
+#define MMCU_ECACHE0_BURST_BIT 0x00000040 /* Match bursts */
+#define MMCU_ENTRY_SYS_BIT 0x00000010 /* Sys-coherent access required */
+#define MMCU_ENTRY_WRC_BIT 0x00000008 /* Write combining allowed */
+#define MMCU_ENTRY_PRIV_BIT 0x00000004 /* Privilege required */
+#define MMCU_ENTRY_WR_BIT 0x00000002 /* Writes allowed */
+#define MMCU_ENTRY_VAL_BIT 0x00000001 /* Entry is valid */
+
+#ifdef METAC_2_1
+/*
+ * Extended first-level/top table entries have extra/larger fields in later
+ * cores as bits 11:0 previously had no effect in such table entries.
+ */
+#define MMCU_E1ENT_ADDR_BITS 0xFFFFFFC0 /* Physical address */
+#define MMCU_E1ENT_ADDR_S 6 /* -> resolution < page size */
+#define MMCU_E1ENT_PGSZ_BITS 0x0000001E /* Page size for 2nd level */
+#define MMCU_E1ENT_PGSZ_S 1
+#define MMCU_E1ENT_PGSZ0_POWER 12 /* PgSz 0 -> 4K */
+#define MMCU_E1ENT_PGSZ_MAX 10 /* PgSz 10 -> 4M maximum */
+#define MMCU_E1ENT_MINIM_BIT 0x00000020
+#endif /* METAC_2_1 */
+
+/* MMCU control register in SYSC region */
+#define MMCU_TABLE_PHYS_ADDR 0x04830010
+#define MMCU_TABLE_PHYS_ADDR_BITS 0xFFFFFFFC
+#ifdef METAC_2_1
+#define MMCU_TABLE_PHYS_EXTEND 0x00000001 /* See below */
+#endif
+#define MMCU_DCACHE_CTRL_ADDR 0x04830018
+#define MMCU_xCACHE_CTRL_ENABLE_BIT 0x00000001
+#define MMCU_xCACHE_CTRL_PARTITION_BIT 0x00000000 /* See xCPART below */
+#define MMCU_ICACHE_CTRL_ADDR 0x04830020
+
+#ifdef METAC_2_1
+
+/*
+ * Allow direct access to physical memory used to implement MMU table.
+ *
+ * Each is based on a corresponding MMCU_TnLOCAL_TABLE_PHYSn or similar
+ * MMCU_TnGLOBAL_TABLE_PHYSn register pair (see next).
+ */
+#define LINSYSMEMT0L_BASE 0x05000000
+#define LINSYSMEMT0L_LIMIT 0x051FFFFF
+#define LINSYSMEMTnX_STRIDE 0x00200000 /* 2MB Local per thread */
+#define LINSYSMEMTnX_STRIDE_S 21
+#define LINSYSMEMTXG_OFFSET 0x00800000 /* +2MB Global per thread */
+#define LINSYSMEMTXG_OFFSET_S 23
+#define LINSYSMEMT1L_BASE 0x05200000
+#define LINSYSMEMT1L_LIMIT 0x053FFFFF
+#define LINSYSMEMT2L_BASE 0x05400000
+#define LINSYSMEMT2L_LIMIT 0x055FFFFF
+#define LINSYSMEMT3L_BASE 0x05600000
+#define LINSYSMEMT3L_LIMIT 0x057FFFFF
+#define LINSYSMEMT0G_BASE 0x05800000
+#define LINSYSMEMT0G_LIMIT 0x059FFFFF
+#define LINSYSMEMT1G_BASE 0x05A00000
+#define LINSYSMEMT1G_LIMIT 0x05BFFFFF
+#define LINSYSMEMT2G_BASE 0x05C00000
+#define LINSYSMEMT2G_LIMIT 0x05DFFFFF
+#define LINSYSMEMT3G_BASE 0x05E00000
+#define LINSYSMEMT3G_LIMIT 0x05FFFFFF
+
+/*
+ * Extended MMU table functionality allows a sparse or flat table to be
+ * described much more efficiently than before.
+ */
+#define MMCU_T0LOCAL_TABLE_PHYS0 0x04830700
+#define MMCU_TnX_TABLE_PHYSX_STRIDE 0x20 /* Offset per thread */
+#define MMCU_TnX_TABLE_PHYSX_STRIDE_S 5
+#define MMCU_TXG_TABLE_PHYSX_OFFSET 0x10 /* Global versus local */
+#define MMCU_TXG_TABLE_PHYSX_OFFSET_S 4
+#define MMCU_TBLPHYS0_DCCTRL_BITS 0x000000DF /* DC controls */
+#define MMCU_TBLPHYS0_ENTLB_BIT 0x00000020 /* Cache in TLB */
+#define MMCU_TBLPHYS0_TBLSZ_BITS 0x00000F00 /* Area supported */
+#define MMCU_TBLPHYS0_TBLSZ_S 8
+#define MMCU_TBLPHYS0_TBLSZ0_POWER 22 /* 0 -> 4M */
+#define MMCU_TBLPHYS0_TBLSZ_MAX 9 /* 9 -> 2G */
+#define MMCU_TBLPHYS0_LINBASE_BITS 0xFFC00000 /* Linear base */
+#define MMCU_TBLPHYS0_LINBASE_S 22
+
+#define MMCU_T0LOCAL_TABLE_PHYS1 0x04830708
+#define MMCU_TBLPHYS1_ADDR_BITS 0xFFFFFFFC /* Physical base */
+#define MMCU_TBLPHYS1_ADDR_S 2
+
+#define MMCU_T0GLOBAL_TABLE_PHYS0 0x04830710
+#define MMCU_T0GLOBAL_TABLE_PHYS1 0x04830718
+#define MMCU_T1LOCAL_TABLE_PHYS0 0x04830720
+#define MMCU_T1LOCAL_TABLE_PHYS1 0x04830728
+#define MMCU_T1GLOBAL_TABLE_PHYS0 0x04830730
+#define MMCU_T1GLOBAL_TABLE_PHYS1 0x04830738
+#define MMCU_T2LOCAL_TABLE_PHYS0 0x04830740
+#define MMCU_T2LOCAL_TABLE_PHYS1 0x04830748
+#define MMCU_T2GLOBAL_TABLE_PHYS0 0x04830750
+#define MMCU_T2GLOBAL_TABLE_PHYS1 0x04830758
+#define MMCU_T3LOCAL_TABLE_PHYS0 0x04830760
+#define MMCU_T3LOCAL_TABLE_PHYS1 0x04830768
+#define MMCU_T3GLOBAL_TABLE_PHYS0 0x04830770
+#define MMCU_T3GLOBAL_TABLE_PHYS1 0x04830778
+
+#define MMCU_T0EBWCCTRL 0x04830640
+#define MMCU_TnEBWCCTRL_BITS 0x00000007
+#define MMCU_TnEBWCCTRL_S 0
+#define MMCU_TnEBWCCCTRL_DISABLE_ALL 0
+#define MMCU_TnEBWCCCTRL_ABIT25 1
+#define MMCU_TnEBWCCCTRL_ABIT26 2
+#define MMCU_TnEBWCCCTRL_ABIT27 3
+#define MMCU_TnEBWCCCTRL_ABIT28 4
+#define MMCU_TnEBWCCCTRL_ABIT29 5
+#define MMCU_TnEBWCCCTRL_ABIT30 6
+#define MMCU_TnEBWCCCTRL_ENABLE_ALL 7
+#define MMCU_TnEBWCCTRL_STRIDE 8
+
+#endif /* METAC_2_1 */
+
+
+/* Registers within the SYSC register region */
+#define METAC_ID 0x04830000
+#define METAC_ID_MAJOR_BITS 0xFF000000
+#define METAC_ID_MAJOR_S 24
+#define METAC_ID_MINOR_BITS 0x00FF0000
+#define METAC_ID_MINOR_S 16
+#define METAC_ID_REV_BITS 0x0000FF00
+#define METAC_ID_REV_S 8
+#define METAC_ID_MAINT_BITS 0x000000FF
+#define METAC_ID_MAINT_S 0
+
+#ifdef METAC_2_1
+/* Use of this section is strongly deprecated */
+#define METAC_ID2 0x04830008
+#define METAC_ID2_DESIGNER_BITS 0xFFFF0000 /* Modified by customer */
+#define METAC_ID2_DESIGNER_S 16
+#define METAC_ID2_MINOR2_BITS 0x00000F00 /* 3rd digit of prod rev */
+#define METAC_ID2_MINOR2_S 8
+#define METAC_ID2_CONFIG_BITS 0x000000FF /* Wrapper configuration */
+#define METAC_ID2_CONFIG_S 0
+
+/* Primary core identification and configuration information */
+#define METAC_CORE_ID 0x04831000
+#define METAC_COREID_GROUP_BITS 0xFF000000
+#define METAC_COREID_GROUP_S 24
+#define METAC_COREID_GROUP_METAG 0x14
+#define METAC_COREID_ID_BITS 0x00FF0000
+#define METAC_COREID_ID_S 16
+#define METAC_COREID_ID_W32 0x10 /* >= for 32-bit pipeline */
+#define METAC_COREID_CONFIG_BITS 0x0000FFFF
+#define METAC_COREID_CONFIG_S 0
+#define METAC_COREID_CFGCACHE_BITS 0x0007
+#define METAC_COREID_CFGCACHE_S 0
+#define METAC_COREID_CFGCACHE_NOM 0
+#define METAC_COREID_CFGCACHE_TYPE0 1
+#define METAC_COREID_CFGCACHE_NOMMU 1 /* Alias for TYPE0 */
+#define METAC_COREID_CFGCACHE_NOCACHE 2
+#define METAC_COREID_CFGCACHE_PRIVNOMMU 3
+#define METAC_COREID_CFGDSP_BITS 0x0038
+#define METAC_COREID_CFGDSP_S 3
+#define METAC_COREID_CFGDSP_NOM 0
+#define METAC_COREID_CFGDSP_MIN 1
+#define METAC_COREID_NOFPACC_BIT 0x0040 /* Set if no FPU accum */
+#define METAC_COREID_CFGFPU_BITS 0x0180
+#define METAC_COREID_CFGFPU_S 7
+#define METAC_COREID_CFGFPU_NOM 0
+#define METAC_COREID_CFGFPU_SNGL 1
+#define METAC_COREID_CFGFPU_DBL 2
+#define METAC_COREID_NOAMA_BIT 0x0200 /* Set if no AMA present */
+#define METAC_COREID_NOCOH_BIT 0x0400 /* Set if no Gbl coherency */
+
+/* Core revision information */
+#define METAC_CORE_REV 0x04831008
+#define METAC_COREREV_DESIGN_BITS 0xFF000000
+#define METAC_COREREV_DESIGN_S 24
+#define METAC_COREREV_MAJOR_BITS 0x00FF0000
+#define METAC_COREREV_MAJOR_S 16
+#define METAC_COREREV_MINOR_BITS 0x0000FF00
+#define METAC_COREREV_MINOR_S 8
+#define METAC_COREREV_MAINT_BITS 0x000000FF
+#define METAC_COREREV_MAINT_S 0
+
+/* Configuration information control outside the core */
+#define METAC_CORE_DESIGNER1 0x04831010 /* Arbitrary value */
+#define METAC_CORE_DESIGNER2 0x04831018 /* Arbitrary value */
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG2 0x04831020
+#define METAC_CORECFG2_COREDBGTYPE_BITS 0x60000000 /* Core debug type */
+#define METAC_CORECFG2_COREDBGTYPE_S 29
+#define METAC_CORECFG2_DCSMALL_BIT 0x04000000 /* Data cache small */
+#define METAC_CORECFG2_ICSMALL_BIT 0x02000000 /* Inst cache small */
+#define METAC_CORECFG2_DCSZNP_BITS 0x01C00000 /* Data cache size np */
+#define METAC_CORECFG2_DCSZNP_S 22
+#define METAC_CORECFG2_ICSZNP_BITS 0x00380000 /* Inst cache size np */
+#define METAC_CORECFG2_ICSZNP_S 19
+#define METAC_CORECFG2_DCSZ_BITS 0x00070000 /* Data cache size */
+#define METAC_CORECFG2_DCSZ_S 16
+#define METAC_CORECFG2_xCSZ_4K 0 /* Allocated values */
+#define METAC_CORECFG2_xCSZ_8K 1
+#define METAC_CORECFG2_xCSZ_16K 2
+#define METAC_CORECFG2_xCSZ_32K 3
+#define METAC_CORECFG2_xCSZ_64K 4
+#define METAC_CORE_C2ICSZ_BITS 0x0000E000 /* Inst cache size */
+#define METAC_CORE_C2ICSZ_S 13
+#define METAC_CORE_GBLACC_BITS 0x00001800 /* Number of Global Acc */
+#define METAC_CORE_GBLACC_S 11
+#define METAC_CORE_GBLDXR_BITS 0x00000700 /* 0 -> 0, R -> 2^(R-1) */
+#define METAC_CORE_GBLDXR_S 8
+#define METAC_CORE_GBLAXR_BITS 0x000000E0 /* 0 -> 0, R -> 2^(R-1) */
+#define METAC_CORE_GBLAXR_S 5
+#define METAC_CORE_RTTRACE_BIT 0x00000010
+#define METAC_CORE_WATCHN_BITS 0x0000000C /* 0 -> 0, N -> 2^N */
+#define METAC_CORE_WATCHN_S 2
+#define METAC_CORE_BREAKN_BITS 0x00000003 /* 0 -> 0, N -> 2^N */
+#define METAC_CORE_BREAKN_S 0
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG3 0x04831028
+#define METAC_CORECFG3_L2C_REV_ID_BITS 0x000F0000 /* Revision of L2 cache */
+#define METAC_CORECFG3_L2C_REV_ID_S 16
+#define METAC_CORECFG3_L2C_LINE_SIZE_BITS 0x00003000 /* L2 line size */
+#define METAC_CORECFG3_L2C_LINE_SIZE_S 12
+#define METAC_CORECFG3_L2C_LINE_SIZE_64B 0x0 /* 64 bytes */
+#define METAC_CORECFG3_L2C_NUM_WAYS_BITS 0x00000F00 /* L2 number of ways (2^n) */
+#define METAC_CORECFG3_L2C_NUM_WAYS_S 8
+#define METAC_CORECFG3_L2C_SIZE_BITS 0x000000F0 /* L2 size (2^n) */
+#define METAC_CORECFG3_L2C_SIZE_S 4
+#define METAC_CORECFG3_L2C_UNIFIED_BIT 0x00000004 /* Unified cache: */
+#define METAC_CORECFG3_L2C_UNIFIED_S 2
+#define METAC_CORECFG3_L2C_UNIFIED_UNIFIED 1 /* - Unified D/I cache */
+#define METAC_CORECFG3_L2C_UNIFIED_SEPARATE 0 /* - Separate D/I cache */
+#define METAC_CORECFG3_L2C_MODE_BIT 0x00000002 /* Cache Mode: */
+#define METAC_CORECFG3_L2C_MODE_S 1
+#define METAC_CORECFG3_L2C_MODE_WRITE_BACK 1 /* - Write back */
+#define METAC_CORECFG3_L2C_MODE_WRITE_THROUGH 0 /* - Write through */
+#define METAC_CORECFG3_L2C_HAVE_L2C_BIT 0x00000001 /* Have L2C */
+#define METAC_CORECFG3_L2C_HAVE_L2C_S 0
+
+#endif /* METAC_2_1 */
+
+#define SYSC_CACHE_MMU_CONFIG 0x04830028
+#ifdef METAC_2_1
+#define SYSC_CMMUCFG_DCSKEWABLE_BIT 0x00000040
+#define SYSC_CMMUCFG_ICSKEWABLE_BIT 0x00000020
+#define SYSC_CMMUCFG_DCSKEWOFF_BIT 0x00000010 /* Skew association override */
+#define SYSC_CMMUCFG_ICSKEWOFF_BIT 0x00000008 /* -> default 0 on if present */
+#define SYSC_CMMUCFG_MODE_BITS 0x00000007 /* Access to old state */
+#define SYSC_CMMUCFG_MODE_S 0
+#define SYSC_CMMUCFG_ON 0x7
+#define SYSC_CMMUCFG_EBYPASS 0x6 /* Enhanced by-pass mode */
+#define SYSC_CMMUCFG_EBYPASSIC 0x4 /* EB just inst cache */
+#define SYSC_CMMUCFG_EBYPASSDC 0x2 /* EB just data cache */
+#endif /* METAC_2_1 */
+/* Old definitions, Keep them for now */
+#define SYSC_CMMUCFG_MMU_ON_BIT 0x1
+#define SYSC_CMMUCFG_DC_ON_BIT 0x2
+#define SYSC_CMMUCFG_IC_ON_BIT 0x4
+
+#define SYSC_JTAG_THREAD 0x04830030
+#define SYSC_JTAG_TX_BITS 0x00000003 /* Read only bits! */
+#define SYSC_JTAG_TX_S 0
+#define SYSC_JTAG_PRIV_BIT 0x00000004
+#ifdef METAC_2_1
+#define SYSC_JTAG_SLAVETX_BITS 0x00000018
+#define SYSC_JTAG_SLAVETX_S 3
+#endif /* METAC_2_1 */
+
+#define SYSC_DCACHE_FLUSH 0x04830038
+#define SYSC_ICACHE_FLUSH 0x04830040
+#define SYSC_xCACHE_FLUSH_INIT 0x1
+#define MMCU_DIRECTMAP0_ADDR 0x04830080 /* LINSYSDIRECT_BASE -> */
+#define MMCU_DIRECTMAPn_STRIDE 0x00000010 /* 4 Region settings */
+#define MMCU_DIRECTMAPn_S 4
+#define MMCU_DIRECTMAPn_ADDR_BITS 0xFF800000
+#define MMCU_DIRECTMAPn_ADDR_S 23
+#define MMCU_DIRECTMAPn_ADDR_SCALE 0x00800000 /* 8M Regions */
+#ifdef METAC_2_1
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ * for each direct mapped region to enable optimisation of these areas.
+ * (LSB similar to VALID must be set for enhancments to be active)
+ */
+#define MMCU_DIRECTMAPn_ENHANCE_BIT 0x00000001 /* 0 = no optim */
+#define MMCU_DIRECTMAPn_DCCTRL_BITS 0x000000DF /* Get DC Ctrl */
+#define MMCU_DIRECTMAPn_DCCTRL_S 0
+#define MMCU_DIRECTMAPn_ICCTRL_BITS 0x0000C000 /* Get IC Ctrl */
+#define MMCU_DIRECTMAPn_ICCTRL_S 8
+#define MMCU_DIRECTMAPn_ENTLB_BIT 0x00000020 /* Cache in TLB */
+#define MMCU_DIRECTMAPn_ICCWIN_BITS 0x0000C000 /* Get IC Win Bits */
+#define MMCU_DIRECTMAPn_ICCWIN_S 14
+#endif /* METAC_2_1 */
+
+#define MMCU_DIRECTMAP1_ADDR 0x04830090
+#define MMCU_DIRECTMAP2_ADDR 0x048300a0
+#define MMCU_DIRECTMAP3_ADDR 0x048300b0
+
+/*
+ * These bits partion each threads use of data cache or instruction cache
+ * resource by modifying the top 4 bits of the address within the cache
+ * storage area.
+ */
+#define SYSC_DCPART0 0x04830200
+#define SYSC_xCPARTn_STRIDE 0x00000008
+#define SYSC_xCPARTL_AND_BITS 0x0000000F /* Masks top 4 bits */
+#define SYSC_xCPARTL_AND_S 0
+#define SYSC_xCPARTG_AND_BITS 0x00000F00 /* Masks top 4 bits */
+#define SYSC_xCPARTG_AND_S 8
+#define SYSC_xCPARTL_OR_BITS 0x000F0000 /* Ors into top 4 bits */
+#define SYSC_xCPARTL_OR_S 16
+#define SYSC_xCPARTG_OR_BITS 0x0F000000 /* Ors into top 4 bits */
+#define SYSC_xCPARTG_OR_S 24
+#define SYSC_CWRMODE_BIT 0x80000000 /* Write cache mode bit */
+
+#define SYSC_DCPART1 0x04830208
+#define SYSC_DCPART2 0x04830210
+#define SYSC_DCPART3 0x04830218
+#define SYSC_ICPART0 0x04830220
+#define SYSC_ICPART1 0x04830228
+#define SYSC_ICPART2 0x04830230
+#define SYSC_ICPART3 0x04830238
+
+/*
+ * META Core Memory and Cache Update registers
+ */
+#define SYSC_MCMDATAX 0x04830300 /* 32-bit read/write data register */
+#define SYSC_MCMDATAT 0x04830308 /* Read or write data triggers oper */
+#define SYSC_MCMGCTRL 0x04830310 /* Control register */
+#define SYSC_MCMGCTRL_READ_BIT 0x00000001 /* Set to issue 1st read */
+#define SYSC_MCMGCTRL_AINC_BIT 0x00000002 /* Set for auto-increment */
+#define SYSC_MCMGCTRL_ADDR_BITS 0x000FFFFC /* Address or index */
+#define SYSC_MCMGCTRL_ADDR_S 2
+#define SYSC_MCMGCTRL_ID_BITS 0x0FF00000 /* Internal memory block Id */
+#define SYSC_MCMGCTRL_ID_S 20
+#define SYSC_MCMGID_NODEV 0xFF /* No Device Selected */
+#define SYSC_MCMGID_DSPRAM0A 0x04 /* DSP RAM D0 block A access */
+#define SYSC_MCMGID_DSPRAM0B 0x05 /* DSP RAM D0 block B access */
+#define SYSC_MCMGID_DSPRAM1A 0x06 /* DSP RAM D1 block A access */
+#define SYSC_MCMGID_DSPRAM1B 0x07 /* DSP RAM D1 block B access */
+#define SYSC_MCMGID_DCACHEL 0x08 /* DCACHE lines (64-bytes/line) */
+#ifdef METAC_2_1
+#define SYSC_MCMGID_DCACHETLB 0x09 /* DCACHE TLB ( Read Only ) */
+#endif /* METAC_2_1 */
+#define SYSC_MCMGID_DCACHET 0x0A /* DCACHE tags (32-bits/line) */
+#define SYSC_MCMGID_DCACHELRU 0x0B /* DCACHE LRU (8-bits/line) */
+#define SYSC_MCMGID_ICACHEL 0x0C /* ICACHE lines (64-bytes/line */
+#ifdef METAC_2_1
+#define SYSC_MCMGID_ICACHETLB 0x0D /* ICACHE TLB (Read Only ) */
+#endif /* METAC_2_1 */
+#define SYSC_MCMGID_ICACHET 0x0E /* ICACHE Tags (32-bits/line) */
+#define SYSC_MCMGID_ICACHELRU 0x0F /* ICACHE LRU (8-bits/line ) */
+#define SYSC_MCMGID_COREIRAM0 0x10 /* Core code mem id 0 */
+#define SYSC_MCMGID_COREIRAMn 0x17
+#define SYSC_MCMGID_COREDRAM0 0x18 /* Core data mem id 0 */
+#define SYSC_MCMGID_COREDRAMn 0x1F
+#ifdef METAC_2_1
+#define SYSC_MCMGID_DCACHEST 0x20 /* DCACHE ST ( Read Only ) */
+#define SYSC_MCMGID_ICACHEST 0x21 /* ICACHE ST ( Read Only ) */
+#define SYSC_MCMGID_DCACHETLBLRU 0x22 /* DCACHE TLB LRU ( Read Only )*/
+#define SYSC_MCMGID_ICACHETLBLRU 0x23 /* ICACHE TLB LRU( Read Only ) */
+#define SYSC_MCMGID_DCACHESTLRU 0x24 /* DCACHE ST LRU ( Read Only ) */
+#define SYSC_MCMGID_ICACHESTLRU 0x25 /* ICACHE ST LRU ( Read Only ) */
+#define SYSC_MCMGID_DEBUGTLB 0x26 /* DEBUG TLB ( Read Only ) */
+#define SYSC_MCMGID_DEBUGST 0x27 /* DEBUG ST ( Read Only ) */
+#define SYSC_MCMGID_L2CACHEL 0x30 /* L2 Cache Lines (64-bytes/line) */
+#define SYSC_MCMGID_L2CACHET 0x31 /* L2 Cache Tags (32-bits/line) */
+#define SYSC_MCMGID_COPROX0 0x70 /* Coprocessor port id 0 */
+#define SYSC_MCMGID_COPROXn 0x77
+#endif /* METAC_2_1 */
+#define SYSC_MCMGCTRL_TR31_BIT 0x80000000 /* Trigger 31 on completion */
+#define SYSC_MCMSTATUS 0x04830318 /* Status read only */
+#define SYSC_MCMSTATUS_IDLE_BIT 0x00000001
+
+/* META System Events */
+#define SYSC_SYS_EVENT 0x04830400
+#define SYSC_SYSEVT_ATOMIC_BIT 0x00000001
+#define SYSC_SYSEVT_CACHEX_BIT 0x00000002
+#define SYSC_ATOMIC_LOCK 0x04830408
+#define SYSC_ATOMIC_STATE_TX_BITS 0x0000000F
+#define SYSC_ATOMIC_STATE_TX_S 0
+#ifdef METAC_1_2
+#define SYSC_ATOMIC_STATE_DX_BITS 0x000000F0
+#define SYSC_ATOMIC_STATE_DX_S 4
+#else /* METAC_1_2 */
+#define SYSC_ATOMIC_SOURCE_BIT 0x00000010
+#endif /* !METAC_1_2 */
+
+
+#ifdef METAC_2_1
+
+/* These definitions replace the EXPAND_TIMER_DIV register defines which are to
+ * be deprecated.
+ */
+#define SYSC_TIMER_DIV 0x04830140
+#define SYSC_TIMDIV_BITS 0x000000FF
+#define SYSC_TIMDIV_S 0
+
+/* META Enhanced by-pass control for local and global region */
+#define MMCU_LOCAL_EBCTRL 0x04830600
+#define MMCU_GLOBAL_EBCTRL 0x04830608
+#define MMCU_EBCTRL_SINGLE_BIT 0x00000020 /* TLB Uncached */
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ * for each direct mapped region to enable optimisation of these areas.
+ */
+#define MMCU_EBCTRL_DCCTRL_BITS 0x000000C0 /* Get DC Ctrl */
+#define MMCU_EBCTRL_DCCTRL_S 0
+#define MMCU_EBCTRL_ICCTRL_BITS 0x0000C000 /* Get DC Ctrl */
+#define MMCU_EBCTRL_ICCTRL_S 8
+
+/* META Cached Core Mode Registers */
+#define MMCU_T0CCM_ICCTRL 0x04830680 /* Core cached code control */
+#define MMCU_TnCCM_xxCTRL_STRIDE 8
+#define MMCU_TnCCM_xxCTRL_STRIDE_S 3
+#define MMCU_T1CCM_ICCTRL 0x04830688
+#define MMCU_T2CCM_ICCTRL 0x04830690
+#define MMCU_T3CCM_ICCTRL 0x04830698
+#define MMCU_T0CCM_DCCTRL 0x048306C0 /* Core cached data control */
+#define MMCU_T1CCM_DCCTRL 0x048306C8
+#define MMCU_T2CCM_DCCTRL 0x048306D0
+#define MMCU_T3CCM_DCCTRL 0x048306D8
+#define MMCU_TnCCM_ENABLE_BIT 0x00000001
+#define MMCU_TnCCM_WIN3_BIT 0x00000002
+#define MMCU_TnCCM_DCWRITE_BIT 0x00000004 /* In DCCTRL only */
+#define MMCU_TnCCM_REGSZ_BITS 0x00000F00
+#define MMCU_TnCCM_REGSZ_S 8
+#define MMCU_TnCCM_REGSZ0_POWER 12 /* RegSz 0 -> 4K */
+#define MMCU_TnCCM_REGSZ_MAXBYTES 0x00080000 /* 512K max */
+#define MMCU_TnCCM_ADDR_BITS 0xFFFFF000
+#define MMCU_TnCCM_ADDR_S 12
+
+#endif /* METAC_2_1 */
+
+/*
+ * Hardware performance counter registers
+ * --------------------------------------
+ */
+#ifdef METAC_2_1
+/* Two Performance Counter Internal Core Events Control registers */
+#define PERF_ICORE0 0x0480FFD0
+#define PERF_ICORE1 0x0480FFD8
+#define PERFI_CTRL_BITS 0x0000000F
+#define PERFI_CTRL_S 0
+#define PERFI_CAH_DMISS 0x0 /* Dcache Misses in cache (TLB Hit) */
+#define PERFI_CAH_IMISS 0x1 /* Icache Misses in cache (TLB Hit) */
+#define PERFI_TLB_DMISS 0x2 /* Dcache Misses in per-thread TLB */
+#define PERFI_TLB_IMISS 0x3 /* Icache Misses in per-thread TLB */
+#define PERFI_TLB_DWRHITS 0x4 /* DC Write-Hits in per-thread TLB */
+#define PERFI_TLB_DWRMISS 0x5 /* DC Write-Miss in per-thread TLB */
+#define PERFI_CAH_DLFETCH 0x8 /* DC Read cache line fetch */
+#define PERFI_CAH_ILFETCH 0x9 /* DC Read cache line fetch */
+#define PERFI_CAH_DWFETCH 0xA /* DC Read cache word fetch */
+#define PERFI_CAH_IWFETCH 0xB /* DC Read cache word fetch */
+#endif /* METAC_2_1 */
+
+/* Two memory-mapped hardware performance counter registers */
+#define PERF_COUNT0 0x0480FFE0
+#define PERF_COUNT1 0x0480FFE8
+
+/* Fields in PERF_COUNTn registers */
+#define PERF_COUNT_BITS 0x00ffffff /* Event count value */
+
+#define PERF_THREAD_BITS 0x0f000000 /* Thread mask selects threads */
+#define PERF_THREAD_S 24
+
+#define PERF_CTRL_BITS 0xf0000000 /* Event filter control */
+#define PERF_CTRL_S 28
+
+#define PERFCTRL_SUPER 0 /* Superthread cycles */
+#define PERFCTRL_REWIND 1 /* Rewinds due to Dcache Misses */
+#ifdef METAC_2_1
+#define PERFCTRL_SUPREW 2 /* Rewinds of superthreaded cycles (no mask) */
+
+#define PERFCTRL_CYCLES 3 /* Counts all cycles (no mask) */
+
+#define PERFCTRL_PREDBC 4 /* Conditional branch predictions */
+#define PERFCTRL_MISPBC 5 /* Conditional branch mispredictions */
+#define PERFCTRL_PREDRT 6 /* Return predictions */
+#define PERFCTRL_MISPRT 7 /* Return mispredictions */
+#endif /* METAC_2_1 */
+
+#define PERFCTRL_DHITS 8 /* Dcache Hits */
+#define PERFCTRL_IHITS 9 /* Icache Hits */
+#define PERFCTRL_IMISS 10 /* Icache Misses in cache or TLB */
+#ifdef METAC_2_1
+#define PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */
+#define PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */
+
+#define PERFCTRL_INT 13 /* Internal core delailed events (see next) */
+#define PERFCTRL_EXT 15 /* External source in core periphery */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/* These definitions replace the EXPAND_PERFCHANx register defines which are to
+ * be deprecated.
+ */
+#define PERF_CHAN0 0x04830150
+#define PERF_CHAN1 0x04830158
+#define PERF_CHAN_BITS 0x0000000F
+#define PERF_CHAN_S 0
+#define PERFCHAN_WRC_WRBURST 0x0 /* Write combiner write burst */
+#define PERFCHAN_WRC_WRITE 0x1 /* Write combiner write */
+#define PERFCHAN_WRC_RDBURST 0x2 /* Write combiner read burst */
+#define PERFCHAN_WRC_READ 0x3 /* Write combiner read */
+#define PERFCHAN_PREARB_DELAY 0x4 /* Pre-arbiter delay cycle */
+ /* Cross-bar hold-off cycle: */
+#define PERFCHAN_XBAR_HOLDWRAP 0x5 /* wrapper register */
+#define PERFCHAN_XBAR_HOLDSBUS 0x6 /* system bus (ATP only) */
+#define PERFCHAN_XBAR_HOLDCREG 0x9 /* core registers */
+#define PERFCHAN_L2C_MISS 0x6 /* L2 Cache miss */
+#define PERFCHAN_L2C_HIT 0x7 /* L2 Cache hit */
+#define PERFCHAN_L2C_WRITEBACK 0x8 /* L2 Cache writeback */
+ /* Admission delay cycle: */
+#define PERFCHAN_INPUT_CREG 0xB /* core registers */
+#define PERFCHAN_INPUT_INTR 0xC /* internal ram */
+#define PERFCHAN_INPUT_WRC 0xD /* write combiners(memory) */
+
+/* Should following be removed as not in TRM anywhere? */
+#define PERFCHAN_XBAR_HOLDINTR 0x8 /* internal ram */
+#define PERFCHAN_INPUT_SBUS 0xA /* register port */
+/* End of remove section. */
+
+#define PERFCHAN_MAINARB_DELAY 0xF /* Main arbiter delay cycle */
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Write combiner registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0WRCOMBINE register defines, which will be
+ * deprecated.
+ */
+#define WRCOMB_CONFIG0 0x04830100
+#define WRCOMB_LFFEn_BIT 0x00004000 /* Enable auto line full flush */
+#define WRCOMB_ENABLE_BIT 0x00002000 /* Enable write combiner */
+#define WRCOMB_TIMEOUT_ENABLE_BIT 0x00001000 /* Timeout flush enable */
+#define WRCOMB_TIMEOUT_COUNT_BITS 0x000003FF
+#define WRCOMB_TIMEOUT_COUNT_S 0
+#define WRCOMB_CONFIG4 0x04830180
+#define WRCOMB_PARTALLOC_BITS 0x000000C0
+#define WRCOMB_PARTALLOC_S 64
+#define WRCOMB_PARTSIZE_BITS 0x00000030
+#define WRCOMB_PARTSIZE_S 4
+#define WRCOMB_PARTOFFSET_BITS 0x0000000F
+#define WRCOMB_PARTOFFSET_S 0
+#define WRCOMB_CONFIG_STRIDE 8
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Thread arbiter registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0ARBITER register defines, which will be
+ * deprecated.
+ */
+#define ARBITER_ARBCONFIG0 0x04830120
+#define ARBCFG_BPRIORITY_BIT 0x02000000
+#define ARBCFG_IPRIORITY_BIT 0x01000000
+#define ARBCFG_PAGE_BITS 0x00FF0000
+#define ARBCFG_PAGE_S 16
+#define ARBCFG_BBASE_BITS 0x0000FF00
+#define ARGCFG_BBASE_S 8
+#define ARBCFG_IBASE_BITS 0x000000FF
+#define ARBCFG_IBASE_S 0
+#define ARBITER_TTECONFIG0 0x04820160
+#define ARBTTE_IUPPER_BITS 0xFF000000
+#define ARBTTE_IUPPER_S 24
+#define ARBTTE_ILOWER_BITS 0x00FF0000
+#define ARBTTE_ILOWER_S 16
+#define ARBTTE_BUPPER_BITS 0x0000FF00
+#define ARBTTE_BUPPER_S 8
+#define ARBTTE_BLOWER_BITS 0x000000FF
+#define ARBTTE_BLOWER_S 0
+#define ARBITER_STRIDE 8
+#endif /* METAC_2_1 */
+
+/*
+ * Expansion area registers
+ * --------------------------------------
+ */
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0WRCOMBINE 0x03000000
+#ifdef METAC_2_1
+#define EXPWRC_LFFEn_BIT 0x00004000 /* Enable auto line full flush */
+#endif /* METAC_2_1 */
+#define EXPWRC_ENABLE_BIT 0x00002000 /* Enable write combiner */
+#define EXPWRC_TIMEOUT_ENABLE_BIT 0x00001000 /* Timeout flush enable */
+#define EXPWRC_TIMEOUT_COUNT_BITS 0x000003FF
+#define EXPWRC_TIMEOUT_COUNT_S 0
+#define EXPAND_TnWRCOMBINE_STRIDE 0x00000008
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0ARBITER 0x03000020
+#define EXPARB_BPRIORITY_BIT 0x02000000
+#define EXPARB_IPRIORITY_BIT 0x01000000
+#define EXPARB_PAGE_BITS 0x00FF0000
+#define EXPARB_PAGE_S 16
+#define EXPARB_BBASE_BITS 0x0000FF00
+#define EXPARB_BBASE_S 8
+#define EXPARB_IBASE_BITS 0x000000FF
+#define EXPARB_IBASE_S 0
+#define EXPAND_TnARBITER_STRIDE 0x00000008
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_TIMER_DIV 0x03000040
+#define EXPTIM_DIV_BITS 0x000000FF
+#define EXPTIM_DIV_S 0
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_PERFCHAN0 0x03000050
+#define EXPAND_PERFCHAN1 0x03000058
+#define EXPPERF_CTRL_BITS 0x0000000F
+#define EXPPERF_CTRL_S 0
+#define EXPPERF_WRC_WRBURST 0x0 /* Write combiner write burst */
+#define EXPPERF_WRC_WRITE 0x1 /* Write combiner write */
+#define EXPPERF_WRC_RDBURST 0x2 /* Write combiner read burst */
+#define EXPPERF_WRC_READ 0x3 /* Write combiner read */
+#define EXPPERF_PREARB_DELAY 0x4 /* Pre-arbiter delay cycle */
+ /* Cross-bar hold-off cycle: */
+#define EXPPERF_XBAR_HOLDWRAP 0x5 /* wrapper register */
+#define EXPPERF_XBAR_HOLDSBUS 0x6 /* system bus */
+#ifdef METAC_1_2
+#define EXPPERF_XBAR_HOLDLBUS 0x7 /* local bus */
+#else /* METAC_1_2 */
+#define EXPPERF_XBAR_HOLDINTR 0x8 /* internal ram */
+#define EXPPERF_XBAR_HOLDCREG 0x9 /* core registers */
+ /* Admission delay cycle: */
+#define EXPPERF_INPUT_SBUS 0xA /* register port */
+#define EXPPERF_INPUT_CREG 0xB /* core registers */
+#define EXPPERF_INPUT_INTR 0xC /* internal ram */
+#define EXPPERF_INPUT_WRC 0xD /* write combiners(memory) */
+#endif /* !METAC_1_2 */
+#define EXPPERF_MAINARB_DELAY 0xF /* Main arbiter delay cycle */
+
+/*
+ * Debug port registers
+ * --------------------------------------
+ */
+
+/* Data Exchange Register */
+#define DBGPORT_MDBGDATAX 0x0
+
+/* Data Transfer register */
+#define DBGPORT_MDBGDATAT 0x4
+
+/* Control Register 0 */
+#define DBGPORT_MDBGCTRL0 0x8
+#define DBGPORT_MDBGCTRL0_ADDR_BITS 0xFFFFFFFC
+#define DBGPORT_MDBGCTRL0_ADDR_S 2
+#define DBGPORT_MDBGCTRL0_AUTOINCR_BIT 0x00000002
+#define DBGPORT_MDBGCTRL0_RD_BIT 0x00000001
+
+/* Control Register 1 */
+#define DBGPORT_MDBGCTRL1 0xC
+#ifdef METAC_2_1
+#define DBGPORT_MDBGCTRL1_DEFERRTHREAD_BITS 0xC0000000
+#define DBGPORT_MDBGCTRL1_DEFERRTHREAD_S 30
+#endif /* METAC_2_1 */
+#define DBGPORT_MDBGCTRL1_LOCK2_INTERLOCK_BIT 0x20000000
+#define DBGPORT_MDBGCTRL1_ATOMIC_INTERLOCK_BIT 0x10000000
+#define DBGPORT_MDBGCTRL1_TRIGSTATUS_BIT 0x08000000
+#define DBGPORT_MDBGCTRL1_GBLPORT_IDLE_BIT 0x04000000
+#define DBGPORT_MDBGCTRL1_COREMEM_IDLE_BIT 0x02000000
+#define DBGPORT_MDBGCTRL1_READY_BIT 0x01000000
+#ifdef METAC_2_1
+#define DBGPORT_MDBGCTRL1_DEFERRID_BITS 0x00E00000
+#define DBGPORT_MDBGCTRL1_DEFERRID_S 21
+#define DBGPORT_MDBGCTRL1_DEFERR_BIT 0x00100000
+#endif /* METAC_2_1 */
+#define DBGPORT_MDBGCTRL1_WR_ACTIVE_BIT 0x00040000
+#define DBGPORT_MDBGCTRL1_COND_LOCK2_BIT 0x00020000
+#define DBGPORT_MDBGCTRL1_LOCK2_BIT 0x00010000
+#define DBGPORT_MDBGCTRL1_DIAGNOSE_BIT 0x00008000
+#define DBGPORT_MDBGCTRL1_FORCEDIAG_BIT 0x00004000
+#define DBGPORT_MDBGCTRL1_MEMFAULT_BITS 0x00003000
+#define DBGPORT_MDBGCTRL1_MEMFAULT_S 12
+#define DBGPORT_MDBGCTRL1_TRIGGER_BIT 0x00000100
+#ifdef METAC_2_1
+#define DBGPORT_MDBGCTRL1_INTSPECIAL_BIT 0x00000080
+#define DBGPORT_MDBGCTRL1_INTRUSIVE_BIT 0x00000040
+#endif /* METAC_2_1 */
+#define DBGPORT_MDBGCTRL1_THREAD_BITS 0x00000030 /* Thread mask selects threads */
+#define DBGPORT_MDBGCTRL1_THREAD_S 4
+#define DBGPORT_MDBGCTRL1_TRANS_SIZE_BITS 0x0000000C
+#define DBGPORT_MDBGCTRL1_TRANS_SIZE_S 2
+#define DBGPORT_MDBGCTRL1_TRANS_SIZE_32_BIT 0x00000000
+#define DBGPORT_MDBGCTRL1_TRANS_SIZE_16_BIT 0x00000004
+#define DBGPORT_MDBGCTRL1_TRANS_SIZE_8_BIT 0x00000008
+#define DBGPORT_MDBGCTRL1_BYTE_ROUND_BITS 0x00000003
+#define DBGPORT_MDBGCTRL1_BYTE_ROUND_S 0
+#define DBGPORT_MDBGCTRL1_BYTE_ROUND_8_BIT 0x00000001
+#define DBGPORT_MDBGCTRL1_BYTE_ROUND_16_BIT 0x00000002
+
+
+/* L2 Cache registers */
+#define SYSC_L2C_INIT 0x048300C0
+#define SYSC_L2C_INIT_INIT 1
+#define SYSC_L2C_INIT_IN_PROGRESS 0
+#define SYSC_L2C_INIT_COMPLETE 1
+
+#define SYSC_L2C_ENABLE 0x048300D0
+#define SYSC_L2C_ENABLE_ENABLE_BIT 0x00000001
+#define SYSC_L2C_ENABLE_PFENABLE_BIT 0x00000002
+
+#define SYSC_L2C_PURGE 0x048300C8
+#define SYSC_L2C_PURGE_PURGE 1
+#define SYSC_L2C_PURGE_IN_PROGRESS 0
+#define SYSC_L2C_PURGE_COMPLETE 1
+
+#endif /* _ASM_METAG_MEM_H_ */
diff --git a/arch/metag/include/asm/metag_regs.h b/arch/metag/include/asm/metag_regs.h
new file mode 100644
index 00000000000..acf4b8e6e9d
--- /dev/null
+++ b/arch/metag/include/asm/metag_regs.h
@@ -0,0 +1,1184 @@
+/*
+ * asm/metag_regs.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta core (non memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_REGS_H_
+#define _ASM_METAG_REGS_H_
+
+/*
+ * CHIP Unit Identifiers and Valid/Global register number masks
+ * ------------------------------------------------------------
+ */
+#define TXUCT_ID 0x0 /* Control unit regs */
+#ifdef METAC_1_2
+#define TXUCT_MASK 0xFF0FFFFF /* Valid regs 0..31 */
+#else
+#define TXUCT_MASK 0xFF1FFFFF /* Valid regs 0..31 */
+#endif
+#define TGUCT_MASK 0x00000000 /* No global regs */
+#define TXUD0_ID 0x1 /* Data unit regs */
+#define TXUD1_ID 0x2
+#define TXUDX_MASK 0xFFFFFFFF /* Valid regs 0..31 */
+#define TGUDX_MASK 0xFFFF0000 /* Global regs for base inst */
+#define TXUDXDSP_MASK 0x0F0FFFFF /* Valid DSP regs */
+#define TGUDXDSP_MASK 0x0E0E0000 /* Global DSP ACC regs */
+#define TXUA0_ID 0x3 /* Address unit regs */
+#define TXUA1_ID 0x4
+#define TXUAX_MASK 0x0000FFFF /* Valid regs 0-15 */
+#define TGUAX_MASK 0x0000FF00 /* Global regs 8-15 */
+#define TXUPC_ID 0x5 /* PC registers */
+#define TXUPC_MASK 0x00000003 /* Valid regs 0- 1 */
+#define TGUPC_MASK 0x00000000 /* No global regs */
+#define TXUPORT_ID 0x6 /* Ports are not registers */
+#define TXUTR_ID 0x7
+#define TXUTR_MASK 0x0000005F /* Valid regs 0-3,4,6 */
+#define TGUTR_MASK 0x00000000 /* No global regs */
+#ifdef METAC_2_1
+#define TXUTT_ID 0x8
+#define TXUTT_MASK 0x0000000F /* Valid regs 0-3 */
+#define TGUTT_MASK 0x00000010 /* Global reg 4 */
+#define TXUFP_ID 0x9 /* FPU regs */
+#define TXUFP_MASK 0x0000FFFF /* Valid regs 0-15 */
+#define TGUFP_MASK 0x00000000 /* No global regs */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_1_2
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+ TXUAX_MASK, TXUPC_MASK, 0, TXUTR_MASK, \
+ 0, 0, 0, 0, 0, 0, 0, 0 }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+ TGUAX_MASK, TGUPC_MASK, 0, TGUTR_MASK, \
+ 0, 0, 0, 0, 0, 0, 0, 0 }
+#else /* METAC_1_2 */
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+ TXUAX_MASK, TXUPC_MASK, 0, TXUTR_MASK, \
+ TXUTT_MASK, TXUFP_MASK, 0, 0, \
+ 0, 0, 0, 0 }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+ TGUAX_MASK, TGUPC_MASK, 0, TGUTR_MASK, \
+ TGUTT_MASK, TGUFP_MASK, 0, 0, \
+ 0, 0, 0, 0 }
+#endif /* !METAC_1_2 */
+
+#define TXUXXDSP_MASKS { 0, TXUDXDSP_MASK, TXUDXDSP_MASK, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0 }
+#define TGUXXDSP_MASKS { 0, TGUDXDSP_MASK, TGUDXDSP_MASK, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* -------------------------------------------------------------------------
+; DATA AND ADDRESS UNIT REGISTERS
+; -----------------------------------------------------------------------*/
+/*
+ Thread local D0 registers
+ */
+/* D0.0 ; Holds 32-bit result, can be used as scratch */
+#define D0Re0 D0.0
+/* D0.1 ; Used to pass Arg6_32 */
+#define D0Ar6 D0.1
+/* D0.2 ; Used to pass Arg4_32 */
+#define D0Ar4 D0.2
+/* D0.3 ; Used to pass Arg2_32 to a called routine (see D1.3 below) */
+#define D0Ar2 D0.3
+/* D0.4 ; Can be used as scratch; used to save A0FrP in entry sequences */
+#define D0FrT D0.4
+/* D0.5 ; C compiler assumes preservation, save with D1.5 if used */
+/* D0.6 ; C compiler assumes preservation, save with D1.6 if used */
+/* D0.7 ; C compiler assumes preservation, save with D1.7 if used */
+/* D0.8 ; Use of D0.8 and above is not encouraged */
+/* D0.9 */
+/* D0.10 */
+/* D0.11 */
+/* D0.12 */
+/* D0.13 */
+/* D0.14 */
+/* D0.15 */
+/*
+ Thread local D1 registers
+ */
+/* D1.0 ; Holds top 32-bits of 64-bit result, can be used as scratch */
+#define D1Re0 D1.0
+/* D1.1 ; Used to pass Arg5_32 */
+#define D1Ar5 D1.1
+/* D1.2 ; Used to pass Arg3_32 */
+#define D1Ar3 D1.2
+/* D1.3 ; Used to pass Arg1_32 (first 32-bit argument) to a called routine */
+#define D1Ar1 D1.3
+/* D1.4 ; Used for Return Pointer, save during entry with A0FrP (via D0.4) */
+#define D1RtP D1.4
+/* D1.5 ; C compiler assumes preservation, save if used */
+/* D1.6 ; C compiler assumes preservation, save if used */
+/* D1.7 ; C compiler assumes preservation, save if used */
+/* D1.8 ; Use of D1.8 and above is not encouraged */
+/* D1.9 */
+/* D1.10 */
+/* D1.11 */
+/* D1.12 */
+/* D1.13 */
+/* D1.14 */
+/* D1.15 */
+/*
+ Thread local A0 registers
+ */
+/* A0.0 ; Primary stack pointer */
+#define A0StP A0.0
+/* A0.1 ; Used as local frame pointer in C, save if used (via D0.4) */
+#define A0FrP A0.1
+/* A0.2 */
+/* A0.3 */
+/* A0.4 ; Use of A0.4 and above is not encouraged */
+/* A0.5 */
+/* A0.6 */
+/* A0.7 */
+/*
+ Thread local A1 registers
+ */
+/* A1.0 ; Global static chain pointer - do not modify */
+#define A1GbP A1.0
+/* A1.1 ; Local static chain pointer in C, can be used as scratch */
+#define A1LbP A1.1
+/* A1.2 */
+/* A1.3 */
+/* A1.4 ; Use of A1.4 and above is not encouraged */
+/* A1.5 */
+/* A1.6 */
+/* A1.7 */
+#ifdef METAC_2_1
+/* Renameable registers for use with Fast Interrupts */
+/* The interrupt stack pointer (usually a global register) */
+#define A0IStP A0IReg
+/* The interrupt global pointer (usually a global register) */
+#define A1IGbP A1IReg
+#endif
+/*
+ Further registers may be globally allocated via linkage/loading tools,
+ normally they are not used.
+ */
+/*-------------------------------------------------------------------------
+; STACK STRUCTURE and CALLING CONVENTION
+; -----------------------------------------------------------------------*/
+/*
+; Calling convention indicates that the following is the state of the
+; stack frame at the start of a routine-
+;
+; Arg9_32 [A0StP+#-12]
+; Arg8_32 [A0StP+#- 8]
+; Arg7_32 [A0StP+#- 4]
+; A0StP->
+;
+; Registers D1.3, D0.3, ..., to D0.1 are used to pass Arg1_32 to Arg6_32
+; respectively. If a routine needs to store them on the stack in order
+; to make sub-calls or because of the general complexity of the routine it
+; is best to dump these registers immediately at the start of a routine
+; using a MSETL or SETL instruction-
+;
+; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected
+;or SETL [A0StP+#8++],D0Ar2 ; Up to two 32-bit args expected
+;
+; For non-leaf routines it is always necessary to save and restore at least
+; the return address value D1RtP on the stack. Also by convention if the
+; frame is saved then a new A0FrP value must be set-up. So for non-leaf
+; routines at this point both these registers must be saved onto the stack
+; using a SETL instruction and the new A0FrP value is then set-up-
+;
+; MOV D0FrT,A0FrP
+; ADD A0FrP,A0StP,#0
+; SETL [A0StP+#8++],D0FrT,D1RtP
+;
+; Registers D0.5, D1.5, to D1.7 are assumed to be preserved across calls so
+; a SETL or MSETL instruction can be used to save the current state
+; of these registers if they are modified by the current routine-
+;
+; MSETL [A0StP],D0.5,D0.6,D0.7 ; Only save registers modified
+;or SETL [A0StP+#8++],D0.5 ; Only D0.5 and/or D1.5 modified
+;
+; All of the above sequences can be combined into one maximal case-
+;
+; MOV D0FrT,A0FrP ; Save and calculate new frame pointer
+; ADD A0FrP,A0StP,#(ARS)
+; MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+;
+; Having completed the above sequence the only remaining task on routine
+; entry is to reserve any local and outgoing argment storage space on the
+; stack. This instruction may be omitted if the size of this region is zero-
+;
+; ADD A0StP,A0StP,#(LCS)
+;
+; LCS is the first example use of one of a number of standard local defined
+; values that can be created to make assembler code more readable and
+; potentially more robust-
+;
+; #define ARS 0x18 ; Register arg bytes saved on stack
+; #define FRS 0x20 ; Frame save area size in bytes
+; #define LCS 0x00 ; Locals and Outgoing arg size
+; #define ARO (LCS+FRS) ; Stack offset to access args
+;
+; All of the above defines should be undefined (#undef) at the end of each
+; routine to avoid accidental use in the next routine.
+;
+; Given all of the above the following stack structure is expected during
+; the body of a routine if all args passed in registers are saved during
+; entry-
+;
+; ; 'Incoming args area'
+; Arg10_32 [A0StP+#-((10*4)+ARO)] Arg9_32 [A0StP+#-(( 9*4)+ARO)]
+; Arg8_32 [A0StP+#-(( 8*4)+ARO)] Arg7_32 [A0StP+#-(( 7*4)+ARO)]
+;--- Call point
+; D0Ar6= Arg6_32 [A0StP+#-(( 6*4)+ARO)] D1Ar5=Arg5_32 [A0StP+#-(( 5*4)+ARO)]
+; D0Ar4= Arg4_32 [A0StP+#-(( 4*4)+ARO)] D1Ar3=Arg3_32 [A0StP+#-(( 3*4)+ARO)]
+; D0Ar2= Arg2_32 [A0StP+#-(( 2*4)+ARO)] D1Ar2=Arg1_32 [A0StP+#-(( 1*4)+ARO)]
+; ; 'Frame area'
+; A0FrP-> D0FrT, D1RtP,
+; D0.5, D1.5,
+; D0.6, D1.6,
+; D0.7, D1.7,
+; ; 'Locals area'
+; Loc0_32 [A0StP+# (( 0*4)-LCS)], Loc1_32 [A0StP+# (( 1*4)-LCS)]
+; .... other locals
+; Locn_32 [A0StP+# (( n*4)-LCS)]
+; ; 'Outgoing args area'
+; Outm_32 [A0StP+#- ( m*4)] .... other outgoing args
+; Out8_32 [A0StP+#- ( 1*4)] Out7_32 [A0StP+#- ( 1*4)]
+; A0StP-> (Out1_32-Out6_32 in regs D1Ar1-D0Ar6)
+;
+; The exit sequence for a non-leaf routine can use the frame pointer created
+; in the entry sequence to optimise the recovery of the full state-
+;
+; MGETL D0FrT,D0.5,D0.6,D0.7,[A0FrP]
+; SUB A0StP,A0FrP,#(ARS+FRS)
+; MOV A0FrP,D0FrT
+; MOV PC,D1RtP
+;
+; Having described the most complex non-leaf case above, it is worth noting
+; that if a routine is a leaf and does not use any of the caller-preserved
+; state. The routine can be implemented as-
+;
+; ADD A0StP,A0StP,#LCS
+; .... body of routine
+; SUB A0StP,A0StP,#LCS
+; MOV PC,D1RtP
+;
+; The stack adjustments can also be omitted if no local storage is required.
+;
+; Another exit sequence structure is more applicable if for a leaf routine
+; with no local frame pointer saved/generated in which the call saved
+; registers need to be saved and restored-
+;
+; MSETL [A0StP],D0.5,D0.6,D0.7 ; Hence FRS is 0x18, ARS is 0x00
+; ADD A0StP,A0StP,#LCS
+; .... body of routine
+; GETL D0.5,D1.5,[A0StP+#((0*8)-(FRS+LCS))]
+; GETL D0.6,D1.6,[A0StP+#((1*8)-(FRS+LCS))]
+; GETL D0.7,D1.7,[A0StP+#((2*8)-(FRS+LCS))]
+; SUB A0StP,A0StP,#(ARS+FRS+LCS)
+; MOV PC,D1RtP
+;
+; Lastly, to support profiling assembler code should use a fixed entry/exit
+; sequence if the trigger define _GMON_ASM is defined-
+;
+; #ifndef _GMON_ASM
+; ... optimised entry code
+; #else
+; ; Profiling entry case
+; MOV D0FrT,A0FrP ; Save and calculate new frame pointer
+; ADD A0FrP,A0StP,#(ARS)
+; MSETL [A0StP],...,D0FrT,... or SETL [A0FrP],D0FrT,D1RtP
+; CALLR D0FrT,_mcount_wrapper
+; #endif
+; ... body of routine
+; #ifndef _GMON_ASM
+; ... optimised exit code
+; #else
+; ; Profiling exit case
+; MGETL D0FrT,...,[A0FrP] or GETL D0FrT,D1RtP,[A0FrP++]
+; SUB A0StP,A0FrP,#(ARS+FRS)
+; MOV A0FrP,D0FrT
+; MOV PC,D1RtP
+; #endif
+
+
+; -------------------------------------------------------------------------
+; CONTROL UNIT REGISTERS
+; -------------------------------------------------------------------------
+;
+; See the assembler guide, hardware documentation, or the field values
+; defined below for some details of the use of these registers.
+*/
+#define TXENABLE CT.0 /* Need to define bit-field values in these */
+#define TXMODE CT.1
+#define TXSTATUS CT.2 /* DEFAULT 0x00020000 */
+#define TXRPT CT.3
+#define TXTIMER CT.4
+#define TXL1START CT.5
+#define TXL1END CT.6
+#define TXL1COUNT CT.7
+#define TXL2START CT.8
+#define TXL2END CT.9
+#define TXL2COUNT CT.10
+#define TXBPOBITS CT.11
+#define TXMRSIZE CT.12
+#define TXTIMERI CT.13
+#define TXDRCTRL CT.14 /* DEFAULT 0x0XXXF0F0 */
+#define TXDRSIZE CT.15
+#define TXCATCH0 CT.16
+#define TXCATCH1 CT.17
+#define TXCATCH2 CT.18
+#define TXCATCH3 CT.19
+
+#ifdef METAC_2_1
+#define TXDEFR CT.20
+#define TXCPRS CT.21
+#endif
+
+#define TXINTERN0 CT.23
+#define TXAMAREG0 CT.24
+#define TXAMAREG1 CT.25
+#define TXAMAREG2 CT.26
+#define TXAMAREG3 CT.27
+#define TXDIVTIME CT.28 /* DEFAULT 0x00000001 */
+#define TXPRIVEXT CT.29 /* DEFAULT 0x003B0000 */
+#define TXTACTCYC CT.30
+#define TXIDLECYC CT.31
+
+/*****************************************************************************
+ * CONTROL UNIT REGISTER BITS
+ ****************************************************************************/
+/*
+ * The following registers and where appropriate the sub-fields of those
+ * registers are defined for pervasive use in controlling program flow.
+ */
+
+/*
+ * TXENABLE register fields - only the thread id is routinely useful
+ */
+#define TXENABLE_REGNUM 0
+#define TXENABLE_THREAD_BITS 0x00000700
+#define TXENABLE_THREAD_S 8
+#define TXENABLE_REV_STEP_BITS 0x000000F0
+#define TXENABLE_REV_STEP_S 4
+
+/*
+ * TXMODE register - controls extensions of the instruction set
+ */
+#define TXMODE_REGNUM 1
+#define TXMODE_DEFAULT 0 /* All fields default to zero */
+
+/*
+ * TXSTATUS register - contains a couple of stable bits that can be used
+ * to determine the privilege processing level and interrupt
+ * processing level of the current thread.
+ */
+#define TXSTATUS_REGNUM 2
+#define TXSTATUS_PSTAT_BIT 0x00020000 /* -> Privilege active */
+#define TXSTATUS_PSTAT_S 17
+#define TXSTATUS_ISTAT_BIT 0x00010000 /* -> In interrupt state */
+#define TXSTATUS_ISTAT_S 16
+
+/*
+ * These are all relatively boring registers, mostly full 32-bit
+ */
+#define TXRPT_REGNUM 3 /* Repeat counter for XFR... instructions */
+#define TXTIMER_REGNUM 4 /* Timer-- causes timer trigger on overflow */
+#define TXL1START_REGNUM 5 /* Hardware Loop 1 Start-PC/End-PC/Count */
+#define TXL1END_REGNUM 6
+#define TXL1COUNT_REGNUM 7
+#define TXL2START_REGNUM 8 /* Hardware Loop 2 Start-PC/End-PC/Count */
+#define TXL2END_REGNUM 9
+#define TXL2COUNT_REGNUM 10
+#define TXBPOBITS_REGNUM 11 /* Branch predict override bits - tune perf */
+#define TXTIMERI_REGNUM 13 /* Timer-- time based interrupt trigger */
+
+/*
+ * TXDIVTIME register is routinely read to calculate the time-base for
+ * the TXTIMER register.
+ */
+#define TXDIVTIME_REGNUM 28
+#define TXDIVTIME_DIV_BITS 0x000000FF
+#define TXDIVTIME_DIV_S 0
+#define TXDIVTIME_DIV_MIN 0x00000001 /* Maximum resolution */
+#define TXDIVTIME_DIV_MAX 0x00000100 /* 1/1 -> 1/256 resolution */
+#define TXDIVTIME_BASE_HZ 1000000 /* Timers run at 1Mhz @1/1 */
+
+/*
+ * TXPRIVEXT register can be consulted to decide if write access to a
+ * part of the threads register set is not permitted when in
+ * unprivileged mode (PSTAT == 0).
+ */
+#define TXPRIVEXT_REGNUM 29
+#define TXPRIVEXT_COPRO_BITS 0xFF000000 /* Co-processor 0-7 */
+#define TXPRIVEXT_COPRO_S 24
+#ifndef METAC_1_2
+#define TXPRIVEXT_TXTIMER_BIT 0x00080000 /* TXTIMER priv */
+#define TXPRIVEXT_TRACE_BIT 0x00040000 /* TTEXEC|TTCTRL|GTEXEC */
+#endif
+#define TXPRIVEXT_TXTRIGGER_BIT 0x00020000 /* TXSTAT|TXMASK|TXPOLL */
+#define TXPRIVEXT_TXGBLCREG_BIT 0x00010000 /* Global common regs */
+#define TXPRIVEXT_CBPRIV_BIT 0x00008000 /* Mem i/f dump priv */
+#define TXPRIVEXT_ILOCK_BIT 0x00004000 /* LOCK inst priv */
+#define TXPRIVEXT_TXITACCYC_BIT 0x00002000 /* TXIDLECYC|TXTACTCYC */
+#define TXPRIVEXT_TXDIVTIME_BIT 0x00001000 /* TXDIVTIME priv */
+#define TXPRIVEXT_TXAMAREGX_BIT 0x00000800 /* TXAMAREGX priv */
+#define TXPRIVEXT_TXTIMERI_BIT 0x00000400 /* TXTIMERI priv */
+#define TXPRIVEXT_TXSTATUS_BIT 0x00000200 /* TXSTATUS priv */
+#define TXPRIVEXT_TXDISABLE_BIT 0x00000100 /* TXENABLE priv */
+#ifndef METAC_1_2
+#define TXPRIVEXT_MINIMON_BIT 0x00000080 /* Enable Minim features */
+#define TXPRIVEXT_OLDBCCON_BIT 0x00000020 /* Restore Static predictions */
+#define TXPRIVEXT_ALIGNREW_BIT 0x00000010 /* Align & precise checks */
+#endif
+#define TXPRIVEXT_KEEPPRI_BIT 0x00000008 /* Use AMA_Priority if ISTAT=1*/
+#define TXPRIVEXT_TXTOGGLEI_BIT 0x00000001 /* TX.....I priv */
+
+/*
+ * TXTACTCYC register - counts instructions issued for this thread
+ */
+#define TXTACTCYC_REGNUM 30
+#define TXTACTCYC_COUNT_MASK 0x00FFFFFF
+
+/*
+ * TXIDLECYC register - counts idle cycles
+ */
+#define TXIDLECYC_REGNUM 31
+#define TXIDLECYC_COUNT_MASK 0x00FFFFFF
+
+/*****************************************************************************
+ * DSP EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values relate to fields and controls that only a program
+ * using the DSP extensions of the META instruction set need to know.
+ */
+
+
+#ifndef METAC_1_2
+/*
+ * Allow co-processor hardware to replace the read pipeline data source in
+ * appropriate cases.
+ */
+#define TXMODE_RDCPEN_BIT 0x00800000
+#endif
+
+/*
+ * Address unit addressing modes
+ */
+#define TXMODE_A1ADDR_BITS 0x00007000
+#define TXMODE_A1ADDR_S 12
+#define TXMODE_A0ADDR_BITS 0x00000700
+#define TXMODE_A0ADDR_S 8
+#define TXMODE_AXADDR_MODULO 3
+#define TXMODE_AXADDR_REVB 4
+#define TXMODE_AXADDR_REVW 5
+#define TXMODE_AXADDR_REVD 6
+#define TXMODE_AXADDR_REVL 7
+
+/*
+ * Data unit OverScale select (default 0 -> normal, 1 -> top 16 bits)
+ */
+#define TXMODE_DXOVERSCALE_BIT 0x00000080
+
+/*
+ * Data unit MX mode select (default 0 -> MX16, 1 -> MX8)
+ */
+#define TXMODE_M8_BIT 0x00000040
+
+/*
+ * Data unit accumulator saturation point (default -> 40 bit accumulator)
+ */
+#define TXMODE_DXACCSAT_BIT 0x00000020 /* Set for 32-bit accumulator */
+
+/*
+ * Data unit accumulator saturation enable (default 0 -> no saturation)
+ */
+#define TXMODE_DXSAT_BIT 0x00000010
+
+/*
+ * Data unit master rounding control (default 0 -> normal, 1 -> convergent)
+ */
+#define TXMODE_DXROUNDING_BIT 0x00000008
+
+/*
+ * Data unit product shift for fractional arithmetic (default off)
+ */
+#define TXMODE_DXPRODSHIFT_BIT 0x00000004
+
+/*
+ * Select the arithmetic mode (multiply mostly) for both data units
+ */
+#define TXMODE_DXARITH_BITS 0x00000003
+#define TXMODE_DXARITH_32 3
+#define TXMODE_DXARITH_32H 2
+#define TXMODE_DXARITH_S16 1
+#define TXMODE_DXARITH_16 0
+
+/*
+ * TXMRSIZE register value only relevant when DSP modulo addressing active
+ */
+#define TXMRSIZE_REGNUM 12
+#define TXMRSIZE_MIN 0x0002 /* 0, 1 -> normal addressing logic */
+#define TXMRSIZE_MAX 0xFFFF
+
+/*
+ * TXDRCTRL register can be used to detect the actaul size of the DSP RAM
+ * partitions allocated to this thread.
+ */
+#define TXDRCTRL_REGNUM 14
+#define TXDRCTRL_SINESIZE_BITS 0x0F000000
+#define TXDRCTRL_SINESIZE_S 24
+#define TXDRCTRL_RAMSZPOW_BITS 0x001F0000 /* Limit = (1<<RAMSZPOW)-1 */
+#define TXDRCTRL_RAMSZPOW_S 16
+#define TXDRCTRL_D1RSZAND_BITS 0x0000F000 /* Mask top 4 bits - D1 */
+#define TXDRCTRL_D1RSZAND_S 12
+#define TXDRCTRL_D0RSZAND_BITS 0x000000F0 /* Mask top 4 bits - D0 */
+#define TXDRCTRL_D0RSZAND_S 4
+/* Given extracted RAMSZPOW and DnRSZAND fields this returns the size */
+#define TXDRCTRL_DXSIZE(Pow, AndBits) \
+ ((((~(AndBits)) & 0x0f) + 1) << ((Pow)-4))
+
+/*
+ * TXDRSIZE register provides modulo addressing options for each DSP RAM
+ */
+#define TXDRSIZE_REGNUM 15
+#define TXDRSIZE_R1MOD_BITS 0xFFFF0000
+#define TXDRSIZE_R1MOD_S 16
+#define TXDRSIZE_R0MOD_BITS 0x0000FFFF
+#define TXDRSIZE_R0MOD_S 0
+
+#define TXDRSIZE_RBRAD_SCALE_BITS 0x70000000
+#define TXDRSIZE_RBRAD_SCALE_S 28
+#define TXDRSIZE_RBMODSIZE_BITS 0x0FFF0000
+#define TXDRSIZE_RBMODSIZE_S 16
+#define TXDRSIZE_RARAD_SCALE_BITS 0x00007000
+#define TXDRSIZE_RARAD_SCALE_S 12
+#define TXDRSIZE_RAMODSIZE_BITS 0x00000FFF
+#define TXDRSIZE_RAMODSIZE_S 0
+
+/*****************************************************************************
+ * DEFERRED and BUS ERROR EXTENSION
+ ****************************************************************************/
+
+/*
+ * TXDEFR register - Deferred exception control
+ */
+#define TXDEFR_REGNUM 20
+#define TXDEFR_DEFAULT 0 /* All fields default to zero */
+
+/*
+ * Bus error state is a multi-bit positive/negative event notification from
+ * the bus infrastructure.
+ */
+#define TXDEFR_BUS_ERR_BIT 0x80000000 /* Set if error (LSB STATE) */
+#define TXDEFR_BUS_ERRI_BIT 0x40000000 /* Fetch returned error */
+#define TXDEFR_BUS_STATE_BITS 0x3F000000 /* Bus event/state data */
+#define TXDEFR_BUS_STATE_S 24
+#define TXDEFR_BUS_TRIG_BIT 0x00800000 /* Set when bus error seen */
+
+/*
+ * Bus events are collected by background code in a deferred manner unless
+ * selected to trigger an extended interrupt HALT trigger when they occur.
+ */
+#define TXDEFR_BUS_ICTRL_BIT 0x00000080 /* Enable interrupt trigger */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CT Bank AMA Registers */
+#define TXAMAREG0_REGNUM 24
+#ifdef METAC_1_2
+#define TXAMAREG0_CTRL_BITS 0x07000000
+#else /* METAC_1_2 */
+#define TXAMAREG0_RCOFF_BIT 0x08000000
+#define TXAMAREG0_DLINEHLT_BIT 0x04000000
+#define TXAMAREG0_DLINEDIS_BIT 0x02000000
+#define TXAMAREG0_CYCSTRICT_BIT 0x01000000
+#define TXAMAREG0_CTRL_BITS (TXAMAREG0_RCOFF_BIT | \
+ TXAMAREG0_DLINEHLT_BIT | \
+ TXAMAREG0_DLINEDIS_BIT | \
+ TXAMAREG0_CYCSTRICT_BIT)
+#endif /* !METAC_1_2 */
+#define TXAMAREG0_CTRL_S 24
+#define TXAMAREG0_MDM_BIT 0x00400000
+#define TXAMAREG0_MPF_BIT 0x00200000
+#define TXAMAREG0_MPE_BIT 0x00100000
+#define TXAMAREG0_MASK_BITS (TXAMAREG0_MDM_BIT | \
+ TXAMAREG0_MPF_BIT | \
+ TXAMAREG0_MPE_BIT)
+#define TXAMAREG0_MASK_S 20
+#define TXAMAREG0_SDM_BIT 0x00040000
+#define TXAMAREG0_SPF_BIT 0x00020000
+#define TXAMAREG0_SPE_BIT 0x00010000
+#define TXAMAREG0_STATUS_BITS (TXAMAREG0_SDM_BIT | \
+ TXAMAREG0_SPF_BIT | \
+ TXAMAREG0_SPE_BIT)
+#define TXAMAREG0_STATUS_S 16
+#define TXAMAREG0_PRIORITY_BITS 0x0000FF00
+#define TXAMAREG0_PRIORITY_S 8
+#define TXAMAREG0_BVALUE_BITS 0x000000FF
+#define TXAMAREG0_BVALUE_S 0
+
+#define TXAMAREG1_REGNUM 25
+#define TXAMAREG1_DELAYC_BITS 0x07FFFFFF
+#define TXAMAREG1_DELAYC_S 0
+
+#define TXAMAREG2_REGNUM 26
+#ifdef METAC_1_2
+#define TXAMAREG2_DLINEC_BITS 0x00FFFFFF
+#define TXAMAREG2_DLINEC_S 0
+#else /* METAC_1_2 */
+#define TXAMAREG2_IRQPRIORITY_BIT 0xFF000000
+#define TXAMAREG2_IRQPRIORITY_S 24
+#define TXAMAREG2_DLINEC_BITS 0x00FFFFF0
+#define TXAMAREG2_DLINEC_S 4
+#endif /* !METAC_1_2 */
+
+#define TXAMAREG3_REGNUM 27
+#define TXAMAREG2_AMABLOCK_BIT 0x00080000
+#define TXAMAREG2_AMAC_BITS 0x0000FFFF
+#define TXAMAREG2_AMAC_S 0
+
+/*****************************************************************************
+ * FPU EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following registers only exist in FPU enabled cores.
+ */
+
+/*
+ * TXMODE register - FPU rounding mode control/status fields
+ */
+#define TXMODE_FPURMODE_BITS 0x00030000
+#define TXMODE_FPURMODE_S 16
+#define TXMODE_FPURMODEWRITE_BIT 0x00040000 /* Set to change FPURMODE */
+
+/*
+ * TXDEFR register - FPU exception handling/state is a significant source
+ * of deferrable errors. Run-time S/W can move handling to interrupt level
+ * using DEFR instruction to collect state.
+ */
+#define TXDEFR_FPE_FE_BITS 0x003F0000 /* Set by FPU_FE events */
+#define TXDEFR_FPE_FE_S 16
+
+#define TXDEFR_FPE_INEXACT_FE_BIT 0x010000
+#define TXDEFR_FPE_UNDERFLOW_FE_BIT 0x020000
+#define TXDEFR_FPE_OVERFLOW_FE_BIT 0x040000
+#define TXDEFR_FPE_DIVBYZERO_FE_BIT 0x080000
+#define TXDEFR_FPE_INVALID_FE_BIT 0x100000
+#define TXDEFR_FPE_DENORMAL_FE_BIT 0x200000
+
+#define TXDEFR_FPE_ICTRL_BITS 0x000003F /* Route to interrupts */
+#define TXDEFR_FPE_ICTRL_S 0
+
+#define TXDEFR_FPE_INEXACT_ICTRL_BIT 0x01
+#define TXDEFR_FPE_UNDERFLOW_ICTRL_BIT 0x02
+#define TXDEFR_FPE_OVERFLOW_ICTRL_BIT 0x04
+#define TXDEFR_FPE_DIVBYZERO_ICTRL_BIT 0x08
+#define TXDEFR_FPE_INVALID_ICTRL_BIT 0x10
+#define TXDEFR_FPE_DENORMAL_ICTRL_BIT 0x20
+
+/*
+ * DETAILED FPU RELATED VALUES
+ * ---------------------------
+ */
+
+/*
+ * Rounding mode field in TXMODE can hold a number of logical values
+ */
+#define METAG_FPURMODE_TONEAREST 0x0 /* Default */
+#define METAG_FPURMODE_TOWARDZERO 0x1
+#define METAG_FPURMODE_UPWARD 0x2
+#define METAG_FPURMODE_DOWNWARD 0x3
+
+/*
+ * In order to set the TXMODE register field that controls the rounding mode
+ * an extra bit must be set in the value written versus that read in order
+ * to gate writes to the rounding mode field. This allows other non-FPU code
+ * to modify TXMODE without knowledge of the FPU units presence and not
+ * influence the FPU rounding mode. This macro adds the required bit so new
+ * rounding modes are accepted.
+ */
+#define TXMODE_FPURMODE_SET(FPURMode) \
+ (TXMODE_FPURMODEWRITE_BIT + ((FPURMode)<<TXMODE_FPURMODE_S))
+
+/*
+ * To successfully restore TXMODE to zero at the end of the function the
+ * following value (rather than zero) must be used.
+ */
+#define TXMODE_FPURMODE_RESET (TXMODE_FPURMODEWRITE_BIT)
+
+/*
+ * In TXSTATUS a special bit exists to indicate if FPU H/W has been accessed
+ * since it was last reset.
+ */
+#define TXSTATUS_FPACTIVE_BIT 0x01000000
+
+/*
+ * Exception state (see TXDEFR_FPU_FE_*) and enabling (for interrupt
+ * level processing (see TXDEFR_FPU_ICTRL_*) are controlled by similar
+ * bit mask locations within each field.
+ */
+#define METAG_FPU_FE_INEXACT 0x01
+#define METAG_FPU_FE_UNDERFLOW 0x02
+#define METAG_FPU_FE_OVERFLOW 0x04
+#define METAG_FPU_FE_DIVBYZERO 0x08
+#define METAG_FPU_FE_INVALID 0x10
+#define METAG_FPU_FE_DENORMAL 0x20
+#define METAG_FPU_FE_ALL_EXCEPT (METAG_FPU_FE_INEXACT | \
+ METAG_FPU_FE_UNDERFLOW | \
+ METAG_FPU_FE_OVERFLOW | \
+ METAG_FPU_FE_DIVBYZERO | \
+ METAG_FPU_FE_INVALID | \
+ METAG_FPU_FE_DENORMAL)
+
+/*****************************************************************************
+ * THREAD CONTROL, ERROR, OR INTERRUPT STATE EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that externally controls
+ * threads, handles errors/interrupts, and/or set-up interrupt/error handlers
+ * for subsequent use.
+ */
+
+/*
+ * TXENABLE register fields - only ENABLE_BIT is potentially read/write
+ */
+#define TXENABLE_MAJOR_REV_BITS 0xFF000000
+#define TXENABLE_MAJOR_REV_S 24
+#define TXENABLE_MINOR_REV_BITS 0x00FF0000
+#define TXENABLE_MINOR_REV_S 16
+#define TXENABLE_CLASS_BITS 0x0000F000
+#define TXENABLE_CLASS_S 12
+#define TXENABLE_CLASS_DSP 0x0 /* -> DSP Thread */
+#define TXENABLE_CLASS_LDSP 0x8 /* -> DSP LITE Thread */
+#define TXENABLE_CLASS_GP 0xC /* -> General Purpose Thread */
+#define TXENABLE_CLASSALT_LFPU 0x2 /* Set to indicate LITE FPU */
+#define TXENABLE_CLASSALT_FPUR8 0x1 /* Set to indicate 8xFPU regs */
+#define TXENABLE_MTXARCH_BIT 0x00000800
+#define TXENABLE_STEP_REV_BITS 0x000000F0
+#define TXENABLE_STEP_REV_S 4
+#define TXENABLE_STOPPED_BIT 0x00000004 /* TXOFF due to ENABLE->0 */
+#define TXENABLE_OFF_BIT 0x00000002 /* Thread is in off state */
+#define TXENABLE_ENABLE_BIT 0x00000001 /* Set if running */
+
+/*
+ * TXSTATUS register - used by external/internal interrupt/error handler
+ */
+#define TXSTATUS_CB1MARKER_BIT 0x00800000 /* -> int level mem state */
+#define TXSTATUS_CBMARKER_BIT 0x00400000 /* -> mem i/f state dumped */
+#define TXSTATUS_MEM_FAULT_BITS 0x00300000
+#define TXSTATUS_MEM_FAULT_S 20
+#define TXSTATUS_MEMFAULT_NONE 0x0 /* -> No memory fault */
+#define TXSTATUS_MEMFAULT_GEN 0x1 /* -> General fault */
+#define TXSTATUS_MEMFAULT_PF 0x2 /* -> Page fault */
+#define TXSTATUS_MEMFAULT_RO 0x3 /* -> Read only fault */
+#define TXSTATUS_MAJOR_HALT_BITS 0x000C0000
+#define TXSTATUS_MAJOR_HALT_S 18
+#define TXSTATUS_MAJHALT_TRAP 0x0 /* -> SWITCH inst used */
+#define TXSTATUS_MAJHALT_INST 0x1 /* -> Unknown inst or fetch */
+#define TXSTATUS_MAJHALT_PRIV 0x2 /* -> Internal privilege */
+#define TXSTATUS_MAJHALT_MEM 0x3 /* -> Memory i/f fault */
+#define TXSTATUS_L_STEP_BITS 0x00000800 /* -> Progress of L oper */
+#define TXSTATUS_LSM_STEP_BITS 0x00000700 /* -> Progress of L/S mult */
+#define TXSTATUS_LSM_STEP_S 8
+#define TXSTATUS_FLAG_BITS 0x0000001F /* -> All the flags */
+#define TXSTATUS_SCC_BIT 0x00000010 /* -> Split-16 flags ... */
+#define TXSTATUS_SCF_LZ_BIT 0x00000008 /* -> Split-16 Low Z flag */
+#define TXSTATUS_SCF_HZ_BIT 0x00000004 /* -> Split-16 High Z flag */
+#define TXSTATUS_SCF_HC_BIT 0x00000002 /* -> Split-16 High C flag */
+#define TXSTATUS_SCF_LC_BIT 0x00000001 /* -> Split-16 Low C flag */
+#define TXSTATUS_CF_Z_BIT 0x00000008 /* -> Condition Z flag */
+#define TXSTATUS_CF_N_BIT 0x00000004 /* -> Condition N flag */
+#define TXSTATUS_CF_O_BIT 0x00000002 /* -> Condition O flag */
+#define TXSTATUS_CF_C_BIT 0x00000001 /* -> Condition C flag */
+
+/*
+ * TXCATCH0-3 register contents may store information on a memory operation
+ * that has failed if the bit TXSTATUS_CBMARKER_BIT is set.
+ */
+#define TXCATCH0_REGNUM 16
+#define TXCATCH1_REGNUM 17
+#define TXCATCH1_ADDR_BITS 0xFFFFFFFF /* TXCATCH1 is Addr 0-31 */
+#define TXCATCH1_ADDR_S 0
+#define TXCATCH2_REGNUM 18
+#define TXCATCH2_DATA0_BITS 0xFFFFFFFF /* TXCATCH2 is Data 0-31 */
+#define TXCATCH2_DATA0_S 0
+#define TXCATCH3_REGNUM 19
+#define TXCATCH3_DATA1_BITS 0xFFFFFFFF /* TXCATCH3 is Data 32-63 */
+#define TXCATCH3_DATA1_S 0
+
+/*
+ * Detailed catch state information
+ * --------------------------------
+ */
+
+/* Contents of TXCATCH0 register */
+#define TXCATCH0_LDRXX_BITS 0xF8000000 /* Load destination reg 0-31 */
+#define TXCATCH0_LDRXX_S 27
+#define TXCATCH0_LDDST_BITS 0x07FF0000 /* Load destination bits */
+#define TXCATCH0_LDDST_S 16
+#define TXCATCH0_LDDST_D1DSP 0x400 /* One bit set if it's a LOAD */
+#define TXCATCH0_LDDST_D0DSP 0x200
+#define TXCATCH0_LDDST_TMPLT 0x100
+#define TXCATCH0_LDDST_TR 0x080
+#ifdef METAC_2_1
+#define TXCATCH0_LDDST_FPU 0x040
+#endif
+#define TXCATCH0_LDDST_PC 0x020
+#define TXCATCH0_LDDST_A1 0x010
+#define TXCATCH0_LDDST_A0 0x008
+#define TXCATCH0_LDDST_D1 0x004
+#define TXCATCH0_LDDST_D0 0x002
+#define TXCATCH0_LDDST_CT 0x001
+#ifdef METAC_2_1
+#define TXCATCH0_WATCHSTOP_BIT 0x00004000 /* Set if Data Watch set fault */
+#endif
+#define TXCATCH0_WATCHS_BIT 0x00004000 /* Set if Data Watch set fault */
+#define TXCATCH0_WATCH1_BIT 0x00002000 /* Set if Data Watch 1 matches */
+#define TXCATCH0_WATCH0_BIT 0x00001000 /* Set if Data Watch 0 matches */
+#define TXCATCH0_FAULT_BITS 0x00000C00 /* See TXSTATUS_MEMFAULT_* */
+#define TXCATCH0_FAULT_S 10
+#define TXCATCH0_PRIV_BIT 0x00000200 /* Privilege of transaction */
+#define TXCATCH0_READ_BIT 0x00000100 /* Set for Read or Load cases */
+
+#ifdef METAC_2_1
+/* LNKGET Marker bit in TXCATCH0 */
+#define TXCATCH0_LNKGET_MARKER_BIT 0x00000008
+#define TXCATCH0_PREPROC_BIT 0x00000004
+#endif
+
+/* Loads are indicated by one of the LDDST bits being set */
+#define TXCATCH0_LDM16_BIT 0x00000004 /* Load M16 flag */
+#define TXCATCH0_LDL2L1_BITS 0x00000003 /* Load data size L2,L1 */
+#define TXCATCH0_LDL2L1_S 0
+
+/* Reads are indicated by the READ bit being set without LDDST bits */
+#define TXCATCH0_RAXX_BITS 0x0000001F /* RAXX issue port for read */
+#define TXCATCH0_RAXX_S 0
+
+/* Write operations are all that remain if READ bit is not set */
+#define TXCATCH0_WMASK_BITS 0x000000FF /* Write byte lane mask */
+#define TXCATCH0_WMASK_S 0
+
+#ifdef METAC_2_1
+
+/* When a FPU exception is signalled then FPUSPEC == FPUSPEC_TAG */
+#define TXCATCH0_FPURDREG_BITS 0xF8000000
+#define TXCATCH0_FPURDREG_S 27
+#define TXCATCH0_FPUR1REG_BITS 0x07C00000
+#define TXCATCH0_FPUR1REG_S 22
+#define TXCATCH0_FPUSPEC_BITS 0x000F0000
+#define TXCATCH0_FPUSPEC_S 16
+#define TXCATCH0_FPUSPEC_TAG 0xF
+#define TXCATCH0_FPUINSTA_BIT 0x00001000
+#define TXCATCH0_FPUINSTQ_BIT 0x00000800
+#define TXCATCH0_FPUINSTZ_BIT 0x00000400
+#define TXCATCH0_FPUINSTN_BIT 0x00000200
+#define TXCATCH0_FPUINSTO3O_BIT 0x00000100
+#define TXCATCH0_FPUWIDTH_BITS 0x000000C0
+#define TXCATCH0_FPUWIDTH_S 6
+#define TXCATCH0_FPUWIDTH_FLOAT 0
+#define TXCATCH0_FPUWIDTH_DOUBLE 1
+#define TXCATCH0_FPUWIDTH_PAIRED 2
+#define TXCATCH0_FPUOPENC_BITS 0x0000003F
+#define TXCATCH0_FPUOPENC_S 0
+#define TXCATCH0_FPUOPENC_ADD 0 /* rop1=Rs1, rop3=Rs2 */
+#define TXCATCH0_FPUOPENC_SUB 1 /* rop1=Rs1, rop3=Rs2 */
+#define TXCATCH0_FPUOPENC_MUL 2 /* rop1=Rs1, rop2=Rs2 */
+#define TXCATCH0_FPUOPENC_ATOI 3 /* rop3=Rs */
+#define TXCATCH0_FPUOPENC_ATOX 4 /* rop3=Rs, uses #Imm */
+#define TXCATCH0_FPUOPENC_ITOA 5 /* rop3=Rs */
+#define TXCATCH0_FPUOPENC_XTOA 6 /* rop3=Rs, uses #Imm */
+#define TXCATCH0_FPUOPENC_ATOH 7 /* rop2=Rs */
+#define TXCATCH0_FPUOPENC_HTOA 8 /* rop2=Rs */
+#define TXCATCH0_FPUOPENC_DTOF 9 /* rop3=Rs */
+#define TXCATCH0_FPUOPENC_FTOD 10 /* rop3=Rs */
+#define TXCATCH0_FPUOPENC_DTOL 11 /* rop3=Rs */
+#define TXCATCH0_FPUOPENC_LTOD 12 /* rop3=Rs */
+#define TXCATCH0_FPUOPENC_DTOXL 13 /* rop3=Rs, uses #imm */
+#define TXCATCH0_FPUOPENC_XLTOD 14 /* rop3=Rs, uses #imm */
+#define TXCATCH0_FPUOPENC_CMP 15 /* rop1=Rs1, rop2=Rs2 */
+#define TXCATCH0_FPUOPENC_MIN 16 /* rop1=Rs1, rop2=Rs2 */
+#define TXCATCH0_FPUOPENC_MAX 17 /* rop1=Rs1, rop2=Rs2 */
+#define TXCATCH0_FPUOPENC_ADDRE 18 /* rop1=Rs1, rop3=Rs2 */
+#define TXCATCH0_FPUOPENC_SUBRE 19 /* rop1=Rs1, rop3=Rs2 */
+#define TXCATCH0_FPUOPENC_MULRE 20 /* rop1=Rs1, rop2=Rs2 */
+#define TXCATCH0_FPUOPENC_MXA 21 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define TXCATCH0_FPUOPENC_MXAS 22 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define TXCATCH0_FPUOPENC_MAR 23 /* rop1=Rs1, rop2=Rs2 */
+#define TXCATCH0_FPUOPENC_MARS 24 /* rop1=Rs1, rop2=Rs2 */
+#define TXCATCH0_FPUOPENC_MUZ 25 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define TXCATCH0_FPUOPENC_MUZS 26 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define TXCATCH0_FPUOPENC_RCP 27 /* rop2=Rs */
+#define TXCATCH0_FPUOPENC_RSQ 28 /* rop2=Rs */
+
+/* For floating point exceptions TXCATCH1 is used to carry extra data */
+#define TXCATCH1_FPUR2REG_BITS 0xF8000000
+#define TXCATCH1_FPUR2REG_S 27
+#define TXCATCH1_FPUR3REG_BITS 0x07C00000 /* Undefined if O3O set */
+#define TXCATCH1_FPUR3REG_S 22
+#define TXCATCH1_FPUIMM16_BITS 0x0000FFFF
+#define TXCATCH1_FPUIMM16_S 0
+
+#endif /* METAC_2_1 */
+
+/*
+ * TXDIVTIME register used to hold the partial base address of memory i/f
+ * state dump area. Now deprecated.
+ */
+#define TXDIVTIME_CBBASE_MASK 0x03FFFE00
+#define TXDIVTIME_CBBASE_LINBASE 0x80000000
+#define TXDIVTIME_CBBASE_LINBOFF 0x00000000 /* BGnd state */
+#define TXDIVTIME_CBBASE_LINIOFF 0x00000100 /* Int state */
+
+/*
+ * TXDIVTIME register used to indicate if the read pipeline was dirty when a
+ * thread was interrupted, halted, or generated an exception. It is invalid
+ * to attempt to issue a further pipeline read address while the read
+ * pipeline is in the dirty state.
+ */
+#define TXDIVTIME_RPDIRTY_BIT 0x80000000
+
+/*
+ * Further bits in the TXDIVTIME register allow interrupt handling code to
+ * short-cut the discovery the most significant bit last read from TXSTATI.
+ *
+ * This is the bit number of the trigger line that a low level interrupt
+ * handler should acknowledge and then perhaps the index of a corresponding
+ * handler function.
+ */
+#define TXDIVTIME_IRQENC_BITS 0x0F000000
+#define TXDIVTIME_IRQENC_S 24
+
+/*
+ * If TXDIVTIME_RPVALID_BIT is set the read pipeline contained significant
+ * information when the thread was interrupted|halted|exceptioned. Each slot
+ * containing data is indicated by a one bit in the corresponding
+ * TXDIVTIME_RPMASK_BITS bit (least significance bit relates to first
+ * location in read pipeline - most likely to have the 1 state). Empty slots
+ * contain zeroes with no interlock applied on reads if RPDIRTY is currently
+ * set with RPMASK itself being read-only state.
+ */
+#define TXDIVTIME_RPMASK_BITS 0x003F0000 /* -> Full (1) Empty (0) */
+#define TXDIVTIME_RPMASK_S 16
+
+/*
+ * TXPRIVEXT register can be used to single step thread execution and
+ * enforce synchronous memory i/f address checking for debugging purposes.
+ */
+#define TXPRIVEXT_TXSTEP_BIT 0x00000004
+#define TXPRIVEXT_MEMCHECK_BIT 0x00000002
+
+/*
+ * TXINTERNx registers holds internal state information for H/W debugging only
+ */
+#define TXINTERN0_REGNUM 23
+#define TXINTERN0_LOCK2_BITS 0xF0000000
+#define TXINTERN0_LOCK2_S 28
+#define TXINTERN0_LOCK1_BITS 0x0F000000
+#define TXINTERN0_LOCK1_S 24
+#define TXINTERN0_TIFDF_BITS 0x0000F000
+#define TXINTERN0_TIFDF_S 12
+#define TXINTERN0_TIFIB_BITS 0x00000F00
+#define TXINTERN0_TIFIB_S 8
+#define TXINTERN0_TIFAF_BITS 0x000000F0
+#define TXINTERN0_TIFAF_S 4
+#define TXINTERN0_MSTATE_BITS 0x0000000F
+#define TXINTERN0_MSTATE_S 0
+
+/*
+ * TXSTAT, TXMASK, TXPOLL, TXSTATI, TXMASKI, TXPOLLI registers from trigger
+ * bank all have similar contents (upper kick count bits not in MASK regs)
+ */
+#define TXSTAT_REGNUM 0
+#define TXSTAT_TIMER_BIT 0x00000001
+#define TXSTAT_TIMER_S 0
+#define TXSTAT_KICK_BIT 0x00000002
+#define TXSTAT_KICK_S 1
+#define TXSTAT_DEFER_BIT 0x00000008
+#define TXSTAT_DEFER_S 3
+#define TXSTAT_EXTTRIG_BITS 0x0000FFF0
+#define TXSTAT_EXTTRIG_S 4
+#define TXSTAT_FPE_BITS 0x003F0000
+#define TXSTAT_FPE_S 16
+#define TXSTAT_FPE_DENORMAL_BIT 0x00200000
+#define TXSTAT_FPE_DENORMAL_S 21
+#define TXSTAT_FPE_INVALID_BIT 0x00100000
+#define TXSTAT_FPE_INVALID_S 20
+#define TXSTAT_FPE_DIVBYZERO_BIT 0x00080000
+#define TXSTAT_FPE_DIVBYZERO_S 19
+#define TXSTAT_FPE_OVERFLOW_BIT 0x00040000
+#define TXSTAT_FPE_OVERFLOW_S 18
+#define TXSTAT_FPE_UNDERFLOW_BIT 0x00020000
+#define TXSTAT_FPE_UNDERFLOW_S 17
+#define TXSTAT_FPE_INEXACT_BIT 0x00010000
+#define TXSTAT_FPE_INEXACT_S 16
+#define TXSTAT_BUSERR_BIT 0x00800000 /* Set if bus error/ack state */
+#define TXSTAT_BUSERR_S 23
+#define TXSTAT_BUSSTATE_BITS 0xFF000000 /* Read only */
+#define TXSTAT_BUSSTATE_S 24
+#define TXSTAT_KICKCNT_BITS 0xFFFF0000
+#define TXSTAT_KICKCNT_S 16
+#define TXMASK_REGNUM 1
+#define TXSTATI_REGNUM 2
+#define TXSTATI_BGNDHALT_BIT 0x00000004
+#define TXMASKI_REGNUM 3
+#define TXPOLL_REGNUM 4
+#define TXPOLLI_REGNUM 6
+
+/*
+ * TXDRCTRL register can be used to partition the DSP RAM space available to
+ * this thread at startup. This is achieved by offsetting the region allocated
+ * to each thread.
+ */
+#define TXDRCTRL_D1PARTOR_BITS 0x00000F00 /* OR's into top 4 bits */
+#define TXDRCTRL_D1PARTOR_S 8
+#define TXDRCTRL_D0PARTOR_BITS 0x0000000F /* OR's into top 4 bits */
+#define TXDRCTRL_D0PARTOR_S 0
+/* Given extracted Pow and Or fields this is threads base within DSP RAM */
+#define TXDRCTRL_DXBASE(Pow, Or) ((Or)<<((Pow)-4))
+
+/*****************************************************************************
+ * RUN TIME TRACE CONTROL REGISTERS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that implements run-time
+ * trace features within the META Core
+ */
+#define TTEXEC TT.0
+#define TTCTRL TT.1
+#define TTMARK TT.2
+#define TTREC TT.3
+#define GTEXEC TT.4
+
+#define TTEXEC_REGNUM 0
+#define TTEXEC_EXTTRIGAND_BITS 0x7F000000
+#define TTEXEC_EXTTRIGAND_S 24
+#define TTEXEC_EXTTRIGEN_BIT 0x00008000
+#define TTEXEC_EXTTRIGMATCH_BITS 0x00007F00
+#define TTEXEC_EXTTRIGMATCH_S 8
+#define TTEXEC_TCMODE_BITS 0x00000003
+#define TTEXEC_TCMODE_S 0
+
+#define TTCTRL_REGNUM 1
+#define TTCTRL_TRACETT_BITS 0x00008000
+#define TTCTRL_TRACETT_S 15
+#define TTCTRL_TRACEALL_BITS 0x00002000
+#define TTCTRL_TRACEALL_S 13
+#ifdef METAC_2_1
+#define TTCTRL_TRACEALLTAG_BITS 0x00000400
+#define TTCTRL_TRACEALLTAG_S 10
+#endif /* METAC_2_1 */
+#define TTCTRL_TRACETAG_BITS 0x00000200
+#define TTCTRL_TRACETAG_S 9
+#define TTCTRL_TRACETTPC_BITS 0x00000080
+#define TTCTRL_TRACETTPC_S 7
+#define TTCTRL_TRACEMPC_BITS 0x00000020
+#define TTCTRL_TRACEMPC_S 5
+#define TTCTRL_TRACEEN_BITS 0x00000008
+#define TTCTRL_TRACEEN_S 3
+#define TTCTRL_TRACEEN1_BITS 0x00000004
+#define TTCTRL_TRACEEN1_S 2
+#define TTCTRL_TRACEPC_BITS 0x00000002
+#define TTCTRL_TRACEPC_S 1
+
+#ifdef METAC_2_1
+#define TTMARK_REGNUM 2
+#define TTMARK_BITS 0xFFFFFFFF
+#define TTMARK_S 0x0
+
+#define TTREC_REGNUM 3
+#define TTREC_BITS 0xFFFFFFFFFFFFFFFF
+#define TTREC_S 0x0
+#endif /* METAC_2_1 */
+
+#define GTEXEC_REGNUM 4
+#define GTEXEC_DCRUN_BITS 0x80000000
+#define GTEXEC_DCRUN_S 31
+#define GTEXEC_ICMODE_BITS 0x0C000000
+#define GTEXEC_ICMODE_S 26
+#define GTEXEC_TCMODE_BITS 0x03000000
+#define GTEXEC_TCMODE_S 24
+#define GTEXEC_PERF1CMODE_BITS 0x00040000
+#define GTEXEC_PERF1CMODE_S 18
+#define GTEXEC_PERF0CMODE_BITS 0x00010000
+#define GTEXEC_PERF0CMODE_S 16
+#define GTEXEC_REFMSEL_BITS 0x0000F000
+#define GTEXEC_REFMSEL_S 12
+#define GTEXEC_METRICTH_BITS 0x000003FF
+#define GTEXEC_METRICTH_S 0
+
+#ifdef METAC_2_1
+/*
+ * Clock Control registers
+ * -----------------------
+ */
+#define TXCLKCTRL_REGNUM 22
+
+/*
+ * Default setting is with clocks always on (DEFON), turning all clocks off
+ * can only be done from external devices (OFF), enabling automatic clock
+ * gating will allow clocks to stop as units fall idle.
+ */
+#define TXCLKCTRL_ALL_OFF 0x02222222
+#define TXCLKCTRL_ALL_DEFON 0x01111111
+#define TXCLKCTRL_ALL_AUTO 0x02222222
+
+/*
+ * Individual fields control caches, floating point and main data/addr units
+ */
+#define TXCLKCTRL_CLOCKIC_BITS 0x03000000
+#define TXCLKCTRL_CLOCKIC_S 24
+#define TXCLKCTRL_CLOCKDC_BITS 0x00300000
+#define TXCLKCTRL_CLOCKDC_S 20
+#define TXCLKCTRL_CLOCKFP_BITS 0x00030000
+#define TXCLKCTRL_CLOCKFP_S 16
+#define TXCLKCTRL_CLOCKD1_BITS 0x00003000
+#define TXCLKCTRL_CLOCKD1_S 12
+#define TXCLKCTRL_CLOCKD0_BITS 0x00000300
+#define TXCLKCTRL_CLOCKD0_S 8
+#define TXCLKCTRL_CLOCKA1_BITS 0x00000030
+#define TXCLKCTRL_CLOCKA1_S 4
+#define TXCLKCTRL_CLOCKA0_BITS 0x00000003
+#define TXCLKCTRL_CLOCKA0_S 0
+
+/*
+ * Individual settings for each field are common
+ */
+#define TXCLKCTRL_CLOCKxx_OFF 0
+#define TXCLKCTRL_CLOCKxx_DEFON 1
+#define TXCLKCTRL_CLOCKxx_AUTO 2
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Fast interrupt new bits
+ * ------------------------------------
+ */
+#define TXSTATUS_IPTOGGLE_BIT 0x80000000 /* Prev PToggle of TXPRIVEXT */
+#define TXSTATUS_ISTATE_BIT 0x40000000 /* IState bit */
+#define TXSTATUS_IWAIT_BIT 0x20000000 /* wait indefinitely in decision step*/
+#define TXSTATUS_IEXCEPT_BIT 0x10000000 /* Indicate an exception occured */
+#define TXSTATUS_IRPCOUNT_BITS 0x0E000000 /* Number of 'dirty' date entries*/
+#define TXSTATUS_IRPCOUNT_S 25
+#define TXSTATUS_IRQSTAT_BITS 0x0000F000 /* IRQEnc bits, trigger or interrupts */
+#define TXSTATUS_IRQSTAT_S 12
+#define TXSTATUS_LNKSETOK_BIT 0x00000020 /* LNKSetOK bit, successful LNKSET */
+
+/* New fields in TXDE for fast interrupt system */
+#define TXDIVTIME_IACTIVE_BIT 0x00008000 /* Enable new interrupt system */
+#define TXDIVTIME_INONEST_BIT 0x00004000 /* Gate nested interrupt */
+#define TXDIVTIME_IREGIDXGATE_BIT 0x00002000 /* gate of the IRegIdex field */
+#define TXDIVTIME_IREGIDX_BITS 0x00001E00 /* Index of A0.0/1 replaces */
+#define TXDIVTIME_IREGIDX_S 9
+#define TXDIVTIME_NOST_BIT 0x00000100 /* disable superthreading bit */
+#endif
+
+#endif /* _ASM_METAG_REGS_H_ */
diff --git a/arch/metag/include/asm/mman.h b/arch/metag/include/asm/mman.h
new file mode 100644
index 00000000000..17999dba927
--- /dev/null
+++ b/arch/metag/include/asm/mman.h
@@ -0,0 +1,11 @@
+#ifndef __METAG_MMAN_H__
+#define __METAG_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#ifndef __ASSEMBLY__
+#define arch_mmap_check metag_mmap_check
+int metag_mmap_check(unsigned long addr, unsigned long len,
+ unsigned long flags);
+#endif
+#endif /* __METAG_MMAN_H__ */
diff --git a/arch/metag/include/asm/mmu.h b/arch/metag/include/asm/mmu.h
new file mode 100644
index 00000000000..9c321147c0b
--- /dev/null
+++ b/arch/metag/include/asm/mmu.h
@@ -0,0 +1,77 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+#ifdef CONFIG_METAG_USER_TCM
+#include <linux/list.h>
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+#include <asm/page.h>
+#endif
+
+typedef struct {
+ /* Software pgd base pointer used for Meta 1.x MMU. */
+ unsigned long pgd_base;
+#ifdef CONFIG_METAG_USER_TCM
+ struct list_head tcm;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#if HPAGE_SHIFT < HUGEPT_SHIFT
+ /* last partially filled huge page table address */
+ unsigned long part_huge;
+#endif
+#endif
+} mm_context_t;
+
+/* Given a virtual address, return the pte for the top level 4meg entry
+ * that maps that address.
+ * Returns 0 (an empty pte) if that range is not mapped.
+ */
+unsigned long mmu_read_first_level_page(unsigned long vaddr);
+
+/* Given a linear (virtual) address, return the second level 4k pte
+ * that maps that address. Returns 0 if the address is not mapped.
+ */
+unsigned long mmu_read_second_level_page(unsigned long vaddr);
+
+/* Get the virtual base address of the MMU */
+unsigned long mmu_get_base(void);
+
+/* Initialize the MMU. */
+void mmu_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META21_MMU
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS0 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS0 if running in global-space.
+ */
+static inline unsigned long mmu_phys0_addr(unsigned int cpu)
+{
+ unsigned long phys0;
+
+ phys0 = (MMCU_T0LOCAL_TABLE_PHYS0 +
+ (MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+ (MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+ return phys0;
+}
+
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS1 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS1 if running in global-space.
+ */
+static inline unsigned long mmu_phys1_addr(unsigned int cpu)
+{
+ unsigned long phys1;
+
+ phys1 = (MMCU_T0LOCAL_TABLE_PHYS1 +
+ (MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+ (MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+ return phys1;
+}
+#endif /* CONFIG_METAG_META21_MMU */
+
+#endif
diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h
new file mode 100644
index 00000000000..ae2a71b5e0b
--- /dev/null
+++ b/arch/metag/include/asm/mmu_context.h
@@ -0,0 +1,113 @@
+#ifndef __METAG_MMU_CONTEXT_H
+#define __METAG_MMU_CONTEXT_H
+
+#include <asm-generic/mm_hooks.h>
+
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#include <linux/io.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+ struct task_struct *tsk)
+{
+}
+
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+#ifndef CONFIG_METAG_META21_MMU
+ /* We use context to store a pointer to the page holding the
+ * pgd of a process while it is running. While a process is not
+ * running the pgd and context fields should be equal.
+ */
+ mm->context.pgd_base = (unsigned long) mm->pgd;
+#endif
+#ifdef CONFIG_METAG_USER_TCM
+ INIT_LIST_HEAD(&mm->context.tcm);
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#include <linux/slab.h>
+#include <asm/tcm.h>
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+ struct tcm_allocation *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &mm->context.tcm, list) {
+ tcm_free(pos->tag, pos->addr, pos->size);
+ list_del(&pos->list);
+ kfree(pos);
+ }
+}
+#else
+#define destroy_context(mm) do { } while (0)
+#endif
+
+#ifdef CONFIG_METAG_META21_MMU
+static inline void load_pgd(pgd_t *pgd, int thread)
+{
+ unsigned long phys0 = mmu_phys0_addr(thread);
+ unsigned long phys1 = mmu_phys1_addr(thread);
+
+ /*
+ * 0x900 2Gb address space
+ * The permission bits apply to MMU table region which gives a 2MB
+ * window into physical memory. We especially don't want userland to be
+ * able to access this.
+ */
+ metag_out32(0x900 | _PAGE_CACHEABLE | _PAGE_PRIV | _PAGE_WRITE |
+ _PAGE_PRESENT, phys0);
+ /* Set new MMU base address */
+ metag_out32(__pa(pgd) & MMCU_TBLPHYS1_ADDR_BITS, phys1);
+}
+#endif
+
+static inline void switch_mmu(struct mm_struct *prev, struct mm_struct *next)
+{
+#ifdef CONFIG_METAG_META21_MMU
+ load_pgd(next->pgd, hard_processor_id());
+#else
+ unsigned int i;
+
+ /* prev->context == prev->pgd in the case where we are initially
+ switching from the init task to the first process. */
+ if (prev->context.pgd_base != (unsigned long) prev->pgd) {
+ for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+ ((pgd_t *) prev->context.pgd_base)[i] = prev->pgd[i];
+ } else
+ prev->pgd = (pgd_t *)mmu_get_base();
+
+ next->pgd = prev->pgd;
+ prev->pgd = (pgd_t *) prev->context.pgd_base;
+
+ for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+ next->pgd[i] = ((pgd_t *) next->context.pgd_base)[i];
+
+ flush_cache_all();
+#endif
+ flush_tlb_all();
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (prev != next)
+ switch_mmu(prev, next);
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+ struct mm_struct *next_mm)
+{
+ switch_mmu(prev_mm, next_mm);
+}
+
+#define deactivate_mm(tsk, mm) do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/mmzone.h b/arch/metag/include/asm/mmzone.h
new file mode 100644
index 00000000000..9c88a9c65f5
--- /dev/null
+++ b/arch/metag/include/asm/mmzone.h
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_MMZONE_H
+#define __ASM_METAG_MMZONE_H
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+#include <linux/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[nid])
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+ int nid;
+
+ for (nid = 0; nid < MAX_NUMNODES; nid++)
+ if (pfn >= node_start_pfn(nid) && pfn <= node_end_pfn(nid))
+ break;
+
+ return nid;
+}
+
+static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn)
+{
+ return NODE_DATA(pfn_to_nid(pfn));
+}
+
+/* arch/metag/mm/numa.c */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end);
+#else
+static inline void
+setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+}
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#ifdef CONFIG_NUMA
+/* SoC specific mem init */
+void __init soc_mem_setup(void);
+#else
+static inline void __init soc_mem_setup(void) {};
+#endif
+
+#endif /* __ASM_METAG_MMZONE_H */
diff --git a/arch/metag/include/asm/module.h b/arch/metag/include/asm/module.h
new file mode 100644
index 00000000000..e47e60941b2
--- /dev/null
+++ b/arch/metag/include/asm/module.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_METAG_MODULE_H
+#define _ASM_METAG_MODULE_H
+
+#include <asm-generic/module.h>
+
+struct metag_plt_entry {
+ /* Indirect jump instruction sequence. */
+ unsigned long tramp[2];
+};
+
+struct mod_arch_specific {
+ /* Indices of PLT sections within module. */
+ unsigned int core_plt_section, init_plt_section;
+};
+
+#if defined CONFIG_METAG_META12
+#define MODULE_PROC_FAMILY "META 1.2 "
+#elif defined CONFIG_METAG_META21
+#define MODULE_PROC_FAMILY "META 2.1 "
+#else
+#define MODULE_PROC_FAMILY ""
+#endif
+
+#ifdef CONFIG_4KSTACKS
+#define MODULE_STACKSIZE "4KSTACKS "
+#else
+#define MODULE_STACKSIZE ""
+#endif
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
+
+#ifdef MODULE
+asm(".section .plt,\"ax\",@progbits; .balign 8; .previous");
+asm(".section .init.plt,\"ax\",@progbits; .balign 8; .previous");
+#endif
+
+#endif /* _ASM_METAG_MODULE_H */
diff --git a/arch/metag/include/asm/page.h b/arch/metag/include/asm/page.h
new file mode 100644
index 00000000000..1e8e281b8bb
--- /dev/null
+++ b/arch/metag/include/asm/page.h
@@ -0,0 +1,128 @@
+#ifndef _METAG_PAGE_H
+#define _METAG_PAGE_H
+
+#include <linux/const.h>
+
+#include <asm/metag_mem.h>
+
+/* PAGE_SHIFT determines the page size */
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define PAGE_SHIFT 12
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define PAGE_SHIFT 13
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define PAGE_SHIFT 14
+#endif
+
+#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define HPAGE_SHIFT 13
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define HPAGE_SHIFT 14
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define HPAGE_SHIFT 15
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define HPAGE_SHIFT 16
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define HPAGE_SHIFT 17
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define HPAGE_SHIFT 18
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define HPAGE_SHIFT 19
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define HPAGE_SHIFT 20
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define HPAGE_SHIFT 21
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define HPAGE_SHIFT 22
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define HPAGE_SIZE (1UL << HPAGE_SHIFT)
+# define HPAGE_MASK (~(HPAGE_SIZE-1))
+# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT-PAGE_SHIFT)
+/*
+ * We define our own hugetlb_get_unmapped_area so we don't corrupt 2nd level
+ * page tables with normal pages in them.
+ */
+# define HUGEPT_SHIFT (22)
+# define HUGEPT_ALIGN (1 << HUGEPT_SHIFT)
+# define HUGEPT_MASK (HUGEPT_ALIGN - 1)
+# define ALIGN_HUGEPT(x) ALIGN(x, HUGEPT_ALIGN)
+# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* On the Meta, we would like to know if the address (heap) we have is
+ * in local or global space.
+ */
+#define is_global_space(addr) ((addr) > 0x7fffffff)
+#define is_local_space(addr) (!is_global_space(addr))
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+#define clear_user_page(page, vaddr, pg) clear_page(page)
+#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
+
+#define pte_val(x) ((x).pte)
+#define pgd_val(x) ((x).pgd)
+#define pgprot_val(x) ((x).pgprot)
+
+#define __pte(x) ((pte_t) { (x) })
+#define __pgd(x) ((pgd_t) { (x) })
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+/* The kernel must now ALWAYS live at either 0xC0000000 or 0x40000000 - that
+ * being either global or local space.
+ */
+#define PAGE_OFFSET (CONFIG_PAGE_OFFSET)
+
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define META_MEMORY_BASE LINGLOBAL_BASE
+#define META_MEMORY_LIMIT LINGLOBAL_LIMIT
+#else
+#define META_MEMORY_BASE LINLOCAL_BASE
+#define META_MEMORY_LIMIT LINLOCAL_LIMIT
+#endif
+
+/* Offset between physical and virtual mapping of kernel memory. */
+extern unsigned int meta_memoffset;
+
+#define __pa(x) ((unsigned long)(((unsigned long)(x)) - meta_memoffset))
+#define __va(x) ((void *)((unsigned long)(((unsigned long)(x)) + meta_memoffset)))
+
+extern unsigned long pfn_base;
+#define ARCH_PFN_OFFSET (pfn_base)
+#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+#ifdef CONFIG_FLATMEM
+extern unsigned long max_pfn;
+extern unsigned long min_low_pfn;
+#define pfn_valid(pfn) ((pfn) >= min_low_pfn && (pfn) < max_pfn)
+#endif
+
+#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* __ASSMEBLY__ */
+
+#endif /* _METAG_PAGE_H */
diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h
new file mode 100644
index 00000000000..105bbff0149
--- /dev/null
+++ b/arch/metag/include/asm/perf_event.h
@@ -0,0 +1,4 @@
+#ifndef __ASM_METAG_PERF_EVENT_H
+#define __ASM_METAG_PERF_EVENT_H
+
+#endif /* __ASM_METAG_PERF_EVENT_H */
diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h
new file mode 100644
index 00000000000..275d9285141
--- /dev/null
+++ b/arch/metag/include/asm/pgalloc.h
@@ -0,0 +1,79 @@
+#ifndef _METAG_PGALLOC_H
+#define _METAG_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/mm.h>
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+
+#define pmd_populate(mm, pmd, pte) \
+ set_pmd(pmd, __pmd(_PAGE_TABLE | page_to_phys(pte)))
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+/*
+ * Allocate and free page tables.
+ */
+#ifdef CONFIG_METAG_META21_MMU
+static inline void pgd_ctor(pgd_t *pgd)
+{
+ memcpy(pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+#else
+#define pgd_ctor(x) do { } while (0)
+#endif
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+ if (pgd)
+ pgd_ctor(pgd);
+ return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ free_page((unsigned long)pgd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT |
+ __GFP_ZERO);
+ return pte;
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ struct page *pte;
+ pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+ if (pte)
+ pgtable_page_ctor(pte);
+ return pte;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+ pgtable_page_dtor(pte);
+ __free_page(pte);
+}
+
+#define __pte_free_tlb(tlb, pte, addr) \
+ do { \
+ pgtable_page_dtor(pte); \
+ tlb_remove_page((tlb), (pte)); \
+ } while (0)
+
+#define check_pgt_cache() do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
new file mode 100644
index 00000000000..1cd13d59519
--- /dev/null
+++ b/arch/metag/include/asm/pgtable.h
@@ -0,0 +1,370 @@
+/*
+ * Macros and functions to manipulate Meta page tables.
+ */
+
+#ifndef _METAG_PGTABLE_H
+#define _METAG_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define CONSISTENT_START 0xF7000000
+#define CONSISTENT_END 0xF73FFFFF
+#define VMALLOC_START 0xF8000000
+#define VMALLOC_END 0xFFFEFFFF
+#else
+#define CONSISTENT_START 0x77000000
+#define CONSISTENT_END 0x773FFFFF
+#define VMALLOC_START 0x78000000
+#define VMALLOC_END 0x7FFFFFFF
+#endif
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1 0x020
+#define _PAGE_CACHE_CTRL0 0x040
+#define _PAGE_CACHE_CTRL1 0x080
+#define _PAGE_ALWAYS_ZERO_2 0x100
+#define _PAGE_ALWAYS_ZERO_3 0x200
+#define _PAGE_ALWAYS_ZERO_4 0x400
+#define _PAGE_ALWAYS_ZERO_5 0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used. Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
+#define _PAGE_FILE _PAGE_ALWAYS_ZERO_3
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL _PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+ _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK (PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT 1
+#define _PAGE_SZ_4K (0x0)
+#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ (_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ (_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ (_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE (_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE (_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE (_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE (_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE (_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE (_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE (_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE (_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE (_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE (_PAGE_SZ_4M)
+#endif
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * Meta, we use that, but "fold" the mid level into the top-level page
+ * table.
+ */
+
+/* PGDIR_SHIFT determines the size of the area a second-level page table can
+ * map. This is always 4MB.
+ */
+
+#define PGDIR_SHIFT 22
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/*
+ * Entries per page directory level: we use a two-level, so
+ * we don't really have any PMD directory physically. First level tables
+ * always map 2Gb (local or global) at a granularity of 4MB, second-level
+ * tables map 4MB with a granularity between 4MB and 4kB (between 1 and
+ * 1024 entries).
+ */
+#define PTRS_PER_PTE (PGDIR_SIZE/PAGE_SIZE)
+#define HPTRS_PER_PTE (PGDIR_SIZE/HPAGE_SIZE)
+#define PTRS_PER_PGD 512
+
+#define USER_PTRS_PER_PGD 256
+#define FIRST_USER_ADDRESS META_MEMORY_BASE
+#define FIRST_USER_PGD_NR pgd_index(FIRST_USER_ADDRESS)
+
+#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+ _PAGE_CACHEABLE)
+
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
+ _PAGE_ACCESSED | _PAGE_CACHEABLE)
+#define PAGE_SHARED_C PAGE_SHARED
+#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+ _PAGE_CACHEABLE)
+#define PAGE_COPY_C PAGE_COPY
+
+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+ _PAGE_CACHEABLE)
+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | _PAGE_WRITE | \
+ _PAGE_CACHEABLE | _PAGE_KERNEL)
+
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY
+#define __P101 PAGE_READONLY
+#define __P110 PAGE_COPY_C
+#define __P111 PAGE_COPY_C
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY
+#define __S101 PAGE_READONLY
+#define __S110 PAGE_SHARED_C
+#define __S111 PAGE_SHARED_C
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+/* zero page used for uninitialized stuff */
+extern unsigned long empty_zero_page;
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+/* Certain architectures need to do special things when pte's
+ * within a page table are directly modified. Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
+
+#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#define pte_none(x) (!pte_val(x))
+#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm, addr, xp) do { pte_val(*(xp)) = 0; } while (0)
+
+#define pmd_none(x) (!pmd_val(x))
+#define pmd_bad(x) ((pmd_val(x) & ~(PAGE_MASK | _PAGE_SZ_MASK)) \
+ != (_PAGE_TABLE & ~_PAGE_SZ_MASK))
+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
+
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+
+static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
+static inline int pte_special(pte_t pte) { return 0; }
+
+static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; }
+static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) { return pte; }
+
+/*
+ * Macro and implementation to make a page protection as uncacheable.
+ */
+#define pgprot_writecombine(prot) \
+ __pgprot(pgprot_val(prot) & ~(_PAGE_CACHE_CTRL1 | _PAGE_CACHE_CTRL0))
+
+#define pgprot_noncached(prot) \
+ __pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)
+
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+ return pte;
+}
+
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+ unsigned long paddr = pmd_val(pmd) & PAGE_MASK;
+ if (!paddr)
+ return 0;
+ return (unsigned long)__va(paddr);
+}
+
+#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+#define pmd_page_shift(pmd) (12 + ((pmd_val(pmd) & _PAGE_SZ_MASK) \
+ >> _PAGE_SZ_SHIFT))
+#define pmd_num_ptrs(pmd) (PGDIR_SIZE >> pmd_page_shift(pmd))
+
+/*
+ * Each pgd is only 2k, mapping 2Gb (local or global). If we're in global
+ * space drop the top bit before indexing the pgd.
+ */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define pgd_index(address) ((((address) & ~0x80000000) >> PGDIR_SHIFT) \
+ & (PTRS_PER_PGD-1))
+#else
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#endif
+
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the second-level page table.. */
+#if !defined(CONFIG_HUGETLB_PAGE)
+ /* all pages are of size (1 << PAGE_SHIFT), so no need to read 1st level pt */
+# define pte_index(pmd, address) \
+ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#else
+ /* some pages are huge, so read 1st level pt to find out */
+# define pte_index(pmd, address) \
+ (((address) >> pmd_page_shift(pmd)) & (pmd_num_ptrs(pmd) - 1))
+#endif
+#define pte_offset_kernel(dir, address) \
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(*(dir), address))
+#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address) pte_offset_kernel(dir, address)
+
+#define pte_unmap(pte) do { } while (0)
+#define pte_unmap_nested(pte) do { } while (0)
+
+#define pte_ERROR(e) \
+ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Meta doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t *pte)
+{
+}
+
+/*
+ * Encode and decode a swap entry (must be !pte_none(e) && !pte_present(e))
+ * Since PAGE_PRESENT is bit 1, we can use the bits above that.
+ */
+#define __swp_type(x) (((x).val >> 1) & 0xff)
+#define __swp_offset(x) ((x).val >> 10)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \
+ ((offset) << 10) })
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+#define PTE_FILE_MAX_BITS 22
+#define pte_to_pgoff(x) (pte_val(x) >> 10)
+#define pgoff_to_pte(x) __pte(((x) << 10) | _PAGE_FILE)
+
+#define kern_addr_valid(addr) (1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init() do { } while (0)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+void paging_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META12
+/* This is a workaround for an issue in Meta 1 cores. These cores cache
+ * invalid entries in the TLB so we always need to flush whenever we add
+ * a new pte. Unfortunately we can only flush the whole TLB not shoot down
+ * single entries so this is sub-optimal. This implementation ensures that
+ * we will get a flush at the second attempt, so we may still get repeated
+ * faults, we just don't overflow the kernel stack handling them.
+ */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+({ \
+ int __changed = !pte_same(*(__ptep), __entry); \
+ if (__changed) { \
+ set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
+ } \
+ flush_tlb_page(__vma, __address); \
+ __changed; \
+})
+#endif
+
+#include <asm-generic/pgtable.h>
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PGTABLE_H */
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
new file mode 100644
index 00000000000..9b029a7911c
--- /dev/null
+++ b/arch/metag/include/asm/processor.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_PROCESSOR_H
+#define __ASM_METAG_PROCESSOR_H
+
+#include <linux/atomic.h>
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/metag_regs.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ __label__ _l; _l: &&_l; })
+
+/* The task stops where the kernel starts */
+#define TASK_SIZE PAGE_OFFSET
+/* Add an extra page of padding at the top of the stack for the guard page. */
+#define STACK_TOP (TASK_SIZE - PAGE_SIZE)
+#define STACK_TOP_MAX STACK_TOP
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE META_MEMORY_BASE
+
+typedef struct {
+ unsigned long seg;
+} mm_segment_t;
+
+#ifdef CONFIG_METAG_FPU
+struct meta_fpu_context {
+ TBICTXEXTFPU fpstate;
+ union {
+ struct {
+ TBICTXEXTBB4 fx8_15;
+ TBICTXEXTFPACC fpacc;
+ } fx8_15;
+ struct {
+ TBICTXEXTFPACC fpacc;
+ TBICTXEXTBB4 unused;
+ } nofx8_15;
+ } extfpstate;
+ bool needs_restore;
+};
+#else
+struct meta_fpu_context {};
+#endif
+
+#ifdef CONFIG_METAG_DSP
+struct meta_ext_context {
+ struct {
+ TBIEXTCTX ctx;
+ TBICTXEXTBB8 bb8;
+ TBIDUAL ax[TBICTXEXTAXX_BYTES / sizeof(TBIDUAL)];
+ TBICTXEXTHL2 hl2;
+ TBICTXEXTTDPR ext;
+ TBICTXEXTRP6 rp;
+ } regs;
+
+ /* DSPRAM A and B save areas. */
+ void *ram[2];
+
+ /* ECH encoded size of DSPRAM save areas. */
+ unsigned int ram_sz[2];
+};
+#else
+struct meta_ext_context {};
+#endif
+
+struct thread_struct {
+ PTBICTX kernel_context;
+ /* A copy of the user process Sig.SaveMask. */
+ unsigned int user_flags;
+ struct meta_fpu_context *fpu_context;
+ void __user *tls_ptr;
+ unsigned short int_depth;
+ unsigned short txdefr_failure;
+ struct meta_ext_context *dsp_context;
+};
+
+#define INIT_THREAD { \
+ NULL, /* kernel_context */ \
+ 0, /* user_flags */ \
+ NULL, /* fpu_context */ \
+ NULL, /* tls_ptr */ \
+ 1, /* int_depth - we start in kernel */ \
+ 0, /* txdefr_failure */ \
+ NULL, /* dsp_context */ \
+}
+
+/* Needed to make #define as we are referencing 'current', that is not visible
+ * yet.
+ *
+ * Stack layout is as below.
+
+ argc argument counter (integer)
+ argv[0] program name (pointer)
+ argv[1...N] program args (pointers)
+ argv[argc-1] end of args (integer)
+ NULL
+ env[0...N] environment variables (pointers)
+ NULL
+
+ */
+#define start_thread(regs, pc, usp) do { \
+ unsigned int *argc = (unsigned int *) bprm->exec; \
+ set_fs(USER_DS); \
+ current->thread.int_depth = 1; \
+ /* Force this process down to user land */ \
+ regs->ctx.SaveMask = TBICTX_PRIV_BIT; \
+ regs->ctx.CurrPC = pc; \
+ regs->ctx.AX[0].U0 = usp; \
+ regs->ctx.DX[3].U1 = *((int *)argc); /* argc */ \
+ regs->ctx.DX[3].U0 = (int)((int *)argc + 1); /* argv */ \
+ regs->ctx.DX[2].U1 = (int)((int *)argc + \
+ regs->ctx.DX[3].U1 + 2); /* envp */ \
+ regs->ctx.DX[2].U0 = 0; /* rtld_fini */ \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+}
+
+#define copy_segments(tsk, mm) do { } while (0)
+#define release_segments(mm) do { } while (0)
+
+extern void exit_thread(void);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+#define thread_saved_pc(tsk) \
+ ((unsigned long)(tsk)->thread.kernel_context->CurrPC)
+#define thread_saved_sp(tsk) \
+ ((unsigned long)(tsk)->thread.kernel_context->AX[0].U0)
+#define thread_saved_fp(tsk) \
+ ((unsigned long)(tsk)->thread.kernel_context->AX[1].U0)
+
+unsigned long get_wchan(struct task_struct *p);
+
+#define KSTK_EIP(tsk) ((tsk)->thread.kernel_context->CurrPC)
+#define KSTK_ESP(tsk) ((tsk)->thread.kernel_context->AX[0].U0)
+
+#define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0)
+
+#define cpu_relax() barrier()
+
+extern void setup_priv(void);
+
+static inline unsigned int hard_processor_id(void)
+{
+ unsigned int id;
+
+ asm volatile ("MOV %0, TXENABLE\n"
+ "AND %0, %0, %1\n"
+ "LSR %0, %0, %2\n"
+ : "=&d" (id)
+ : "I" (TXENABLE_THREAD_BITS),
+ "K" (TXENABLE_THREAD_S)
+ );
+
+ return id;
+}
+
+#define OP3_EXIT 0
+
+#define HALT_OK 0
+#define HALT_PANIC -1
+
+/*
+ * Halt (stop) the hardware thread. This instruction sequence is the
+ * standard way to cause a Meta hardware thread to exit. The exit code
+ * is pushed onto the stack which is interpreted by the debug adapter.
+ */
+static inline void hard_processor_halt(int exit_code)
+{
+ asm volatile ("MOV D1Ar1, %0\n"
+ "MOV D0Ar6, %1\n"
+ "MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2\n"
+ "1:\n"
+ "SWITCH #0xC30006\n"
+ "B 1b\n"
+ : : "r" (exit_code), "K" (OP3_EXIT));
+}
+
+/* Set these hooks to call SoC specific code to restart/halt/power off. */
+extern void (*soc_restart)(char *cmd);
+extern void (*soc_halt)(void);
+
+extern void show_trace(struct task_struct *tsk, unsigned long *sp,
+ struct pt_regs *regs);
+
+#endif
diff --git a/arch/metag/include/asm/prom.h b/arch/metag/include/asm/prom.h
new file mode 100644
index 00000000000..d2aa35d2228
--- /dev/null
+++ b/arch/metag/include/asm/prom.h
@@ -0,0 +1,23 @@
+/*
+ * arch/metag/include/asm/prom.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * Based on ARM version:
+ * Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#ifndef __ASM_METAG_PROM_H
+#define __ASM_METAG_PROM_H
+
+#include <asm/setup.h>
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+extern struct machine_desc *setup_machine_fdt(void *dt);
+extern void copy_fdt(void);
+
+#endif /* __ASM_METAG_PROM_H */
diff --git a/arch/metag/include/asm/ptrace.h b/arch/metag/include/asm/ptrace.h
new file mode 100644
index 00000000000..fcabc18daf2
--- /dev/null
+++ b/arch/metag/include/asm/ptrace.h
@@ -0,0 +1,60 @@
+#ifndef _METAG_PTRACE_H
+#define _METAG_PTRACE_H
+
+#include <linux/compiler.h>
+#include <uapi/asm/ptrace.h>
+#include <asm/tbx.h>
+
+#ifndef __ASSEMBLY__
+
+/* this struct defines the way the registers are stored on the
+ stack during a system call. */
+
+struct pt_regs {
+ TBICTX ctx;
+ TBICTXEXTCB0 extcb0[5];
+};
+
+#define user_mode(regs) (((regs)->ctx.SaveMask & TBICTX_PRIV_BIT) > 0)
+
+#define instruction_pointer(regs) ((unsigned long)(regs)->ctx.CurrPC)
+#define profile_pc(regs) instruction_pointer(regs)
+
+#define task_pt_regs(task) \
+ ((struct pt_regs *)(task_stack_page(task) + \
+ sizeof(struct thread_info)))
+
+#define current_pt_regs() \
+ ((struct pt_regs *)((char *)current_thread_info() + \
+ sizeof(struct thread_info)))
+
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_leave(struct pt_regs *regs);
+
+/* copy a struct user_gp_regs out to user */
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf);
+/* copy a struct user_gp_regs in from user */
+int metag_gp_regs_copyin(struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+/* copy a struct user_cb_regs out to user */
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf);
+/* copy a struct user_cb_regs in from user */
+int metag_cb_regs_copyin(struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+/* copy a struct user_rp_state out to user */
+int metag_rp_state_copyout(const struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf);
+/* copy a struct user_rp_state in from user */
+int metag_rp_state_copyin(struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PTRACE_H */
diff --git a/arch/metag/include/asm/setup.h b/arch/metag/include/asm/setup.h
new file mode 100644
index 00000000000..e13083b15dd
--- /dev/null
+++ b/arch/metag/include/asm/setup.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_METAG_SETUP_H
+#define _ASM_METAG_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+void per_cpu_trap_init(unsigned long);
+extern void __init dump_machine_table(void);
+#endif /* _ASM_METAG_SETUP_H */
diff --git a/arch/metag/include/asm/smp.h b/arch/metag/include/asm/smp.h
new file mode 100644
index 00000000000..e0373f81a11
--- /dev/null
+++ b/arch/metag/include/asm/smp.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <linux/cpumask.h>
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+enum ipi_msg_type {
+ IPI_CALL_FUNC,
+ IPI_CALL_FUNC_SINGLE,
+ IPI_RESCHEDULE,
+};
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
+
+asmlinkage void secondary_start_kernel(void);
+
+extern void secondary_startup(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+extern void cpu_die(void);
+#endif
+
+extern void smp_init_cpus(void);
+#endif /* __ASM_SMP_H */
diff --git a/arch/metag/include/asm/sparsemem.h b/arch/metag/include/asm/sparsemem.h
new file mode 100644
index 00000000000..03fe255d697
--- /dev/null
+++ b/arch/metag/include/asm/sparsemem.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_METAG_SPARSEMEM_H
+#define __ASM_METAG_SPARSEMEM_H
+
+/*
+ * SECTION_SIZE_BITS 2^N: how big each section will be
+ * MAX_PHYSADDR_BITS 2^N: how much physical address space we have
+ * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
+ */
+#define SECTION_SIZE_BITS 26
+#define MAX_PHYSADDR_BITS 32
+#define MAX_PHYSMEM_BITS 32
+
+#endif /* __ASM_METAG_SPARSEMEM_H */
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
new file mode 100644
index 00000000000..86a7cf3d138
--- /dev/null
+++ b/arch/metag/include/asm/spinlock.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#ifdef CONFIG_METAG_ATOMICITY_LOCK1
+#include <asm/spinlock_lock1.h>
+#else
+#include <asm/spinlock_lnkget.h>
+#endif
+
+#define arch_spin_unlock_wait(lock) \
+ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock) cpu_relax()
+#define arch_read_relax(lock) cpu_relax()
+#define arch_write_relax(lock) cpu_relax()
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/metag/include/asm/spinlock_lnkget.h b/arch/metag/include/asm/spinlock_lnkget.h
new file mode 100644
index 00000000000..ad8436feed8
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_lnkget.h
@@ -0,0 +1,249 @@
+#ifndef __ASM_SPINLOCK_LNKGET_H
+#define __ASM_SPINLOCK_LNKGET_H
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ int ret;
+
+ asm volatile ("LNKGETD %0, [%1]\n"
+ "TST %0, #1\n"
+ "MOV %0, #1\n"
+ "XORZ %0, %0, %0\n"
+ : "=&d" (ret)
+ : "da" (&lock->lock)
+ : "cc");
+ return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ int tmp;
+
+ asm volatile ("1: LNKGETD %0,[%1]\n"
+ " TST %0, #1\n"
+ " ADD %0, %0, #1\n"
+ " LNKSETDZ [%1], %0\n"
+ " BNZ 1b\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (tmp)
+ : "da" (&lock->lock)
+ : "cc");
+
+ smp_mb();
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ int tmp;
+
+ asm volatile (" LNKGETD %0,[%1]\n"
+ " TST %0, #1\n"
+ " ADD %0, %0, #1\n"
+ " LNKSETDZ [%1], %0\n"
+ " BNZ 1f\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " MOV %0, #1\n"
+ "1: XORNZ %0, %0, %0\n"
+ : "=&d" (tmp)
+ : "da" (&lock->lock)
+ : "cc");
+
+ smp_mb();
+
+ return tmp;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ smp_mb();
+
+ asm volatile (" SETD [%0], %1\n"
+ :
+ : "da" (&lock->lock), "da" (0)
+ : "memory");
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31. When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ int tmp;
+
+ asm volatile ("1: LNKGETD %0,[%1]\n"
+ " CMP %0, #0\n"
+ " ADD %0, %0, %2\n"
+ " LNKSETDZ [%1], %0\n"
+ " BNZ 1b\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (tmp)
+ : "da" (&rw->lock), "bd" (0x80000000)
+ : "cc");
+
+ smp_mb();
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ int tmp;
+
+ asm volatile (" LNKGETD %0,[%1]\n"
+ " CMP %0, #0\n"
+ " ADD %0, %0, %2\n"
+ " LNKSETDZ [%1], %0\n"
+ " BNZ 1f\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " MOV %0,#1\n"
+ "1: XORNZ %0, %0, %0\n"
+ : "=&d" (tmp)
+ : "da" (&rw->lock), "bd" (0x80000000)
+ : "cc");
+
+ smp_mb();
+
+ return tmp;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ smp_mb();
+
+ asm volatile (" SETD [%0], %1\n"
+ :
+ : "da" (&rw->lock), "da" (0)
+ : "memory");
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+ int ret;
+
+ asm volatile ("LNKGETD %0, [%1]\n"
+ "CMP %0, #0\n"
+ "MOV %0, #1\n"
+ "XORNZ %0, %0, %0\n"
+ : "=&d" (ret)
+ : "da" (&rw->lock)
+ : "cc");
+ return ret;
+}
+
+/*
+ * Read locks are a bit more hairy:
+ * - Exclusively load the lock value.
+ * - Increment it.
+ * - Store new lock value if positive, and we still own this location.
+ * If the value is negative, we've already failed.
+ * - If we failed to store the value, we want a negative result.
+ * - If we failed, try again.
+ * Unlocking is similarly hairy. We may have multiple read locks
+ * currently active. However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ int tmp;
+
+ asm volatile ("1: LNKGETD %0,[%1]\n"
+ " ADDS %0, %0, #1\n"
+ " LNKSETDPL [%1], %0\n"
+ " BMI 1b\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (tmp)
+ : "da" (&rw->lock)
+ : "cc");
+
+ smp_mb();
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ int tmp;
+
+ smp_mb();
+
+ asm volatile ("1: LNKGETD %0,[%1]\n"
+ " SUB %0, %0, #1\n"
+ " LNKSETD [%1], %0\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (tmp)
+ : "da" (&rw->lock)
+ : "cc", "memory");
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ int tmp;
+
+ asm volatile (" LNKGETD %0,[%1]\n"
+ " ADDS %0, %0, #1\n"
+ " LNKSETDPL [%1], %0\n"
+ " BMI 1f\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " MOV %0,#1\n"
+ " BZ 2f\n"
+ "1: MOV %0,#0\n"
+ "2:\n"
+ : "=&d" (tmp)
+ : "da" (&rw->lock)
+ : "cc");
+
+ smp_mb();
+
+ return tmp;
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+ int tmp;
+
+ asm volatile ("LNKGETD %0, [%1]\n"
+ "CMP %0, %2\n"
+ "MOV %0, #1\n"
+ "XORZ %0, %0, %0\n"
+ : "=&d" (tmp)
+ : "da" (&rw->lock), "bd" (0x80000000)
+ : "cc");
+ return tmp;
+}
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock) cpu_relax()
+#define arch_read_relax(lock) cpu_relax()
+#define arch_write_relax(lock) cpu_relax()
+
+#endif /* __ASM_SPINLOCK_LNKGET_H */
diff --git a/arch/metag/include/asm/spinlock_lock1.h b/arch/metag/include/asm/spinlock_lock1.h
new file mode 100644
index 00000000000..c630444cffe
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_lock1.h
@@ -0,0 +1,184 @@
+#ifndef __ASM_SPINLOCK_LOCK1_H
+#define __ASM_SPINLOCK_LOCK1_H
+
+#include <asm/bug.h>
+#include <asm/global_lock.h>
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ int ret;
+
+ barrier();
+ ret = lock->lock;
+ WARN_ON(ret != 0 && ret != 1);
+ return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ unsigned int we_won = 0;
+ unsigned long flags;
+
+again:
+ __global_lock1(flags);
+ if (lock->lock == 0) {
+ fence();
+ lock->lock = 1;
+ we_won = 1;
+ }
+ __global_unlock1(flags);
+ if (we_won == 0)
+ goto again;
+ WARN_ON(lock->lock != 1);
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ unsigned long flags;
+ unsigned int ret;
+
+ __global_lock1(flags);
+ ret = lock->lock;
+ if (ret == 0) {
+ fence();
+ lock->lock = 1;
+ }
+ __global_unlock1(flags);
+ return (ret == 0);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ barrier();
+ WARN_ON(!lock->lock);
+ lock->lock = 0;
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31. When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ unsigned long flags;
+ unsigned int we_won = 0;
+
+again:
+ __global_lock1(flags);
+ if (rw->lock == 0) {
+ fence();
+ rw->lock = 0x80000000;
+ we_won = 1;
+ }
+ __global_unlock1(flags);
+ if (we_won == 0)
+ goto again;
+ WARN_ON(rw->lock != 0x80000000);
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ unsigned long flags;
+ unsigned int ret;
+
+ __global_lock1(flags);
+ ret = rw->lock;
+ if (ret == 0) {
+ fence();
+ rw->lock = 0x80000000;
+ }
+ __global_unlock1(flags);
+
+ return (ret == 0);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ barrier();
+ WARN_ON(rw->lock != 0x80000000);
+ rw->lock = 0;
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+ unsigned int ret;
+
+ barrier();
+ ret = rw->lock;
+ return (ret == 0);
+}
+
+/*
+ * Read locks are a bit more hairy:
+ * - Exclusively load the lock value.
+ * - Increment it.
+ * - Store new lock value if positive, and we still own this location.
+ * If the value is negative, we've already failed.
+ * - If we failed to store the value, we want a negative result.
+ * - If we failed, try again.
+ * Unlocking is similarly hairy. We may have multiple read locks
+ * currently active. However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ unsigned long flags;
+ unsigned int we_won = 0, ret;
+
+again:
+ __global_lock1(flags);
+ ret = rw->lock;
+ if (ret < 0x80000000) {
+ fence();
+ rw->lock = ret + 1;
+ we_won = 1;
+ }
+ __global_unlock1(flags);
+ if (!we_won)
+ goto again;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ unsigned long flags;
+ unsigned int ret;
+
+ __global_lock1(flags);
+ fence();
+ ret = rw->lock--;
+ __global_unlock1(flags);
+ WARN_ON(ret == 0);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ unsigned long flags;
+ unsigned int ret;
+
+ __global_lock1(flags);
+ ret = rw->lock;
+ if (ret < 0x80000000) {
+ fence();
+ rw->lock = ret + 1;
+ }
+ __global_unlock1(flags);
+ return (ret < 0x80000000);
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+ unsigned int ret;
+
+ barrier();
+ ret = rw->lock;
+ return (ret < 0x80000000);
+}
+
+#endif /* __ASM_SPINLOCK_LOCK1_H */
diff --git a/arch/metag/include/asm/spinlock_types.h b/arch/metag/include/asm/spinlock_types.h
new file mode 100644
index 00000000000..b76391405fe
--- /dev/null
+++ b/arch/metag/include/asm/spinlock_types.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_METAG_SPINLOCK_TYPES_H
+#define _ASM_METAG_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+ volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+
+typedef struct {
+ volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#endif /* _ASM_METAG_SPINLOCK_TYPES_H */
diff --git a/arch/metag/include/asm/stacktrace.h b/arch/metag/include/asm/stacktrace.h
new file mode 100644
index 00000000000..2830a0fe7ac
--- /dev/null
+++ b/arch/metag/include/asm/stacktrace.h
@@ -0,0 +1,20 @@
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+struct stackframe {
+ unsigned long fp;
+ unsigned long sp;
+ unsigned long lr;
+ unsigned long pc;
+};
+
+struct metag_frame {
+ unsigned long fp;
+ unsigned long lr;
+};
+
+extern int unwind_frame(struct stackframe *frame);
+extern void walk_stackframe(struct stackframe *frame,
+ int (*fn)(struct stackframe *, void *), void *data);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/metag/include/asm/string.h b/arch/metag/include/asm/string.h
new file mode 100644
index 00000000000..53e3806eee0
--- /dev/null
+++ b/arch/metag/include/asm/string.h
@@ -0,0 +1,13 @@
+#ifndef _METAG_STRING_H_
+#define _METAG_STRING_H_
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *__s, int __c, size_t __count);
+
+#define __HAVE_ARCH_MEMCPY
+void *memcpy(void *__to, __const__ void *__from, size_t __n);
+
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#endif /* _METAG_STRING_H_ */
diff --git a/arch/metag/include/asm/switch.h b/arch/metag/include/asm/switch.h
new file mode 100644
index 00000000000..1fd6a587c84
--- /dev/null
+++ b/arch/metag/include/asm/switch.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ASM_METAG_SWITCH_H
+#define _ASM_METAG_SWITCH_H
+
+/* metag SWITCH codes */
+#define __METAG_SW_PERM_BREAK 0x400002 /* compiled in breakpoint */
+#define __METAG_SW_SYS_LEGACY 0x440000 /* legacy system calls */
+#define __METAG_SW_SYS 0x440001 /* system calls */
+
+/* metag SWITCH instruction encoding */
+#define __METAG_SW_ENCODING(TYPE) (0xaf000000 | (__METAG_SW_##TYPE))
+
+#endif /* _ASM_METAG_SWITCH_H */
diff --git a/arch/metag/include/asm/syscall.h b/arch/metag/include/asm/syscall.h
new file mode 100644
index 00000000000..24fc97939f7
--- /dev/null
+++ b/arch/metag/include/asm/syscall.h
@@ -0,0 +1,104 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_METAG_SYSCALL_H
+#define _ASM_METAG_SYSCALL_H
+
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+
+#include <asm/switch.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ unsigned long insn;
+
+ /*
+ * FIXME there's no way to find out how we got here other than to
+ * examine the memory at the PC to see if it is a syscall
+ * SWITCH instruction.
+ */
+ if (get_user(insn, (unsigned long *)(regs->ctx.CurrPC - 4)))
+ return -1;
+
+ if (insn == __METAG_SW_ENCODING(SYS))
+ return regs->ctx.DX[0].U1;
+ else
+ return -1L;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ /* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ unsigned long error = regs->ctx.DX[0].U0;
+ return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->ctx.DX[0].U0;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+ struct pt_regs *regs,
+ int error, long val)
+{
+ regs->ctx.DX[0].U0 = (long) error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ unsigned long *args)
+{
+ unsigned int reg, j;
+ BUG_ON(i + n > 6);
+
+ for (j = i, reg = 6 - i; j < (i + n); j++, reg--) {
+ if (reg % 2)
+ args[j] = regs->ctx.DX[(reg + 1) / 2].U0;
+ else
+ args[j] = regs->ctx.DX[reg / 2].U1;
+ }
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned int i, unsigned int n,
+ const unsigned long *args)
+{
+ unsigned int reg;
+ BUG_ON(i + n > 6);
+
+ for (reg = 6 - i; i < (i + n); i++, reg--) {
+ if (reg % 2)
+ regs->ctx.DX[(reg + 1) / 2].U0 = args[i];
+ else
+ regs->ctx.DX[reg / 2].U1 = args[i];
+ }
+}
+
+#define NR_syscalls __NR_syscalls
+
+/* generic syscall table */
+extern const void *sys_call_table[];
+
+#endif /* _ASM_METAG_SYSCALL_H */
diff --git a/arch/metag/include/asm/syscalls.h b/arch/metag/include/asm/syscalls.h
new file mode 100644
index 00000000000..a02b9555652
--- /dev/null
+++ b/arch/metag/include/asm/syscalls.h
@@ -0,0 +1,39 @@
+#ifndef _ASM_METAG_SYSCALLS_H
+#define _ASM_METAG_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+
+/* kernel/signal.c */
+#define sys_rt_sigreturn sys_rt_sigreturn
+asmlinkage long sys_rt_sigreturn(void);
+
+#include <asm-generic/syscalls.h>
+
+/* kernel/sys_metag.c */
+asmlinkage int sys_metag_setglobalbit(char __user *, int);
+asmlinkage void sys_metag_set_fpu_flags(unsigned int);
+asmlinkage int sys_metag_set_tls(void __user *);
+asmlinkage void *sys_metag_get_tls(void);
+
+asmlinkage long sys_truncate64_metag(const char __user *, unsigned long,
+ unsigned long);
+asmlinkage long sys_ftruncate64_metag(unsigned int, unsigned long,
+ unsigned long);
+asmlinkage long sys_fadvise64_64_metag(int, unsigned long, unsigned long,
+ unsigned long, unsigned long, int);
+asmlinkage long sys_readahead_metag(int, unsigned long, unsigned long, size_t);
+asmlinkage ssize_t sys_pread64_metag(unsigned long, char __user *, size_t,
+ unsigned long, unsigned long);
+asmlinkage ssize_t sys_pwrite64_metag(unsigned long, char __user *, size_t,
+ unsigned long, unsigned long);
+asmlinkage long sys_sync_file_range_metag(int, unsigned long, unsigned long,
+ unsigned long, unsigned long,
+ unsigned int);
+
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+ int syscall);
+
+#endif /* _ASM_METAG_SYSCALLS_H */
diff --git a/arch/metag/include/asm/tbx.h b/arch/metag/include/asm/tbx.h
new file mode 100644
index 00000000000..287b36ff8ad
--- /dev/null
+++ b/arch/metag/include/asm/tbx.h
@@ -0,0 +1,1425 @@
+/*
+ * asm/tbx.h
+ *
+ * Copyright (C) 2000-2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Thread binary interface header
+ */
+
+#ifndef _ASM_METAG_TBX_H_
+#define _ASM_METAG_TBX_H_
+
+/* for CACHEW_* values */
+#include <asm/metag_isa.h>
+/* for LINSYSEVENT_* addresses */
+#include <asm/metag_mem.h>
+
+#ifdef TBI_1_4
+#ifndef TBI_MUTEXES_1_4
+#define TBI_MUTEXES_1_4
+#endif
+#ifndef TBI_SEMAPHORES_1_4
+#define TBI_SEMAPHORES_1_4
+#endif
+#ifndef TBI_ASYNC_SWITCH_1_4
+#define TBI_ASYNC_SWITCH_1_4
+#endif
+#ifndef TBI_FASTINT_1_4
+#define TBI_FASTINT_1_4
+#endif
+#endif
+
+
+/* Id values in the TBI system describe a segment using an arbitrary
+ integer value and flags in the bottom 8 bits, the SIGPOLL value is
+ used in cases where control over blocking or polling behaviour is
+ needed. */
+#define TBID_SIGPOLL_BIT 0x02 /* Set bit in an Id value to poll vs block */
+/* Extended segment identifiers use strings in the string table */
+#define TBID_IS_SEGSTR( Id ) (((Id) & (TBID_SEGTYPE_BITS>>1)) == 0)
+
+/* Segment identifiers contain the following related bit-fields */
+#define TBID_SEGTYPE_BITS 0x0F /* One of the predefined segment types */
+#define TBID_SEGTYPE_S 0
+#define TBID_SEGSCOPE_BITS 0x30 /* Indicates the scope of the segment */
+#define TBID_SEGSCOPE_S 4
+#define TBID_SEGGADDR_BITS 0xC0 /* Indicates access possible via pGAddr */
+#define TBID_SEGGADDR_S 6
+
+/* Segments of memory can only really contain a few types of data */
+#define TBID_SEGTYPE_TEXT 0x02 /* Code segment */
+#define TBID_SEGTYPE_DATA 0x04 /* Data segment */
+#define TBID_SEGTYPE_STACK 0x06 /* Stack segment */
+#define TBID_SEGTYPE_HEAP 0x0A /* Heap segment */
+#define TBID_SEGTYPE_ROOT 0x0C /* Root block segments */
+#define TBID_SEGTYPE_STRING 0x0E /* String table segment */
+
+/* Segments have one of three possible scopes */
+#define TBID_SEGSCOPE_INIT 0 /* Temporary area for initialisation phase */
+#define TBID_SEGSCOPE_LOCAL 1 /* Private to this thread */
+#define TBID_SEGSCOPE_GLOBAL 2 /* Shared globally throughout the system */
+#define TBID_SEGSCOPE_SHARED 3 /* Limited sharing between local/global */
+
+/* For segment specifier a further field in two of the remaining bits
+ indicates the usefulness of the pGAddr field in the segment descriptor
+ descriptor. */
+#define TBID_SEGGADDR_NULL 0 /* pGAddr is NULL -> SEGSCOPE_(LOCAL|INIT) */
+#define TBID_SEGGADDR_READ 1 /* Only read via pGAddr */
+#define TBID_SEGGADDR_WRITE 2 /* Full access via pGAddr */
+#define TBID_SEGGADDR_EXEC 3 /* Only execute via pGAddr */
+
+/* The following values are common to both segment and signal Id value and
+ live in the top 8 bits of the Id values. */
+
+/* The ISTAT bit indicates if segments are related to interrupt vs
+ background level interfaces a thread can still handle all triggers at
+ either level, but can also split these up if it wants to. */
+#define TBID_ISTAT_BIT 0x01000000
+#define TBID_ISTAT_S 24
+
+/* Privilege needed to access a segment is indicated by the next bit.
+
+ This bit is set to mirror the current privilege level when starting a
+ search for a segment - setting it yourself toggles the automatically
+ generated state which is only useful to emulate unprivileged behaviour
+ or access unprivileged areas of memory while at privileged level. */
+#define TBID_PSTAT_BIT 0x02000000
+#define TBID_PSTAT_S 25
+
+/* The top six bits of a signal/segment specifier identifies a thread within
+ the system. This represents a segments owner. */
+#define TBID_THREAD_BITS 0xFC000000
+#define TBID_THREAD_S 26
+
+/* Special thread id values */
+#define TBID_THREAD_NULL (-32) /* Never matches any thread/segment id used */
+#define TBID_THREAD_GLOBAL (-31) /* Things global to all threads */
+#define TBID_THREAD_HOST ( -1) /* Host interface */
+#define TBID_THREAD_EXTIO (TBID_THREAD_HOST) /* Host based ExtIO i/f */
+
+/* Virtual Id's are used for external thread interface structures or the
+ above special Id's */
+#define TBID_IS_VIRTTHREAD( Id ) ((Id) < 0)
+
+/* Real Id's are used for actual hardware threads that are local */
+#define TBID_IS_REALTHREAD( Id ) ((Id) >= 0)
+
+/* Generate a segment Id given Thread, Scope, and Type */
+#define TBID_SEG( Thread, Scope, Type ) (\
+ ((Thread)<<TBID_THREAD_S) + ((Scope)<<TBID_SEGSCOPE_S) + (Type))
+
+/* Generate a signal Id given Thread and SigNum */
+#define TBID_SIG( Thread, SigNum ) (\
+ ((Thread)<<TBID_THREAD_S) + ((SigNum)<<TBID_SIGNUM_S) + TBID_SIGNAL_BIT)
+
+/* Generate an Id that solely represents a thread - useful for cache ops */
+#define TBID_THD( Thread ) ((Thread)<<TBID_THREAD_S)
+#define TBID_THD_NULL ((TBID_THREAD_NULL) <<TBID_THREAD_S)
+#define TBID_THD_GLOBAL ((TBID_THREAD_GLOBAL)<<TBID_THREAD_S)
+
+/* Common exception handler (see TBID_SIGNUM_XXF below) receives hardware
+ generated fault codes TBIXXF_SIGNUM_xxF in it's SigNum parameter */
+#define TBIXXF_SIGNUM_IIF 0x01 /* General instruction fault */
+#define TBIXXF_SIGNUM_PGF 0x02 /* Privilege general fault */
+#define TBIXXF_SIGNUM_DHF 0x03 /* Data access watchpoint HIT */
+#define TBIXXF_SIGNUM_IGF 0x05 /* Code fetch general read failure */
+#define TBIXXF_SIGNUM_DGF 0x07 /* Data access general read/write fault */
+#define TBIXXF_SIGNUM_IPF 0x09 /* Code fetch page fault */
+#define TBIXXF_SIGNUM_DPF 0x0B /* Data access page fault */
+#define TBIXXF_SIGNUM_IHF 0x0D /* Instruction breakpoint HIT */
+#define TBIXXF_SIGNUM_DWF 0x0F /* Data access read-only fault */
+
+/* Hardware signals communicate events between processing levels within a
+ single thread all the _xxF cases are exceptions and are routed via a
+ common exception handler, _SWx are software trap events and kicks including
+ __TBISignal generated kicks, and finally _TRx are hardware triggers */
+#define TBID_SIGNUM_SW0 0x00 /* SWITCH GROUP 0 - Per thread user */
+#define TBID_SIGNUM_SW1 0x01 /* SWITCH GROUP 1 - Per thread system */
+#define TBID_SIGNUM_SW2 0x02 /* SWITCH GROUP 2 - Internal global request */
+#define TBID_SIGNUM_SW3 0x03 /* SWITCH GROUP 3 - External global request */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_FPE 0x04 /* Deferred exception - Any IEEE 754 exception */
+#define TBID_SIGNUM_FPD 0x05 /* Deferred exception - Denormal exception */
+/* Reserved 0x6 for a reserved deferred exception */
+#define TBID_SIGNUM_BUS 0x07 /* Deferred exception - Bus Error */
+/* Reserved 0x08-0x09 */
+#else
+/* Reserved 0x04-0x09 */
+#endif
+#define TBID_SIGNUM_SWS 0x0A /* KICK received with SigMask != 0 */
+#define TBID_SIGNUM_SWK 0x0B /* KICK received with SigMask == 0 */
+/* Reserved 0x0C-0x0F */
+#define TBID_SIGNUM_TRT 0x10 /* Timer trigger */
+#define TBID_SIGNUM_LWK 0x11 /* Low level kick (handler provided by TBI) */
+#define TBID_SIGNUM_XXF 0x12 /* Fault handler - receives ALL _xxF sigs */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_DFR 0x13 /* Deferred Exception handler */
+#else
+#define TBID_SIGNUM_FPE 0x13 /* FPE Exception handler */
+#endif
+/* External trigger one group 0x14 to 0x17 - per thread */
+#define TBID_SIGNUM_TR1(Thread) (0x14+(Thread))
+#define TBID_SIGNUM_T10 0x14
+#define TBID_SIGNUM_T11 0x15
+#define TBID_SIGNUM_T12 0x16
+#define TBID_SIGNUM_T13 0x17
+/* External trigger two group 0x18 to 0x1b - per thread */
+#define TBID_SIGNUM_TR2(Thread) (0x18+(Thread))
+#define TBID_SIGNUM_T20 0x18
+#define TBID_SIGNUM_T21 0x19
+#define TBID_SIGNUM_T22 0x1A
+#define TBID_SIGNUM_T23 0x1B
+#define TBID_SIGNUM_TR3 0x1C /* External trigger N-4 (global) */
+#define TBID_SIGNUM_TR4 0x1D /* External trigger N-3 (global) */
+#define TBID_SIGNUM_TR5 0x1E /* External trigger N-2 (global) */
+#define TBID_SIGNUM_TR6 0x1F /* External trigger N-1 (global) */
+#define TBID_SIGNUM_MAX 0x1F
+
+/* Return the trigger register(TXMASK[I]/TXSTAT[I]) bits related to
+ each hardware signal, sometimes this is a many-to-one relationship. */
+#define TBI_TRIG_BIT(SigNum) (\
+ ((SigNum) >= TBID_SIGNUM_TRT) ? 1<<((SigNum)-TBID_SIGNUM_TRT) :\
+ ( ((SigNum) == TBID_SIGNUM_SWS) || \
+ ((SigNum) == TBID_SIGNUM_SWK) ) ? \
+ TXSTAT_KICK_BIT : TXSTATI_BGNDHALT_BIT )
+
+/* Return the hardware trigger vector number for entries in the
+ HWVEC0EXT table that will generate the required internal trigger. */
+#define TBI_TRIG_VEC(SigNum) (\
+ ((SigNum) >= TBID_SIGNUM_T10) ? ((SigNum)-TBID_SIGNUM_TRT) : -1)
+
+/* Default trigger masks for each thread at background/interrupt level */
+#define TBI_TRIGS_INIT( Thread ) (\
+ TXSTAT_KICK_BIT + TBI_TRIG_BIT(TBID_SIGNUM_TR1(Thread)) )
+#define TBI_INTS_INIT( Thread ) (\
+ TXSTAT_KICK_BIT + TXSTATI_BGNDHALT_BIT \
+ + TBI_TRIG_BIT(TBID_SIGNUM_TR2(Thread)) )
+
+#ifndef __ASSEMBLY__
+/* A spin-lock location is a zero-initialised location in memory */
+typedef volatile int TBISPIN, *PTBISPIN;
+
+/* A kick location is a hardware location you can write to
+ * in order to cause a kick
+ */
+typedef volatile int *PTBIKICK;
+
+#if defined(METAC_1_0) || defined(METAC_1_1)
+/* Macro to perform a kick */
+#define TBI_KICK( pKick ) do { pKick[0] = 1; } while (0)
+#else
+/* #define METAG_LIN_VALUES before including machine.h if required */
+#ifdef LINSYSEVENT_WR_COMBINE_FLUSH
+/* Macro to perform a kick - write combiners must be flushed */
+#define TBI_KICK( pKick ) do {\
+ volatile int *pFlush = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH; \
+ pFlush[0] = 0; \
+ pKick[0] = 1; } while (0)
+#endif
+#endif /* if defined(METAC_1_0) || defined(METAC_1_1) */
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* 64-bit dual unit state value */
+typedef struct _tbidual_tag_ {
+ /* 32-bit value from a pair of registers in data or address units */
+ int U0, U1;
+} TBIDUAL, *PTBIDUAL;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIDUAL */
+#define TBIDUAL_U0 (0)
+#define TBIDUAL_U1 (4)
+
+#define TBIDUAL_BYTES (8)
+
+#define TBICTX_CRIT_BIT 0x0001 /* ASync state saved in TBICTX */
+#define TBICTX_SOFT_BIT 0x0002 /* Sync state saved in TBICTX (other bits 0) */
+#ifdef TBI_FASTINT_1_4
+#define TBICTX_FINT_BIT 0x0004 /* Using Fast Interrupts */
+#endif
+#define TBICTX_FPAC_BIT 0x0010 /* FPU state in TBICTX, FPU active on entry */
+#define TBICTX_XMCC_BIT 0x0020 /* Bit to identify a MECC task */
+#define TBICTX_CBUF_BIT 0x0040 /* Hardware catch buffer flag from TXSTATUS */
+#define TBICTX_CBRP_BIT 0x0080 /* Read pipeline dirty from TXDIVTIME */
+#define TBICTX_XDX8_BIT 0x0100 /* Saved DX.8 to DX.15 too */
+#define TBICTX_XAXX_BIT 0x0200 /* Save remaining AX registers to AX.7 */
+#define TBICTX_XHL2_BIT 0x0400 /* Saved hardware loop registers too */
+#define TBICTX_XTDP_BIT 0x0800 /* Saved DSP registers too */
+#define TBICTX_XEXT_BIT 0x1000 /* Set if TBICTX.Ext.Ctx contains extended
+ state save area, otherwise TBICTX.Ext.AX2
+ just holds normal A0.2 and A1.2 states */
+#define TBICTX_WAIT_BIT 0x2000 /* Causes wait for trigger - sticky toggle */
+#define TBICTX_XCBF_BIT 0x4000 /* Catch buffer or RD extracted into TBICTX */
+#define TBICTX_PRIV_BIT 0x8000 /* Set if system uses 'privileged' model */
+
+#ifdef METAC_1_0
+#define TBICTX_XAX3_BIT 0x0200 /* Saved AX.5 to AX.7 for XAXX */
+#define TBICTX_AX_REGS 5 /* Ax.0 to Ax.4 are core GP regs on CHORUS */
+#else
+#define TBICTX_XAX4_BIT 0x0200 /* Saved AX.4 to AX.7 for XAXX */
+#define TBICTX_AX_REGS 4 /* Default is Ax.0 to Ax.3 */
+#endif
+
+#ifdef TBI_1_4
+#define TBICTX_CFGFPU_FX16_BIT 0x00010000 /* Save FX.8 to FX.15 too */
+
+/* The METAC_CORE_ID_CONFIG field indicates omitted DSP resources */
+#define METAC_COREID_CFGXCTX_MASK( Value ) (\
+ ( (((Value & METAC_COREID_CFGDSP_BITS)>> \
+ METAC_COREID_CFGDSP_S ) == METAC_COREID_CFGDSP_MIN) ? \
+ ~(TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+ \
+ TBICTX_XAXX_BIT+TBICTX_XDX8_BIT ) : ~0U ) )
+#endif
+
+/* Extended context state provides a standardised method for registering the
+ arguments required by __TBICtxSave to save the additional register states
+ currently in use by non general purpose code. The state of the __TBIExtCtx
+ variable in the static space of the thread forms an extension of the base
+ context of the thread.
+
+ If ( __TBIExtCtx.Ctx.SaveMask == 0 ) then pExt is assumed to be NULL and
+ the empty state of __TBIExtCtx is represented by the fact that
+ TBICTX.SaveMask does not have the bit TBICTX_XEXT_BIT set.
+
+ If ( __TBIExtCtx.Ctx.SaveMask != 0 ) then pExt should point at a suitably
+ sized extended context save area (usually at the end of the stack space
+ allocated by the current routine). This space should allow for the
+ displaced state of A0.2 and A1.2 to be saved along with the other extended
+ states indicated via __TBIExtCtx.Ctx.SaveMask. */
+#ifndef __ASSEMBLY__
+typedef union _tbiextctx_tag_ {
+ long long Val;
+ TBIDUAL AX2;
+ struct _tbiextctxext_tag {
+#ifdef TBI_1_4
+ short DspramSizes; /* DSPRAM sizes. Encoding varies between
+ TBICtxAlloc and the ECH scheme. */
+#else
+ short Reserved0;
+#endif
+ short SaveMask; /* Flag bits for state saved */
+ PTBIDUAL pExt; /* AX[2] state saved first plus Xxxx state */
+
+ } Ctx;
+
+} TBIEXTCTX, *PTBIEXTCTX;
+
+/* Automatic registration of extended context save for __TBINestInts */
+extern TBIEXTCTX __TBIExtCtx;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIEXTCTX */
+#define TBIEXTCTX_AX2 (0)
+#define TBIEXTCTX_Ctx (0)
+#define TBIEXTCTX_Ctx_SaveMask (TBIEXTCTX_Ctx + 2)
+#define TBIEXTCTX_Ctx_pExt (TBIEXTCTX_Ctx + 2 + 2)
+
+/* Extended context data size calculation constants */
+#define TBICTXEXT_BYTES (8)
+#define TBICTXEXTBB8_BYTES (8*8)
+#define TBICTXEXTAX3_BYTES (3*8)
+#define TBICTXEXTAX4_BYTES (4*8)
+#ifdef METAC_1_0
+#define TBICTXEXTAXX_BYTES TBICTXEXTAX3_BYTES
+#else
+#define TBICTXEXTAXX_BYTES TBICTXEXTAX4_BYTES
+#endif
+#define TBICTXEXTHL2_BYTES (3*8)
+#define TBICTXEXTTDR_BYTES (27*8)
+#define TBICTXEXTTDP_BYTES TBICTXEXTTDR_BYTES
+
+#ifdef TBI_1_4
+#define TBICTXEXTFX8_BYTES (4*8)
+#define TBICTXEXTFPAC_BYTES (1*4 + 2*2 + 4*8)
+#define TBICTXEXTFACF_BYTES (3*8)
+#endif
+
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTXEXT_MAXBITS (TBICTX_XEXT_BIT| \
+ TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+ TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTXEXT_MAXBYTES (TBICTXEXT_BYTES+TBICTXEXTBB8_BYTES+\
+ TBICTXEXTAXX_BYTES+TBICTXEXTHL2_BYTES+\
+ TBICTXEXTTDP_BYTES )
+
+#ifdef TBI_FASTINT_1_4
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTX2EXT_MAXBITS (TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+ TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTX2EXT_MAXBYTES (TBICTXEXTBB8_BYTES+TBICTXEXTAXX_BYTES\
+ +TBICTXEXTHL2_BYTES+TBICTXEXTTDP_BYTES )
+#endif
+
+/* Specify extended resources being used by current routine, code must be
+ assembler generated to utilise extended resources-
+
+ MOV D0xxx,A0StP ; Perform alloca - routine should
+ ADD A0StP,A0StP,#SaveSize ; setup/use A0FrP to access locals
+ MOVT D1xxx,#SaveMask ; TBICTX_XEXT_BIT MUST be set
+ SETL [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+
+ NB: OG(___TBIExtCtx) is a special case supported for SETL/GETL operations
+ on 64-bit sizes structures only, other accesses must be based on use
+ of OGA(___TBIExtCtx).
+
+ At exit of routine-
+
+ MOV D0xxx,#0 ; Clear extended context save state
+ MOV D1xxx,#0
+ SETL [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+ SUB A0StP,A0StP,#SaveSize ; If original A0StP required
+
+ NB: Both the setting and clearing of the whole __TBIExtCtx MUST be done
+ atomically in one 64-bit write operation.
+
+ For simple interrupt handling only via __TBINestInts there should be no
+ impact of the __TBIExtCtx system. If pre-emptive scheduling is being
+ performed however (assuming __TBINestInts has already been called earlier
+ on) then the following logic will correctly call __TBICtxSave if required
+ and clear out the currently selected background task-
+
+ if ( __TBIExtCtx.Ctx.SaveMask & TBICTX_XEXT_BIT )
+ {
+ / * Store extended states in pCtx * /
+ State.Sig.SaveMask |= __TBIExtCtx.Ctx.SaveMask;
+
+ (void) __TBICtxSave( State, (void *) __TBIExtCtx.Ctx.pExt );
+ __TBIExtCtx.Val = 0;
+ }
+
+ and when restoring task states call __TBICtxRestore-
+
+ / * Restore state from pCtx * /
+ State.Sig.pCtx = pCtx;
+ State.Sig.SaveMask = pCtx->SaveMask;
+
+ if ( State.Sig.SaveMask & TBICTX_XEXT_BIT )
+ {
+ / * Restore extended states from pCtx * /
+ __TBIExtCtx.Val = pCtx->Ext.Val;
+
+ (void) __TBICtxRestore( State, (void *) __TBIExtCtx.Ctx.pExt );
+ }
+
+ */
+
+/* Critical thread state save area */
+#ifndef __ASSEMBLY__
+typedef struct _tbictx_tag_ {
+ /* TXSTATUS_FLAG_BITS and TXSTATUS_LSM_STEP_BITS from TXSTATUS */
+ short Flags;
+ /* Mask indicates any extended context state saved; 0 -> Never run */
+ short SaveMask;
+ /* Saved PC value */
+ int CurrPC;
+ /* Saved critical register states */
+ TBIDUAL DX[8];
+ /* Background control register states - for cores without catch buffer
+ base in DIVTIME the TXSTATUS bits RPVALID and RPMASK are stored with
+ the real state TXDIVTIME in CurrDIVTIME */
+ int CurrRPT, CurrBPOBITS, CurrMODE, CurrDIVTIME;
+ /* Saved AX register states */
+ TBIDUAL AX[2];
+ TBIEXTCTX Ext;
+ TBIDUAL AX3[TBICTX_AX_REGS-3];
+
+ /* Any CBUF state to be restored by a handler return must be stored here.
+ Other extended state can be stored anywhere - see __TBICtxSave and
+ __TBICtxRestore. */
+
+} TBICTX, *PTBICTX;
+
+#ifdef TBI_FASTINT_1_4
+typedef struct _tbictx2_tag_ {
+ TBIDUAL AX[2]; /* AU.0, AU.1 */
+ TBIDUAL DX[2]; /* DU.0, DU.4 */
+ int CurrMODE;
+ int CurrRPT;
+ int CurrSTATUS;
+ void *CurrPC; /* PC in PC address space */
+} TBICTX2, *PTBICTX2;
+/* TBICTX2 is followed by:
+ * TBICTXEXTCB0 if TXSTATUS.CBMarker
+ * TBIDUAL * TXSTATUS.IRPCount if TXSTATUS.IRPCount > 0
+ * TBICTXGP if using __TBIStdRootIntHandler or __TBIStdCtxSwitchRootIntHandler
+ */
+
+typedef struct _tbictxgp_tag_ {
+ short DspramSizes;
+ short SaveMask;
+ void *pExt;
+ TBIDUAL DX[6]; /* DU.1-DU.3, DU.5-DU.7 */
+ TBIDUAL AX[2]; /* AU.2-AU.3 */
+} TBICTXGP, *PTBICTXGP;
+
+#define TBICTXGP_DspramSizes (0)
+#define TBICTXGP_SaveMask (TBICTXGP_DspramSizes + 2)
+#define TBICTXGP_MAX_BYTES (2 + 2 + 4 + 8*(6+2))
+
+#endif
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBICTX */
+#define TBICTX_Flags (0)
+#define TBICTX_SaveMask (2)
+#define TBICTX_CurrPC (4)
+#define TBICTX_DX (2 + 2 + 4)
+#define TBICTX_CurrRPT (2 + 2 + 4 + 8 * 8)
+#define TBICTX_CurrMODE (2 + 2 + 4 + 8 * 8 + 4 + 4)
+#define TBICTX_AX (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4)
+#define TBICTX_Ext (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4 + 2 * 8)
+#define TBICTX_Ext_AX2 (TBICTX_Ext + TBIEXTCTX_AX2)
+#define TBICTX_Ext_AX2_U0 (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U0)
+#define TBICTX_Ext_AX2_U1 (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U1)
+#define TBICTX_Ext_Ctx_pExt (TBICTX_Ext + TBIEXTCTX_Ctx_pExt)
+#define TBICTX_Ext_Ctx_SaveMask (TBICTX_Ext + TBIEXTCTX_Ctx_SaveMask)
+
+#ifdef TBI_FASTINT_1_4
+#define TBICTX2_BYTES (8 * 2 + 8 * 2 + 4 + 4 + 4 + 4)
+#define TBICTXEXTCB0_BYTES (4 + 4 + 8)
+
+#define TBICTX2_CRIT_MAX_BYTES (TBICTX2_BYTES + TBICTXEXTCB0_BYTES + 6 * TBIDUAL_BYTES)
+#define TBI_SWITCH_NEXT_PC(PC, EXTRA) ((PC) + (EXTRA & 1) ? 8 : 4)
+#endif
+
+#ifndef __ASSEMBLY__
+/* Extended thread state save areas - catch buffer state element */
+typedef struct _tbictxextcb0_tag_ {
+ /* Flags data and address value - see METAC_CATCH_VALUES in machine.h */
+ unsigned long CBFlags, CBAddr;
+ /* 64-bit data */
+ TBIDUAL CBData;
+
+} TBICTXEXTCB0, *PTBICTXEXTCB0;
+
+/* Read pipeline state saved on later cores after single catch buffer slot */
+typedef struct _tbictxextrp6_tag_ {
+ /* RPMask is TXSTATUS_RPMASK_BITS only, reserved is undefined */
+ unsigned long RPMask, Reserved0;
+ TBIDUAL CBData[6];
+
+} TBICTXEXTRP6, *PTBICTXEXTRP6;
+
+/* Extended thread state save areas - 8 DU register pairs */
+typedef struct _tbictxextbb8_tag_ {
+ /* Remaining Data unit registers in 64-bit pairs */
+ TBIDUAL UX[8];
+
+} TBICTXEXTBB8, *PTBICTXEXTBB8;
+
+/* Extended thread state save areas - 3 AU register pairs */
+typedef struct _tbictxextbb3_tag_ {
+ /* Remaining Address unit registers in 64-bit pairs */
+ TBIDUAL UX[3];
+
+} TBICTXEXTBB3, *PTBICTXEXTBB3;
+
+/* Extended thread state save areas - 4 AU register pairs or 4 FX pairs */
+typedef struct _tbictxextbb4_tag_ {
+ /* Remaining Address unit or FPU registers in 64-bit pairs */
+ TBIDUAL UX[4];
+
+} TBICTXEXTBB4, *PTBICTXEXTBB4;
+
+/* Extended thread state save areas - Hardware loop states (max 2) */
+typedef struct _tbictxexthl2_tag_ {
+ /* Hardware looping register states */
+ TBIDUAL Start, End, Count;
+
+} TBICTXEXTHL2, *PTBICTXEXTHL2;
+
+/* Extended thread state save areas - DSP register states */
+typedef struct _tbictxexttdp_tag_ {
+ /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+ TBIDUAL Acc32[1];
+ /* DSP > 32-bit accumulator bits 63:32 of ACX.0 (zero-extended) */
+ TBIDUAL Acc64[1];
+ /* Twiddle register state, and three phase increment states */
+ TBIDUAL PReg[4];
+ /* Modulo region size, padded to 64-bits */
+ int CurrMRSIZE, Reserved0;
+
+} TBICTXEXTTDP, *PTBICTXEXTTDP;
+
+/* Extended thread state save areas - DSP register states including DSP RAM */
+typedef struct _tbictxexttdpr_tag_ {
+ /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+ TBIDUAL Acc32[1];
+ /* DSP 40-bit accumulator register state (Bits 39:8 of ACX.0) */
+ TBIDUAL Acc40[1];
+ /* DSP RAM Pointers */
+ TBIDUAL RP0[2], WP0[2], RP1[2], WP1[2];
+ /* DSP RAM Increments */
+ TBIDUAL RPI0[2], WPI0[2], RPI1[2], WPI1[2];
+ /* Template registers */
+ unsigned long Tmplt[16];
+ /* Modulo address region size and DSP RAM module region sizes */
+ int CurrMRSIZE, CurrDRSIZE;
+
+} TBICTXEXTTDPR, *PTBICTXEXTTDPR;
+
+#ifdef TBI_1_4
+/* The METAC_ID_CORE register state is a marker for the FPU
+ state that is then stored after this core header structure. */
+#define TBICTXEXTFPU_CONFIG_MASK ( (METAC_COREID_NOFPACC_BIT+ \
+ METAC_COREID_CFGFPU_BITS ) << \
+ METAC_COREID_CONFIG_BITS )
+
+/* Recorded FPU exception state from TXDEFR in DefrFpu */
+#define TBICTXEXTFPU_DEFRFPU_MASK (TXDEFR_FPU_FE_BITS)
+
+/* Extended thread state save areas - FPU register states */
+typedef struct _tbictxextfpu_tag_ {
+ /* Stored METAC_CORE_ID CONFIG */
+ int CfgFpu;
+ /* Stored deferred TXDEFR bits related to FPU
+ *
+ * This is encoded as follows in order to fit into 16-bits:
+ * DefrFPU:15 - 14 <= 0
+ * :13 - 8 <= TXDEFR:21-16
+ * : 7 - 6 <= 0
+ * : 5 - 0 <= TXDEFR:5-0
+ */
+ short DefrFpu;
+
+ /* TXMODE bits related to FPU */
+ short ModeFpu;
+
+ /* FPU Even/Odd register states */
+ TBIDUAL FX[4];
+
+ /* if CfgFpu & TBICTX_CFGFPU_FX16_BIT -> 1 then TBICTXEXTBB4 holds FX.8-15 */
+ /* if CfgFpu & TBICTX_CFGFPU_NOACF_BIT -> 0 then TBICTXEXTFPACC holds state */
+} TBICTXEXTFPU, *PTBICTXEXTFPU;
+
+/* Extended thread state save areas - FPU accumulator state */
+typedef struct _tbictxextfpacc_tag_ {
+ /* FPU accumulator register state - three 64-bit parts */
+ TBIDUAL FAcc32[3];
+
+} TBICTXEXTFPACC, *PTBICTXEXTFPACC;
+#endif
+
+/* Prototype TBI structure */
+struct _tbi_tag_ ;
+
+/* A 64-bit return value used commonly in the TBI APIs */
+typedef union _tbires_tag_ {
+ /* Save and load this value to get/set the whole result quickly */
+ long long Val;
+
+ /* Parameter of a fnSigs or __TBICtx* call */
+ struct _tbires_sig_tag_ {
+ /* TXMASK[I] bits zeroed upto and including current trigger level */
+ unsigned short TrigMask;
+ /* Control bits for handlers - see PTBIAPIFN documentation below */
+ unsigned short SaveMask;
+ /* Pointer to the base register context save area of the thread */
+ PTBICTX pCtx;
+ } Sig;
+
+ /* Result of TBIThrdPrivId call */
+ struct _tbires_thrdprivid_tag_ {
+ /* Basic thread identifier; just TBID_THREAD_BITS */
+ int Id;
+ /* None thread number bits; TBID_ISTAT_BIT+TBID_PSTAT_BIT */
+ int Priv;
+ } Thrd;
+
+ /* Parameter and Result of a __TBISwitch call */
+ struct _tbires_switch_tag_ {
+ /* Parameter passed across context switch */
+ void *pPara;
+ /* Thread context of other Thread includng restore flags */
+ PTBICTX pCtx;
+ } Switch;
+
+ /* For extended S/W events only */
+ struct _tbires_ccb_tag_ {
+ void *pCCB;
+ int COff;
+ } CCB;
+
+ struct _tbires_tlb_tag_ {
+ int Leaf; /* TLB Leaf data */
+ int Flags; /* TLB Flags */
+ } Tlb;
+
+#ifdef TBI_FASTINT_1_4
+ struct _tbires_intr_tag_ {
+ short TrigMask;
+ short SaveMask;
+ PTBICTX2 pCtx;
+ } Intr;
+#endif
+
+} TBIRES, *PTBIRES;
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* Prototype for all signal handler functions, called via ___TBISyncTrigger or
+ ___TBIASyncTrigger.
+
+ State.Sig.TrigMask will indicate the bits set within TXMASKI at
+ the time of the handler call that have all been cleared to prevent
+ nested interrupt occuring immediately.
+
+ State.Sig.SaveMask is a bit-mask which will be set to Zero when a trigger
+ occurs at background level and TBICTX_CRIT_BIT and optionally
+ TBICTX_CBUF_BIT when a trigger occurs at interrupt level.
+
+ TBICTX_CBUF_BIT reflects the state of TXSTATUS_CBMARKER_BIT for
+ the interrupted background thread.
+
+ State.Sig.pCtx will point at a TBICTX structure generated to hold the
+ critical state of the interrupted thread at interrupt level and
+ should be set to NULL when called at background level.
+
+ Triggers will indicate the status of TXSTAT or TXSTATI sampled by the
+ code that called the handler.
+
+ InstOrSWSId is defined firstly as 'Inst' if the SigNum is TBID_SIGNUM_SWx
+ and hold the actual SWITCH instruction detected, secondly if SigNum
+ is TBID_SIGNUM_SWS the 'SWSId' is defined to hold the Id of the
+ software signal detected, in other cases the value of this
+ parameter is undefined.
+
+ pTBI points at the PTBI structure related to the thread and processing
+ level involved.
+
+ TBIRES return value at both processing levels is similar in terms of any
+ changes that the handler makes. By default the State argument value
+ passed in should be returned.
+
+ Sig.TrigMask value is bits to OR back into TXMASKI when the handler
+ completes to enable currently disabled interrupts.
+
+ Sig.SaveMask value is ignored.
+
+ Sig.pCtx is ignored.
+
+ */
+typedef TBIRES (*PTBIAPIFN)( TBIRES State, int SigNum,
+ int Triggers, int InstOrSWSId,
+ volatile struct _tbi_tag_ *pTBI );
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* The global memory map is described by a list of segment descriptors */
+typedef volatile struct _tbiseg_tag_ {
+ volatile struct _tbiseg_tag_ *pLink;
+ int Id; /* Id of the segment */
+ TBISPIN Lock; /* Spin-lock for struct (normally 0) */
+ unsigned int Bytes; /* Size of region in bytes */
+ void *pGAddr; /* Base addr of region in global space */
+ void *pLAddr; /* Base addr of region in local space */
+ int Data[2]; /* Segment specific data (may be extended) */
+
+} TBISEG, *PTBISEG;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Offsets of fields in TBISEG structure */
+#define TBISEG_pLink ( 0)
+#define TBISEG_Id ( 4)
+#define TBISEG_Lock ( 8)
+#define TBISEG_Bytes (12)
+#define TBISEG_pGAddr (16)
+#define TBISEG_pLAddr (20)
+#define TBISEG_Data (24)
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbi_tag_ {
+ int SigMask; /* Bits set to represent S/W events */
+ PTBIKICK pKick; /* Kick addr for S/W events */
+ void *pCCB; /* Extended S/W events */
+ PTBISEG pSeg; /* Related segment structure */
+ PTBIAPIFN fnSigs[TBID_SIGNUM_MAX+1];/* Signal handler API table */
+} *PTBI, TBI;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBI */
+#define TBI_SigMask (0)
+#define TBI_pKick (4)
+#define TBI_pCCB (8)
+#define TBI_pSeg (12)
+#define TBI_fnSigs (16)
+
+#ifdef TBI_1_4
+#ifndef __ASSEMBLY__
+/* This handler should be used for TBID_SIGNUM_DFR */
+extern TBIRES __TBIHandleDFR ( TBIRES State, int SigNum,
+ int Triggers, int InstOrSWSId,
+ volatile struct _tbi_tag_ *pTBI );
+#endif
+#endif
+
+/* String table entry - special values */
+#define METAG_TBI_STRS (0x5300) /* Tag : If entry is valid */
+#define METAG_TBI_STRE (0x4500) /* Tag : If entry is end of table */
+#define METAG_TBI_STRG (0x4700) /* Tag : If entry is a gap */
+#define METAG_TBI_STRX (0x5A00) /* TransLen : If no translation present */
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbistr_tag_ {
+ short Bytes; /* Length of entry in Bytes */
+ short Tag; /* Normally METAG_TBI_STRS(0x5300) */
+ short Len; /* Length of the string entry (incl null) */
+ short TransLen; /* Normally METAG_TBI_STRX(0x5A00) */
+ char String[8]; /* Zero terminated (may-be bigger) */
+
+} TBISTR, *PTBISTR;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Cache size information - available as fields of Data[1] of global heap
+ segment */
+#define METAG_TBI_ICACHE_SIZE_S 0 /* see comments below */
+#define METAG_TBI_ICACHE_SIZE_BITS 0x0000000F
+#define METAG_TBI_ICACHE_FILL_S 4
+#define METAG_TBI_ICACHE_FILL_BITS 0x000000F0
+#define METAG_TBI_DCACHE_SIZE_S 8
+#define METAG_TBI_DCACHE_SIZE_BITS 0x00000F00
+#define METAG_TBI_DCACHE_FILL_S 12
+#define METAG_TBI_DCACHE_FILL_BITS 0x0000F000
+
+/* METAG_TBI_xCACHE_SIZE
+ Describes the physical cache size rounded up to the next power of 2
+ relative to a 16K (2^14) cache. These sizes are encoded as a signed addend
+ to this base power of 2, for example
+ 4K -> 2^12 -> -2 (i.e. 12-14)
+ 8K -> 2^13 -> -1
+ 16K -> 2^14 -> 0
+ 32K -> 2^15 -> +1
+ 64K -> 2^16 -> +2
+ 128K -> 2^17 -> +3
+
+ METAG_TBI_xCACHE_FILL
+ Describes the physical cache size within the power of 2 area given by
+ the value above. For example a 10K cache may be represented as having
+ nearest size 16K with a fill of 10 sixteenths. This is encoded as the
+ number of unused 1/16ths, for example
+ 0000 -> 0 -> 16/16
+ 0001 -> 1 -> 15/16
+ 0010 -> 2 -> 14/16
+ ...
+ 1111 -> 15 -> 1/16
+ */
+
+#define METAG_TBI_CACHE_SIZE_BASE_LOG2 14
+
+/* Each declaration made by this macro generates a TBISTR entry */
+#ifndef __ASSEMBLY__
+#define TBISTR_DECL( Name, Str ) \
+ __attribute__ ((__section__ (".tbistr") )) const char Name[] = #Str
+#endif
+
+/* META timer values - see below for Timer support routines */
+#define TBI_TIMERWAIT_MIN (-16) /* Minimum 'recommended' period */
+#define TBI_TIMERWAIT_MAX (-0x7FFFFFFF) /* Maximum 'recommended' period */
+
+#ifndef __ASSEMBLY__
+/* These macros allow direct access from C to any register known to the
+ assembler or defined in machine.h. Example candidates are TXTACTCYC,
+ TXIDLECYC, and TXPRIVEXT. Note that when higher level macros and routines
+ like the timer and trigger handling features below these should be used in
+ preference to this direct low-level access mechanism. */
+#define TBI_GETREG( Reg ) __extension__ ({\
+ int __GRValue; \
+ __asm__ volatile ("MOV\t%0," #Reg "\t/* (*TBI_GETREG OK) */" : \
+ "=r" (__GRValue) ); \
+ __GRValue; })
+
+#define TBI_SETREG( Reg, Value ) do {\
+ int __SRValue = Value; \
+ __asm__ volatile ("MOV\t" #Reg ",%0\t/* (*TBI_SETREG OK) */" : \
+ : "r" (__SRValue) ); } while (0)
+
+#define TBI_SWAPREG( Reg, Value ) do {\
+ int __XRValue = (Value); \
+ __asm__ volatile ("SWAP\t" #Reg ",%0\t/* (*TBI_SWAPREG OK) */" : \
+ "=r" (__XRValue) : "0" (__XRValue) ); \
+ Value = __XRValue; } while (0)
+
+/* Obtain and/or release global critical section lock given that interrupts
+ are already disabled and/or should remain disabled. */
+#define TBI_NOINTSCRITON do {\
+ __asm__ volatile ("LOCK1\t\t/* (*TBI_NOINTSCRITON OK) */");} while (0)
+#define TBI_NOINTSCRITOFF do {\
+ __asm__ volatile ("LOCK0\t\t/* (*TBI_NOINTSCRITOFF OK) */");} while (0)
+/* Optimised in-lining versions of the above macros */
+
+#define TBI_LOCK( TrigState ) do {\
+ int __TRValue; \
+ int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+ __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_LOCK ... */\n\t" \
+ "SWAP\t%0,TXMASKI\t/* ... */\n\t" \
+ "LOCK2\t\t/* ... */\n\t" \
+ "SETD\t[%1+#0x40],D1RtP /* ... OK) */" : \
+ "=r&" (__TRValue) : "u" (__ALOCKHI) ); \
+ TrigState = __TRValue; } while (0)
+#define TBI_CRITON( TrigState ) do {\
+ int __TRValue; \
+ __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_CRITON ... */\n\t" \
+ "SWAP\t%0,TXMASKI\t/* ... */\n\t" \
+ "LOCK1\t\t/* ... OK) */" : \
+ "=r" (__TRValue) ); \
+ TrigState = __TRValue; } while (0)
+
+#define TBI_INTSX( TrigState ) do {\
+ int __TRValue = TrigState; \
+ __asm__ volatile ("SWAP\t%0,TXMASKI\t/* (*TBI_INTSX OK) */" : \
+ "=r" (__TRValue) : "0" (__TRValue) ); \
+ TrigState = __TRValue; } while (0)
+
+#define TBI_UNLOCK( TrigState ) do {\
+ int __TRValue = TrigState; \
+ int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+ __asm__ volatile ("SETD\t[%1+#0x00],D1RtP\t/* (*TBI_UNLOCK ... */\n\t" \
+ "LOCK0\t\t/* ... */\n\t" \
+ "MOV\tTXMASKI,%0\t/* ... OK) */" : \
+ : "r" (__TRValue), "u" (__ALOCKHI) ); } while (0)
+
+#define TBI_CRITOFF( TrigState ) do {\
+ int __TRValue = TrigState; \
+ __asm__ volatile ("LOCK0\t\t/* (*TBI_CRITOFF ... */\n\t" \
+ "MOV\tTXMASKI,%0\t/* ... OK) */" : \
+ : "r" (__TRValue) ); } while (0)
+
+#define TBI_TRIGSX( SrcDst ) do { TBI_SWAPREG( TXMASK, SrcDst );} while (0)
+
+/* Composite macros to perform logic ops on INTS or TRIGS masks */
+#define TBI_INTSOR( Bits ) do {\
+ int __TT = 0; TBI_INTSX(__TT); \
+ __TT |= (Bits); TBI_INTSX(__TT); } while (0)
+
+#define TBI_INTSAND( Bits ) do {\
+ int __TT = 0; TBI_INTSX(__TT); \
+ __TT &= (Bits); TBI_INTSX(__TT); } while (0)
+
+#ifdef TBI_1_4
+#define TBI_DEFRICTRLSOR( Bits ) do {\
+ int __TT = TBI_GETREG( CT.20 ); \
+ __TT |= (Bits); TBI_SETREG( CT.20, __TT); } while (0)
+
+#define TBI_DEFRICTRLSAND( Bits ) do {\
+ int __TT = TBI_GETREG( TXDEFR ); \
+ __TT &= (Bits); TBI_SETREG( CT.20, __TT); } while (0)
+#endif
+
+#define TBI_TRIGSOR( Bits ) do {\
+ int __TT = TBI_GETREG( TXMASK ); \
+ __TT |= (Bits); TBI_SETREG( TXMASK, __TT); } while (0)
+
+#define TBI_TRIGSAND( Bits ) do {\
+ int __TT = TBI_GETREG( TXMASK ); \
+ __TT &= (Bits); TBI_SETREG( TXMASK, __TT); } while (0)
+
+/* Macros to disable and re-enable interrupts using TBI_INTSX, deliberate
+ traps and exceptions can still be handled within the critical section. */
+#define TBI_STOPINTS( Value ) do {\
+ int __TT = TBI_GETREG( TXMASKI ); \
+ __TT &= TXSTATI_BGNDHALT_BIT; TBI_INTSX( __TT ); \
+ Value = __TT; } while (0)
+#define TBI_RESTINTS( Value ) do {\
+ int __TT = Value; TBI_INTSX( __TT ); } while (0)
+
+/* Return pointer to segment list at current privilege level */
+PTBISEG __TBISegList( void );
+
+/* Search the segment list for a match given Id, pStart can be NULL */
+PTBISEG __TBIFindSeg( PTBISEG pStart, int Id );
+
+/* Prepare a new segment structure using space from within another */
+PTBISEG __TBINewSeg( PTBISEG pFromSeg, int Id, unsigned int Bytes );
+
+/* Prepare a new segment using any global or local heap segments available */
+PTBISEG __TBIMakeNewSeg( int Id, unsigned int Bytes );
+
+/* Insert a new segment into the segment list so __TBIFindSeg can locate it */
+void __TBIAddSeg( PTBISEG pSeg );
+#define __TBIADDSEG_DEF /* Some versions failed to define this */
+
+/* Return Id of current thread; TBID_ISTAT_BIT+TBID_THREAD_BITS */
+int __TBIThreadId( void );
+
+/* Return TBIRES.Thrd data for current thread */
+TBIRES __TBIThrdPrivId( void );
+
+/* Return pointer to current threads TBI root block.
+ Id implies whether Int or Background root block is required */
+PTBI __TBI( int Id );
+
+/* Try to set Mask bit using the spin-lock protocol, return 0 if fails and
+ new state if succeeds */
+int __TBIPoll( PTBISPIN pLock, int Mask );
+
+/* Set Mask bits via the spin-lock protocol in *pLock, return new state */
+int __TBISpin( PTBISPIN pLock, int Mask );
+
+/* Default handler set up for all TBI.fnSigs entries during initialisation */
+TBIRES __TBIUnExpXXX( TBIRES State, int SigNum,
+ int Triggers, int Inst, PTBI pTBI );
+
+/* Call this routine to service triggers at background processing level. The
+ TBID_POLL_BIT of the Id parameter value will be used to indicate that the
+ routine should return if no triggers need to be serviced initially. If this
+ bit is not set the routine will block until one trigger handler is serviced
+ and then behave like the poll case servicing any remaining triggers
+ actually outstanding before returning. Normally the State parameter should
+ be simply initialised to zero and the result should be ignored, other
+ values/options are for internal use only. */
+TBIRES __TBISyncTrigger( TBIRES State, int Id );
+
+/* Call this routine to enable processing of triggers by signal handlers at
+ interrupt level. The State parameter value passed is returned by this
+ routine. The State.Sig.TrigMask field also specifies the initial
+ state of the interrupt mask register TXMASKI to be setup by the call.
+ The other parts of the State parameter are ignored unless the PRIV bit is
+ set in the SaveMask field. In this case the State.Sig.pCtx field specifies
+ the base of the stack to which the interrupt system should switch into
+ as it saves the state of the previously executing code. In the case the
+ thread will be unprivileged as it continues execution at the return
+ point of this routine and it's future state will be effectively never
+ trusted to be valid. */
+TBIRES __TBIASyncTrigger( TBIRES State );
+
+/* Call this to swap soft threads executing at the background processing level.
+ The TBIRES returned to the new thread will be the same as the NextThread
+ value specified to the call. The NextThread.Switch.pCtx value specifies
+ which thread context to restore and the NextThread.Switch.Para value can
+ hold an arbitrary expression to be passed between the threads. The saved
+ state of the previous thread will be stored in a TBICTX descriptor created
+ on it's stack and the address of this will be stored into the *rpSaveCtx
+ location specified. */
+TBIRES __TBISwitch( TBIRES NextThread, PTBICTX *rpSaveCtx );
+
+/* Call this to initialise a stack frame ready for further use, up to four
+ 32-bit arguments may be specified after the fixed args to be passed via
+ the new stack pStack to the routine specified via fnMain. If the
+ main-line routine ever returns the thread will operate as if main itself
+ had returned and terminate with the return code given. */
+typedef int (*PTBIMAINFN)( TBIRES Arg /*, <= 4 additional 32-bit args */ );
+PTBICTX __TBISwitchInit( void *pStack, PTBIMAINFN fnMain, ... );
+
+/* Call this to resume a thread from a saved synchronous TBICTX state.
+ The TBIRES returned to the new thread will be the same as the NextThread
+ value specified to the call. The NextThread.Switch.pCtx value specifies
+ which thread context to restore and the NextThread.Switch.Para value can
+ hold an arbitrary expression to be passed between the threads. The context
+ of the calling thread is lost and this routine never returns to the
+ caller. The TrigsMask value supplied is ored into TXMASKI to enable
+ interrupts after the context of the new thread is established. */
+void __TBISyncResume( TBIRES NextThread, int TrigsMask );
+
+/* Call these routines to save and restore the extended states of
+ scheduled tasks. */
+void *__TBICtxSave( TBIRES State, void *pExt );
+void *__TBICtxRestore( TBIRES State, void *pExt );
+
+#ifdef TBI_1_4
+#ifdef TBI_FASTINT_1_4
+/* Call these routines to copy the GP state to a separate buffer
+ * Only necessary for context switching.
+ */
+PTBICTXGP __TBICtx2SaveCrit( PTBICTX2 pCurrentCtx, PTBICTX2 pSaveCtx );
+void *__TBICtx2SaveGP( PTBICTXGP pCurrentCtxGP, PTBICTXGP pSaveCtxGP );
+
+/* Call these routines to save and restore the extended states of
+ scheduled tasks. */
+void *__TBICtx2Save( PTBICTXGP pCtxGP, short SaveMask, void *pExt );
+void *__TBICtx2Restore( PTBICTX2 pCtx, short SaveMask, void *pExt );
+#endif
+
+/* If FPAC flag is set then significant FPU context exists. Call these routine
+ to save and restore it */
+void *__TBICtxFPUSave( TBIRES State, void *pExt );
+void *__TBICtxFPURestore( TBIRES State, void *pExt );
+
+#ifdef TBI_FASTINT_1_4
+extern void *__TBICtx2FPUSave (PTBICTXGP, short, void*);
+extern void *__TBICtx2FPURestore (PTBICTXGP, short, void*);
+#endif
+#endif
+
+#ifdef TBI_1_4
+/* Call these routines to save and restore DSPRAM. */
+void *__TBIDspramSaveA (short DspramSizes, void *pExt);
+void *__TBIDspramSaveB (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreA (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreB (short DspramSizes, void *pExt);
+#endif
+
+/* This routine should be used at the entrypoint of interrupt handlers to
+ re-enable higher priority interrupts and/or save state from the previously
+ executing background code. State is a TBIRES.Sig parameter with NoNestMask
+ indicating the triggers (if any) that should remain disabled and SaveMask
+ CBUF bit indicating the if the hardware catch buffer is dirty. Optionally
+ any number of extended state bits X??? including XCBF can be specified to
+ force a nested state save call to __TBICtxSave before the current routine
+ continues. (In the latter case __TBICtxRestore should be called to restore
+ any extended states before the background thread of execution is resumed)
+
+ By default (no X??? bits specified in SaveMask) this routine performs a
+ sub-call to __TBICtxSave with the pExt and State parameters specified IF
+ some triggers could be serviced while the current interrupt handler
+ executes and the hardware catch buffer is actually dirty. In this case
+ this routine provides the XCBF bit in State.Sig.SaveMask to force the
+ __TBICtxSave to extract the current catch state.
+
+ The NoNestMask parameter should normally indicate that the same or lower
+ triggers than those provoking the current handler call should not be
+ serviced in nested calls, zero may be specified if all possible interrupts
+ are to be allowed.
+
+ The TBIRES.Sig value returned will be similar to the State parameter
+ specified with the XCBF bit ORed into it's SaveMask if a context save was
+ required and fewer bits set in it's TrigMask corresponding to the same/lower
+ priority interrupt triggers still not enabled. */
+TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask );
+
+/* This routine causes the TBICTX structure specified in State.Sig.pCtx to
+ be restored. This implies that execution will not return to the caller.
+ The State.Sig.TrigMask field will be restored during the context switch
+ such that any immediately occuring interrupts occur in the context of the
+ newly specified task. The State.Sig.SaveMask parameter is ignored. */
+void __TBIASyncResume( TBIRES State );
+
+/* Call this routine to enable fastest possible processing of one or more
+ interrupt triggers via a unified signal handler. The handler concerned
+ must simple return after servicing the related hardware.
+ The State.Sig.TrigMask parameter indicates the interrupt triggers to be
+ enabled and the Thin.Thin.fnHandler specifies the routine to call and
+ the whole Thin parameter value will be passed to this routine unaltered as
+ it's first parameter. */
+void __TBIASyncThin( TBIRES State, TBIRES Thin );
+
+/* Do this before performing your own direct spin-lock access - use TBI_LOCK */
+int __TBILock( void );
+
+/* Do this after performing your own direct spin-lock access - use TBI_UNLOCK */
+void __TBIUnlock( int TrigState );
+
+/* Obtain and release global critical section lock - only stops execution
+ of interrupts on this thread and similar critical section code on other
+ local threads - use TBI_CRITON or TBI_CRITOFF */
+int __TBICritOn( void );
+void __TBICritOff( int TrigState );
+
+/* Change INTS (TXMASKI) - return old state - use TBI_INTSX */
+int __TBIIntsX( int NewMask );
+
+/* Change TRIGS (TXMASK) - return old state - use TBI_TRIGSX */
+int __TBITrigsX( int NewMask );
+
+/* This function initialises a timer for first use, only the TBID_ISTAT_BIT
+ of the Id parameter is used to indicate which timer is to be modified. The
+ Wait value should either be zero to disable the timer concerned or be in
+ the recommended TBI_TIMERWAIT_* range to specify the delay required before
+ the first timer trigger occurs.
+
+ The TBID_ISTAT_BIT of the Id parameter similar effects all other timer
+ support functions (see below). */
+void __TBITimerCtrl( int Id, int Wait );
+
+/* This routine returns a 64-bit time stamp value that is initialised to zero
+ via a __TBITimerCtrl timer enabling call. */
+long long __TBITimeStamp( int Id );
+
+/* To manage a periodic timer each period elapsed should be subracted from
+ the current timer value to attempt to set up the next timer trigger. The
+ Wait parameter should be a value in the recommended TBI_TIMERWAIT_* range.
+ The return value is the new aggregate value that the timer was updated to,
+ if this is less than zero then a timer trigger is guaranteed to be
+ generated after the number of ticks implied, if a positive result is
+ returned either itterative or step-wise corrective action must be taken to
+ resynchronise the timer and hence provoke a future timer trigger. */
+int __TBITimerAdd( int Id, int Wait );
+
+/* String table search function, pStart is first entry to check or NULL,
+ pStr is string data to search for and MatchLen is either length of string
+ to compare for an exact match or negative length to compare for partial
+ match. */
+const TBISTR *__TBIFindStr( const TBISTR *pStart,
+ const char *pStr, int MatchLen );
+
+/* String table translate function, pStr is text to translate and Len is
+ it's length. Value returned may not be a string pointer if the
+ translation value is really some other type, 64-bit alignment of the return
+ pointer is guaranteed so almost any type including a structure could be
+ located with this routine. */
+const void *__TBITransStr( const char *pStr, int Len );
+
+
+
+/* Arbitrary physical memory access windows, use different Channels to avoid
+ conflict/thrashing within a single piece of code. */
+void *__TBIPhysAccess( int Channel, int PhysAddr, int Bytes );
+void __TBIPhysRelease( int Channel, void *pLinAddr );
+
+#ifdef METAC_1_0
+/* Data cache function nullified because data cache is off */
+#define TBIDCACHE_FLUSH( pAddr )
+#define TBIDCACHE_PRELOAD( Type, pAddr ) ((Type) (pAddr))
+#define TBIDCACHE_REFRESH( Type, pAddr ) ((Type) (pAddr))
+#endif
+#ifdef METAC_1_1
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( pAddr ) ((volatile char *) \
+ (((unsigned int) (pAddr))>>LINSYSLFLUSH_S))[0] = 0
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr ) \
+ ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr ) __extension__ ({ \
+ Type __addr = (Type)(Addr); \
+ (void)__builtin_dcache_refresh ((void *)(((unsigned int)(__addr))>>6)); \
+ __addr; })
+
+#endif
+#ifndef METAC_1_0
+#ifndef METAC_1_1
+/* Support for DCACHE builtin */
+extern void __builtin_dcache_flush (void *);
+
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( Addr ) \
+ __builtin_dcache_flush ((void *)(Addr))
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr ) \
+ ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr ) \
+ ((Type) __builtin_dcache_refresh ((void *)(Addr)))
+
+#endif
+#endif
+
+/* Flush the MMCU cache */
+#define TBIMCACHE_FLUSH() { ((volatile int *) LINSYSCFLUSH_MMCU)[0] = 0; }
+
+#ifdef METAC_2_1
+/* Obtain the MMU table entry for the specified address */
+#define TBIMTABLE_LEAFDATA(ADDR) TBIXCACHE_RD((int)(ADDR) & (-1<<6))
+
+#ifndef __ASSEMBLY__
+/* Obtain the full MMU table entry for the specified address */
+#define TBIMTABLE_DATA(ADDR) __extension__ ({ TBIRES __p; \
+ __p.Val = TBIXCACHE_RL((int)(ADDR) & (-1<<6)); \
+ __p; })
+#endif
+#endif
+
+/* Combine a physical base address, and a linear address
+ * Internal use only
+ */
+#define _TBIMTABLE_LIN2PHYS(PHYS, LIN, LMASK) (void*)(((int)(PHYS)&0xFFFFF000)\
+ +((int)(LIN)&(LMASK)))
+
+/* Convert a linear to a physical address */
+#define TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR) \
+ (((LEAFDATA) & CRLINPHY0_VAL_BIT) \
+ ? _TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR, 0x00000FFF) \
+ : 0)
+
+/* Debug support - using external debugger or host */
+void __TBIDumpSegListEntries( void );
+void __TBILogF( const char *pFmt, ... );
+void __TBIAssert( const char *pFile, int LineNum, const char *pExp );
+void __TBICont( const char *pMsg, ... ); /* TBIAssert -> 'wait for continue' */
+
+/* Array of signal name data for debug messages */
+extern const char __TBISigNames[];
+#endif /* ifndef __ASSEMBLY__ */
+
+
+
+/* Scale of sub-strings in the __TBISigNames string list */
+#define TBI_SIGNAME_SCALE 4
+#define TBI_SIGNAME_SCALE_S 2
+
+#define TBI_1_3
+
+#ifdef TBI_1_3
+
+#ifndef __ASSEMBLY__
+#define TBIXCACHE_RD(ADDR) __extension__ ({\
+ void * __Addr = (void *)(ADDR); \
+ int __Data; \
+ __asm__ volatile ( "CACHERD\t%0,[%1+#0]" : \
+ "=r" (__Data) : "r" (__Addr) ); \
+ __Data; })
+
+#define TBIXCACHE_RL(ADDR) __extension__ ({\
+ void * __Addr = (void *)(ADDR); \
+ long long __Data; \
+ __asm__ volatile ( "CACHERL\t%0,%t0,[%1+#0]" : \
+ "=d" (__Data) : "r" (__Addr) ); \
+ __Data; })
+
+#define TBIXCACHE_WD(ADDR, DATA) do {\
+ void * __Addr = (void *)(ADDR); \
+ int __Data = DATA; \
+ __asm__ volatile ( "CACHEWD\t[%0+#0],%1" : \
+ : "r" (__Addr), "r" (__Data) ); } while(0)
+
+#define TBIXCACHE_WL(ADDR, DATA) do {\
+ void * __Addr = (void *)(ADDR); \
+ long long __Data = DATA; \
+ __asm__ volatile ( "CACHEWL\t[%0+#0],%1,%t1" : \
+ : "r" (__Addr), "r" (__Data) ); } while(0)
+
+#ifdef TBI_4_0
+
+#define TBICACHE_FLUSH_L1D_L2(ADDR) \
+ TBIXCACHE_WD(ADDR, CACHEW_FLUSH_L1D_L2)
+#define TBICACHE_WRITEBACK_L1D_L2(ADDR) \
+ TBIXCACHE_WD(ADDR, CACHEW_WRITEBACK_L1D_L2)
+#define TBICACHE_INVALIDATE_L1D(ADDR) \
+ TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D)
+#define TBICACHE_INVALIDATE_L1D_L2(ADDR) \
+ TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D_L2)
+#define TBICACHE_INVALIDATE_L1DTLB(ADDR) \
+ TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1DTLB)
+#define TBICACHE_INVALIDATE_L1I(ADDR) \
+ TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1I)
+#define TBICACHE_INVALIDATE_L1ITLB(ADDR) \
+ TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1ITLB)
+
+#endif /* TBI_4_0 */
+#endif /* ifndef __ASSEMBLY__ */
+
+/*
+ * Calculate linear PC value from real PC and Minim mode control, the LSB of
+ * the result returned indicates if address compression has occured.
+ */
+#ifndef __ASSEMBLY__
+#define METAG_LINPC( PCVal ) (\
+ ( (TBI_GETREG(TXPRIVEXT) & TXPRIVEXT_MINIMON_BIT) != 0 ) ? ( \
+ ( ((PCVal) & 0x00900000) == 0x00900000 ) ? \
+ (((PCVal) & 0xFFE00000) + (((PCVal) & 0x001FFFFC)>>1) + 1) : \
+ ( ((PCVal) & 0x00800000) == 0x00000000 ) ? \
+ (((PCVal) & 0xFF800000) + (((PCVal) & 0x007FFFFC)>>1) + 1) : \
+ (PCVal) ) \
+ : (PCVal) )
+#define METAG_LINPC_X2BIT 0x00000001 /* Make (Size>>1) if compressed */
+
+/* Convert an arbitrary Linear address into a valid Minim PC or return 0 */
+#define METAG_PCMINIM( LinVal ) (\
+ (((LinVal) & 0x00980000) == 0x00880000) ? \
+ (((LinVal) & 0xFFE00000) + (((LinVal) & 0x000FFFFE)<<1)) : \
+ (((LinVal) & 0x00C00000) == 0x00000000) ? \
+ (((LinVal) & 0xFF800000) + (((LinVal) & 0x003FFFFE)<<1)) : 0 )
+
+/* Reverse a METAG_LINPC conversion step to return the original PCVal */
+#define METAG_PCLIN( LinVal ) ( 0xFFFFFFFC & (\
+ ( (LinVal & METAG_LINPC_X2BIT) != 0 ) ? METAG_PCMINIM( LinVal ) : \
+ (LinVal) ))
+
+/*
+ * Flush the MMCU Table cache privately for each thread. On cores that do not
+ * support per-thread flushing it will flush all threads mapping data.
+ */
+#define TBIMCACHE_TFLUSH(Thread) do {\
+ ((volatile int *)( LINSYSCFLUSH_TxMMCU_BASE + \
+ (LINSYSCFLUSH_TxMMCU_STRIDE*(Thread)) ))[0] = 0; \
+ } while(0)
+
+/*
+ * To flush a single linear-matched cache line from the code cache. In
+ * cases where Minim is possible the METAC_LINPC operation must be used
+ * to pre-process the address being flushed.
+ */
+#define TBIICACHE_FLUSH( pAddr ) TBIXCACHE_WD (pAddr, CACHEW_ICACHE_BIT)
+
+/* To flush a single linear-matched mapping from code/data MMU table cache */
+#define TBIMCACHE_AFLUSH( pAddr, SegType ) \
+ TBIXCACHE_WD(pAddr, CACHEW_TLBFLUSH_BIT + ( \
+ ((SegType) == TBID_SEGTYPE_TEXT) ? CACHEW_ICACHE_BIT : 0 ))
+
+/*
+ * To flush translation data corresponding to a range of addresses without
+ * using TBITCACHE_FLUSH to flush all of this threads translation data. It
+ * is necessary to know what stride (>= 4K) must be used to flush a specific
+ * region.
+ *
+ * For example direct mapped regions use the maximum page size (512K) which may
+ * mean that only one flush is needed to cover the sub-set of the direct
+ * mapped area used since it was setup.
+ *
+ * The function returns the stride on which flushes should be performed.
+ *
+ * If 0 is returned then the region is not subject to MMU caching, if -1 is
+ * returned then this indicates that only TBIMCACHE_TFLUSH can be used to
+ * flush the region concerned rather than TBIMCACHE_AFLUSH which this
+ * function is designed to support.
+ */
+int __TBIMMUCacheStride( const void *pStart, int Bytes );
+
+/*
+ * This function will use the above lower level functions to achieve a MMU
+ * table data flush in an optimal a fashion as possible. On a system that
+ * supports linear address based caching this function will also call the
+ * code or data cache flush functions to maintain address/data coherency.
+ *
+ * SegType should be TBID_SEGTYPE_TEXT if the address range is for code or
+ * any other value such as TBID_SEGTYPE_DATA for data. If an area is
+ * used in both ways then call this function twice; once for each.
+ */
+void __TBIMMUCacheFlush( const void *pStart, int Bytes, int SegType );
+
+/*
+ * Cached Core mode setup and flush functions allow one code and one data
+ * region of the corresponding global or local cache partion size to be
+ * locked into the corresponding cache memory. This prevents normal LRU
+ * logic discarding the code or data and avoids write-thru bandwidth in
+ * data areas. Code mappings are selected by specifying TBID_SEGTYPE_TEXT
+ * for SegType, otherwise data mappings are created.
+ *
+ * Mode supplied should always contain the VALID bit and WINx selection data.
+ * Data areas will be mapped read-only if the WRITE bit is not added.
+ *
+ * The address returned by the Opt function will either be the same as that
+ * passed in (if optimisation cannot be supported) or the base of the new core
+ * cached region in linear address space. The returned address must be passed
+ * into the End function to remove the mapping when required. If a non-core
+ * cached memory address is passed into it the End function has no effect.
+ * Note that the region accessed MUST be flushed from the appropriate cache
+ * before the End function is called to deliver correct operation.
+ */
+void *__TBICoreCacheOpt( const void *pStart, int Bytes, int SegType, int Mode );
+void __TBICoreCacheEnd( const void *pOpt, int Bytes, int SegType );
+
+/*
+ * Optimise physical access channel and flush side effects before releasing
+ * the channel. If pStart is NULL the whole region must be flushed and this is
+ * done automatically by the channel release function if optimisation is
+ * enabled. Flushing the specific region that may have been accessed before
+ * release should optimises this process. On physically cached systems we do
+ * not flush the code/data caches only the MMU table data needs flushing.
+ */
+void __TBIPhysOptim( int Channel, int IMode, int DMode );
+void __TBIPhysFlush( int Channel, const void *pStart, int Bytes );
+#endif
+#endif /* ifdef TBI_1_3 */
+
+#endif /* _ASM_METAG_TBX_H_ */
diff --git a/arch/metag/include/asm/tcm.h b/arch/metag/include/asm/tcm.h
new file mode 100644
index 00000000000..7711c317b1d
--- /dev/null
+++ b/arch/metag/include/asm/tcm.h
@@ -0,0 +1,30 @@
+#ifndef __ASM_TCM_H__
+#define __ASM_TCM_H__
+
+#include <linux/ioport.h>
+#include <linux/list.h>
+
+struct tcm_allocation {
+ struct list_head list;
+ unsigned int tag;
+ unsigned long addr;
+ unsigned long size;
+};
+
+/*
+ * TCM memory region descriptor.
+ */
+struct tcm_region {
+ unsigned int tag;
+ struct resource res;
+};
+
+#define TCM_INVALID_TAG 0xffffffff
+
+unsigned long tcm_alloc(unsigned int tag, size_t len);
+void tcm_free(unsigned int tag, unsigned long addr, size_t len);
+unsigned int tcm_lookup_tag(unsigned long p);
+
+int tcm_add_region(struct tcm_region *reg);
+
+#endif
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
new file mode 100644
index 00000000000..0ecd34d8b5f
--- /dev/null
+++ b/arch/metag/include/asm/thread_info.h
@@ -0,0 +1,155 @@
+/* thread_info.h: Meta low-level thread information
+ *
+ * Copyright (C) 2002 David Howells (dhowells@redhat.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ *
+ * Meta port by Imagination Technologies
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#include <linux/compiler.h>
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#endif
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must
+ * also be changed
+ */
+#ifndef __ASSEMBLY__
+
+/* This must be 8 byte aligned so we can ensure stack alignment. */
+struct thread_info {
+ struct task_struct *task; /* main task structure */
+ struct exec_domain *exec_domain; /* execution domain */
+ unsigned long flags; /* low level flags */
+ unsigned long status; /* thread-synchronous flags */
+ u32 cpu; /* current CPU */
+ int preempt_count; /* 0 => preemptable, <0 => BUG */
+
+ mm_segment_t addr_limit; /* thread address space */
+ struct restart_block restart_block;
+
+ u8 supervisor_stack[0];
+};
+
+#else /* !__ASSEMBLY__ */
+
+#include <generated/asm-offsets.h>
+
+#endif
+
+#define PREEMPT_ACTIVE 0x10000000
+
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SHIFT 12
+#else
+#define THREAD_SHIFT 13
+#endif
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
+#else
+#define THREAD_SIZE_ORDER 0
+#endif
+
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#define STACK_WARN (THREAD_SIZE/8)
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk) \
+{ \
+ .task = &tsk, \
+ .exec_domain = &default_exec_domain, \
+ .flags = 0, \
+ .cpu = 0, \
+ .preempt_count = INIT_PREEMPT_COUNT, \
+ .addr_limit = KERNEL_DS, \
+ .restart_block = { \
+ .fn = do_no_restart_syscall, \
+ }, \
+}
+
+#define init_thread_info (init_thread_union.thread_info)
+#define init_stack (init_thread_union.stack)
+
+/* how to get the current stack pointer from C */
+register unsigned long current_stack_pointer asm("A0StP") __used;
+
+/* how to get the thread information struct from C */
+static inline struct thread_info *current_thread_info(void)
+{
+ return (struct thread_info *)(current_stack_pointer &
+ ~(THREAD_SIZE - 1));
+}
+
+#define __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+ return addr == (void *) (((unsigned long) addr & ~(THREAD_SIZE - 1))
+ + sizeof(struct thread_info));
+}
+
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to
+ * access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
+#define TIF_SIGPENDING 1 /* signal pending */
+#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
+#define TIF_SINGLESTEP 3 /* restore singlestep on return to user
+ mode */
+#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
+#define TIF_SECCOMP 5 /* secure computing */
+#define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME 7 /* callback before returning to user */
+#define TIF_POLLING_NRFLAG 8 /* true if poll_idle() is polling
+ TIF_NEED_RESCHED */
+#define TIF_MEMDIE 9 /* is terminating due to OOM killer */
+#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint instrumentation */
+
+
+#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP (1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+
+/* work to do in syscall trace */
+#define _TIF_WORK_SYSCALL_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \
+ _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \
+ _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \
+ _TIF_SINGLESTEP | _TIF_RESTORE_SIGMASK | \
+ _TIF_NOTIFY_RESUME)
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \
+ _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP))
+
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/metag/include/asm/tlb.h b/arch/metag/include/asm/tlb.h
new file mode 100644
index 00000000000..048282f1cc1
--- /dev/null
+++ b/arch/metag/include/asm/tlb.h
@@ -0,0 +1,36 @@
+#ifndef __ASM_METAG_TLB_H
+#define __ASM_METAG_TLB_H
+
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* Note, read http://lkml.org/lkml/2004/1/15/6 */
+
+#ifdef CONFIG_METAG_META12
+
+#define tlb_start_vma(tlb, vma) \
+ do { \
+ if (!tlb->fullmm) \
+ flush_cache_range(vma, vma->vm_start, vma->vm_end); \
+ } while (0)
+
+#define tlb_end_vma(tlb, vma) \
+ do { \
+ if (!tlb->fullmm) \
+ flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
+ } while (0)
+
+
+#else
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
+
+#endif
+
+#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
+#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif
diff --git a/arch/metag/include/asm/tlbflush.h b/arch/metag/include/asm/tlbflush.h
new file mode 100644
index 00000000000..566acf918a6
--- /dev/null
+++ b/arch/metag/include/asm/tlbflush.h
@@ -0,0 +1,77 @@
+#ifndef __ASM_METAG_TLBFLUSH_H
+#define __ASM_METAG_TLBFLUSH_H
+
+#include <linux/io.h>
+#include <linux/sched.h>
+#include <asm/metag_mem.h>
+#include <asm/pgalloc.h>
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb() flushes the current mm struct TLBs
+ * - flush_tlb_all() flushes all processes TLBs
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(mm, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * FIXME: Meta 2 can flush single TLB entries.
+ *
+ */
+
+#if defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP)
+static inline void __flush_tlb(void)
+{
+ /* flush TLB entries for just the current hardware thread */
+ int thread = hard_processor_id();
+ metag_out32(0, (LINSYSCFLUSH_TxMMCU_BASE +
+ LINSYSCFLUSH_TxMMCU_STRIDE * thread));
+}
+#else
+static inline void __flush_tlb(void)
+{
+ /* flush TLB entries for all hardware threads */
+ metag_out32(0, LINSYSCFLUSH_MMCU);
+}
+#endif /* defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP) */
+
+#define flush_tlb() __flush_tlb()
+
+#define flush_tlb_all() __flush_tlb()
+
+#define local_flush_tlb_all() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (mm == current->active_mm)
+ __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ flush_tlb_mm(mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ flush_tlb_all();
+}
+
+#endif /* __ASM_METAG_TLBFLUSH_H */
+
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
new file mode 100644
index 00000000000..23f5118f58d
--- /dev/null
+++ b/arch/metag/include/asm/topology.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_METAG_TOPOLOGY_H
+#define _ASM_METAG_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+/* sched_domains SD_NODE_INIT for Meta machines */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .parent = NULL, \
+ .child = NULL, \
+ .groups = NULL, \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 2, \
+ .busy_idx = 3, \
+ .idle_idx = 2, \
+ .newidle_idx = 0, \
+ .wake_idx = 0, \
+ .forkexec_idx = 0, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_FORK \
+ | SD_BALANCE_EXEC \
+ | SD_BALANCE_NEWIDLE \
+ | SD_SERIALIZE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+
+#define cpu_to_node(cpu) ((void)(cpu), 0)
+#define parent_node(node) ((void)(node), 0)
+
+#define cpumask_of_node(node) ((void)node, cpu_online_mask)
+
+#define pcibus_to_node(bus) ((void)(bus), -1)
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+
+#endif
+
+#define mc_capable() (1)
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+
+#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_METAG_TOPOLOGY_H */
diff --git a/arch/metag/include/asm/traps.h b/arch/metag/include/asm/traps.h
new file mode 100644
index 00000000000..ac808740bd8
--- /dev/null
+++ b/arch/metag/include/asm/traps.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2005,2008 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _METAG_TBIVECTORS_H
+#define _METAG_TBIVECTORS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/tbx.h>
+
+typedef TBIRES (*kick_irq_func_t)(TBIRES, int, int, int, PTBI, int *);
+
+extern TBIRES kick_handler(TBIRES, int, int, int, PTBI);
+struct kick_irq_handler {
+ struct list_head list;
+ kick_irq_func_t func;
+};
+
+extern void kick_register_func(struct kick_irq_handler *);
+extern void kick_unregister_func(struct kick_irq_handler *);
+
+extern void head_end(TBIRES, unsigned long);
+extern void restart_critical_section(TBIRES State);
+extern TBIRES tail_end_sys(TBIRES, int, int *);
+static inline TBIRES tail_end(TBIRES state)
+{
+ return tail_end_sys(state, -1, NULL);
+}
+
+DECLARE_PER_CPU(PTBI, pTBI);
+extern PTBI pTBI_get(unsigned int);
+
+extern int ret_from_fork(TBIRES arg);
+
+extern int do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned int write_access, unsigned int trapno);
+
+extern TBIRES __TBIUnExpXXX(TBIRES State, int SigNum, int Triggers, int Inst,
+ PTBI pTBI);
+
+#endif
+
+#endif /* _METAG_TBIVECTORS_H */
diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
new file mode 100644
index 00000000000..0748b0a9798
--- /dev/null
+++ b/arch/metag/include/asm/uaccess.h
@@ -0,0 +1,241 @@
+#ifndef __METAG_UACCESS_H
+#define __METAG_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not. If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds() (KERNEL_DS)
+#define get_fs() (current_thread_info()->addr_limit)
+#define set_fs(x) (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS))
+/*
+ * Explicitly allow NULL pointers here. Parts of the kernel such
+ * as readv/writev use access_ok to validate pointers, but want
+ * to allow NULL pointers for various reasons. NULL pointers are
+ * safe to allow through because the first page is not mappable on
+ * Meta.
+ *
+ * We also wish to avoid letting user code access the system area
+ * and the kernel half of the address space.
+ */
+#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \
+ ((addr) > PAGE_OFFSET && \
+ (addr) < LINCORE_BASE))
+
+static inline int __access_ok(unsigned long addr, unsigned long size)
+{
+ return __kernel_ok || !__user_bad(addr, size);
+}
+
+#define access_ok(type, addr, size) __access_ok((unsigned long)(addr), \
+ (unsigned long)(size))
+
+static inline int verify_area(int type, const void *addr, unsigned long size)
+{
+ return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+struct exception_table_entry {
+ unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * These are the main single-value transfer routines. They automatically
+ * use the right size if we just have the right pointer type.
+ */
+
+#define put_user(x, ptr) \
+ __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __put_user(x, ptr) \
+ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+
+extern void __put_user_bad(void);
+
+#define __put_user_nocheck(x, ptr, size) \
+({ \
+ long __pu_err; \
+ __put_user_size((x), (ptr), (size), __pu_err); \
+ __pu_err; \
+})
+
+#define __put_user_check(x, ptr, size) \
+({ \
+ long __pu_err = -EFAULT; \
+ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
+ if (access_ok(VERIFY_WRITE, __pu_addr, size)) \
+ __put_user_size((x), __pu_addr, (size), __pu_err); \
+ __pu_err; \
+})
+
+extern long __put_user_asm_b(unsigned int x, void __user *addr);
+extern long __put_user_asm_w(unsigned int x, void __user *addr);
+extern long __put_user_asm_d(unsigned int x, void __user *addr);
+extern long __put_user_asm_l(unsigned long long x, void __user *addr);
+
+#define __put_user_size(x, ptr, size, retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: \
+ retval = __put_user_asm_b((unsigned int)x, ptr); break; \
+ case 2: \
+ retval = __put_user_asm_w((unsigned int)x, ptr); break; \
+ case 4: \
+ retval = __put_user_asm_d((unsigned int)x, ptr); break; \
+ case 8: \
+ retval = __put_user_asm_l((unsigned long long)x, ptr); break; \
+ default: \
+ __put_user_bad(); \
+ } \
+} while (0)
+
+#define get_user(x, ptr) \
+ __get_user_check((x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) \
+ __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
+extern long __get_user_bad(void);
+
+#define __get_user_nocheck(x, ptr, size) \
+({ \
+ long __gu_err, __gu_val; \
+ __get_user_size(__gu_val, (ptr), (size), __gu_err); \
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ __gu_err; \
+})
+
+#define __get_user_check(x, ptr, size) \
+({ \
+ long __gu_err = -EFAULT, __gu_val = 0; \
+ const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ if (access_ok(VERIFY_READ, __gu_addr, size)) \
+ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ __gu_err; \
+})
+
+extern unsigned char __get_user_asm_b(const void __user *addr, long *err);
+extern unsigned short __get_user_asm_w(const void __user *addr, long *err);
+extern unsigned int __get_user_asm_d(const void __user *addr, long *err);
+
+#define __get_user_size(x, ptr, size, retval) \
+do { \
+ retval = 0; \
+ switch (size) { \
+ case 1: \
+ x = __get_user_asm_b(ptr, &retval); break; \
+ case 2: \
+ x = __get_user_asm_w(ptr, &retval); break; \
+ case 4: \
+ x = __get_user_asm_d(ptr, &retval); break; \
+ default: \
+ (x) = __get_user_bad(); \
+ } \
+} while (0)
+
+/*
+ * Copy a null terminated string from userspace.
+ *
+ * Must return:
+ * -EFAULT for an exception
+ * count if we hit the buffer limit
+ * bytes copied if we hit a null byte
+ * (without the null byte)
+ */
+
+extern long __must_check __strncpy_from_user(char *dst, const char __user *src,
+ long count);
+
+#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count)
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+extern long __must_check strnlen_user(const char __user *src, long count);
+
+#define strlen_user(str) strnlen_user(str, 32767)
+
+extern unsigned long __must_check __copy_user_zeroing(void *to,
+ const void __user *from,
+ unsigned long n);
+
+static inline unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ return __copy_user_zeroing(to, from, n);
+ return n;
+}
+
+#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n)
+#define __copy_from_user_inatomic __copy_from_user
+
+extern unsigned long __must_check __copy_user(void __user *to,
+ const void *from,
+ unsigned long n);
+
+static inline unsigned long copy_to_user(void __user *to, const void *from,
+ unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ return __copy_user(to, from, n);
+ return n;
+}
+
+#define __copy_to_user(to, from, n) __copy_user(to, from, n)
+#define __copy_to_user_inatomic __copy_to_user
+
+/*
+ * Zero Userspace
+ */
+
+extern unsigned long __must_check __do_clear_user(void __user *to,
+ unsigned long n);
+
+static inline unsigned long clear_user(void __user *to, unsigned long n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ return __do_clear_user(to, n);
+ return n;
+}
+
+#define __clear_user(to, n) __do_clear_user(to, n)
+
+#endif /* _METAG_UACCESS_H */
diff --git a/arch/metag/include/asm/unistd.h b/arch/metag/include/asm/unistd.h
new file mode 100644
index 00000000000..32955a18fb3
--- /dev/null
+++ b/arch/metag/include/asm/unistd.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <uapi/asm/unistd.h>
+
+#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/metag/include/asm/user_gateway.h b/arch/metag/include/asm/user_gateway.h
new file mode 100644
index 00000000000..e404c09e3b7
--- /dev/null
+++ b/arch/metag/include/asm/user_gateway.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_USER_GATEWAY_H
+#define __ASM_METAG_USER_GATEWAY_H
+
+#include <asm/page.h>
+
+/* Page of kernel code accessible to userspace. */
+#define USER_GATEWAY_PAGE 0x6ffff000
+/* Offset of TLS pointer array in gateway page. */
+#define USER_GATEWAY_TLS 0x100
+
+#ifndef __ASSEMBLY__
+
+extern char __user_gateway_start;
+extern char __user_gateway_end;
+
+/* Kernel mapping of the gateway page. */
+extern void *gateway_page;
+
+static inline void set_gateway_tls(void __user *tls_ptr)
+{
+ void **gateway_tls = (void **)(gateway_page + USER_GATEWAY_TLS +
+ hard_processor_id() * 4);
+
+ *gateway_tls = (__force void *)tls_ptr;
+#ifdef CONFIG_METAG_META12
+ /* Avoid cache aliases on virtually tagged cache. */
+ __builtin_dcache_flush((void *)USER_GATEWAY_PAGE + USER_GATEWAY_TLS +
+ hard_processor_id() * sizeof(void *));
+#endif
+}
+
+extern int __kuser_get_tls(void);
+extern char *__kuser_get_tls_end[];
+
+extern int __kuser_cmpxchg(int, int, unsigned long *);
+extern char *__kuser_cmpxchg_end[];
+
+#endif
+
+#endif
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
new file mode 100644
index 00000000000..876c71f866d
--- /dev/null
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -0,0 +1,13 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += byteorder.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += swab.h
+header-y += unistd.h
+
+generic-y += mman.h
+generic-y += setup.h
diff --git a/arch/metag/include/uapi/asm/byteorder.h b/arch/metag/include/uapi/asm/byteorder.h
new file mode 100644
index 00000000000..9558416d578
--- /dev/null
+++ b/arch/metag/include/uapi/asm/byteorder.h
@@ -0,0 +1 @@
+#include <linux/byteorder/little_endian.h>
diff --git a/arch/metag/include/uapi/asm/ptrace.h b/arch/metag/include/uapi/asm/ptrace.h
new file mode 100644
index 00000000000..45d97809d33
--- /dev/null
+++ b/arch/metag/include/uapi/asm/ptrace.h
@@ -0,0 +1,113 @@
+#ifndef _UAPI_METAG_PTRACE_H
+#define _UAPI_METAG_PTRACE_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * These are the layouts of the regsets returned by the GETREGSET ptrace call
+ */
+
+/* user_gp_regs::status */
+
+/* CBMarker bit (indicates catch state / catch replay) */
+#define USER_GP_REGS_STATUS_CATCH_BIT (1 << 22)
+#define USER_GP_REGS_STATUS_CATCH_S 22
+/* LSM_STEP field (load/store multiple step) */
+#define USER_GP_REGS_STATUS_LSM_STEP_BITS (0x7 << 8)
+#define USER_GP_REGS_STATUS_LSM_STEP_S 8
+/* SCC bit (indicates split 16x16 condition flags) */
+#define USER_GP_REGS_STATUS_SCC_BIT (1 << 4)
+#define USER_GP_REGS_STATUS_SCC_S 4
+
+/* normal condition flags */
+/* CF_Z bit (Zero flag) */
+#define USER_GP_REGS_STATUS_CF_Z_BIT (1 << 3)
+#define USER_GP_REGS_STATUS_CF_Z_S 3
+/* CF_N bit (Negative flag) */
+#define USER_GP_REGS_STATUS_CF_N_BIT (1 << 2)
+#define USER_GP_REGS_STATUS_CF_N_S 2
+/* CF_V bit (oVerflow flag) */
+#define USER_GP_REGS_STATUS_CF_V_BIT (1 << 1)
+#define USER_GP_REGS_STATUS_CF_V_S 1
+/* CF_C bit (Carry flag) */
+#define USER_GP_REGS_STATUS_CF_C_BIT (1 << 0)
+#define USER_GP_REGS_STATUS_CF_C_S 0
+
+/* split 16x16 condition flags */
+/* SCF_LZ bit (Low Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_LZ_BIT (1 << 3)
+#define USER_GP_REGS_STATUS_SCF_LZ_S 3
+/* SCF_HZ bit (High Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_HZ_BIT (1 << 2)
+#define USER_GP_REGS_STATUS_SCF_HZ_S 2
+/* SCF_HC bit (High Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_HC_BIT (1 << 1)
+#define USER_GP_REGS_STATUS_SCF_HC_S 1
+/* SCF_LC bit (Low Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_LC_BIT (1 << 0)
+#define USER_GP_REGS_STATUS_SCF_LC_S 0
+
+/**
+ * struct user_gp_regs - User general purpose registers
+ * @dx: GP data unit regs (dx[reg][unit] = D{unit:0-1}.{reg:0-7})
+ * @ax: GP address unit regs (ax[reg][unit] = A{unit:0-1}.{reg:0-3})
+ * @pc: PC register
+ * @status: TXSTATUS register (condition flags, LSM_STEP etc)
+ * @rpt: TXRPT registers (branch repeat counter)
+ * @bpobits: TXBPOBITS register ("branch prediction other" bits)
+ * @mode: TXMODE register
+ * @_pad1: Reserved padding to make sizeof obviously 64bit aligned
+ *
+ * This is the user-visible general purpose register state structure.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS.
+ *
+ * It is also used in the signal context.
+ */
+struct user_gp_regs {
+ unsigned long dx[8][2];
+ unsigned long ax[4][2];
+ unsigned long pc;
+ unsigned long status;
+ unsigned long rpt;
+ unsigned long bpobits;
+ unsigned long mode;
+ unsigned long _pad1;
+};
+
+/**
+ * struct user_cb_regs - User catch buffer registers
+ * @flags: TXCATCH0 register (fault flags)
+ * @addr: TXCATCH1 register (fault address)
+ * @data: TXCATCH2 and TXCATCH3 registers (low and high data word)
+ *
+ * This is the user-visible catch buffer register state structure containing
+ * information about a failed memory access, and allowing the access to be
+ * modified and replayed.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_CBUF.
+ */
+struct user_cb_regs {
+ unsigned long flags;
+ unsigned long addr;
+ unsigned long long data;
+};
+
+/**
+ * struct user_rp_state - User read pipeline state
+ * @entries: Read pipeline entries
+ * @mask: Mask of valid pipeline entries (RPMask from TXDIVTIME register)
+ *
+ * This is the user-visible read pipeline state structure containing the entries
+ * currently in the read pipeline and the mask of valid entries.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_RPIPE.
+ */
+struct user_rp_state {
+ unsigned long long entries[6];
+ unsigned long mask;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_METAG_PTRACE_H */
diff --git a/arch/metag/include/uapi/asm/resource.h b/arch/metag/include/uapi/asm/resource.h
new file mode 100644
index 00000000000..526d23cc305
--- /dev/null
+++ b/arch/metag/include/uapi/asm/resource.h
@@ -0,0 +1,7 @@
+#ifndef _UAPI_METAG_RESOURCE_H
+#define _UAPI_METAG_RESOURCE_H
+
+#define _STK_LIM_MAX (1 << 28)
+#include <asm-generic/resource.h>
+
+#endif /* _UAPI_METAG_RESOURCE_H */
diff --git a/arch/metag/include/uapi/asm/sigcontext.h b/arch/metag/include/uapi/asm/sigcontext.h
new file mode 100644
index 00000000000..ef79a910c1c
--- /dev/null
+++ b/arch/metag/include/uapi/asm/sigcontext.h
@@ -0,0 +1,31 @@
+#ifndef _ASM_METAG_SIGCONTEXT_H
+#define _ASM_METAG_SIGCONTEXT_H
+
+#include <asm/ptrace.h>
+
+/*
+ * In a sigcontext structure we need to store the active state of the
+ * user process so that it does not get trashed when we call the signal
+ * handler. That not really the same as a user context that we are
+ * going to store on syscall etc.
+ */
+struct sigcontext {
+ struct user_gp_regs regs; /* needs to be first */
+
+ /*
+ * Catch registers describing a memory fault.
+ * If USER_GP_REGS_STATUS_CATCH_BIT is set in regs.status then catch
+ * buffers have been saved and will be replayed on sigreturn.
+ * Clear that bit to discard the catch state instead of replaying it.
+ */
+ struct user_cb_regs cb;
+
+ /*
+ * Read pipeline state. This will get restored on sigreturn.
+ */
+ struct user_rp_state rp;
+
+ unsigned long oldmask;
+};
+
+#endif
diff --git a/arch/metag/include/uapi/asm/siginfo.h b/arch/metag/include/uapi/asm/siginfo.h
new file mode 100644
index 00000000000..b2e0c8b62ae
--- /dev/null
+++ b/arch/metag/include/uapi/asm/siginfo.h
@@ -0,0 +1,8 @@
+#ifndef _METAG_SIGINFO_H
+#define _METAG_SIGINFO_H
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/metag/include/uapi/asm/swab.h b/arch/metag/include/uapi/asm/swab.h
new file mode 100644
index 00000000000..1076b3a6387
--- /dev/null
+++ b/arch/metag/include/uapi/asm/swab.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_METAG_SWAB_H
+#define __ASM_METAG_SWAB_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm-generic/swab.h>
+
+static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
+{
+ return __builtin_metag_bswaps(x);
+}
+#define __arch_swab16 __arch_swab16
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+ return __builtin_metag_bswap(x);
+}
+#define __arch_swab32 __arch_swab32
+
+static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
+{
+ return __builtin_metag_bswapll(x);
+}
+#define __arch_swab64 __arch_swab64
+
+#endif /* __ASM_METAG_SWAB_H */
diff --git a/arch/metag/include/uapi/asm/unistd.h b/arch/metag/include/uapi/asm/unistd.h
new file mode 100644
index 00000000000..b80b8e899d2
--- /dev/null
+++ b/arch/metag/include/uapi/asm/unistd.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* metag-specific syscalls. */
+#define __NR_metag_setglobalbit (__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_metag_setglobalbit, sys_metag_setglobalbit)
+#define __NR_metag_set_fpu_flags (__NR_arch_specific_syscall + 2)
+__SYSCALL(__NR_metag_set_fpu_flags, sys_metag_set_fpu_flags)
+#define __NR_metag_set_tls (__NR_arch_specific_syscall + 3)
+__SYSCALL(__NR_metag_set_tls, sys_metag_set_tls)
+#define __NR_metag_get_tls (__NR_arch_specific_syscall + 4)
+__SYSCALL(__NR_metag_get_tls, sys_metag_get_tls)
diff --git a/arch/metag/kernel/.gitignore b/arch/metag/kernel/.gitignore
new file mode 100644
index 00000000000..c5f676c3c22
--- /dev/null
+++ b/arch/metag/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/metag/kernel/Makefile b/arch/metag/kernel/Makefile
new file mode 100644
index 00000000000..d7675f4a5df
--- /dev/null
+++ b/arch/metag/kernel/Makefile
@@ -0,0 +1,39 @@
+#
+# Makefile for the Linux/Meta kernel.
+#
+
+extra-y += head.o
+extra-y += vmlinux.lds
+
+obj-y += cachepart.o
+obj-y += clock.o
+obj-y += core_reg.o
+obj-y += devtree.o
+obj-y += dma.o
+obj-y += irq.o
+obj-y += kick.o
+obj-y += machines.o
+obj-y += process.o
+obj-y += ptrace.o
+obj-y += setup.o
+obj-y += signal.o
+obj-y += stacktrace.o
+obj-y += sys_metag.o
+obj-y += tbiunexp.o
+obj-y += time.o
+obj-y += topology.o
+obj-y += traps.o
+obj-y += user_gateway.o
+
+obj-$(CONFIG_PERF_EVENTS) += perf/
+
+obj-$(CONFIG_METAG_COREMEM) += coremem.o
+obj-$(CONFIG_METAG_DA) += da.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER) += ftrace_stub.o
+obj-$(CONFIG_MODULES) += metag_ksyms.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_METAG_SUSPEND_MEM) += suspend.o
+obj-$(CONFIG_METAG_USER_TCM) += tcm.o
diff --git a/arch/metag/kernel/asm-offsets.c b/arch/metag/kernel/asm-offsets.c
new file mode 100644
index 00000000000..bfc9205f964
--- /dev/null
+++ b/arch/metag/kernel/asm-offsets.c
@@ -0,0 +1,14 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ */
+
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+
+int main(void)
+{
+ DEFINE(THREAD_INFO_SIZE, sizeof(struct thread_info));
+ return 0;
+}
diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c
new file mode 100644
index 00000000000..3a589dfb966
--- /dev/null
+++ b/arch/metag/kernel/cachepart.c
@@ -0,0 +1,124 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <asm/processor.h>
+#include <asm/cachepart.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+#define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
+#define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
+
+#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */
+#define ICACHE 0
+#define DCACHE 1
+
+/* The CORE_CONFIG2 register is not available on Meta 1 */
+#ifdef CONFIG_METAG_META21
+unsigned int get_dcache_size(void)
+{
+ unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+ return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS)
+ >> METAC_CORECFG2_DCSZ_S);
+}
+
+unsigned int get_icache_size(void)
+{
+ unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+ return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS)
+ >> METAC_CORE_C2ICSZ_S);
+}
+
+unsigned int get_global_dcache_size(void)
+{
+ unsigned int cpart = metag_in32(SYSC_DCPART(hard_processor_id()));
+ unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+ return (get_dcache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+unsigned int get_global_icache_size(void)
+{
+ unsigned int cpart = metag_in32(SYSC_ICPART(hard_processor_id()));
+ unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+ return (get_icache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+static unsigned int get_thread_cache_size(unsigned int cache, int thread_id)
+{
+ unsigned int cache_size;
+ unsigned int t_cache_part;
+ unsigned int isEnabled;
+ unsigned int offset = 0;
+ isEnabled = (cache == DCACHE ? metag_in32(MMCU_DCACHE_CTRL_ADDR) & 0x1 :
+ metag_in32(MMCU_ICACHE_CTRL_ADDR) & 0x1);
+ if (!isEnabled)
+ return 0;
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+ /* Checking for global cache */
+ cache_size = (cache == DCACHE ? get_global_dache_size() :
+ get_global_icache_size());
+ offset = 8;
+#else
+ cache_size = (cache == DCACHE ? get_dcache_size() :
+ get_icache_size());
+#endif
+ t_cache_part = (cache == DCACHE ?
+ (metag_in32(SYSC_DCPART(thread_id)) >> offset) & 0xF :
+ (metag_in32(SYSC_ICPART(thread_id)) >> offset) & 0xF);
+ switch (t_cache_part) {
+ case 0xF:
+ return cache_size;
+ case 0x7:
+ return cache_size / 2;
+ case 0x3:
+ return cache_size / 4;
+ case 0x1:
+ return cache_size / 8;
+ case 0:
+ return cache_size / 16;
+ }
+ return -1;
+}
+
+void check_for_cache_aliasing(int thread_id)
+{
+ unsigned int thread_cache_size;
+ unsigned int cache_type;
+ for (cache_type = ICACHE; cache_type <= DCACHE; cache_type++) {
+ thread_cache_size =
+ get_thread_cache_size(cache_type, thread_id);
+ if (thread_cache_size < 0)
+ pr_emerg("Can't read %s cache size", \
+ cache_type ? "DCACHE" : "ICACHE");
+ else if (thread_cache_size == 0)
+ /* Cache is off. No need to check for aliasing */
+ continue;
+ if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) {
+ pr_emerg("Cache aliasing detected in %s on Thread %d",
+ cache_type ? "DCACHE" : "ICACHE", thread_id);
+ pr_warn("Total %s size: %u bytes",
+ cache_type ? "DCACHE" : "ICACHE ",
+ cache_type ? get_dcache_size()
+ : get_icache_size());
+ pr_warn("Thread %s size: %d bytes",
+ cache_type ? "CACHE" : "ICACHE",
+ thread_cache_size);
+ pr_warn("Page Size: %lu bytes", PAGE_SIZE);
+ }
+ }
+}
+
+#else
+
+void check_for_cache_aliasing(int thread_id)
+{
+ return;
+}
+
+#endif
diff --git a/arch/metag/kernel/clock.c b/arch/metag/kernel/clock.c
new file mode 100644
index 00000000000..defc84056f1
--- /dev/null
+++ b/arch/metag/kernel/clock.c
@@ -0,0 +1,53 @@
+/*
+ * arch/metag/kernel/clock.c
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <asm/param.h>
+#include <asm/clock.h>
+
+struct meta_clock_desc _meta_clock;
+
+/* Default machine get_core_freq callback. */
+static unsigned long get_core_freq_default(void)
+{
+#ifdef CONFIG_METAG_META21
+ /*
+ * Meta 2 cores divide down the core clock for the Meta timers, so we
+ * can estimate the core clock from the divider.
+ */
+ return (metag_in32(EXPAND_TIMER_DIV) + 1) * 1000000;
+#else
+ /*
+ * On Meta 1 we don't know the core clock, but assuming the Meta timer
+ * is correct it can be estimated based on loops_per_jiffy.
+ */
+ return (loops_per_jiffy * HZ * 5) >> 1;
+#endif
+}
+
+/**
+ * setup_meta_clocks() - Set up the Meta clock.
+ * @desc: Clock descriptor usually provided by machine description
+ *
+ * Ensures all callbacks are valid.
+ */
+void __init setup_meta_clocks(struct meta_clock_desc *desc)
+{
+ /* copy callbacks */
+ if (desc)
+ _meta_clock = *desc;
+
+ /* set fallback functions */
+ if (!_meta_clock.get_core_freq)
+ _meta_clock.get_core_freq = get_core_freq_default;
+}
+
diff --git a/arch/metag/kernel/core_reg.c b/arch/metag/kernel/core_reg.c
new file mode 100644
index 00000000000..671cce8c34f
--- /dev/null
+++ b/arch/metag/kernel/core_reg.c
@@ -0,0 +1,117 @@
+/*
+ * Support for reading and writing Meta core internal registers.
+ *
+ * Copyright (C) 2011 Imagination Technologies Ltd.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define UNIT_BIT_MASK TXUXXRXRQ_UXX_BITS
+#define REG_BIT_MASK TXUXXRXRQ_RX_BITS
+#define THREAD_BIT_MASK TXUXXRXRQ_TX_BITS
+
+#define UNIT_SHIFTS TXUXXRXRQ_UXX_S
+#define REG_SHIFTS TXUXXRXRQ_RX_S
+#define THREAD_SHIFTS TXUXXRXRQ_TX_S
+
+#define UNIT_VAL(x) (((x) << UNIT_SHIFTS) & UNIT_BIT_MASK)
+#define REG_VAL(x) (((x) << REG_SHIFTS) & REG_BIT_MASK)
+#define THREAD_VAL(x) (((x) << THREAD_SHIFTS) & THREAD_BIT_MASK)
+
+/*
+ * core_reg_write() - modify the content of a register in a core unit.
+ * @unit: The unit to be modified.
+ * @reg: Register number within the unit.
+ * @thread: The thread we want to access.
+ * @val: The new value to write.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+void core_reg_write(int unit, int reg, int thread, unsigned int val)
+{
+ unsigned long flags;
+
+ /* TXUCT_ID has its own memory mapped registers */
+ if (unit == TXUCT_ID) {
+ void __iomem *cu_reg = __CU_addr(thread, reg);
+ metag_out32(val, cu_reg);
+ return;
+ }
+
+ __global_lock2(flags);
+
+ /* wait for ready */
+ while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+ udelay(10);
+
+ /* set the value to write */
+ metag_out32(val, TXUXXRXDT);
+
+ /* set the register to write */
+ val = UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread);
+ metag_out32(val, TXUXXRXRQ);
+
+ /* wait for finish */
+ while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+ udelay(10);
+
+ __global_unlock2(flags);
+}
+EXPORT_SYMBOL(core_reg_write);
+
+/*
+ * core_reg_read() - read the content of a register in a core unit.
+ * @unit: The unit to be modified.
+ * @reg: Register number within the unit.
+ * @thread: The thread we want to access.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+unsigned int core_reg_read(int unit, int reg, int thread)
+{
+ unsigned long flags;
+ unsigned int val;
+
+ /* TXUCT_ID has its own memory mapped registers */
+ if (unit == TXUCT_ID) {
+ void __iomem *cu_reg = __CU_addr(thread, reg);
+ val = metag_in32(cu_reg);
+ return val;
+ }
+
+ __global_lock2(flags);
+
+ /* wait for ready */
+ while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+ udelay(10);
+
+ /* set the register to read */
+ val = (UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread) |
+ TXUXXRXRQ_RDnWR_BIT);
+ metag_out32(val, TXUXXRXRQ);
+
+ /* wait for finish */
+ while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+ udelay(10);
+
+ /* read the register value */
+ val = metag_in32(TXUXXRXDT);
+
+ __global_unlock2(flags);
+
+ return val;
+}
+EXPORT_SYMBOL(core_reg_read);
diff --git a/arch/metag/kernel/da.c b/arch/metag/kernel/da.c
new file mode 100644
index 00000000000..52aabb658fd
--- /dev/null
+++ b/arch/metag/kernel/da.c
@@ -0,0 +1,23 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <asm/da.h>
+#include <asm/metag_mem.h>
+
+bool _metag_da_present;
+
+int __init metag_da_probe(void)
+{
+ _metag_da_present = (metag_in32(T0VECINT_BHALT) == 1);
+ if (_metag_da_present)
+ pr_info("DA present\n");
+ else
+ pr_info("DA not present\n");
+ return 0;
+}
diff --git a/arch/metag/kernel/devtree.c b/arch/metag/kernel/devtree.c
new file mode 100644
index 00000000000..7cd02529636
--- /dev/null
+++ b/arch/metag/kernel/devtree.c
@@ -0,0 +1,114 @@
+/*
+ * linux/arch/metag/kernel/devtree.c
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * Based on ARM version:
+ * Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/mach/arch.h>
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+ pr_err("%s(%llx, %llx)\n",
+ __func__, base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+ return alloc_bootmem_align(size, align);
+}
+
+/**
+ * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
+ * @dt: virtual address pointer to dt blob
+ *
+ * If a dtb was passed to the kernel, then use it to choose the correct
+ * machine_desc and to setup the system.
+ */
+struct machine_desc * __init setup_machine_fdt(void *dt)
+{
+ struct boot_param_header *devtree = dt;
+ struct machine_desc *mdesc, *mdesc_best = NULL;
+ unsigned int score, mdesc_score = ~1;
+ unsigned long dt_root;
+ const char *model;
+
+ /* check device tree validity */
+ if (be32_to_cpu(devtree->magic) != OF_DT_HEADER)
+ return NULL;
+
+ /* Search the mdescs for the 'best' compatible value match */
+ initial_boot_params = devtree;
+ dt_root = of_get_flat_dt_root();
+
+ for_each_machine_desc(mdesc) {
+ score = of_flat_dt_match(dt_root, mdesc->dt_compat);
+ if (score > 0 && score < mdesc_score) {
+ mdesc_best = mdesc;
+ mdesc_score = score;
+ }
+ }
+ if (!mdesc_best) {
+ const char *prop;
+ long size;
+
+ pr_err("\nError: unrecognized/unsupported device tree compatible list:\n[ ");
+
+ prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
+ if (prop) {
+ while (size > 0) {
+ printk("'%s' ", prop);
+ size -= strlen(prop) + 1;
+ prop += strlen(prop) + 1;
+ }
+ }
+ printk("]\n\n");
+
+ dump_machine_table(); /* does not return */
+ }
+
+ model = of_get_flat_dt_prop(dt_root, "model", NULL);
+ if (!model)
+ model = of_get_flat_dt_prop(dt_root, "compatible", NULL);
+ if (!model)
+ model = "<unknown>";
+ pr_info("Machine: %s, model: %s\n", mdesc_best->name, model);
+
+ /* Retrieve various information from the /chosen node */
+ of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+
+ return mdesc_best;
+}
+
+/**
+ * copy_fdt - Copy device tree into non-init memory.
+ *
+ * We must copy the flattened device tree blob into non-init memory because the
+ * unflattened device tree will reference the strings in it directly.
+ */
+void __init copy_fdt(void)
+{
+ void *alloc = early_init_dt_alloc_memory_arch(
+ be32_to_cpu(initial_boot_params->totalsize), 0x40);
+ if (alloc) {
+ memcpy(alloc, initial_boot_params,
+ be32_to_cpu(initial_boot_params->totalsize));
+ initial_boot_params = alloc;
+ }
+}
diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c
new file mode 100644
index 00000000000..8c00dedadc5
--- /dev/null
+++ b/arch/metag/kernel/dma.c
@@ -0,0 +1,507 @@
+/*
+ * Meta version derived from arch/powerpc/lib/dma-noncoherent.c
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ * PowerPC version derived from arch/arm/mm/consistent.c
+ * Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ * Copyright (C) 2000 Russell King
+ *
+ * Consistent memory allocators. Used for DMA devices that want to
+ * share uncached memory with the processor core. The function return
+ * is the virtual address and 'dma_handle' is the physical address.
+ * Mostly stolen from the ARM port, with some changes for PowerPC.
+ * -- Dan
+ *
+ * Reorganized to get rid of the arch-specific consistent_* functions
+ * and provide non-coherent implementations for the DMA API. -Matt
+ *
+ * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
+ * implementation. This is pulled straight from ARM and barely
+ * modified. -Matt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_START) \
+ >> PAGE_SHIFT)
+
+static u64 get_coherent_dma_mask(struct device *dev)
+{
+ u64 mask = ~0ULL;
+
+ if (dev) {
+ mask = dev->coherent_dma_mask;
+
+ /*
+ * Sanity check the DMA mask - it must be non-zero, and
+ * must be able to be satisfied by a DMA allocation.
+ */
+ if (mask == 0) {
+ dev_warn(dev, "coherent DMA mask is unset\n");
+ return 0;
+ }
+ }
+
+ return mask;
+}
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ * struct vm_struct {
+ * struct metag_vm_region region;
+ * unsigned long flags;
+ * struct page **pages;
+ * unsigned int nr_pages;
+ * unsigned long phys_addr;
+ * };
+ *
+ * get_vm_area() would then call metag_vm_region_alloc with an appropriate
+ * struct metag_vm_region head (eg):
+ *
+ * struct metag_vm_region vmalloc_head = {
+ * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
+ * .vm_start = VMALLOC_START,
+ * .vm_end = VMALLOC_END,
+ * };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.) I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling
+ * metag_vm_region_alloc().
+ */
+struct metag_vm_region {
+ struct list_head vm_list;
+ unsigned long vm_start;
+ unsigned long vm_end;
+ struct page *vm_pages;
+ int vm_active;
+};
+
+static struct metag_vm_region consistent_head = {
+ .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
+ .vm_start = CONSISTENT_START,
+ .vm_end = CONSISTENT_END,
+};
+
+static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region
+ *head, size_t size,
+ gfp_t gfp)
+{
+ unsigned long addr = head->vm_start, end = head->vm_end - size;
+ unsigned long flags;
+ struct metag_vm_region *c, *new;
+
+ new = kmalloc(sizeof(struct metag_vm_region), gfp);
+ if (!new)
+ goto out;
+
+ spin_lock_irqsave(&consistent_lock, flags);
+
+ list_for_each_entry(c, &head->vm_list, vm_list) {
+ if ((addr + size) < addr)
+ goto nospc;
+ if ((addr + size) <= c->vm_start)
+ goto found;
+ addr = c->vm_end;
+ if (addr > end)
+ goto nospc;
+ }
+
+found:
+ /*
+ * Insert this entry _before_ the one we found.
+ */
+ list_add_tail(&new->vm_list, &c->vm_list);
+ new->vm_start = addr;
+ new->vm_end = addr + size;
+ new->vm_active = 1;
+
+ spin_unlock_irqrestore(&consistent_lock, flags);
+ return new;
+
+nospc:
+ spin_unlock_irqrestore(&consistent_lock, flags);
+ kfree(new);
+out:
+ return NULL;
+}
+
+static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region
+ *head, unsigned long addr)
+{
+ struct metag_vm_region *c;
+
+ list_for_each_entry(c, &head->vm_list, vm_list) {
+ if (c->vm_active && c->vm_start == addr)
+ goto out;
+ }
+ c = NULL;
+out:
+ return c;
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp)
+{
+ struct page *page;
+ struct metag_vm_region *c;
+ unsigned long order;
+ u64 mask = get_coherent_dma_mask(dev);
+ u64 limit;
+
+ if (!consistent_pte) {
+ pr_err("%s: not initialised\n", __func__);
+ dump_stack();
+ return NULL;
+ }
+
+ if (!mask)
+ goto no_page;
+ size = PAGE_ALIGN(size);
+ limit = (mask + 1) & ~mask;
+ if ((limit && size >= limit)
+ || size >= (CONSISTENT_END - CONSISTENT_START)) {
+ pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n",
+ size, mask);
+ return NULL;
+ }
+
+ order = get_order(size);
+
+ if (mask != 0xffffffff)
+ gfp |= GFP_DMA;
+
+ page = alloc_pages(gfp, order);
+ if (!page)
+ goto no_page;
+
+ /*
+ * Invalidate any data that might be lurking in the
+ * kernel direct-mapped region for device DMA.
+ */
+ {
+ void *kaddr = page_address(page);
+ memset(kaddr, 0, size);
+ flush_dcache_region(kaddr, size);
+ }
+
+ /*
+ * Allocate a virtual address in the consistent mapping region.
+ */
+ c = metag_vm_region_alloc(&consistent_head, size,
+ gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+ if (c) {
+ unsigned long vaddr = c->vm_start;
+ pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+ struct page *end = page + (1 << order);
+
+ c->vm_pages = page;
+ split_page(page, order);
+
+ /*
+ * Set the "dma handle"
+ */
+ *handle = page_to_bus(page);
+
+ do {
+ BUG_ON(!pte_none(*pte));
+
+ SetPageReserved(page);
+ set_pte_at(&init_mm, vaddr,
+ pte, mk_pte(page,
+ pgprot_writecombine
+ (PAGE_KERNEL)));
+ page++;
+ pte++;
+ vaddr += PAGE_SIZE;
+ } while (size -= PAGE_SIZE);
+
+ /*
+ * Free the otherwise unused pages.
+ */
+ while (page < end) {
+ __free_page(page);
+ page++;
+ }
+
+ return (void *)c->vm_start;
+ }
+
+ if (page)
+ __free_pages(page, order);
+no_page:
+ return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * free a page as defined by the above mapping.
+ */
+void dma_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ struct metag_vm_region *c;
+ unsigned long flags, addr;
+ pte_t *ptep;
+
+ size = PAGE_ALIGN(size);
+
+ spin_lock_irqsave(&consistent_lock, flags);
+
+ c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr);
+ if (!c)
+ goto no_area;
+
+ c->vm_active = 0;
+ if ((c->vm_end - c->vm_start) != size) {
+ pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
+ __func__, c->vm_end - c->vm_start, size);
+ dump_stack();
+ size = c->vm_end - c->vm_start;
+ }
+
+ ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+ addr = c->vm_start;
+ do {
+ pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+ unsigned long pfn;
+
+ ptep++;
+ addr += PAGE_SIZE;
+
+ if (!pte_none(pte) && pte_present(pte)) {
+ pfn = pte_pfn(pte);
+
+ if (pfn_valid(pfn)) {
+ struct page *page = pfn_to_page(pfn);
+ ClearPageReserved(page);
+
+ __free_page(page);
+ continue;
+ }
+ }
+
+ pr_crit("%s: bad page in kernel page table\n",
+ __func__);
+ } while (size -= PAGE_SIZE);
+
+ flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+ list_del(&c->vm_list);
+
+ spin_unlock_irqrestore(&consistent_lock, flags);
+
+ kfree(c);
+ return;
+
+no_area:
+ spin_unlock_irqrestore(&consistent_lock, flags);
+ pr_err("%s: trying to free invalid coherent area: %p\n",
+ __func__, vaddr);
+ dump_stack();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+
+static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+ int ret = -ENXIO;
+
+ unsigned long flags, user_size, kern_size;
+ struct metag_vm_region *c;
+
+ user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+ spin_lock_irqsave(&consistent_lock, flags);
+ c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+ spin_unlock_irqrestore(&consistent_lock, flags);
+
+ if (c) {
+ unsigned long off = vma->vm_pgoff;
+
+ kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
+
+ if (off < kern_size &&
+ user_size <= (kern_size - off)) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(c->vm_pages) + off,
+ user_size << PAGE_SHIFT,
+ vma->vm_page_prot);
+ }
+ }
+
+
+ return ret;
+}
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_coherent);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_writecombine);
+
+
+
+
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte;
+ int ret = 0;
+
+ do {
+ int offset = pgd_index(CONSISTENT_START);
+ pgd = pgd_offset(&init_mm, CONSISTENT_START);
+ pud = pud_alloc(&init_mm, pgd, CONSISTENT_START);
+ pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START);
+ if (!pmd) {
+ pr_err("%s: no pmd tables\n", __func__);
+ ret = -ENOMEM;
+ break;
+ }
+ WARN_ON(!pmd_none(*pmd));
+
+ pte = pte_alloc_kernel(pmd, CONSISTENT_START);
+ if (!pte) {
+ pr_err("%s: no pte tables\n", __func__);
+ ret = -ENOMEM;
+ break;
+ }
+
+ pgd_k = ((pgd_t *) mmu_get_base()) + offset;
+ pud_k = pud_offset(pgd_k, CONSISTENT_START);
+ pmd_k = pmd_offset(pud_k, CONSISTENT_START);
+ set_pmd(pmd_k, *pmd);
+
+ consistent_pte = pte;
+ } while (0);
+
+ return ret;
+}
+early_initcall(dma_alloc_init);
+
+/*
+ * make an area consistent to devices.
+ */
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction)
+{
+ /*
+ * Ensure any writes get through the write combiner. This is necessary
+ * even with DMA_FROM_DEVICE, or the write may dirty the cache after
+ * we've invalidated it and get written back during the DMA.
+ */
+
+ barrier();
+
+ switch (dma_direction) {
+ case DMA_BIDIRECTIONAL:
+ /*
+ * Writeback to ensure the device can see our latest changes and
+ * so that we have no dirty lines, and invalidate the cache
+ * lines too in preparation for receiving the buffer back
+ * (dma_sync_for_cpu) later.
+ */
+ flush_dcache_region(vaddr, size);
+ break;
+ case DMA_TO_DEVICE:
+ /*
+ * Writeback to ensure the device can see our latest changes.
+ * There's no need to invalidate as the device shouldn't write
+ * to the buffer.
+ */
+ writeback_dcache_region(vaddr, size);
+ break;
+ case DMA_FROM_DEVICE:
+ /*
+ * Invalidate to ensure we have no dirty lines that could get
+ * written back during the DMA. It's also safe to flush
+ * (writeback) here if necessary.
+ */
+ invalidate_dcache_region(vaddr, size);
+ break;
+ case DMA_NONE:
+ BUG();
+ }
+
+ wmb();
+}
+EXPORT_SYMBOL(dma_sync_for_device);
+
+/*
+ * make an area consistent to the core.
+ */
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction)
+{
+ /*
+ * Hardware L2 cache prefetch doesn't occur across 4K physical
+ * boundaries, however according to Documentation/DMA-API-HOWTO.txt
+ * kmalloc'd memory is DMA'able, so accesses in nearby memory could
+ * trigger a cache fill in the DMA buffer.
+ *
+ * This should never cause dirty lines, so a flush or invalidate should
+ * be safe to allow us to see data from the device.
+ */
+ if (_meta_l2c_pf_is_enabled()) {
+ switch (dma_direction) {
+ case DMA_BIDIRECTIONAL:
+ case DMA_FROM_DEVICE:
+ invalidate_dcache_region(vaddr, size);
+ break;
+ case DMA_TO_DEVICE:
+ /* The device shouldn't have written to the buffer */
+ break;
+ case DMA_NONE:
+ BUG();
+ }
+ }
+
+ rmb();
+}
+EXPORT_SYMBOL(dma_sync_for_cpu);
diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
new file mode 100644
index 00000000000..a774f321643
--- /dev/null
+++ b/arch/metag/kernel/ftrace.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ * Dynamic ftrace support.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+
+#define D04_MOVT_TEMPLATE 0x02200005
+#define D04_CALL_TEMPLATE 0xAC200005
+#define D1RTP_MOVT_TEMPLATE 0x03200005
+#define D1RTP_CALL_TEMPLATE 0xAC200006
+
+static const unsigned long NOP[2] = {0xa0fffffe, 0xa0fffffe};
+static unsigned long movt_and_call_insn[2];
+
+static unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)&NOP[0];
+}
+
+static unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+ unsigned long hi16, low16;
+
+ hi16 = (addr & 0xffff0000) >> 13;
+ low16 = (addr & 0x0000ffff) << 3;
+
+ /*
+ * The compiler makes the call to mcount_wrapper()
+ * (Meta's wrapper around mcount()) through the register
+ * D0.4. So whenever we're patching one of those compiler-generated
+ * calls we also need to go through D0.4. Otherwise use D1RtP.
+ */
+ if (pc == (unsigned long)&ftrace_call) {
+ writel(D1RTP_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+ writel(D1RTP_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+ } else {
+ writel(D04_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+ writel(D04_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+ }
+
+ return (unsigned char *)&movt_and_call_insn[0];
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine.
+ */
+
+ /* read the text we want to modify */
+ if (probe_kernel_read(replaced, (void *)pc, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+
+ /* replace the text with the new text */
+ if (probe_kernel_write((void *)pc, new_code, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+ return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ int ret;
+ unsigned long pc;
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ pc = (unsigned long)&ftrace_call;
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(pc, (unsigned long)func);
+ ret = ftrace_modify_code(pc, old, new);
+
+ return ret;
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace();
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+ /* The return code is returned via data */
+ writel(0, data);
+
+ return 0;
+}
diff --git a/arch/metag/kernel/ftrace_stub.S b/arch/metag/kernel/ftrace_stub.S
new file mode 100644
index 00000000000..e70bff745bd
--- /dev/null
+++ b/arch/metag/kernel/ftrace_stub.S
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ */
+
+#include <asm/ftrace.h>
+
+ .text
+#ifdef CONFIG_DYNAMIC_FTRACE
+ .global _mcount_wrapper
+ .type _mcount_wrapper,function
+_mcount_wrapper:
+ MOV PC,D0.4
+
+ .global _ftrace_caller
+ .type _ftrace_caller,function
+_ftrace_caller:
+ MOVT D0Re0,#HI(_function_trace_stop)
+ ADD D0Re0,D0Re0,#LO(_function_trace_stop)
+ GETD D0Re0,[D0Re0]
+ CMP D0Re0,#0
+ BEQ $Lcall_stub
+ MOV PC,D0.4
+$Lcall_stub:
+ MSETL [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+ MOV D1Ar1, D0.4
+ MOV D0Ar2, D1RtP
+ SUB D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+
+ .global _ftrace_call
+_ftrace_call:
+ MOVT D1RtP,#HI(_ftrace_stub)
+ CALL D1RtP,#LO(_ftrace_stub)
+ GETL D0.4, D1RtP, [A0StP++#(-8)]
+ GETL D0Ar2, D1Ar1, [A0StP++#(-8)]
+ GETL D0Ar4, D1Ar3, [A0StP++#(-8)]
+ GETL D0Ar6, D1Ar5, [A0StP++#(-8)]
+ MOV PC, D0.4
+#else
+
+ .global _mcount_wrapper
+ .type _mcount_wrapper,function
+_mcount_wrapper:
+ MOVT D0Re0,#HI(_function_trace_stop)
+ ADD D0Re0,D0Re0,#LO(_function_trace_stop)
+ GETD D0Re0,[D0Re0]
+ CMP D0Re0,#0
+ BEQ $Lcall_mcount
+ MOV PC,D0.4
+$Lcall_mcount:
+ MSETL [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+ MOV D1Ar1, D0.4
+ MOV D0Ar2, D1RtP
+ MOVT D0Re0,#HI(_ftrace_trace_function)
+ ADD D0Re0,D0Re0,#LO(_ftrace_trace_function)
+ GET D1Ar3,[D0Re0]
+ MOVT D1Re0,#HI(_ftrace_stub)
+ ADD D1Re0,D1Re0,#LO(_ftrace_stub)
+ CMP D1Ar3,D1Re0
+ BEQ $Ltrace_exit
+ MOV D1RtP,D1Ar3
+ SUB D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+ SWAP PC,D1RtP
+$Ltrace_exit:
+ GETL D0.4, D1RtP, [A0StP++#(-8)]
+ GETL D0Ar2, D1Ar1, [A0StP++#(-8)]
+ GETL D0Ar4, D1Ar3, [A0StP++#(-8)]
+ GETL D0Ar6, D1Ar5, [A0StP++#(-8)]
+ MOV PC, D0.4
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ .global _ftrace_stub
+_ftrace_stub:
+ MOV PC,D1RtP
diff --git a/arch/metag/kernel/head.S b/arch/metag/kernel/head.S
new file mode 100644
index 00000000000..969dffabc03
--- /dev/null
+++ b/arch/metag/kernel/head.S
@@ -0,0 +1,57 @@
+ ! Copyright 2005,2006,2007,2009 Imagination Technologies
+
+#include <linux/init.h>
+#include <generated/asm-offsets.h>
+#undef __exit
+
+ __HEAD
+ ! Setup the stack and get going into _metag_start_kernel
+ .global __start
+ .type __start,function
+__start:
+ ! D1Ar1 contains pTBI (ISTAT)
+ ! D0Ar2 contains pTBI
+ ! D1Ar3 contains __pTBISegs
+ ! D0Ar4 contains kernel arglist pointer
+
+ MOVT D0Re0,#HI(___pTBIs)
+ ADD D0Re0,D0Re0,#LO(___pTBIs)
+ SETL [D0Re0],D0Ar2,D1Ar1
+ MOVT D0Re0,#HI(___pTBISegs)
+ ADD D0Re0,D0Re0,#LO(___pTBISegs)
+ SETD [D0Re0],D1Ar3
+ MOV A0FrP,#0
+ MOV D0Re0,#0
+ MOV D1Re0,#0
+ MOV D1Ar3,#0
+ MOV D1Ar1,D0Ar4 !Store kernel boot params
+ MOV D1Ar5,#0
+ MOV D0Ar6,#0
+#ifdef CONFIG_METAG_DSP
+ MOV D0.8,#0
+#endif
+ MOVT A0StP,#HI(_init_thread_union)
+ ADD A0StP,A0StP,#LO(_init_thread_union)
+ ADD A0StP,A0StP,#THREAD_INFO_SIZE
+ MOVT D1RtP,#HI(_metag_start_kernel)
+ CALL D1RtP,#LO(_metag_start_kernel)
+ .size __start,.-__start
+
+ !! Needed by TBX
+ .global __exit
+ .type __exit,function
+__exit:
+ XOR TXENABLE,D0Re0,D0Re0
+ .size __exit,.-__exit
+
+#ifdef CONFIG_SMP
+ .global _secondary_startup
+ .type _secondary_startup,function
+_secondary_startup:
+ MOVT A0StP,#HI(_secondary_data_stack)
+ ADD A0StP,A0StP,#LO(_secondary_data_stack)
+ GETD A0StP,[A0StP]
+ ADD A0StP,A0StP,#THREAD_INFO_SIZE
+ B _secondary_start_kernel
+ .size _secondary_startup,.-_secondary_startup
+#endif
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
new file mode 100644
index 00000000000..87707efeb0a
--- /dev/null
+++ b/arch/metag/kernel/irq.c
@@ -0,0 +1,323 @@
+/*
+ * Linux/Meta general interrupt handling code
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqchip/metag.h>
+#include <linux/irqdomain.h>
+#include <linux/ratelimit.h>
+
+#include <asm/core_reg.h>
+#include <asm/mach/arch.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_4KSTACKS
+union irq_ctx {
+ struct thread_info tinfo;
+ u32 stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+struct irq_domain *root_domain;
+
+static unsigned int startup_meta_irq(struct irq_data *data)
+{
+ tbi_startup_interrupt(data->hwirq);
+ return 0;
+}
+
+static void shutdown_meta_irq(struct irq_data *data)
+{
+ tbi_shutdown_interrupt(data->hwirq);
+}
+
+void do_IRQ(int irq, struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+#ifdef CONFIG_4KSTACKS
+ struct irq_desc *desc;
+ union irq_ctx *curctx, *irqctx;
+ u32 *isp;
+#endif
+
+ irq_enter();
+
+ irq = irq_linear_revmap(root_domain, irq);
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+ {
+ unsigned long sp;
+
+ sp = __core_reg_get(A0StP);
+ sp &= THREAD_SIZE - 1;
+
+ if (unlikely(sp > (THREAD_SIZE - 1024)))
+ pr_err("Stack overflow in do_IRQ: %ld\n", sp);
+ }
+#endif
+
+
+#ifdef CONFIG_4KSTACKS
+ curctx = (union irq_ctx *) current_thread_info();
+ irqctx = hardirq_ctx[smp_processor_id()];
+
+ /*
+ * this is where we switch to the IRQ stack. However, if we are
+ * already using the IRQ stack (because we interrupted a hardirq
+ * handler) we can't do that and just have to keep using the
+ * current stack (which is the irq stack already after all)
+ */
+ if (curctx != irqctx) {
+ /* build the stack frame on the IRQ stack */
+ isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+ irqctx->tinfo.task = curctx->tinfo.task;
+
+ /*
+ * Copy the softirq bits in preempt_count so that the
+ * softirq checks work in the hardirq context.
+ */
+ irqctx->tinfo.preempt_count =
+ (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+ (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+ desc = irq_to_desc(irq);
+
+ asm volatile (
+ "MOV D0.5,%0\n"
+ "MOV D1Ar1,%1\n"
+ "MOV D1RtP,%2\n"
+ "MOV D0Ar2,%3\n"
+ "SWAP A0StP,D0.5\n"
+ "SWAP PC,D1RtP\n"
+ "MOV A0StP,D0.5\n"
+ :
+ : "r" (isp), "r" (irq), "r" (desc->handle_irq),
+ "r" (desc)
+ : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+ "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+ "D0.5"
+ );
+ } else
+#endif
+ generic_handle_irq(irq);
+
+ irq_exit();
+
+ set_irq_regs(old_regs);
+}
+
+#ifdef CONFIG_4KSTACKS
+
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+ union irq_ctx *irqctx;
+
+ if (hardirq_ctx[cpu])
+ return;
+
+ irqctx = (union irq_ctx *) &hardirq_stack[cpu * THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ hardirq_ctx[cpu] = irqctx;
+
+ irqctx = (union irq_ctx *) &softirq_stack[cpu * THREAD_SIZE];
+ irqctx->tinfo.task = NULL;
+ irqctx->tinfo.exec_domain = NULL;
+ irqctx->tinfo.cpu = cpu;
+ irqctx->tinfo.preempt_count = 0;
+ irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
+
+ softirq_ctx[cpu] = irqctx;
+
+ pr_info("CPU %u irqstacks, hard=%p soft=%p\n",
+ cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
+}
+
+void irq_ctx_exit(int cpu)
+{
+ hardirq_ctx[smp_processor_id()] = NULL;
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+ unsigned long flags;
+ struct thread_info *curctx;
+ union irq_ctx *irqctx;
+ u32 *isp;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+
+ if (local_softirq_pending()) {
+ curctx = current_thread_info();
+ irqctx = softirq_ctx[smp_processor_id()];
+ irqctx->tinfo.task = curctx->task;
+
+ /* build the stack frame on the softirq stack */
+ isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+
+ asm volatile (
+ "MOV D0.5,%0\n"
+ "SWAP A0StP,D0.5\n"
+ "CALLR D1RtP,___do_softirq\n"
+ "MOV A0StP,D0.5\n"
+ :
+ : "r" (isp)
+ : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+ "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+ "D0.5"
+ );
+ /*
+ * Shouldn't happen, we returned above if in_interrupt():
+ */
+ WARN_ON_ONCE(softirq_count());
+ }
+
+ local_irq_restore(flags);
+}
+#endif
+
+static struct irq_chip meta_irq_type = {
+ .name = "META-IRQ",
+ .irq_startup = startup_meta_irq,
+ .irq_shutdown = shutdown_meta_irq,
+};
+
+/**
+ * tbisig_map() - Map a TBI signal number to a virtual IRQ number.
+ * @hw: Number of the TBI signal. Must be in range.
+ *
+ * Returns: The virtual IRQ number of the TBI signal number IRQ specified by
+ * @hw.
+ */
+int tbisig_map(unsigned int hw)
+{
+ return irq_create_mapping(root_domain, hw);
+}
+
+/**
+ * metag_tbisig_map() - map a tbi signal to a Linux virtual IRQ number
+ * @d: root irq domain
+ * @irq: virtual irq number
+ * @hw: hardware irq number (TBI signal number)
+ *
+ * This sets up a virtual irq for a specified TBI signal number.
+ */
+static int metag_tbisig_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+#ifdef CONFIG_SMP
+ irq_set_chip_and_handler(irq, &meta_irq_type, handle_percpu_irq);
+#else
+ irq_set_chip_and_handler(irq, &meta_irq_type, handle_simple_irq);
+#endif
+ return 0;
+}
+
+static const struct irq_domain_ops metag_tbisig_domain_ops = {
+ .map = metag_tbisig_map,
+};
+
+/*
+ * void init_IRQ(void)
+ *
+ * Parameters: None
+ *
+ * Returns: Nothing
+ *
+ * This function should be called during kernel startup to initialize
+ * the IRQ handling routines.
+ */
+void __init init_IRQ(void)
+{
+ root_domain = irq_domain_add_linear(NULL, 32,
+ &metag_tbisig_domain_ops, NULL);
+ if (unlikely(!root_domain))
+ panic("init_IRQ: cannot add root IRQ domain");
+
+ irq_ctx_init(smp_processor_id());
+
+ init_internal_IRQ();
+ init_external_IRQ();
+
+ if (machine_desc->init_irq)
+ machine_desc->init_irq();
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+ if (machine_desc->nr_irqs)
+ nr_irqs = machine_desc->nr_irqs;
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+ raw_spin_lock_irq(&desc->lock);
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(data, cpumask_of(cpu), false);
+ raw_spin_unlock_irq(&desc->lock);
+}
+
+/*
+ * The CPU has been marked offline. Migrate IRQs off this CPU. If
+ * the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ */
+void migrate_irqs(void)
+{
+ unsigned int i, cpu = smp_processor_id();
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+ unsigned int newcpu;
+
+ if (irqd_is_per_cpu(data))
+ continue;
+
+ if (!cpumask_test_cpu(cpu, data->affinity))
+ continue;
+
+ newcpu = cpumask_any_and(data->affinity, cpu_online_mask);
+
+ if (newcpu >= nr_cpu_ids) {
+ pr_info_ratelimited("IRQ%u no longer affine to CPU%u\n",
+ i, cpu);
+
+ cpumask_setall(data->affinity);
+ newcpu = cpumask_any_and(data->affinity,
+ cpu_online_mask);
+ }
+
+ route_irq(data, i, newcpu);
+ }
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/metag/kernel/kick.c b/arch/metag/kernel/kick.c
new file mode 100644
index 00000000000..50fcbec98cd
--- /dev/null
+++ b/arch/metag/kernel/kick.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2009 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ * The Meta KICK interrupt mechanism is generally a useful feature, so
+ * we provide an interface for registering multiple interrupt
+ * handlers. All the registered interrupt handlers are "chained". When
+ * a KICK interrupt is received the first function in the list is
+ * called. If that interrupt handler cannot handle the KICK the next
+ * one is called, then the next until someone handles it (or we run
+ * out of functions). As soon as one function handles the interrupt no
+ * other handlers are called.
+ *
+ * The only downside of chaining interrupt handlers is that each
+ * handler must be able to detect whether the KICK was intended for it
+ * or not. For example, when the IPI handler runs and it sees that
+ * there are no IPI messages it must not signal that the KICK was
+ * handled, thereby giving the other handlers a chance to run.
+ *
+ * The reason that we provide our own interface for calling KICK
+ * handlers instead of using the generic kernel infrastructure is that
+ * the KICK handlers require access to a CPU's pTBI structure. So we
+ * pass it as an argument.
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+#include <asm/traps.h>
+
+/*
+ * All accesses/manipulations of kick_handlers_list should be
+ * performed while holding kick_handlers_lock.
+ */
+static DEFINE_SPINLOCK(kick_handlers_lock);
+static LIST_HEAD(kick_handlers_list);
+
+void kick_register_func(struct kick_irq_handler *kh)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kick_handlers_lock, flags);
+
+ list_add_tail(&kh->list, &kick_handlers_list);
+
+ spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_register_func);
+
+void kick_unregister_func(struct kick_irq_handler *kh)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kick_handlers_lock, flags);
+
+ list_del(&kh->list);
+
+ spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_unregister_func);
+
+TBIRES
+kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI)
+{
+ struct kick_irq_handler *kh;
+ struct list_head *lh;
+ int handled = 0;
+ TBIRES ret;
+
+ head_end(State, ~INTS_OFF_MASK);
+
+ /* If we interrupted user code handle any critical sections. */
+ if (State.Sig.SaveMask & TBICTX_PRIV_BIT)
+ restart_critical_section(State);
+
+ trace_hardirqs_off();
+
+ /*
+ * There is no need to disable interrupts here because we
+ * can't nest KICK interrupts in a KICK interrupt handler.
+ */
+ spin_lock(&kick_handlers_lock);
+
+ list_for_each(lh, &kick_handlers_list) {
+ kh = list_entry(lh, struct kick_irq_handler, list);
+
+ ret = kh->func(State, SigNum, Triggers, Inst, pTBI, &handled);
+ if (handled)
+ break;
+ }
+
+ spin_unlock(&kick_handlers_lock);
+
+ WARN_ON(!handled);
+
+ return tail_end(ret);
+}
diff --git a/arch/metag/kernel/machines.c b/arch/metag/kernel/machines.c
new file mode 100644
index 00000000000..1edf6ba193b
--- /dev/null
+++ b/arch/metag/kernel/machines.c
@@ -0,0 +1,20 @@
+/*
+ * arch/metag/kernel/machines.c
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * Generic Meta Boards.
+ */
+
+#include <linux/init.h>
+#include <asm/irq.h>
+#include <asm/mach/arch.h>
+
+static const char *meta_boards_compat[] __initdata = {
+ "img,meta",
+ NULL,
+};
+
+MACHINE_START(META, "Generic Meta")
+ .dt_compat = meta_boards_compat,
+MACHINE_END
diff --git a/arch/metag/kernel/metag_ksyms.c b/arch/metag/kernel/metag_ksyms.c
new file mode 100644
index 00000000000..ec872ef14eb
--- /dev/null
+++ b/arch/metag/kernel/metag_ksyms.c
@@ -0,0 +1,49 @@
+#include <linux/export.h>
+
+#include <asm/div64.h>
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/tbx.h>
+
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_FLATMEM
+/* needed for the pfn_valid macro */
+EXPORT_SYMBOL(max_pfn);
+EXPORT_SYMBOL(min_low_pfn);
+#endif
+
+/* TBI symbols */
+EXPORT_SYMBOL(__TBI);
+EXPORT_SYMBOL(__TBIFindSeg);
+EXPORT_SYMBOL(__TBIPoll);
+EXPORT_SYMBOL(__TBITimeStamp);
+
+#define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
+
+/* libgcc functions */
+DECLARE_EXPORT(__ashldi3);
+DECLARE_EXPORT(__ashrdi3);
+DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__udivsi3);
+DECLARE_EXPORT(__divsi3);
+DECLARE_EXPORT(__umodsi3);
+DECLARE_EXPORT(__modsi3);
+DECLARE_EXPORT(__muldi3);
+DECLARE_EXPORT(__cmpdi2);
+DECLARE_EXPORT(__ucmpdi2);
+
+/* Maths functions */
+EXPORT_SYMBOL(div_u64);
+EXPORT_SYMBOL(div_s64);
+
+/* String functions */
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memmove);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(mcount_wrapper);
+#endif
diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c
new file mode 100644
index 00000000000..986331cd0a5
--- /dev/null
+++ b/arch/metag/kernel/module.c
@@ -0,0 +1,284 @@
+/* Kernel module help for Meta.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sort.h>
+
+#include <asm/unaligned.h>
+
+/* Count how many different relocations (different symbol, different
+ addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+{
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+ for (i = 0; i < num; i++)
+ /* Only count relbranch relocs, others don't need stubs */
+ if (ELF32_R_TYPE(rela[i].r_info) == R_METAG_RELBRANCH &&
+ (r_info != ELF32_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF32_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+
+ return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+ const Elf32_Rela *x, *y;
+
+ y = (Elf32_Rela *)_x;
+ x = (Elf32_Rela *)_y;
+
+ /* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to
+ * make the comparison cheaper/faster. It won't affect the sorting or
+ * the counting algorithms' performance
+ */
+ if (x->r_info < y->r_info)
+ return -1;
+ else if (x->r_info > y->r_info)
+ return 1;
+ else if (x->r_addend < y->r_addend)
+ return -1;
+ else if (x->r_addend > y->r_addend)
+ return 1;
+ else
+ return 0;
+}
+
+static void relaswap(void *_x, void *_y, int size)
+{
+ uint32_t *x, *y, tmp;
+ int i;
+
+ y = (uint32_t *)_x;
+ x = (uint32_t *)_y;
+
+ for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
+ tmp = x[i];
+ x[i] = y[i];
+ y[i] = tmp;
+ }
+}
+
+/* Get the potential trampolines size required of the init and
+ non-init sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+ const Elf32_Shdr *sechdrs,
+ const char *secstrings,
+ int is_init)
+{
+ unsigned long ret = 0;
+ unsigned i;
+
+ /* Everything marked ALLOC (this includes the exported
+ symbols) */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ /* If it's called *.init*, and we're not init, we're
+ not interested */
+ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL)
+ != is_init)
+ continue;
+
+ /* We don't want to look at debug sections. */
+ if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != NULL)
+ continue;
+
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ pr_debug("Found relocations in section %u\n", i);
+ pr_debug("Ptr: %p. Number: %u\n",
+ (void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf32_Rela));
+
+ /* Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * alogrithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf32_Rela),
+ sizeof(Elf32_Rela), relacmp, relaswap);
+
+ ret += count_relocs((void *)hdr
+ + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size
+ / sizeof(Elf32_Rela))
+ * sizeof(struct metag_plt_entry);
+ }
+ }
+
+ return ret;
+}
+
+int module_frob_arch_sections(Elf32_Ehdr *hdr,
+ Elf32_Shdr *sechdrs,
+ char *secstrings,
+ struct module *me)
+{
+ unsigned int i;
+
+ /* Find .plt and .init.plt sections */
+ for (i = 0; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
+ me->arch.init_plt_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+ me->arch.core_plt_section = i;
+ }
+ if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+ pr_err("Module doesn't contain .plt or .init.plt sections.\n");
+ return -ENOEXEC;
+ }
+
+ /* Override their sizes */
+ sechdrs[me->arch.core_plt_section].sh_size
+ = get_plt_size(hdr, sechdrs, secstrings, 0);
+ sechdrs[me->arch.core_plt_section].sh_type = SHT_NOBITS;
+ sechdrs[me->arch.init_plt_section].sh_size
+ = get_plt_size(hdr, sechdrs, secstrings, 1);
+ sechdrs[me->arch.init_plt_section].sh_type = SHT_NOBITS;
+ return 0;
+}
+
+/* Set up a trampoline in the PLT to bounce us to the distant function */
+static uint32_t do_plt_call(void *location, Elf32_Addr val,
+ Elf32_Shdr *sechdrs, struct module *mod)
+{
+ struct metag_plt_entry *entry;
+ /* Instructions used to do the indirect jump. */
+ uint32_t tramp[2];
+
+ /* We have to trash a register, so we assume that any control
+ transfer more than 21-bits away must be a function call
+ (so we can use a call-clobbered register). */
+
+ /* MOVT D0Re0,#HI(v) */
+ tramp[0] = 0x02000005 | (((val & 0xffff0000) >> 16) << 3);
+ /* JUMP D0Re0,#LO(v) */
+ tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3);
+
+ /* Init, or core PLT? */
+ if (location >= mod->module_core
+ && location < mod->module_core + mod->core_size)
+ entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+ else
+ entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+
+ /* Find this entry, or if that fails, the next avail. entry */
+ while (entry->tramp[0])
+ if (entry->tramp[0] == tramp[0] && entry->tramp[1] == tramp[1])
+ return (uint32_t)entry;
+ else
+ entry++;
+
+ entry->tramp[0] = tramp[0];
+ entry->tramp[1] = tramp[1];
+
+ return (uint32_t)entry;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+ unsigned int i;
+ Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+ Elf32_Sym *sym;
+ Elf32_Addr relocation;
+ uint32_t *location;
+ int32_t value;
+
+ pr_debug("Applying relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+ /* This is where to make the change */
+ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+ + rel[i].r_offset;
+ /* This is the symbol it is referring to. Note that all
+ undefined symbols have been resolved. */
+ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+ + ELF32_R_SYM(rel[i].r_info);
+ relocation = sym->st_value + rel[i].r_addend;
+
+ switch (ELF32_R_TYPE(rel[i].r_info)) {
+ case R_METAG_NONE:
+ break;
+ case R_METAG_HIADDR16:
+ relocation >>= 16;
+ case R_METAG_LOADDR16:
+ *location = (*location & 0xfff80007) |
+ ((relocation & 0xffff) << 3);
+ break;
+ case R_METAG_ADDR32:
+ /*
+ * Packed data structures may cause a misaligned
+ * R_METAG_ADDR32 to be emitted.
+ */
+ put_unaligned(relocation, location);
+ break;
+ case R_METAG_GETSETOFF:
+ *location += ((relocation & 0xfff) << 7);
+ break;
+ case R_METAG_RELBRANCH:
+ if (*location & (0x7ffff << 5)) {
+ pr_err("bad relbranch relocation\n");
+ break;
+ }
+
+ /* This jump is too big for the offset slot. Build
+ * a PLT to jump through to get to where we want to go.
+ * NB: 21bit check - not scaled to 19bit yet
+ */
+ if (((int32_t)(relocation -
+ (uint32_t)location) > 0xfffff) ||
+ ((int32_t)(relocation -
+ (uint32_t)location) < -0xfffff)) {
+ relocation = do_plt_call(location, relocation,
+ sechdrs, me);
+ }
+
+ value = relocation - (uint32_t)location;
+
+ /* branch instruction aligned */
+ value /= 4;
+
+ if ((value > 0x7ffff) || (value < -0x7ffff)) {
+ /*
+ * this should have been caught by the code
+ * above!
+ */
+ pr_err("overflow of relbranch reloc\n");
+ }
+
+ *location = (*location & (~(0x7ffff << 5))) |
+ ((value & 0x7ffff) << 5);
+ break;
+
+ default:
+ pr_err("module %s: Unknown relocation: %u\n",
+ me->name, ELF32_R_TYPE(rel[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+}
diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile
new file mode 100644
index 00000000000..b158cb27208
--- /dev/null
+++ b/arch/metag/kernel/perf/Makefile
@@ -0,0 +1,3 @@
+# Makefile for performance event core
+
+obj-y += perf_event.o
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
new file mode 100644
index 00000000000..a876d5ff389
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -0,0 +1,861 @@
+/*
+ * Meta performance counter support.
+ * Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This code is based on the sh pmu code:
+ * Copyright (C) 2009 Paul Mundt
+ *
+ * and on the arm pmu code:
+ * Copyright (C) 2009 picoChip Designs, Ltd., James Iles
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#include <asm/core_reg.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "perf_event.h"
+
+static int _hw_perf_event_init(struct perf_event *);
+static void _hw_perf_event_destroy(struct perf_event *);
+
+/* Determines which core type we are */
+static struct metag_pmu *metag_pmu __read_mostly;
+
+/* Processor specific data */
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+/* PMU admin */
+const char *perf_pmu_name(void)
+{
+ if (metag_pmu)
+ return metag_pmu->pmu.name;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+ if (metag_pmu)
+ return metag_pmu->max_events;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static inline int metag_pmu_initialised(void)
+{
+ return !!metag_pmu;
+}
+
+static void release_pmu_hardware(void)
+{
+ int irq;
+ unsigned int version = (metag_pmu->version &
+ (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+ METAC_ID_REV_S;
+
+ /* Early cores don't have overflow interrupts */
+ if (version < 0x0104)
+ return;
+
+ irq = internal_irq_map(17);
+ if (irq >= 0)
+ free_irq(irq, (void *)1);
+
+ irq = internal_irq_map(16);
+ if (irq >= 0)
+ free_irq(irq, (void *)0);
+}
+
+static int reserve_pmu_hardware(void)
+{
+ int err = 0, irq[2];
+ unsigned int version = (metag_pmu->version &
+ (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+ METAC_ID_REV_S;
+
+ /* Early cores don't have overflow interrupts */
+ if (version < 0x0104)
+ goto out;
+
+ /*
+ * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
+ * similarly, 17 is the interrupt for performance counter 1.
+ * We can't (yet) interrupt on the cycle counter, because it's a
+ * register, however it holds a 32-bit value as opposed to 24-bit.
+ */
+ irq[0] = internal_irq_map(16);
+ if (irq[0] < 0) {
+ pr_err("unable to map internal IRQ %d\n", 16);
+ goto out;
+ }
+ err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
+ "metagpmu0", (void *)0);
+ if (err) {
+ pr_err("unable to request IRQ%d for metag PMU counters\n",
+ irq[0]);
+ goto out;
+ }
+
+ irq[1] = internal_irq_map(17);
+ if (irq[1] < 0) {
+ pr_err("unable to map internal IRQ %d\n", 17);
+ goto out_irq1;
+ }
+ err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
+ "metagpmu1", (void *)1);
+ if (err) {
+ pr_err("unable to request IRQ%d for metag PMU counters\n",
+ irq[1]);
+ goto out_irq1;
+ }
+
+ return 0;
+
+out_irq1:
+ free_irq(irq[0], (void *)0);
+out:
+ return err;
+}
+
+/* PMU operations */
+static void metag_pmu_enable(struct pmu *pmu)
+{
+}
+
+static void metag_pmu_disable(struct pmu *pmu)
+{
+}
+
+static int metag_pmu_event_init(struct perf_event *event)
+{
+ int err = 0;
+ atomic_t *active_events = &metag_pmu->active_events;
+
+ if (!metag_pmu_initialised()) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ event->destroy = _hw_perf_event_destroy;
+
+ if (!atomic_inc_not_zero(active_events)) {
+ mutex_lock(&metag_pmu->reserve_mutex);
+ if (atomic_read(active_events) == 0)
+ err = reserve_pmu_hardware();
+
+ if (!err)
+ atomic_inc(active_events);
+
+ mutex_unlock(&metag_pmu->reserve_mutex);
+ }
+
+ /* Hardware and caches counters */
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ err = _hw_perf_event_init(event);
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (err)
+ event->destroy(event);
+
+out:
+ return err;
+}
+
+void metag_pmu_event_update(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
+{
+ u64 prev_raw_count, new_raw_count;
+ s64 delta;
+
+ /*
+ * If this counter is chained, it may be that the previous counter
+ * value has been changed beneath us.
+ *
+ * To get around this, we read and exchange the new raw count, then
+ * add the delta (new - prev) to the generic counter atomically.
+ *
+ * Without interrupts, this is the simplest approach.
+ */
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = metag_pmu->read(idx);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count)
+ goto again;
+
+ /*
+ * Calculate the delta and add it to the counter.
+ */
+ delta = new_raw_count - prev_raw_count;
+
+ local64_add(delta, &event->count);
+}
+
+int metag_pmu_event_set_period(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
+{
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int ret = 0;
+
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (left > (s64)metag_pmu->max_period)
+ left = metag_pmu->max_period;
+
+ if (metag_pmu->write)
+ metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD);
+
+ perf_event_update_userpage(event);
+
+ return ret;
+}
+
+static void metag_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (WARN_ON_ONCE(idx == -1))
+ return;
+
+ /*
+ * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
+ */
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /*
+ * Reset the period.
+ * Some counters can't be stopped (i.e. are core global), so when the
+ * counter was 'stopped' we merely disabled the IRQ. If we don't reset
+ * the period, then we'll either: a) get an overflow too soon;
+ * or b) too late if the overflow happened since disabling.
+ * Obviously, this has little bearing on cores without the overflow
+ * interrupt, as the performance counter resets to zero on write
+ * anyway.
+ */
+ if (metag_pmu->max_period)
+ metag_pmu_event_set_period(event, hwc, hwc->idx);
+ cpuc->events[idx] = event;
+ metag_pmu->enable(hwc, idx);
+}
+
+static void metag_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * We should always update the counter on stop; see comment above
+ * why.
+ */
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ metag_pmu_event_update(event, hwc, hwc->idx);
+ metag_pmu->disable(hwc, hwc->idx);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static int metag_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = 0, ret = 0;
+
+ perf_pmu_disable(event->pmu);
+
+ /* check whether we're counting instructions */
+ if (hwc->config == 0x100) {
+ if (__test_and_set_bit(METAG_INST_COUNTER,
+ cpuc->used_mask)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ idx = METAG_INST_COUNTER;
+ } else {
+ /* Check whether we have a spare counter */
+ idx = find_first_zero_bit(cpuc->used_mask,
+ atomic_read(&metag_pmu->active_events));
+ if (idx >= METAG_INST_COUNTER) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ __set_bit(idx, cpuc->used_mask);
+ }
+ hwc->idx = idx;
+
+ /* Make sure the counter is disabled */
+ metag_pmu->disable(hwc, idx);
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ if (flags & PERF_EF_START)
+ metag_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+out:
+ perf_pmu_enable(event->pmu);
+ return ret;
+}
+
+static void metag_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ WARN_ON(idx < 0);
+ metag_pmu_stop(event, PERF_EF_UPDATE);
+ cpuc->events[idx] = NULL;
+ __clear_bit(idx, cpuc->used_mask);
+
+ perf_event_update_userpage(event);
+}
+
+static void metag_pmu_read(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* Don't read disabled counters! */
+ if (hwc->idx < 0)
+ return;
+
+ metag_pmu_event_update(event, hwc, hwc->idx);
+}
+
+static struct pmu pmu = {
+ .pmu_enable = metag_pmu_enable,
+ .pmu_disable = metag_pmu_disable,
+
+ .event_init = metag_pmu_event_init,
+
+ .add = metag_pmu_add,
+ .del = metag_pmu_del,
+ .start = metag_pmu_start,
+ .stop = metag_pmu_stop,
+ .read = metag_pmu_read,
+};
+
+/* Core counter specific functions */
+static const int metag_general_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x03,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
+};
+
+static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x08,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x09,
+ [C(RESULT_MISS)] = 0x0a,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0xd0,
+ [C(RESULT_MISS)] = 0xd2,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0xd4,
+ [C(RESULT_MISS)] = 0xd5,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0xd1,
+ [C(RESULT_MISS)] = 0xd3,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+ [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+ },
+ },
+};
+
+
+static void _hw_perf_event_destroy(struct perf_event *event)
+{
+ atomic_t *active_events = &metag_pmu->active_events;
+ struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
+
+ if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
+ release_pmu_hardware();
+ mutex_unlock(pmu_mutex);
+ }
+}
+
+static int _hw_perf_cache_event(int config, int *evp)
+{
+ unsigned long type, op, result;
+ int ev;
+
+ if (!metag_pmu->cache_events)
+ return -EINVAL;
+
+ /* Unpack config */
+ type = config & 0xff;
+ op = (config >> 8) & 0xff;
+ result = (config >> 16) & 0xff;
+
+ if (type >= PERF_COUNT_HW_CACHE_MAX ||
+ op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+ result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ ev = (*metag_pmu->cache_events)[type][op][result];
+ if (ev == 0)
+ return -EOPNOTSUPP;
+ if (ev == -1)
+ return -EINVAL;
+ *evp = ev;
+ return 0;
+}
+
+static int _hw_perf_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ int mapping = 0, err;
+
+ switch (attr->type) {
+ case PERF_TYPE_HARDWARE:
+ if (attr->config >= PERF_COUNT_HW_MAX)
+ return -EINVAL;
+
+ mapping = metag_pmu->event_map(attr->config);
+ break;
+
+ case PERF_TYPE_HW_CACHE:
+ err = _hw_perf_cache_event(attr->config, &mapping);
+ if (err)
+ return err;
+ break;
+ }
+
+ /* Return early if the event is unsupported */
+ if (mapping == -1)
+ return -EINVAL;
+
+ /*
+ * Early cores have "limited" counters - they have no overflow
+ * interrupts - and so are unable to do sampling without extra work
+ * and timer assistance.
+ */
+ if (metag_pmu->max_period == 0) {
+ if (hwc->sample_period)
+ return -EINVAL;
+ }
+
+ /*
+ * Don't assign an index until the event is placed into the hardware.
+ * -1 signifies that we're still deciding where to put it. On SMP
+ * systems each core has its own set of counters, so we can't do any
+ * constraint checking yet.
+ */
+ hwc->idx = -1;
+
+ /* Store the event encoding */
+ hwc->config |= (unsigned long)mapping;
+
+ /*
+ * For non-sampling runs, limit the sample_period to half of the
+ * counter width. This way, the new counter value should be less
+ * likely to overtake the previous one (unless there are IRQ latency
+ * issues...)
+ */
+ if (metag_pmu->max_period) {
+ if (!hwc->sample_period) {
+ hwc->sample_period = metag_pmu->max_period >> 1;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ }
+ }
+
+ return 0;
+}
+
+static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
+{
+ struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+ unsigned int config = event->config;
+ unsigned int tmp = config & 0xf0;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /*
+ * Check if we're enabling the instruction counter (index of
+ * MAX_HWEVENTS - 1)
+ */
+ if (METAG_INST_COUNTER == idx) {
+ WARN_ONCE((config != 0x100),
+ "invalid configuration (%d) for counter (%d)\n",
+ config, idx);
+
+ /* Reset the cycle count */
+ __core_reg_set(TXTACTCYC, 0);
+ goto unlock;
+ }
+
+ /* Check for a core internal or performance channel event. */
+ if (tmp) {
+ void *perf_addr = (void *)PERF_COUNT(idx);
+
+ /*
+ * Anything other than a cycle count will write the low-
+ * nibble to the correct counter register.
+ */
+ switch (tmp) {
+ case 0xd0:
+ perf_addr = (void *)PERF_ICORE(idx);
+ break;
+
+ case 0xf0:
+ perf_addr = (void *)PERF_CHAN(idx);
+ break;
+ }
+
+ metag_out32((tmp & 0x0f), perf_addr);
+
+ /*
+ * Now we use the high nibble as the performance event to
+ * to count.
+ */
+ config = tmp >> 4;
+ }
+
+ /*
+ * Enabled counters start from 0. Early cores clear the count on
+ * write but newer cores don't, so we make sure that the count is
+ * set to 0.
+ */
+ tmp = ((config & 0xf) << 28) |
+ ((1 << 24) << cpu_2_hwthread_id[get_cpu()]);
+ metag_out32(tmp, PERF_COUNT(idx));
+unlock:
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
+{
+ struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+ unsigned int tmp = 0;
+ unsigned long flags;
+
+ /*
+ * The cycle counter can't be disabled per se, as it's a hardware
+ * thread register which is always counting. We merely return if this
+ * is the counter we're attempting to disable.
+ */
+ if (METAG_INST_COUNTER == idx)
+ return;
+
+ /*
+ * The counter value _should_ have been read prior to disabling,
+ * as if we're running on an early core then the value gets reset to
+ * 0, and any read after that would be useless. On the newer cores,
+ * however, it's better to read-modify-update this for purposes of
+ * the overflow interrupt.
+ * Here we remove the thread id AND the event nibble (there are at
+ * least two events that count events that are core global and ignore
+ * the thread id mask). This only works because we don't mix thread
+ * performance counts, and event 0x00 requires a thread id mask!
+ */
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ tmp = metag_in32(PERF_COUNT(idx));
+ tmp &= 0x00ffffff;
+ metag_out32(tmp, PERF_COUNT(idx));
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u64 metag_pmu_read_counter(int idx)
+{
+ u32 tmp = 0;
+
+ /* The act of reading the cycle counter also clears it */
+ if (METAG_INST_COUNTER == idx) {
+ __core_reg_swap(TXTACTCYC, tmp);
+ goto out;
+ }
+
+ tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
+out:
+ return tmp;
+}
+
+static void metag_pmu_write_counter(int idx, u32 val)
+{
+ struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+ u32 tmp = 0;
+ unsigned long flags;
+
+ /*
+ * This _shouldn't_ happen, but if it does, then we can just
+ * ignore the write, as the register is read-only and clear-on-write.
+ */
+ if (METAG_INST_COUNTER == idx)
+ return;
+
+ /*
+ * We'll keep the thread mask and event id, and just update the
+ * counter itself. Also , we should bound the value to 24-bits.
+ */
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ val &= 0x00ffffff;
+ tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
+ val |= tmp;
+ metag_out32(val, PERF_COUNT(idx));
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int metag_pmu_event_map(int idx)
+{
+ return metag_general_events[idx];
+}
+
+static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
+{
+ int idx = (int)dev;
+ struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct perf_event *event = cpuhw->events[idx];
+ struct hw_perf_event *hwc = &event->hw;
+ struct pt_regs *regs = get_irq_regs();
+ struct perf_sample_data sampledata;
+ unsigned long flags;
+ u32 counter = 0;
+
+ /*
+ * We need to stop the core temporarily from generating another
+ * interrupt while we disable this counter. However, we don't want
+ * to flag the counter as free
+ */
+ __global_lock2(flags);
+ counter = metag_in32(PERF_COUNT(idx));
+ metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
+ __global_unlock2(flags);
+
+ /* Update the counts and reset the sample period */
+ metag_pmu_event_update(event, hwc, idx);
+ perf_sample_data_init(&sampledata, 0, hwc->last_period);
+ metag_pmu_event_set_period(event, hwc, idx);
+
+ /*
+ * Enable the counter again once core overflow processing has
+ * completed.
+ */
+ if (!perf_event_overflow(event, &sampledata, regs))
+ metag_out32(counter, PERF_COUNT(idx));
+
+ return IRQ_HANDLED;
+}
+
+static struct metag_pmu _metag_pmu = {
+ .handle_irq = metag_pmu_counter_overflow,
+ .enable = metag_pmu_enable_counter,
+ .disable = metag_pmu_disable_counter,
+ .read = metag_pmu_read_counter,
+ .write = metag_pmu_write_counter,
+ .event_map = metag_pmu_event_map,
+ .cache_events = &metag_pmu_cache_events,
+ .max_period = MAX_PERIOD,
+ .max_events = MAX_HWEVENTS,
+};
+
+/* PMU CPU hotplug notifier */
+static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned int)hcpu;
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+ return NOTIFY_DONE;
+
+ memset(cpuc, 0, sizeof(struct cpu_hw_events));
+ raw_spin_lock_init(&cpuc->pmu_lock);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata metag_pmu_notifier = {
+ .notifier_call = metag_pmu_cpu_notify,
+};
+
+/* PMU Initialisation */
+static int __init init_hw_perf_events(void)
+{
+ int ret = 0, cpu;
+ u32 version = *(u32 *)METAC_ID;
+ int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
+ int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
+ >> METAC_ID_REV_S;
+
+ /* Not a Meta 2 core, then not supported */
+ if (0x02 > major) {
+ pr_info("no hardware counter support available\n");
+ goto out;
+ } else if (0x02 == major) {
+ metag_pmu = &_metag_pmu;
+
+ if (min_rev < 0x0104) {
+ /*
+ * A core without overflow interrupts, and clear-on-
+ * write counters.
+ */
+ metag_pmu->handle_irq = NULL;
+ metag_pmu->write = NULL;
+ metag_pmu->max_period = 0;
+ }
+
+ metag_pmu->name = "Meta 2";
+ metag_pmu->version = version;
+ metag_pmu->pmu = pmu;
+ }
+
+ pr_info("enabled with %s PMU driver, %d counters available\n",
+ metag_pmu->name, metag_pmu->max_events);
+
+ /* Initialise the active events and reservation mutex */
+ atomic_set(&metag_pmu->active_events, 0);
+ mutex_init(&metag_pmu->reserve_mutex);
+
+ /* Clear the counters */
+ metag_out32(0, PERF_COUNT(0));
+ metag_out32(0, PERF_COUNT(1));
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ memset(cpuc, 0, sizeof(struct cpu_hw_events));
+ raw_spin_lock_init(&cpuc->pmu_lock);
+ }
+
+ register_cpu_notifier(&metag_pmu_notifier);
+ ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
+out:
+ return ret;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h
new file mode 100644
index 00000000000..fd10a1345b6
--- /dev/null
+++ b/arch/metag/kernel/perf/perf_event.h
@@ -0,0 +1,106 @@
+/*
+ * Meta performance counter support.
+ * Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef METAG_PERF_EVENT_H_
+#define METAG_PERF_EVENT_H_
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+
+/* For performance counter definitions */
+#include <asm/metag_mem.h>
+
+/*
+ * The Meta core has two performance counters, with 24-bit resolution. Newer
+ * cores generate an overflow interrupt on transition from 0xffffff to 0.
+ *
+ * Each counter consists of the counter id, hardware thread id, and the count
+ * itself; each counter can be assigned to multiple hardware threads at any
+ * one time, with the returned count being an aggregate of events. A small
+ * number of events are thread global, i.e. they count the aggregate of all
+ * threads' events, regardless of the thread selected.
+ *
+ * Newer cores can store an arbitrary 24-bit number in the counter, whereas
+ * older cores will clear the counter bits on write.
+ *
+ * We also have a pseudo-counter in the form of the thread active cycles
+ * counter (which, incidentally, is also bound to
+ */
+
+#define MAX_HWEVENTS 3
+#define MAX_PERIOD ((1UL << 24) - 1)
+#define METAG_INST_COUNTER (MAX_HWEVENTS - 1)
+
+/**
+ * struct cpu_hw_events - a processor core's performance events
+ * @events: an array of perf_events active for a given index.
+ * @used_mask: a bitmap of in-use counters.
+ * @pmu_lock: a perf counter lock
+ *
+ * This is a per-cpu/core structure that maintains a record of its
+ * performance counters' state.
+ */
+struct cpu_hw_events {
+ struct perf_event *events[MAX_HWEVENTS];
+ unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+ raw_spinlock_t pmu_lock;
+};
+
+/**
+ * struct metag_pmu - the Meta PMU structure
+ * @pmu: core pmu structure
+ * @name: pmu name
+ * @version: core version
+ * @handle_irq: overflow interrupt handler
+ * @enable: enable a counter
+ * @disable: disable a counter
+ * @read: read the value of a counter
+ * @write: write a value to a counter
+ * @event_map: kernel event to counter event id map
+ * @cache_events: kernel cache counter to core cache counter map
+ * @max_period: maximum value of the counter before overflow
+ * @max_events: maximum number of counters available at any one time
+ * @active_events: number of active counters
+ * @reserve_mutex: counter reservation mutex
+ *
+ * This describes the main functionality and data used by the performance
+ * event core.
+ */
+struct metag_pmu {
+ struct pmu pmu;
+ const char *name;
+ u32 version;
+ irqreturn_t (*handle_irq)(int irq_num, void *dev);
+ void (*enable)(struct hw_perf_event *evt, int idx);
+ void (*disable)(struct hw_perf_event *evt, int idx);
+ u64 (*read)(int idx);
+ void (*write)(int idx, u32 val);
+ int (*event_map)(int idx);
+ const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+ u32 max_period;
+ int max_events;
+ atomic_t active_events;
+ struct mutex reserve_mutex;
+};
+
+/* Convenience macros for accessing the perf counters */
+/* Define some convenience accessors */
+#define PERF_COUNT(x) (PERF_COUNT0 + (sizeof(u64) * (x)))
+#define PERF_ICORE(x) (PERF_ICORE0 + (sizeof(u64) * (x)))
+#define PERF_CHAN(x) (PERF_CHAN0 + (sizeof(u64) * (x)))
+
+/* Cache index macros */
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_OP_UNSUPPORTED 0xfffe
+#define CACHE_OP_NONSENSE 0xffff
+
+#endif
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
new file mode 100644
index 00000000000..315633461a9
--- /dev/null
+++ b/arch/metag/kernel/perf_callchain.c
@@ -0,0 +1,96 @@
+/*
+ * Perf callchain handling code.
+ *
+ * Based on the ARM perf implementation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+static bool is_valid_call(unsigned long calladdr)
+{
+ unsigned int callinsn;
+
+ /* Check the possible return address is aligned. */
+ if (!(calladdr & 0x3)) {
+ if (!get_user(callinsn, (unsigned int *)calladdr)) {
+ /* Check for CALLR or SWAP PC,D1RtP. */
+ if ((callinsn & 0xff000000) == 0xab000000 ||
+ callinsn == 0xa3200aa0)
+ return true;
+ }
+ }
+ return false;
+}
+
+static struct metag_frame __user *
+user_backtrace(struct metag_frame __user *user_frame,
+ struct perf_callchain_entry *entry)
+{
+ struct metag_frame frame;
+ unsigned long calladdr;
+
+ /* We cannot rely on having frame pointers in user code. */
+ while (1) {
+ /* Also check accessibility of one struct frame beyond */
+ if (!access_ok(VERIFY_READ, user_frame, sizeof(frame)))
+ return 0;
+ if (__copy_from_user_inatomic(&frame, user_frame,
+ sizeof(frame)))
+ return 0;
+
+ --user_frame;
+
+ calladdr = frame.lr - 4;
+ if (is_valid_call(calladdr)) {
+ perf_callchain_store(entry, calladdr);
+ return user_frame;
+ }
+ }
+
+ return 0;
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+ unsigned long sp = regs->ctx.AX[0].U0;
+ struct metag_frame __user *frame;
+
+ frame = (struct metag_frame __user *)sp;
+
+ --frame;
+
+ while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+ frame = user_backtrace(frame, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+ void *data)
+{
+ struct perf_callchain_entry *entry = data;
+ perf_callchain_store(entry, fr->pc);
+ return 0;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+ struct stackframe fr;
+
+ fr.fp = regs->ctx.AX[1].U0;
+ fr.sp = regs->ctx.AX[0].U0;
+ fr.lr = regs->ctx.DX[4].U1;
+ fr.pc = regs->ctx.CurrPC;
+ walk_stackframe(&fr, callchain_trace, entry);
+}
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
new file mode 100644
index 00000000000..c6efe62e5b7
--- /dev/null
+++ b/arch/metag/kernel/process.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008,2009,2010,2011 Imagination Technologies
+ *
+ * This file contains the architecture-dependent parts of process handling.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/elfcore.h>
+#include <linux/fs.h>
+#include <linux/tick.h>
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <linux/pm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/core_reg.h>
+#include <asm/user_gateway.h>
+#include <asm/tcm.h>
+#include <asm/traps.h>
+#include <asm/switch_to.h>
+
+/*
+ * Wait for the next interrupt and enable local interrupts
+ */
+static inline void arch_idle(void)
+{
+ int tmp;
+
+ /*
+ * Quickly jump straight into the interrupt entry point without actually
+ * triggering an interrupt. When TXSTATI gets read the processor will
+ * block until an interrupt is triggered.
+ */
+ asm volatile (/* Switch into ISTAT mode */
+ "RTH\n\t"
+ /* Enable local interrupts */
+ "MOV TXMASKI, %1\n\t"
+ /*
+ * We can't directly "SWAP PC, PCX", so we swap via a
+ * temporary. Essentially we do:
+ * PCX_new = 1f (the place to continue execution)
+ * PC = PCX_old
+ */
+ "ADD %0, CPC0, #(1f-.)\n\t"
+ "SWAP PCX, %0\n\t"
+ "MOV PC, %0\n"
+ /* Continue execution here with interrupts enabled */
+ "1:"
+ : "=a" (tmp)
+ : "r" (get_trigger_mask()));
+}
+
+void cpu_idle(void)
+{
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ while (1) {
+ tick_nohz_idle_enter();
+ rcu_idle_enter();
+
+ while (!need_resched()) {
+ /*
+ * We need to disable interrupts here to ensure we don't
+ * miss a wakeup call.
+ */
+ local_irq_disable();
+ if (!need_resched()) {
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_is_offline(smp_processor_id()))
+ cpu_die();
+#endif
+ arch_idle();
+ } else {
+ local_irq_enable();
+ }
+ }
+
+ rcu_idle_exit();
+ tick_nohz_idle_exit();
+ schedule_preempt_disabled();
+ }
+}
+
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+void (*soc_restart)(char *cmd);
+void (*soc_halt)(void);
+
+void machine_restart(char *cmd)
+{
+ if (soc_restart)
+ soc_restart(cmd);
+ hard_processor_halt(HALT_OK);
+}
+
+void machine_halt(void)
+{
+ if (soc_halt)
+ soc_halt();
+ smp_send_stop();
+ hard_processor_halt(HALT_OK);
+}
+
+void machine_power_off(void)
+{
+ if (pm_power_off)
+ pm_power_off();
+ smp_send_stop();
+ hard_processor_halt(HALT_OK);
+}
+
+#define FLAG_Z 0x8
+#define FLAG_N 0x4
+#define FLAG_O 0x2
+#define FLAG_C 0x1
+
+void show_regs(struct pt_regs *regs)
+{
+ int i;
+ const char *AX0_names[] = {"A0StP", "A0FrP"};
+ const char *AX1_names[] = {"A1GbP", "A1LbP"};
+
+ const char *DX0_names[] = {
+ "D0Re0",
+ "D0Ar6",
+ "D0Ar4",
+ "D0Ar2",
+ "D0FrT",
+ "D0.5 ",
+ "D0.6 ",
+ "D0.7 "
+ };
+
+ const char *DX1_names[] = {
+ "D1Re0",
+ "D1Ar5",
+ "D1Ar3",
+ "D1Ar1",
+ "D1RtP",
+ "D1.5 ",
+ "D1.6 ",
+ "D1.7 "
+ };
+
+ pr_info(" pt_regs @ %p\n", regs);
+ pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask);
+ pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags,
+ regs->ctx.Flags & FLAG_Z ? 'Z' : 'z',
+ regs->ctx.Flags & FLAG_N ? 'N' : 'n',
+ regs->ctx.Flags & FLAG_O ? 'O' : 'o',
+ regs->ctx.Flags & FLAG_C ? 'C' : 'c');
+ pr_info(" TXRPT = 0x%08x\n", regs->ctx.CurrRPT);
+ pr_info(" PC = 0x%08x\n", regs->ctx.CurrPC);
+
+ /* AX regs */
+ for (i = 0; i < 2; i++) {
+ pr_info(" %s = 0x%08x ",
+ AX0_names[i],
+ regs->ctx.AX[i].U0);
+ printk(" %s = 0x%08x\n",
+ AX1_names[i],
+ regs->ctx.AX[i].U1);
+ }
+
+ if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+ pr_warn(" Extended state present - AX2.[01] will be WRONG\n");
+
+ /* Special place with AXx.2 */
+ pr_info(" A0.2 = 0x%08x ",
+ regs->ctx.Ext.AX2.U0);
+ printk(" A1.2 = 0x%08x\n",
+ regs->ctx.Ext.AX2.U1);
+
+ /* 'extended' AX regs (nominally, just AXx.3) */
+ for (i = 0; i < (TBICTX_AX_REGS - 3); i++) {
+ pr_info(" A0.%d = 0x%08x ", i + 3, regs->ctx.AX3[i].U0);
+ printk(" A1.%d = 0x%08x\n", i + 3, regs->ctx.AX3[i].U1);
+ }
+
+ for (i = 0; i < 8; i++) {
+ pr_info(" %s = 0x%08x ", DX0_names[i], regs->ctx.DX[i].U0);
+ printk(" %s = 0x%08x\n", DX1_names[i], regs->ctx.DX[i].U1);
+ }
+
+ show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+ unsigned long arg, struct task_struct *tsk)
+{
+ struct pt_regs *childregs = task_pt_regs(tsk);
+ void *kernel_context = ((void *) childregs +
+ sizeof(struct pt_regs));
+ unsigned long global_base;
+
+ BUG_ON(((unsigned long)childregs) & 0x7);
+ BUG_ON(((unsigned long)kernel_context) & 0x7);
+
+ memset(&tsk->thread.kernel_context, 0,
+ sizeof(tsk->thread.kernel_context));
+
+ tsk->thread.kernel_context = __TBISwitchInit(kernel_context,
+ ret_from_fork,
+ 0, 0);
+
+ if (unlikely(tsk->flags & PF_KTHREAD)) {
+ /*
+ * Make sure we don't leak any kernel data to child's regs
+ * if kernel thread becomes a userspace thread in the future
+ */
+ memset(childregs, 0 , sizeof(struct pt_regs));
+
+ global_base = __core_reg_get(A1GbP);
+ childregs->ctx.AX[0].U1 = (unsigned long) global_base;
+ childregs->ctx.AX[0].U0 = (unsigned long) kernel_context;
+ /* Set D1Ar1=arg and D1RtP=usp (fn) */
+ childregs->ctx.DX[4].U1 = usp;
+ childregs->ctx.DX[3].U1 = arg;
+ tsk->thread.int_depth = 2;
+ return 0;
+ }
+ /*
+ * Get a pointer to where the new child's register block should have
+ * been pushed.
+ * The Meta's stack grows upwards, and the context is the the first
+ * thing to be pushed by TBX (phew)
+ */
+ *childregs = *current_pt_regs();
+ /* Set the correct stack for the clone mode */
+ if (usp)
+ childregs->ctx.AX[0].U0 = ALIGN(usp, 8);
+ tsk->thread.int_depth = 1;
+
+ /* set return value for child process */
+ childregs->ctx.DX[0].U0 = 0;
+
+ /* The TLS pointer is passed as an argument to sys_clone. */
+ if (clone_flags & CLONE_SETTLS)
+ tsk->thread.tls_ptr =
+ (__force void __user *)childregs->ctx.DX[1].U1;
+
+#ifdef CONFIG_METAG_FPU
+ if (tsk->thread.fpu_context) {
+ struct meta_fpu_context *ctx;
+
+ ctx = kmemdup(tsk->thread.fpu_context,
+ sizeof(struct meta_fpu_context), GFP_ATOMIC);
+ tsk->thread.fpu_context = ctx;
+ }
+#endif
+
+#ifdef CONFIG_METAG_DSP
+ if (tsk->thread.dsp_context) {
+ struct meta_ext_context *ctx;
+ int i;
+
+ ctx = kmemdup(tsk->thread.dsp_context,
+ sizeof(struct meta_ext_context), GFP_ATOMIC);
+ for (i = 0; i < 2; i++)
+ ctx->ram[i] = kmemdup(ctx->ram[i], ctx->ram_sz[i],
+ GFP_ATOMIC);
+ tsk->thread.dsp_context = ctx;
+ }
+#endif
+
+ return 0;
+}
+
+#ifdef CONFIG_METAG_FPU
+static void alloc_fpu_context(struct thread_struct *thread)
+{
+ thread->fpu_context = kzalloc(sizeof(struct meta_fpu_context),
+ GFP_ATOMIC);
+}
+
+static void clear_fpu(struct thread_struct *thread)
+{
+ thread->user_flags &= ~TBICTX_FPAC_BIT;
+ kfree(thread->fpu_context);
+ thread->fpu_context = NULL;
+}
+#else
+static void clear_fpu(struct thread_struct *thread)
+{
+}
+#endif
+
+#ifdef CONFIG_METAG_DSP
+static void clear_dsp(struct thread_struct *thread)
+{
+ if (thread->dsp_context) {
+ kfree(thread->dsp_context->ram[0]);
+ kfree(thread->dsp_context->ram[1]);
+
+ kfree(thread->dsp_context);
+
+ thread->dsp_context = NULL;
+ }
+
+ __core_reg_set(D0.8, 0);
+}
+#else
+static void clear_dsp(struct thread_struct *thread)
+{
+}
+#endif
+
+struct task_struct *__sched __switch_to(struct task_struct *prev,
+ struct task_struct *next)
+{
+ TBIRES to, from;
+
+ to.Switch.pCtx = next->thread.kernel_context;
+ to.Switch.pPara = prev;
+
+#ifdef CONFIG_METAG_FPU
+ if (prev->thread.user_flags & TBICTX_FPAC_BIT) {
+ struct pt_regs *regs = task_pt_regs(prev);
+ TBIRES state;
+
+ state.Sig.SaveMask = prev->thread.user_flags;
+ state.Sig.pCtx = &regs->ctx;
+
+ if (!prev->thread.fpu_context)
+ alloc_fpu_context(&prev->thread);
+ if (prev->thread.fpu_context)
+ __TBICtxFPUSave(state, prev->thread.fpu_context);
+ }
+ /*
+ * Force a restore of the FPU context next time this process is
+ * scheduled.
+ */
+ if (prev->thread.fpu_context)
+ prev->thread.fpu_context->needs_restore = true;
+#endif
+
+
+ from = __TBISwitch(to, &prev->thread.kernel_context);
+
+ /* Restore TLS pointer for this process. */
+ set_gateway_tls(current->thread.tls_ptr);
+
+ return (struct task_struct *) from.Switch.pPara;
+}
+
+void flush_thread(void)
+{
+ clear_fpu(&current->thread);
+ clear_dsp(&current->thread);
+}
+
+/*
+ * Free current thread data structures etc.
+ */
+void exit_thread(void)
+{
+ clear_fpu(&current->thread);
+ clear_dsp(&current->thread);
+}
+
+/* TODO: figure out how to unwind the kernel stack here to figure out
+ * where we went to sleep. */
+unsigned long get_wchan(struct task_struct *p)
+{
+ return 0;
+}
+
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+ /* Returning 0 indicates that the FPU state was not stored (as it was
+ * not in use) */
+ return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#define ELF_MIN_ALIGN PAGE_SIZE
+
+#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
+#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
+#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
+
+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+ struct elf_phdr *eppnt, int prot, int type,
+ unsigned long total_size)
+{
+ unsigned long map_addr, size;
+ unsigned long page_off = ELF_PAGEOFFSET(eppnt->p_vaddr);
+ unsigned long raw_size = eppnt->p_filesz + page_off;
+ unsigned long off = eppnt->p_offset - page_off;
+ unsigned int tcm_tag;
+ addr = ELF_PAGESTART(addr);
+ size = ELF_PAGEALIGN(raw_size);
+
+ /* mmap() will return -EINVAL if given a zero size, but a
+ * segment with zero filesize is perfectly valid */
+ if (!size)
+ return addr;
+
+ tcm_tag = tcm_lookup_tag(addr);
+
+ if (tcm_tag != TCM_INVALID_TAG)
+ type &= ~MAP_FIXED;
+
+ /*
+ * total_size is the size of the ELF (interpreter) image.
+ * The _first_ mmap needs to know the full size, otherwise
+ * randomization might put this image into an overlapping
+ * position with the ELF binary image. (since size < total_size)
+ * So we first map the 'big' image - and unmap the remainder at
+ * the end. (which unmap is needed for ELF images with holes.)
+ */
+ if (total_size) {
+ total_size = ELF_PAGEALIGN(total_size);
+ map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+ if (!BAD_ADDR(map_addr))
+ vm_munmap(map_addr+size, total_size-size);
+ } else
+ map_addr = vm_mmap(filep, addr, size, prot, type, off);
+
+ if (!BAD_ADDR(map_addr) && tcm_tag != TCM_INVALID_TAG) {
+ struct tcm_allocation *tcm;
+ unsigned long tcm_addr;
+
+ tcm = kmalloc(sizeof(*tcm), GFP_KERNEL);
+ if (!tcm)
+ return -ENOMEM;
+
+ tcm_addr = tcm_alloc(tcm_tag, raw_size);
+ if (tcm_addr != addr) {
+ kfree(tcm);
+ return -ENOMEM;
+ }
+
+ tcm->tag = tcm_tag;
+ tcm->addr = tcm_addr;
+ tcm->size = raw_size;
+
+ list_add(&tcm->list, &current->mm->context.tcm);
+
+ eppnt->p_vaddr = map_addr;
+ if (copy_from_user((void *) addr, (void __user *) map_addr,
+ raw_size))
+ return -EFAULT;
+ }
+
+ return map_addr;
+}
+#endif
diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
new file mode 100644
index 00000000000..47a8828615a
--- /dev/null
+++ b/arch/metag/kernel/ptrace.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <trace/syscall.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+/*
+ * user_regset definitions.
+ */
+
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ const void *ptr;
+ unsigned long data;
+ int ret;
+
+ /* D{0-1}.{0-7} */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ regs->ctx.DX, 0, 4*16);
+ if (ret)
+ goto out;
+ /* A{0-1}.{0-1} */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ regs->ctx.AX, 4*16, 4*20);
+ if (ret)
+ goto out;
+ /* A{0-1}.2 */
+ if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+ ptr = regs->ctx.Ext.Ctx.pExt;
+ else
+ ptr = &regs->ctx.Ext.AX2;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ ptr, 4*20, 4*22);
+ if (ret)
+ goto out;
+ /* A{0-1}.3 */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &regs->ctx.AX3, 4*22, 4*24);
+ if (ret)
+ goto out;
+ /* PC */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &regs->ctx.CurrPC, 4*24, 4*25);
+ if (ret)
+ goto out;
+ /* TXSTATUS */
+ data = (unsigned long)regs->ctx.Flags;
+ if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
+ data |= USER_GP_REGS_STATUS_CATCH_BIT;
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &data, 4*25, 4*26);
+ if (ret)
+ goto out;
+ /* TXRPT, TXBPOBITS, TXMODE */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &regs->ctx.CurrRPT, 4*26, 4*29);
+ if (ret)
+ goto out;
+ /* Padding */
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ 4*29, 4*30);
+out:
+ return ret;
+}
+
+int metag_gp_regs_copyin(struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ void *ptr;
+ unsigned long data;
+ int ret;
+
+ /* D{0-1}.{0-7} */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ regs->ctx.DX, 0, 4*16);
+ if (ret)
+ goto out;
+ /* A{0-1}.{0-1} */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ regs->ctx.AX, 4*16, 4*20);
+ if (ret)
+ goto out;
+ /* A{0-1}.2 */
+ if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+ ptr = regs->ctx.Ext.Ctx.pExt;
+ else
+ ptr = &regs->ctx.Ext.AX2;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ ptr, 4*20, 4*22);
+ if (ret)
+ goto out;
+ /* A{0-1}.3 */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &regs->ctx.AX3, 4*22, 4*24);
+ if (ret)
+ goto out;
+ /* PC */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &regs->ctx.CurrPC, 4*24, 4*25);
+ if (ret)
+ goto out;
+ /* TXSTATUS */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &data, 4*25, 4*26);
+ if (ret)
+ goto out;
+ regs->ctx.Flags = data & 0xffff;
+ if (data & USER_GP_REGS_STATUS_CATCH_BIT)
+ regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBUF_BIT;
+ else
+ regs->ctx.SaveMask &= ~TBICTX_CBUF_BIT;
+ /* TXRPT, TXBPOBITS, TXMODE */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &regs->ctx.CurrRPT, 4*26, 4*29);
+out:
+ return ret;
+}
+
+static int metag_gp_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ const struct pt_regs *regs = task_pt_regs(target);
+ return metag_gp_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_gp_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ return metag_gp_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ int ret;
+
+ /* TXCATCH{0-3} */
+ if (regs->ctx.SaveMask & TBICTX_XCBF_BIT)
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ regs->extcb0, 0, 4*4);
+ else
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ 0, 4*4);
+ return ret;
+}
+
+int metag_cb_regs_copyin(struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ /* TXCATCH{0-3} */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ regs->extcb0, 0, 4*4);
+ return ret;
+}
+
+static int metag_cb_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ const struct pt_regs *regs = task_pt_regs(target);
+ return metag_cb_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_cb_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ return metag_cb_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_rp_state_copyout(const struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ unsigned long mask;
+ u64 *ptr;
+ int ret, i;
+
+ /* Empty read pipeline */
+ if (!(regs->ctx.SaveMask & TBICTX_CBRP_BIT)) {
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+ 0, 4*13);
+ goto out;
+ }
+
+ mask = (regs->ctx.CurrDIVTIME & TXDIVTIME_RPMASK_BITS) >>
+ TXDIVTIME_RPMASK_S;
+
+ /* Read pipeline entries */
+ ptr = (void *)&regs->extcb0[1];
+ for (i = 0; i < 6; ++i, ++ptr) {
+ if (mask & (1 << i))
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ ptr, 8*i, 8*(i + 1));
+ else
+ ret = user_regset_copyout_zero(&pos, &count, &kbuf,
+ &ubuf, 8*i, 8*(i + 1));
+ if (ret)
+ goto out;
+ }
+ /* Mask of entries */
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &mask, 4*12, 4*13);
+out:
+ return ret;
+}
+
+int metag_rp_state_copyin(struct pt_regs *regs,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct user_rp_state rp;
+ unsigned long long *ptr;
+ int ret, i;
+
+ /* Read the entire pipeline before making any changes */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &rp, 0, 4*13);
+ if (ret)
+ goto out;
+
+ /* Write pipeline entries */
+ ptr = (void *)&regs->extcb0[1];
+ for (i = 0; i < 6; ++i, ++ptr)
+ if (rp.mask & (1 << i))
+ *ptr = rp.entries[i];
+
+ /* Update RPMask in TXDIVTIME */
+ regs->ctx.CurrDIVTIME &= ~TXDIVTIME_RPMASK_BITS;
+ regs->ctx.CurrDIVTIME |= (rp.mask << TXDIVTIME_RPMASK_S)
+ & TXDIVTIME_RPMASK_BITS;
+
+ /* Set/clear flags to indicate catch/read pipeline state */
+ if (rp.mask)
+ regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBRP_BIT;
+ else
+ regs->ctx.SaveMask &= ~TBICTX_CBRP_BIT;
+out:
+ return ret;
+}
+
+static int metag_rp_state_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ const struct pt_regs *regs = task_pt_regs(target);
+ return metag_rp_state_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_rp_state_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+enum metag_regset {
+ REGSET_GENERAL,
+ REGSET_CBUF,
+ REGSET_READPIPE,
+};
+
+static const struct user_regset metag_regsets[] = {
+ [REGSET_GENERAL] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = ELF_NGREG,
+ .size = sizeof(long),
+ .align = sizeof(long long),
+ .get = metag_gp_regs_get,
+ .set = metag_gp_regs_set,
+ },
+ [REGSET_CBUF] = {
+ .core_note_type = NT_METAG_CBUF,
+ .n = sizeof(struct user_cb_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long long),
+ .get = metag_cb_regs_get,
+ .set = metag_cb_regs_set,
+ },
+ [REGSET_READPIPE] = {
+ .core_note_type = NT_METAG_RPIPE,
+ .n = sizeof(struct user_rp_state) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long long),
+ .get = metag_rp_state_get,
+ .set = metag_rp_state_set,
+ },
+};
+
+static const struct user_regset_view user_metag_view = {
+ .name = "metag",
+ .e_machine = EM_METAG,
+ .regsets = metag_regsets,
+ .n = ARRAY_SIZE(metag_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+ return &user_metag_view;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ /* nothing to do.. */
+}
+
+long arch_ptrace(struct task_struct *child, long request, unsigned long addr,
+ unsigned long data)
+{
+ int ret;
+
+ switch (request) {
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+
+ return ret;
+}
+
+int syscall_trace_enter(struct pt_regs *regs)
+{
+ int ret = 0;
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ ret = tracehook_report_syscall_entry(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->ctx.DX[0].U1);
+
+ return ret ? -1 : regs->ctx.DX[0].U1;
+}
+
+void syscall_trace_leave(struct pt_regs *regs)
+{
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->ctx.DX[0].U1);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, 0);
+}
diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c
new file mode 100644
index 00000000000..879246170ae
--- /dev/null
+++ b/arch/metag/kernel/setup.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file contains the architecture-dependant parts of system setup.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/of_fdt.h>
+#include <linux/pfn.h>
+#include <linux/root_dev.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/start_kernel.h>
+#include <linux/string.h>
+
+#include <asm/cachepart.h>
+#include <asm/clock.h>
+#include <asm/core_reg.h>
+#include <asm/cpu.h>
+#include <asm/da.h>
+#include <asm/highmem.h>
+#include <asm/hwthread.h>
+#include <asm/l2cache.h>
+#include <asm/mach/arch.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+#include <asm/mmu.h>
+#include <asm/mmzone.h>
+#include <asm/processor.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/traps.h>
+
+/* Priv protect as many registers as possible. */
+#define DEFAULT_PRIV (TXPRIVEXT_COPRO_BITS | \
+ TXPRIVEXT_TXTRIGGER_BIT | \
+ TXPRIVEXT_TXGBLCREG_BIT | \
+ TXPRIVEXT_ILOCK_BIT | \
+ TXPRIVEXT_TXITACCYC_BIT | \
+ TXPRIVEXT_TXDIVTIME_BIT | \
+ TXPRIVEXT_TXAMAREGX_BIT | \
+ TXPRIVEXT_TXTIMERI_BIT | \
+ TXPRIVEXT_TXSTATUS_BIT | \
+ TXPRIVEXT_TXDISABLE_BIT)
+
+/* Meta2 specific bits. */
+#ifdef CONFIG_METAG_META12
+#define META2_PRIV 0
+#else
+#define META2_PRIV (TXPRIVEXT_TXTIMER_BIT | \
+ TXPRIVEXT_TRACE_BIT)
+#endif
+
+/* Unaligned access checking bits. */
+#ifdef CONFIG_METAG_UNALIGNED
+#define UNALIGNED_PRIV TXPRIVEXT_ALIGNREW_BIT
+#else
+#define UNALIGNED_PRIV 0
+#endif
+
+#define PRIV_BITS (DEFAULT_PRIV | \
+ META2_PRIV | \
+ UNALIGNED_PRIV)
+
+/*
+ * Protect access to:
+ * 0x06000000-0x07ffffff Direct mapped region
+ * 0x05000000-0x05ffffff MMU table region (Meta1)
+ * 0x04400000-0x047fffff Cache flush region
+ * 0x84000000-0x87ffffff Core cache memory region (Meta2)
+ *
+ * Allow access to:
+ * 0x80000000-0x81ffffff Core code memory region (Meta2)
+ */
+#ifdef CONFIG_METAG_META12
+#define PRIVSYSR_BITS TXPRIVSYSR_ALL_BITS
+#else
+#define PRIVSYSR_BITS (TXPRIVSYSR_ALL_BITS & ~TXPRIVSYSR_CORECODE_BIT)
+#endif
+
+/* Protect all 0x02xxxxxx and 0x048xxxxx. */
+#define PIOREG_BITS 0xffffffff
+
+/*
+ * Protect all 0x04000xx0 (system events)
+ * except write combiner flush and write fence (system events 4 and 5).
+ */
+#define PSYREG_BITS 0xfffffffb
+
+
+extern char _heap_start[];
+
+#ifdef CONFIG_METAG_BUILTIN_DTB
+extern u32 __dtb_start[];
+#endif
+
+#ifdef CONFIG_DA_CONSOLE
+/* Our early channel based console driver */
+extern struct console dash_console;
+#endif
+
+struct machine_desc *machine_desc __initdata;
+
+/*
+ * Map a Linux CPU number to a hardware thread ID
+ * In SMP this will be setup with the correct mapping at startup; in UP this
+ * will map to the HW thread on which we are running.
+ */
+u8 cpu_2_hwthread_id[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = BAD_HWTHREAD_ID
+};
+
+/*
+ * Map a hardware thread ID to a Linux CPU number
+ * In SMP this will be fleshed out with the correct CPU ID for a particular
+ * hardware thread. In UP this will be initialised with the boot CPU ID.
+ */
+u8 hwthread_id_2_cpu[4] __read_mostly = {
+ [0 ... 3] = BAD_CPU_ID
+};
+
+/* The relative offset of the MMU mapped memory (from ldlk or bootloader)
+ * to the real physical memory. This is needed as we have to use the
+ * physical addresses in the MMU tables (pte entries), and not the virtual
+ * addresses.
+ * This variable is used in the __pa() and __va() macros, and should
+ * probably only be used via them.
+ */
+unsigned int meta_memoffset;
+EXPORT_SYMBOL(meta_memoffset);
+
+static char __initdata *original_cmd_line;
+
+DEFINE_PER_CPU(PTBI, pTBI);
+
+/*
+ * Mapping are specified as "CPU_ID:HWTHREAD_ID", e.g.
+ *
+ * "hwthread_map=0:1,1:2,2:3,3:0"
+ *
+ * Linux CPU ID HWTHREAD_ID
+ * ---------------------------
+ * 0 1
+ * 1 2
+ * 2 3
+ * 3 0
+ */
+static int __init parse_hwthread_map(char *p)
+{
+ int cpu;
+
+ while (*p) {
+ cpu = (*p++) - '0';
+ if (cpu < 0 || cpu > 9)
+ goto err_cpu;
+
+ p++; /* skip semi-colon */
+ cpu_2_hwthread_id[cpu] = (*p++) - '0';
+ if (cpu_2_hwthread_id[cpu] >= 4)
+ goto err_thread;
+ hwthread_id_2_cpu[cpu_2_hwthread_id[cpu]] = cpu;
+
+ if (*p == ',')
+ p++; /* skip comma */
+ }
+
+ return 0;
+err_cpu:
+ pr_err("%s: hwthread_map cpu argument out of range\n", __func__);
+ return -EINVAL;
+err_thread:
+ pr_err("%s: hwthread_map thread argument out of range\n", __func__);
+ return -EINVAL;
+}
+early_param("hwthread_map", parse_hwthread_map);
+
+void __init dump_machine_table(void)
+{
+ struct machine_desc *p;
+ const char **compat;
+
+ pr_info("Available machine support:\n\tNAME\t\tCOMPATIBLE LIST\n");
+ for_each_machine_desc(p) {
+ pr_info("\t%s\t[", p->name);
+ for (compat = p->dt_compat; compat && *compat; ++compat)
+ printk(" '%s'", *compat);
+ printk(" ]\n");
+ }
+
+ pr_info("\nPlease check your kernel config and/or bootloader.\n");
+
+ hard_processor_halt(HALT_PANIC);
+}
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+static int metag_panic_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ hard_processor_halt(HALT_PANIC);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block metag_panic_block = {
+ metag_panic_event,
+ NULL,
+ 0
+};
+#endif
+
+void __init setup_arch(char **cmdline_p)
+{
+ unsigned long start_pfn;
+ unsigned long text_start = (unsigned long)(&_stext);
+ unsigned long cpu = smp_processor_id();
+ unsigned long heap_start, heap_end;
+ unsigned long start_pte;
+ PTBI _pTBI;
+ PTBISEG p_heap;
+ int heap_id, i;
+
+ metag_cache_probe();
+
+ metag_da_probe();
+#ifdef CONFIG_DA_CONSOLE
+ if (metag_da_enabled()) {
+ /* An early channel based console driver */
+ register_console(&dash_console);
+ add_preferred_console("ttyDA", 1, NULL);
+ }
+#endif
+
+ /* try interpreting the argument as a device tree */
+ machine_desc = setup_machine_fdt(original_cmd_line);
+ /* if it doesn't look like a device tree it must be a command line */
+ if (!machine_desc) {
+#ifdef CONFIG_METAG_BUILTIN_DTB
+ /* try the embedded device tree */
+ machine_desc = setup_machine_fdt(__dtb_start);
+ if (!machine_desc)
+ panic("Invalid embedded device tree.");
+#else
+ /* use the default machine description */
+ machine_desc = default_machine_desc();
+#endif
+#ifndef CONFIG_CMDLINE_FORCE
+ /* append the bootloader cmdline to any builtin fdt cmdline */
+ if (boot_command_line[0] && original_cmd_line[0])
+ strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+ strlcat(boot_command_line, original_cmd_line,
+ COMMAND_LINE_SIZE);
+#endif
+ }
+ setup_meta_clocks(machine_desc->clocks);
+
+ *cmdline_p = boot_command_line;
+ parse_early_param();
+
+ /*
+ * Make sure we don't alias in dcache or icache
+ */
+ check_for_cache_aliasing(cpu);
+
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &metag_panic_block);
+#endif
+
+#ifdef CONFIG_DUMMY_CONSOLE
+ conswitchp = &dummy_con;
+#endif
+
+ if (!(__core_reg_get(TXSTATUS) & TXSTATUS_PSTAT_BIT))
+ panic("Privilege must be enabled for this thread.");
+
+ _pTBI = __TBI(TBID_ISTAT_BIT);
+
+ per_cpu(pTBI, cpu) = _pTBI;
+
+ if (!per_cpu(pTBI, cpu))
+ panic("No TBI found!");
+
+ /*
+ * Initialize all interrupt vectors to our copy of __TBIUnExpXXX,
+ * rather than the version from the bootloader. This makes call
+ * stacks easier to understand and may allow us to unmap the
+ * bootloader at some point.
+ *
+ * We need to keep the LWK handler that TBI installed in order to
+ * be able to do inter-thread comms.
+ */
+ for (i = 0; i <= TBID_SIGNUM_MAX; i++)
+ if (i != TBID_SIGNUM_LWK)
+ _pTBI->fnSigs[i] = __TBIUnExpXXX;
+
+ /* A Meta requirement is that the kernel is loaded (virtually)
+ * at the PAGE_OFFSET.
+ */
+ if (PAGE_OFFSET != text_start)
+ panic("Kernel not loaded at PAGE_OFFSET (%#x) but at %#lx.",
+ PAGE_OFFSET, text_start);
+
+ start_pte = mmu_read_second_level_page(text_start);
+
+ /*
+ * Kernel pages should have the PRIV bit set by the bootloader.
+ */
+ if (!(start_pte & _PAGE_KERNEL))
+ panic("kernel pte does not have PRIV set");
+
+ /*
+ * See __pa and __va in include/asm/page.h.
+ * This value is negative when running in local space but the
+ * calculations work anyway.
+ */
+ meta_memoffset = text_start - (start_pte & PAGE_MASK);
+
+ /* Now lets look at the heap space */
+ heap_id = (__TBIThreadId() & TBID_THREAD_BITS)
+ + TBID_SEG(0, TBID_SEGSCOPE_LOCAL, TBID_SEGTYPE_HEAP);
+
+ p_heap = __TBIFindSeg(NULL, heap_id);
+
+ if (!p_heap)
+ panic("Could not find heap from TBI!");
+
+ /* The heap begins at the first full page after the kernel data. */
+ heap_start = (unsigned long) &_heap_start;
+
+ /* The heap ends at the end of the heap segment specified with
+ * ldlk.
+ */
+ if (is_global_space(text_start)) {
+ pr_debug("WARNING: running in global space!\n");
+ heap_end = (unsigned long)p_heap->pGAddr + p_heap->Bytes;
+ } else {
+ heap_end = (unsigned long)p_heap->pLAddr + p_heap->Bytes;
+ }
+
+ ROOT_DEV = Root_RAM0;
+
+ /* init_mm is the mm struct used for the first task. It is then
+ * cloned for all other tasks spawned from that task.
+ *
+ * Note - we are using the virtual addresses here.
+ */
+ init_mm.start_code = (unsigned long)(&_stext);
+ init_mm.end_code = (unsigned long)(&_etext);
+ init_mm.end_data = (unsigned long)(&_edata);
+ init_mm.brk = (unsigned long)heap_start;
+
+ min_low_pfn = PFN_UP(__pa(text_start));
+ max_low_pfn = PFN_DOWN(__pa(heap_end));
+
+ pfn_base = min_low_pfn;
+
+ /* Round max_pfn up to a 4Mb boundary. The free_bootmem_node()
+ * call later makes sure to keep the rounded up pages marked reserved.
+ */
+ max_pfn = max_low_pfn + ((1 << MAX_ORDER) - 1);
+ max_pfn &= ~((1 << MAX_ORDER) - 1);
+
+ start_pfn = PFN_UP(__pa(heap_start));
+
+ if (min_low_pfn & ((1 << MAX_ORDER) - 1)) {
+ /* Theoretically, we could expand the space that the
+ * bootmem allocator covers - much as we do for the
+ * 'high' address, and then tell the bootmem system
+ * that the lowest chunk is 'not available'. Right
+ * now it is just much easier to constrain the
+ * user to always MAX_ORDER align their kernel space.
+ */
+
+ panic("Kernel must be %d byte aligned, currently at %#lx.",
+ 1 << (MAX_ORDER + PAGE_SHIFT),
+ min_low_pfn << PAGE_SHIFT);
+ }
+
+#ifdef CONFIG_HIGHMEM
+ highstart_pfn = highend_pfn = max_pfn;
+ high_memory = (void *) __va(PFN_PHYS(highstart_pfn));
+#else
+ high_memory = (void *)__va(PFN_PHYS(max_pfn));
+#endif
+
+ paging_init(heap_end);
+
+ setup_priv();
+
+ /* Setup the boot cpu's mapping. The rest will be setup below. */
+ cpu_2_hwthread_id[smp_processor_id()] = hard_processor_id();
+ hwthread_id_2_cpu[hard_processor_id()] = smp_processor_id();
+
+ /* Copy device tree blob into non-init memory before unflattening */
+ copy_fdt();
+ unflatten_device_tree();
+
+#ifdef CONFIG_SMP
+ smp_init_cpus();
+#endif
+
+ if (machine_desc->init_early)
+ machine_desc->init_early();
+}
+
+static int __init customize_machine(void)
+{
+ /* customizes platform devices, or adds new ones */
+ if (machine_desc->init_machine)
+ machine_desc->init_machine();
+ return 0;
+}
+arch_initcall(customize_machine);
+
+static int __init init_machine_late(void)
+{
+ if (machine_desc->init_late)
+ machine_desc->init_late();
+ return 0;
+}
+late_initcall(init_machine_late);
+
+#ifdef CONFIG_PROC_FS
+/*
+ * Get CPU information for use by the procfs.
+ */
+static const char *get_cpu_capabilities(unsigned int txenable)
+{
+#ifdef CONFIG_METAG_META21
+ /* See CORE_ID in META HTP.GP TRM - Architecture Overview 2.1.238 */
+ int coreid = metag_in32(METAC_CORE_ID);
+ unsigned int dsp_type = (coreid >> 3) & 7;
+ unsigned int fpu_type = (coreid >> 7) & 3;
+
+ switch (dsp_type | fpu_type << 3) {
+ case (0x00): return "EDSP";
+ case (0x01): return "DSP";
+ case (0x08): return "EDSP+LFPU";
+ case (0x09): return "DSP+LFPU";
+ case (0x10): return "EDSP+FPU";
+ case (0x11): return "DSP+FPU";
+ }
+ return "UNKNOWN";
+
+#else
+ if (!(txenable & TXENABLE_CLASS_BITS))
+ return "DSP";
+ else
+ return "";
+#endif
+}
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ const char *cpu;
+ unsigned int txenable, thread_id, major, minor;
+ unsigned long clockfreq = get_coreclock();
+#ifdef CONFIG_SMP
+ int i;
+ unsigned long lpj;
+#endif
+
+ cpu = "META";
+
+ txenable = __core_reg_get(TXENABLE);
+ major = (txenable & TXENABLE_MAJOR_REV_BITS) >> TXENABLE_MAJOR_REV_S;
+ minor = (txenable & TXENABLE_MINOR_REV_BITS) >> TXENABLE_MINOR_REV_S;
+ thread_id = (txenable >> 8) & 0x3;
+
+#ifdef CONFIG_SMP
+ for_each_online_cpu(i) {
+ lpj = per_cpu(cpu_data, i).loops_per_jiffy;
+ txenable = core_reg_read(TXUCT_ID, TXENABLE_REGNUM,
+ cpu_2_hwthread_id[i]);
+
+ seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+ "Clocking:\t%lu.%1luMHz\n"
+ "BogoMips:\t%lu.%02lu\n"
+ "Calibration:\t%lu loops\n"
+ "Capabilities:\t%s\n\n",
+ cpu, major, minor, i,
+ clockfreq / 1000000, (clockfreq / 100000) % 10,
+ lpj / (500000 / HZ), (lpj / (5000 / HZ)) % 100,
+ lpj,
+ get_cpu_capabilities(txenable));
+ }
+#else
+ seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+ "Clocking:\t%lu.%1luMHz\n"
+ "BogoMips:\t%lu.%02lu\n"
+ "Calibration:\t%lu loops\n"
+ "Capabilities:\t%s\n",
+ cpu, major, minor, thread_id,
+ clockfreq / 1000000, (clockfreq / 100000) % 10,
+ loops_per_jiffy / (500000 / HZ),
+ (loops_per_jiffy / (5000 / HZ)) % 100,
+ loops_per_jiffy,
+ get_cpu_capabilities(txenable));
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_METAG_L2C
+ if (meta_l2c_is_present()) {
+ seq_printf(m, "L2 cache:\t%s\n"
+ "L2 cache size:\t%d KB\n",
+ meta_l2c_is_enabled() ? "enabled" : "disabled",
+ meta_l2c_size() >> 10);
+ }
+#endif
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ return (void *)(*pos == 0);
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return NULL;
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+#endif /* CONFIG_PROC_FS */
+
+void __init metag_start_kernel(char *args)
+{
+ /* Zero the timer register so timestamps are from the point at
+ * which the kernel started running.
+ */
+ __core_reg_set(TXTIMER, 0);
+
+ /* Clear the bss. */
+ memset(__bss_start, 0,
+ (unsigned long)__bss_stop - (unsigned long)__bss_start);
+
+ /* Remember where these are for use in setup_arch */
+ original_cmd_line = args;
+
+ current_thread_info()->cpu = hard_processor_id();
+
+ start_kernel();
+}
+
+/**
+ * setup_priv() - Set up privilege protection registers.
+ *
+ * Set up privilege protection registers such as TXPRIVEXT to prevent userland
+ * from touching our precious registers and sensitive memory areas.
+ */
+void setup_priv(void)
+{
+ unsigned int offset = hard_processor_id() << TXPRIVREG_STRIDE_S;
+
+ __core_reg_set(TXPRIVEXT, PRIV_BITS);
+
+ metag_out32(PRIVSYSR_BITS, T0PRIVSYSR + offset);
+ metag_out32(PIOREG_BITS, T0PIOREG + offset);
+ metag_out32(PSYREG_BITS, T0PSYREG + offset);
+}
+
+PTBI pTBI_get(unsigned int cpu)
+{
+ return per_cpu(pTBI, cpu);
+}
+EXPORT_SYMBOL(pTBI_get);
+
+#if defined(CONFIG_METAG_DSP) && defined(CONFIG_METAG_FPU)
+char capabilites[] = "dsp fpu";
+#elif defined(CONFIG_METAG_DSP)
+char capabilites[] = "dsp";
+#elif defined(CONFIG_METAG_FPU)
+char capabilites[] = "fpu";
+#else
+char capabilites[] = "";
+#endif
+
+static struct ctl_table caps_kern_table[] = {
+ {
+ .procname = "capabilities",
+ .data = capabilites,
+ .maxlen = sizeof(capabilites),
+ .mode = 0444,
+ .proc_handler = proc_dostring,
+ },
+ {}
+};
+
+static struct ctl_table caps_root_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = caps_kern_table,
+ },
+ {}
+};
+
+static int __init capabilities_register_sysctl(void)
+{
+ struct ctl_table_header *caps_table_header;
+
+ caps_table_header = register_sysctl_table(caps_root_table);
+ if (!caps_table_header) {
+ pr_err("Unable to register CAPABILITIES sysctl\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+core_initcall(capabilities_register_sysctl);
diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c
new file mode 100644
index 00000000000..3be61cf0b14
--- /dev/null
+++ b/arch/metag/kernel/signal.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (C) 1991,1992 Linus Torvalds
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
+#include <linux/tracehook.h>
+
+#include <asm/ucontext.h>
+#include <asm/cacheflush.h>
+#include <asm/switch.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+
+#define REG_FLAGS ctx.SaveMask
+#define REG_RETVAL ctx.DX[0].U0
+#define REG_SYSCALL ctx.DX[0].U1
+#define REG_SP ctx.AX[0].U0
+#define REG_ARG1 ctx.DX[3].U1
+#define REG_ARG2 ctx.DX[3].U0
+#define REG_ARG3 ctx.DX[2].U1
+#define REG_PC ctx.CurrPC
+#define REG_RTP ctx.DX[4].U1
+
+struct rt_sigframe {
+ struct siginfo info;
+ struct ucontext uc;
+ unsigned long retcode[2];
+};
+
+static int restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *sc)
+{
+ int err;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ err = metag_gp_regs_copyin(regs, 0, sizeof(struct user_gp_regs), NULL,
+ &sc->regs);
+ if (!err)
+ err = metag_cb_regs_copyin(regs, 0,
+ sizeof(struct user_cb_regs), NULL,
+ &sc->cb);
+ if (!err)
+ err = metag_rp_state_copyin(regs, 0,
+ sizeof(struct user_rp_state), NULL,
+ &sc->rp);
+
+ /* This is a user-mode context. */
+ regs->REG_FLAGS |= TBICTX_PRIV_BIT;
+
+ return err;
+}
+
+long sys_rt_sigreturn(void)
+{
+ /* NOTE - Meta stack goes UPWARDS - so we wind the stack back */
+ struct pt_regs *regs = current_pt_regs();
+ struct rt_sigframe __user *frame;
+ sigset_t set;
+
+ frame = (__force struct rt_sigframe __user *)(regs->REG_SP -
+ sizeof(*frame));
+
+ if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+ goto badframe;
+
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+
+ set_current_blocked(&set);
+
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+
+ return regs->REG_RETVAL;
+
+badframe:
+ force_sig(SIGSEGV, current);
+
+ return 0;
+}
+
+static int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+ unsigned long mask)
+{
+ int err;
+
+ err = metag_gp_regs_copyout(regs, 0, sizeof(struct user_gp_regs), NULL,
+ &sc->regs);
+
+ if (!err)
+ err = metag_cb_regs_copyout(regs, 0,
+ sizeof(struct user_cb_regs), NULL,
+ &sc->cb);
+ if (!err)
+ err = metag_rp_state_copyout(regs, 0,
+ sizeof(struct user_rp_state), NULL,
+ &sc->rp);
+
+ /* OK, clear that cbuf flag in the old context, or our stored
+ * catch buffer will be restored when we go to call the signal
+ * handler. Also clear out the CBRP RA/RD pipe bit incase
+ * that is pending as well!
+ * Note that as we have already stored this context, these
+ * flags will get restored on sigreturn to their original
+ * state.
+ */
+ regs->REG_FLAGS &= ~(TBICTX_XCBF_BIT | TBICTX_CBUF_BIT |
+ TBICTX_CBRP_BIT);
+
+ /* Clear out the LSM_STEP bits in case we are in the middle of
+ * and MSET/MGET.
+ */
+ regs->ctx.Flags &= ~TXSTATUS_LSM_STEP_BITS;
+
+ err |= __put_user(mask, &sc->oldmask);
+
+ return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static void __user *get_sigframe(struct k_sigaction *ka, unsigned long sp,
+ size_t frame_size)
+{
+ /* Meta stacks grows upwards */
+ if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
+ sp = current->sas_ss_sp;
+
+ sp = (sp + 7) & ~7; /* 8byte align stack */
+
+ return (void __user *)sp;
+}
+
+static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ sigset_t *set, struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ int err = -EFAULT;
+ unsigned long code;
+
+ frame = get_sigframe(ka, regs->REG_SP, sizeof(*frame));
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ goto out;
+
+ err = copy_siginfo_to_user(&frame->info, info);
+
+ /* Create the ucontext. */
+ err |= __put_user(0, &frame->uc.uc_flags);
+ err |= __put_user(0, (unsigned long __user *)&frame->uc.uc_link);
+ err |= __save_altstack(&frame->uc.uc_stack, regs->REG_SP);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+ if (err)
+ goto out;
+
+ /* Set up to return from userspace. */
+
+ /* MOV D1Re0 (D1.0), #__NR_rt_sigreturn */
+ code = 0x03000004 | (__NR_rt_sigreturn << 3);
+ err |= __put_user(code, (unsigned long __user *)(&frame->retcode[0]));
+
+ /* SWITCH #__METAG_SW_SYS */
+ code = __METAG_SW_ENCODING(SYS);
+ err |= __put_user(code, (unsigned long __user *)(&frame->retcode[1]));
+
+ if (err)
+ goto out;
+
+ /* Set up registers for signal handler */
+ regs->REG_RTP = (unsigned long) frame->retcode;
+ regs->REG_SP = (unsigned long) frame + sizeof(*frame);
+ regs->REG_ARG1 = sig;
+ regs->REG_ARG2 = (unsigned long) &frame->info;
+ regs->REG_ARG3 = (unsigned long) &frame->uc;
+ regs->REG_PC = (unsigned long) ka->sa.sa_handler;
+
+ pr_debug("SIG deliver (%s:%d): sp=%p pc=%08x pr=%08x\n",
+ current->comm, current->pid, frame, regs->REG_PC,
+ regs->REG_RTP);
+
+ /* Now pass size of 'new code' into sigtramp so we can do a more
+ * effective cache flush - directed rather than 'full flush'.
+ */
+ flush_cache_sigtramp(regs->REG_RTP, sizeof(frame->retcode));
+out:
+ if (err) {
+ force_sigsegv(sig, current);
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static void handle_signal(unsigned long sig, siginfo_t *info,
+ struct k_sigaction *ka, struct pt_regs *regs)
+{
+ sigset_t *oldset = sigmask_to_save();
+
+ /* Set up the stack frame */
+ if (setup_rt_frame(sig, ka, info, oldset, regs))
+ return;
+
+ signal_delivered(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP));
+}
+
+ /*
+ * Notes for Meta.
+ * We have moved from the old 2.4.9 SH way of using syscall_nr (in the stored
+ * context) to passing in the syscall flag on the stack.
+ * This is because having syscall_nr in our context does not fit with TBX, and
+ * corrupted the stack.
+ */
+static int do_signal(struct pt_regs *regs, int syscall)
+{
+ unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
+ struct k_sigaction ka;
+ siginfo_t info;
+ int signr;
+ int restart = 0;
+
+ /*
+ * By the end of rt_sigreturn the context describes the point that the
+ * signal was taken (which may happen to be just before a syscall if
+ * it's already been restarted). This should *never* be mistaken for a
+ * system call in need of restarting.
+ */
+ if (syscall == __NR_rt_sigreturn)
+ syscall = -1;
+
+ /* Did we come from a system call? */
+ if (syscall >= 0) {
+ continue_addr = regs->REG_PC;
+ restart_addr = continue_addr - 4;
+ retval = regs->REG_RETVAL;
+
+ /*
+ * Prepare for system call restart. We do this here so that a
+ * debugger will see the already changed PC.
+ */
+ switch (retval) {
+ case -ERESTART_RESTARTBLOCK:
+ restart = -2;
+ case -ERESTARTNOHAND:
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ ++restart;
+ regs->REG_PC = restart_addr;
+ break;
+ }
+ }
+
+ /*
+ * Get the signal to deliver. When running under ptrace, at this point
+ * the debugger may change all our registers ...
+ */
+ signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+ /*
+ * Depending on the signal settings we may need to revert the decision
+ * to restart the system call. But skip this if a debugger has chosen to
+ * restart at a different PC.
+ */
+ if (regs->REG_PC != restart_addr)
+ restart = 0;
+ if (signr > 0) {
+ if (unlikely(restart)) {
+ if (retval == -ERESTARTNOHAND
+ || retval == -ERESTART_RESTARTBLOCK
+ || (retval == -ERESTARTSYS
+ && !(ka.sa.sa_flags & SA_RESTART))) {
+ regs->REG_RETVAL = -EINTR;
+ regs->REG_PC = continue_addr;
+ }
+ }
+
+ /* Whee! Actually deliver the signal. */
+ handle_signal(signr, &info, &ka, regs);
+ return 0;
+ }
+
+ /* Handlerless -ERESTART_RESTARTBLOCK re-enters via restart_syscall */
+ if (unlikely(restart < 0))
+ regs->REG_SYSCALL = __NR_restart_syscall;
+
+ /*
+ * If there's no signal to deliver, we just put the saved sigmask back.
+ */
+ restore_saved_sigmask();
+
+ return restart;
+}
+
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+ int syscall)
+{
+ do {
+ if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+ schedule();
+ } else {
+ if (unlikely(!user_mode(regs)))
+ return 0;
+ local_irq_enable();
+ if (thread_flags & _TIF_SIGPENDING) {
+ int restart = do_signal(regs, syscall);
+ if (unlikely(restart)) {
+ /*
+ * Restart without handlers.
+ * Deal with it without leaving
+ * the kernel space.
+ */
+ return restart;
+ }
+ syscall = -1;
+ } else {
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+ }
+ }
+ local_irq_disable();
+ thread_flags = current_thread_info()->flags;
+ } while (thread_flags & _TIF_WORK_MASK);
+ return 0;
+}
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
new file mode 100644
index 00000000000..4b6d1f14df3
--- /dev/null
+++ b/arch/metag/kernel/smp.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
+ *
+ * Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/seq_file.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachepart.h>
+#include <asm/core_reg.h>
+#include <asm/cpu.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+#include <asm/hwthread.h>
+#include <asm/traps.h>
+
+DECLARE_PER_CPU(PTBI, pTBI);
+
+void *secondary_data_stack;
+
+/*
+ * structures for inter-processor calls
+ * - A collection of single bit ipi messages.
+ */
+struct ipi_data {
+ spinlock_t lock;
+ unsigned long ipi_count;
+ unsigned long bits;
+};
+
+static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
+ .lock = __SPIN_LOCK_UNLOCKED(ipi_data.lock),
+};
+
+static DEFINE_SPINLOCK(boot_lock);
+
+/*
+ * "thread" is assumed to be a valid Meta hardware thread ID.
+ */
+int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle)
+{
+ u32 val;
+
+ /*
+ * set synchronisation state between this boot processor
+ * and the secondary one
+ */
+ spin_lock(&boot_lock);
+
+ core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
+ core_reg_write(TXUPC_ID, 1, thread, 0);
+
+ /*
+ * Give the thread privilege (PSTAT) and clear potentially problematic
+ * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
+ */
+ core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
+
+ /* Clear the minim enable bit. */
+ val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
+ core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
+
+ /*
+ * set the ThreadEnable bit (0x1) in the TXENABLE register
+ * for the specified thread - off it goes!
+ */
+ val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
+ core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
+
+ /*
+ * now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+ spin_unlock(&boot_lock);
+
+ return 0;
+}
+
+int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+ unsigned int thread = cpu_2_hwthread_id[cpu];
+ int ret;
+
+ load_pgd(swapper_pg_dir, thread);
+
+ flush_tlb_all();
+
+ /*
+ * Tell the secondary CPU where to find its idle thread's stack.
+ */
+ secondary_data_stack = task_stack_page(idle);
+
+ wmb();
+
+ /*
+ * Now bring the CPU into our world.
+ */
+ ret = boot_secondary(thread, idle);
+ if (ret == 0) {
+ unsigned long timeout;
+
+ /*
+ * CPU was successfully started, wait for it
+ * to come online or time out.
+ */
+ timeout = jiffies + HZ;
+ while (time_before(jiffies, timeout)) {
+ if (cpu_online(cpu))
+ break;
+
+ udelay(10);
+ barrier();
+ }
+
+ if (!cpu_online(cpu))
+ ret = -EIO;
+ }
+
+ secondary_data_stack = NULL;
+
+ if (ret) {
+ pr_crit("CPU%u: processor failed to boot\n", cpu);
+
+ /*
+ * FIXME: We need to clean up the new idle thread. --rmk
+ */
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static DECLARE_COMPLETION(cpu_killed);
+
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpuexit __cpu_disable(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct task_struct *p;
+
+ /*
+ * Take this CPU offline. Once we clear this, we can't return,
+ * and we must not schedule until we're ready to give up the cpu.
+ */
+ set_cpu_online(cpu, false);
+
+ /*
+ * OK - migrate IRQs away from this CPU
+ */
+ migrate_irqs();
+
+ /*
+ * Flush user cache and TLB mappings, and then remove this CPU
+ * from the vm mask set of all processes.
+ */
+ flush_cache_all();
+ local_flush_tlb_all();
+
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ if (p->mm)
+ cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
+ }
+ read_unlock(&tasklist_lock);
+
+ return 0;
+}
+
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpuexit __cpu_die(unsigned int cpu)
+{
+ if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
+ pr_err("CPU%u: unable to kill\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we do not return from this function. If this cpu is
+ * brought online again it will need to run secondary_startup().
+ */
+void __cpuexit cpu_die(void)
+{
+ local_irq_disable();
+ idle_task_exit();
+
+ complete(&cpu_killed);
+
+ asm ("XOR TXENABLE, D0Re0,D0Re0\n");
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+{
+ struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
+
+ cpu_info->loops_per_jiffy = loops_per_jiffy;
+}
+
+/*
+ * This is the secondary CPU boot entry. We're using this CPUs
+ * idle thread stack and the global page tables.
+ */
+asmlinkage void secondary_start_kernel(void)
+{
+ struct mm_struct *mm = &init_mm;
+ unsigned int cpu = smp_processor_id();
+
+ /*
+ * All kernel threads share the same mm context; grab a
+ * reference and switch to it.
+ */
+ atomic_inc(&mm->mm_users);
+ atomic_inc(&mm->mm_count);
+ current->active_mm = mm;
+ cpumask_set_cpu(cpu, mm_cpumask(mm));
+ enter_lazy_tlb(mm, current);
+ local_flush_tlb_all();
+
+ /*
+ * TODO: Some day it might be useful for each Linux CPU to
+ * have its own TBI structure. That would allow each Linux CPU
+ * to run different interrupt handlers for the same IRQ
+ * number.
+ *
+ * For now, simply copying the pointer to the boot CPU's TBI
+ * structure is sufficient because we always want to run the
+ * same interrupt handler whatever CPU takes the interrupt.
+ */
+ per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
+
+ if (!per_cpu(pTBI, cpu))
+ panic("No TBI found!");
+
+ per_cpu_trap_init(cpu);
+
+ preempt_disable();
+
+ setup_priv();
+
+ /*
+ * Enable local interrupts.
+ */
+ tbi_startup_interrupt(TBID_SIGNUM_TRT);
+ notify_cpu_starting(cpu);
+ local_irq_enable();
+
+ pr_info("CPU%u (thread %u): Booted secondary processor\n",
+ cpu, cpu_2_hwthread_id[cpu]);
+
+ calibrate_delay();
+ smp_store_cpu_info(cpu);
+
+ /*
+ * OK, now it's safe to let the boot CPU continue
+ */
+ set_cpu_online(cpu, true);
+
+ /*
+ * Check for cache aliasing.
+ * Preemption is disabled
+ */
+ check_for_cache_aliasing(cpu);
+
+ /*
+ * OK, it's off to the idle thread for us
+ */
+ cpu_idle();
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+ int cpu;
+ unsigned long bogosum = 0;
+
+ for_each_online_cpu(cpu)
+ bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
+
+ pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ num_online_cpus(),
+ bogosum / (500000/HZ),
+ (bogosum / (5000/HZ)) % 100);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ unsigned int cpu = smp_processor_id();
+
+ init_new_context(current, &init_mm);
+ current_thread_info()->cpu = cpu;
+
+ smp_store_cpu_info(cpu);
+ init_cpu_present(cpu_possible_mask);
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
+
+ if (!per_cpu(pTBI, cpu))
+ panic("No TBI found!");
+}
+
+static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
+
+static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
+{
+ unsigned long flags;
+ unsigned int cpu;
+ cpumask_t map;
+
+ cpumask_clear(&map);
+ local_irq_save(flags);
+
+ for_each_cpu(cpu, mask) {
+ struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+
+ spin_lock(&ipi->lock);
+
+ /*
+ * KICK interrupts are queued in hardware so we'll get
+ * multiple interrupts if we call smp_cross_call()
+ * multiple times for one msg. The problem is that we
+ * only have one bit for each message - we can't queue
+ * them in software.
+ *
+ * The first time through ipi_handler() we'll clear
+ * the msg bit, having done all the work. But when we
+ * return we'll get _another_ interrupt (and another,
+ * and another until we've handled all the queued
+ * KICKs). Running ipi_handler() when there's no work
+ * to do is bad because that's how kick handler
+ * chaining detects who the KICK was intended for.
+ * See arch/metag/kernel/kick.c for more details.
+ *
+ * So only add 'cpu' to 'map' if we haven't already
+ * queued a KICK interrupt for 'msg'.
+ */
+ if (!(ipi->bits & (1 << msg))) {
+ ipi->bits |= 1 << msg;
+ cpumask_set_cpu(cpu, &map);
+ }
+
+ spin_unlock(&ipi->lock);
+ }
+
+ /*
+ * Call the platform specific cross-CPU call function.
+ */
+ smp_cross_call(map, msg);
+
+ local_irq_restore(flags);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ send_ipi_message(mask, IPI_CALL_FUNC);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+}
+
+void show_ipi_list(struct seq_file *p)
+{
+ unsigned int cpu;
+
+ seq_puts(p, "IPI:");
+
+ for_each_present_cpu(cpu)
+ seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
+
+ seq_putc(p, '\n');
+}
+
+static DEFINE_SPINLOCK(stop_lock);
+
+/*
+ * Main handler for inter-processor interrupts
+ *
+ * For Meta, the ipimask now only identifies a single
+ * category of IPI (Bit 1 IPIs have been replaced by a
+ * different mechanism):
+ *
+ * Bit 0 - Inter-processor function call
+ */
+static int do_IPI(struct pt_regs *regs)
+{
+ unsigned int cpu = smp_processor_id();
+ struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ unsigned long msgs, nextmsg;
+ int handled = 0;
+
+ ipi->ipi_count++;
+
+ spin_lock(&ipi->lock);
+ msgs = ipi->bits;
+ nextmsg = msgs & -msgs;
+ ipi->bits &= ~nextmsg;
+ spin_unlock(&ipi->lock);
+
+ if (nextmsg) {
+ handled = 1;
+
+ nextmsg = ffz(~nextmsg);
+ switch (nextmsg) {
+ case IPI_RESCHEDULE:
+ scheduler_ipi();
+ break;
+
+ case IPI_CALL_FUNC:
+ generic_smp_call_function_interrupt();
+ break;
+
+ case IPI_CALL_FUNC_SINGLE:
+ generic_smp_call_function_single_interrupt();
+ break;
+
+ default:
+ pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
+ cpu, nextmsg);
+ break;
+ }
+ }
+
+ set_irq_regs(old_regs);
+
+ return handled;
+}
+
+void smp_send_reschedule(int cpu)
+{
+ send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+static void stop_this_cpu(void *data)
+{
+ unsigned int cpu = smp_processor_id();
+
+ if (system_state == SYSTEM_BOOTING ||
+ system_state == SYSTEM_RUNNING) {
+ spin_lock(&stop_lock);
+ pr_crit("CPU%u: stopping\n", cpu);
+ dump_stack();
+ spin_unlock(&stop_lock);
+ }
+
+ set_cpu_online(cpu, false);
+
+ local_irq_disable();
+
+ hard_processor_halt(HALT_OK);
+}
+
+void smp_send_stop(void)
+{
+ smp_call_function(stop_this_cpu, NULL, 0);
+}
+
+/*
+ * not supported here
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
+/*
+ * We use KICKs for inter-processor interrupts.
+ *
+ * For every CPU in "callmap" the IPI data must already have been
+ * stored in that CPU's "ipi_data" member prior to calling this
+ * function.
+ */
+static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
+{
+ int cpu;
+
+ for_each_cpu(cpu, &callmap) {
+ unsigned int thread;
+
+ thread = cpu_2_hwthread_id[cpu];
+
+ BUG_ON(thread == BAD_HWTHREAD_ID);
+
+ metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
+ }
+}
+
+static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
+ int Inst, PTBI pTBI, int *handled)
+{
+ *handled = do_IPI((struct pt_regs *)State.Sig.pCtx);
+
+ return State;
+}
+
+static struct kick_irq_handler ipi_irq = {
+ .func = ipi_handler,
+};
+
+static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
+{
+ kick_raise_softirq(callmap, 1);
+}
+
+static inline unsigned int get_core_count(void)
+{
+ int i;
+ unsigned int ret = 0;
+
+ for (i = 0; i < CONFIG_NR_CPUS; i++) {
+ if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
+ ret++;
+ }
+
+ return ret;
+}
+
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+void __init smp_init_cpus(void)
+{
+ unsigned int i, ncores = get_core_count();
+
+ /* If no hwthread_map early param was set use default mapping */
+ for (i = 0; i < NR_CPUS; i++)
+ if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
+ cpu_2_hwthread_id[i] = i;
+ hwthread_id_2_cpu[i] = i;
+ }
+
+ for (i = 0; i < ncores; i++)
+ set_cpu_possible(i, true);
+
+ kick_register_func(&ipi_irq);
+}
diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c
new file mode 100644
index 00000000000..5510361d5be
--- /dev/null
+++ b/arch/metag/kernel/stacktrace.c
@@ -0,0 +1,187 @@
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/stacktrace.h>
+
+#if defined(CONFIG_FRAME_POINTER)
+
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+
+static unsigned long tbi_boing_addr;
+static unsigned long tbi_boing_size;
+
+static void tbi_boing_init(void)
+{
+ /* We need to know where TBIBoingVec is and it's size */
+ unsigned long size;
+ unsigned long offset;
+ char modname[MODULE_NAME_LEN];
+ char name[KSYM_NAME_LEN];
+ tbi_boing_addr = kallsyms_lookup_name("___TBIBoingVec");
+ if (!tbi_boing_addr)
+ tbi_boing_addr = 1;
+ else if (!lookup_symbol_attrs(tbi_boing_addr, &size,
+ &offset, modname, name))
+ tbi_boing_size = size;
+}
+#endif
+
+#define ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
+
+/*
+ * Unwind the current stack frame and store the new register values in the
+ * structure passed as argument. Unwinding is equivalent to a function return,
+ * hence the new PC value rather than LR should be used for backtrace.
+ */
+int notrace unwind_frame(struct stackframe *frame)
+{
+ struct metag_frame *fp = (struct metag_frame *)frame->fp;
+ unsigned long lr;
+ unsigned long fpnew;
+
+ if (frame->fp & 0x7)
+ return -EINVAL;
+
+ fpnew = fp->fp;
+ lr = fp->lr - 4;
+
+#ifdef CONFIG_KALLSYMS
+ /* If we've reached TBIBoingVec then we're at an interrupt
+ * entry point or a syscall entry point. The frame pointer
+ * points to a pt_regs which can be used to continue tracing on
+ * the other side of the boing.
+ */
+ if (!tbi_boing_addr)
+ tbi_boing_init();
+ if (tbi_boing_size && lr >= tbi_boing_addr &&
+ lr < tbi_boing_addr + tbi_boing_size) {
+ struct pt_regs *regs = (struct pt_regs *)fpnew;
+ if (user_mode(regs))
+ return -EINVAL;
+ fpnew = regs->ctx.AX[1].U0;
+ lr = regs->ctx.DX[4].U1;
+ }
+#endif
+
+ /* stack grows up, so frame pointers must decrease */
+ if (fpnew < (ALIGN_DOWN((unsigned long)fp, THREAD_SIZE) +
+ sizeof(struct thread_info)) || fpnew >= (unsigned long)fp)
+ return -EINVAL;
+
+ /* restore the registers from the stack frame */
+ frame->fp = fpnew;
+ frame->pc = lr;
+
+ return 0;
+}
+#else
+int notrace unwind_frame(struct stackframe *frame)
+{
+ struct metag_frame *sp = (struct metag_frame *)frame->sp;
+
+ if (frame->sp & 0x7)
+ return -EINVAL;
+
+ while (!kstack_end(sp)) {
+ unsigned long addr = sp->lr - 4;
+ sp--;
+
+ if (__kernel_text_address(addr)) {
+ frame->sp = (unsigned long)sp;
+ frame->pc = addr;
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+#endif
+
+void notrace walk_stackframe(struct stackframe *frame,
+ int (*fn)(struct stackframe *, void *), void *data)
+{
+ while (1) {
+ int ret;
+
+ if (fn(frame, data))
+ break;
+ ret = unwind_frame(frame);
+ if (ret < 0)
+ break;
+ }
+}
+EXPORT_SYMBOL(walk_stackframe);
+
+#ifdef CONFIG_STACKTRACE
+struct stack_trace_data {
+ struct stack_trace *trace;
+ unsigned int no_sched_functions;
+ unsigned int skip;
+};
+
+static int save_trace(struct stackframe *frame, void *d)
+{
+ struct stack_trace_data *data = d;
+ struct stack_trace *trace = data->trace;
+ unsigned long addr = frame->pc;
+
+ if (data->no_sched_functions && in_sched_functions(addr))
+ return 0;
+ if (data->skip) {
+ data->skip--;
+ return 0;
+ }
+
+ trace->entries[trace->nr_entries++] = addr;
+
+ return trace->nr_entries >= trace->max_entries;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ struct stack_trace_data data;
+ struct stackframe frame;
+
+ data.trace = trace;
+ data.skip = trace->skip;
+
+ if (tsk != current) {
+#ifdef CONFIG_SMP
+ /*
+ * What guarantees do we have here that 'tsk' is not
+ * running on another CPU? For now, ignore it as we
+ * can't guarantee we won't explode.
+ */
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+ return;
+#else
+ data.no_sched_functions = 1;
+ frame.fp = thread_saved_fp(tsk);
+ frame.sp = thread_saved_sp(tsk);
+ frame.lr = 0; /* recovered from the stack */
+ frame.pc = thread_saved_pc(tsk);
+#endif
+ } else {
+ register unsigned long current_sp asm ("A0StP");
+
+ data.no_sched_functions = 0;
+ frame.fp = (unsigned long)__builtin_frame_address(0);
+ frame.sp = current_sp;
+ frame.lr = (unsigned long)__builtin_return_address(0);
+ frame.pc = (unsigned long)save_stack_trace_tsk;
+ }
+
+ walk_stackframe(&frame, save_trace, &data);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+ save_stack_trace_tsk(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
diff --git a/arch/metag/kernel/sys_metag.c b/arch/metag/kernel/sys_metag.c
new file mode 100644
index 00000000000..efe833a452f
--- /dev/null
+++ b/arch/metag/kernel/sys_metag.c
@@ -0,0 +1,180 @@
+/*
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/Meta
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/unistd.h>
+#include <asm/cacheflush.h>
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/switch.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+#include <asm/user_gateway.h>
+
+#define merge_64(hi, lo) ((((unsigned long long)(hi)) << 32) + \
+ ((lo) & 0xffffffffUL))
+
+int metag_mmap_check(unsigned long addr, unsigned long len,
+ unsigned long flags)
+{
+ /* We can't have people trying to write to the bottom of the
+ * memory map, there are mysterious unspecified things there that
+ * we don't want people trampling on.
+ */
+ if ((flags & MAP_FIXED) && (addr < TASK_UNMAPPED_BASE))
+ return -EINVAL;
+
+ return 0;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+{
+ /* The shift for mmap2 is constant, regardless of PAGE_SIZE setting. */
+ if (pgoff & ((1 << (PAGE_SHIFT - 12)) - 1))
+ return -EINVAL;
+
+ pgoff >>= PAGE_SHIFT - 12;
+
+ return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+}
+
+asmlinkage int sys_metag_setglobalbit(char __user *addr, int mask)
+{
+ char tmp;
+ int ret = 0;
+ unsigned int flags;
+
+ if (!((__force unsigned int)addr >= LINCORE_BASE))
+ return -EFAULT;
+
+ __global_lock2(flags);
+
+ metag_data_cache_flush((__force void *)addr, sizeof(mask));
+
+ ret = __get_user(tmp, addr);
+ if (ret)
+ goto out;
+ tmp |= mask;
+ ret = __put_user(tmp, addr);
+
+ metag_data_cache_flush((__force void *)addr, sizeof(mask));
+
+out:
+ __global_unlock2(flags);
+
+ return ret;
+}
+
+#define TXDEFR_FPU_MASK ((0x1f << 16) | 0x1f)
+
+asmlinkage void sys_metag_set_fpu_flags(unsigned int flags)
+{
+ unsigned int temp;
+
+ flags &= TXDEFR_FPU_MASK;
+
+ temp = __core_reg_get(TXDEFR);
+ temp &= ~TXDEFR_FPU_MASK;
+ temp |= flags;
+ __core_reg_set(TXDEFR, temp);
+}
+
+asmlinkage int sys_metag_set_tls(void __user *ptr)
+{
+ current->thread.tls_ptr = ptr;
+ set_gateway_tls(ptr);
+
+ return 0;
+}
+
+asmlinkage void *sys_metag_get_tls(void)
+{
+ return (__force void *)current->thread.tls_ptr;
+}
+
+asmlinkage long sys_truncate64_metag(const char __user *path, unsigned long lo,
+ unsigned long hi)
+{
+ return sys_truncate64(path, merge_64(hi, lo));
+}
+
+asmlinkage long sys_ftruncate64_metag(unsigned int fd, unsigned long lo,
+ unsigned long hi)
+{
+ return sys_ftruncate64(fd, merge_64(hi, lo));
+}
+
+asmlinkage long sys_fadvise64_64_metag(int fd, unsigned long offs_lo,
+ unsigned long offs_hi,
+ unsigned long len_lo,
+ unsigned long len_hi, int advice)
+{
+ return sys_fadvise64_64(fd, merge_64(offs_hi, offs_lo),
+ merge_64(len_hi, len_lo), advice);
+}
+
+asmlinkage long sys_readahead_metag(int fd, unsigned long lo, unsigned long hi,
+ size_t count)
+{
+ return sys_readahead(fd, merge_64(hi, lo), count);
+}
+
+asmlinkage ssize_t sys_pread64_metag(unsigned long fd, char __user *buf,
+ size_t count, unsigned long lo,
+ unsigned long hi)
+{
+ return sys_pread64(fd, buf, count, merge_64(hi, lo));
+}
+
+asmlinkage ssize_t sys_pwrite64_metag(unsigned long fd, char __user *buf,
+ size_t count, unsigned long lo,
+ unsigned long hi)
+{
+ return sys_pwrite64(fd, buf, count, merge_64(hi, lo));
+}
+
+asmlinkage long sys_sync_file_range_metag(int fd, unsigned long offs_lo,
+ unsigned long offs_hi,
+ unsigned long len_lo,
+ unsigned long len_hi,
+ unsigned int flags)
+{
+ return sys_sync_file_range(fd, merge_64(offs_hi, offs_lo),
+ merge_64(len_hi, len_lo), flags);
+}
+
+/* Provide the actual syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+/*
+ * We need wrappers for anything with unaligned 64bit arguments
+ */
+#define sys_truncate64 sys_truncate64_metag
+#define sys_ftruncate64 sys_ftruncate64_metag
+#define sys_fadvise64_64 sys_fadvise64_64_metag
+#define sys_readahead sys_readahead_metag
+#define sys_pread64 sys_pread64_metag
+#define sys_pwrite64 sys_pwrite64_metag
+#define sys_sync_file_range sys_sync_file_range_metag
+
+/*
+ * Note that we can't include <linux/unistd.h> here since the header
+ * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well.
+ */
+const void *sys_call_table[__NR_syscalls] = {
+ [0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/metag/kernel/tbiunexp.S b/arch/metag/kernel/tbiunexp.S
new file mode 100644
index 00000000000..907bbe0b2e6
--- /dev/null
+++ b/arch/metag/kernel/tbiunexp.S
@@ -0,0 +1,22 @@
+/* Pass a breakpoint through to Codescape */
+
+#include <asm/tbx.h>
+
+ .text
+ .global ___TBIUnExpXXX
+ .type ___TBIUnExpXXX,function
+___TBIUnExpXXX:
+ TSTT D0Ar2,#TBICTX_CRIT_BIT ! Result of nestable int call?
+ BZ $LTBINormCase ! UnExpXXX at background level
+ MOV D0Re0,TXMASKI ! Read TXMASKI
+ XOR TXMASKI,D1Re0,D1Re0 ! Turn off BGNDHALT handling!
+ OR D0Ar2,D0Ar2,D0Re0 ! Preserve bits cleared
+$LTBINormCase:
+ MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2 ! Save args on stack
+ SETL [A0StP++],D0Ar2,D1Ar1 ! Init area for returned values
+ SWITCH #0xC20208 ! Total stack frame size 8 Dwords
+ ! write back size 2 Dwords
+ GETL D0Re0,D1Re0,[--A0StP] ! Get result
+ SUB A0StP,A0StP,#(8*3) ! Recover stack frame
+ MOV PC,D1RtP
+ .size ___TBIUnExpXXX,.-___TBIUnExpXXX
diff --git a/arch/metag/kernel/tcm.c b/arch/metag/kernel/tcm.c
new file mode 100644
index 00000000000..5d102b31ce8
--- /dev/null
+++ b/arch/metag/kernel/tcm.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies Ltd.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/genalloc.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#include <asm/tcm.h>
+
+struct tcm_pool {
+ struct list_head list;
+ unsigned int tag;
+ unsigned long start;
+ unsigned long end;
+ struct gen_pool *pool;
+};
+
+static LIST_HEAD(pool_list);
+
+static struct tcm_pool *find_pool(unsigned int tag)
+{
+ struct list_head *lh;
+ struct tcm_pool *pool;
+
+ list_for_each(lh, &pool_list) {
+ pool = list_entry(lh, struct tcm_pool, list);
+ if (pool->tag == tag)
+ return pool;
+ }
+
+ return NULL;
+}
+
+/**
+ * tcm_alloc - allocate memory from a TCM pool
+ * @tag: tag of the pool to allocate memory from
+ * @len: number of bytes to be allocated
+ *
+ * Allocate the requested number of bytes from the pool matching
+ * the specified tag. Returns the address of the allocated memory
+ * or zero on failure.
+ */
+unsigned long tcm_alloc(unsigned int tag, size_t len)
+{
+ unsigned long vaddr;
+ struct tcm_pool *pool;
+
+ pool = find_pool(tag);
+ if (!pool)
+ return 0;
+
+ vaddr = gen_pool_alloc(pool->pool, len);
+ if (!vaddr)
+ return 0;
+
+ return vaddr;
+}
+
+/**
+ * tcm_free - free a block of memory to a TCM pool
+ * @tag: tag of the pool to free memory to
+ * @addr: address of the memory to be freed
+ * @len: number of bytes to be freed
+ *
+ * Free the requested number of bytes at a specific address to the
+ * pool matching the specified tag.
+ */
+void tcm_free(unsigned int tag, unsigned long addr, size_t len)
+{
+ struct tcm_pool *pool;
+
+ pool = find_pool(tag);
+ if (!pool)
+ return;
+ gen_pool_free(pool->pool, addr, len);
+}
+
+/**
+ * tcm_lookup_tag - find the tag matching an address
+ * @p: memory address to lookup the tag for
+ *
+ * Find the tag of the tcm memory region that contains the
+ * specified address. Returns %TCM_INVALID_TAG if no such
+ * memory region could be found.
+ */
+unsigned int tcm_lookup_tag(unsigned long p)
+{
+ struct list_head *lh;
+ struct tcm_pool *pool;
+ unsigned long addr = (unsigned long) p;
+
+ list_for_each(lh, &pool_list) {
+ pool = list_entry(lh, struct tcm_pool, list);
+ if (addr >= pool->start && addr < pool->end)
+ return pool->tag;
+ }
+
+ return TCM_INVALID_TAG;
+}
+
+/**
+ * tcm_add_region - add a memory region to TCM pool list
+ * @reg: descriptor of region to be added
+ *
+ * Add a region of memory to the TCM pool list. Returns 0 on success.
+ */
+int __init tcm_add_region(struct tcm_region *reg)
+{
+ struct tcm_pool *pool;
+
+ pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool) {
+ pr_err("Failed to alloc memory for TCM pool!\n");
+ return -ENOMEM;
+ }
+
+ pool->tag = reg->tag;
+ pool->start = reg->res.start;
+ pool->end = reg->res.end;
+
+ /*
+ * 2^3 = 8 bytes granularity to allow for 64bit access alignment.
+ * -1 = NUMA node specifier.
+ */
+ pool->pool = gen_pool_create(3, -1);
+
+ if (!pool->pool) {
+ pr_err("Failed to create TCM pool!\n");
+ kfree(pool);
+ return -ENOMEM;
+ }
+
+ if (gen_pool_add(pool->pool, reg->res.start,
+ reg->res.end - reg->res.start + 1, -1)) {
+ pr_err("Failed to add memory to TCM pool!\n");
+ return -ENOMEM;
+ }
+ pr_info("Added %s TCM pool (%08x bytes @ %08x)\n",
+ reg->res.name, reg->res.end - reg->res.start + 1,
+ reg->res.start);
+
+ list_add_tail(&pool->list, &pool_list);
+
+ return 0;
+}
diff --git a/arch/metag/kernel/time.c b/arch/metag/kernel/time.c
new file mode 100644
index 00000000000..17dc10733b2
--- /dev/null
+++ b/arch/metag/kernel/time.c
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2005-2013 Imagination Technologies Ltd.
+ *
+ * This file contains the Meta-specific time handling details.
+ *
+ */
+
+#include <linux/init.h>
+
+#include <clocksource/metag_generic.h>
+
+void __init time_init(void)
+{
+ metag_generic_timer_init();
+}
diff --git a/arch/metag/kernel/topology.c b/arch/metag/kernel/topology.c
new file mode 100644
index 00000000000..bec3dec4922
--- /dev/null
+++ b/arch/metag/kernel/topology.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2007 Paul Mundt
+ * Copyright (C) 2010 Imagination Technolohies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/topology.h>
+
+#include <asm/cpu.h>
+
+DEFINE_PER_CPU(struct cpuinfo_metag, cpu_data);
+
+cpumask_t cpu_core_map[NR_CPUS];
+
+static cpumask_t cpu_coregroup_map(unsigned int cpu)
+{
+ return *cpu_possible_mask;
+}
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+ return &cpu_core_map[cpu];
+}
+
+int arch_update_cpu_topology(void)
+{
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu)
+ cpu_core_map[cpu] = cpu_coregroup_map(cpu);
+
+ return 0;
+}
+
+static int __init topology_init(void)
+{
+ int i, ret;
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ for_each_online_node(i)
+ register_one_node(i);
+#endif
+
+ for_each_present_cpu(i) {
+ struct cpuinfo_metag *cpuinfo = &per_cpu(cpu_data, i);
+#ifdef CONFIG_HOTPLUG_CPU
+ cpuinfo->cpu.hotpluggable = 1;
+#endif
+ ret = register_cpu(&cpuinfo->cpu, i);
+ if (unlikely(ret))
+ pr_warn("%s: register_cpu %d failed (%d)\n",
+ __func__, i, ret);
+ }
+
+#if defined(CONFIG_NUMA) && !defined(CONFIG_SMP)
+ /*
+ * In the UP case, make sure the CPU association is still
+ * registered under each node. Without this, sysfs fails
+ * to make the connection between nodes other than node0
+ * and cpu0.
+ */
+ for_each_online_node(i)
+ if (i != numa_node_id())
+ register_cpu_under_node(raw_smp_processor_id(), i);
+#endif
+
+ return 0;
+}
+subsys_initcall(topology_init);
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
new file mode 100644
index 00000000000..8961f247b50
--- /dev/null
+++ b/arch/metag/kernel/traps.c
@@ -0,0 +1,995 @@
+/*
+ * Meta exception handling.
+ *
+ * Copyright (C) 2005,2006,2007,2008,2009,2012 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/preempt.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/kdebug.h>
+#include <linux/kexec.h>
+#include <linux/unistd.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+
+#include <asm/bug.h>
+#include <asm/core_reg.h>
+#include <asm/irqflags.h>
+#include <asm/siginfo.h>
+#include <asm/traps.h>
+#include <asm/hwthread.h>
+#include <asm/switch.h>
+#include <asm/user_gateway.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+
+/* Passing syscall arguments as long long is quicker. */
+typedef unsigned int (*LPSYSCALL) (unsigned long long,
+ unsigned long long,
+ unsigned long long);
+
+/*
+ * Users of LNKSET should compare the bus error bits obtained from DEFR
+ * against TXDEFR_LNKSET_SUCCESS only as the failure code will vary between
+ * different cores revisions.
+ */
+#define TXDEFR_LNKSET_SUCCESS 0x02000000
+#define TXDEFR_LNKSET_FAILURE 0x04000000
+
+/*
+ * Our global TBI handle. Initialised from setup.c/setup_arch.
+ */
+DECLARE_PER_CPU(PTBI, pTBI);
+
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(unsigned int, trigger_mask);
+#else
+unsigned int global_trigger_mask;
+EXPORT_SYMBOL(global_trigger_mask);
+#endif
+
+unsigned long per_cpu__stack_save[NR_CPUS];
+
+static const char * const trap_names[] = {
+ [TBIXXF_SIGNUM_IIF] = "Illegal instruction fault",
+ [TBIXXF_SIGNUM_PGF] = "Privilege violation",
+ [TBIXXF_SIGNUM_DHF] = "Unaligned data access fault",
+ [TBIXXF_SIGNUM_IGF] = "Code fetch general read failure",
+ [TBIXXF_SIGNUM_DGF] = "Data access general read/write fault",
+ [TBIXXF_SIGNUM_IPF] = "Code fetch page fault",
+ [TBIXXF_SIGNUM_DPF] = "Data access page fault",
+ [TBIXXF_SIGNUM_IHF] = "Instruction breakpoint",
+ [TBIXXF_SIGNUM_DWF] = "Read-only data access fault",
+};
+
+const char *trap_name(int trapno)
+{
+ if (trapno >= 0 && trapno < ARRAY_SIZE(trap_names)
+ && trap_names[trapno])
+ return trap_names[trapno];
+ return "Unknown fault";
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(const char *str, struct pt_regs *regs, long err,
+ unsigned long addr)
+{
+ static int die_counter;
+
+ oops_enter();
+
+ spin_lock_irq(&die_lock);
+ console_verbose();
+ bust_spinlocks(1);
+ pr_err("%s: err %04lx (%s) addr %08lx [#%d]\n", str, err & 0xffff,
+ trap_name(err & 0xffff), addr, ++die_counter);
+
+ print_modules();
+ show_regs(regs);
+
+ pr_err("Process: %s (pid: %d, stack limit = %p)\n", current->comm,
+ task_pid_nr(current), task_stack_page(current) + THREAD_SIZE);
+
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+
+ if (panic_on_oops)
+ panic("Fatal exception");
+
+ spin_unlock_irq(&die_lock);
+ oops_exit();
+ do_exit(SIGSEGV);
+}
+
+#ifdef CONFIG_METAG_DSP
+/*
+ * The ECH encoding specifies the size of a DSPRAM as,
+ *
+ * "slots" / 4
+ *
+ * A "slot" is the size of two DSPRAM bank entries; an entry from
+ * DSPRAM bank A and an entry from DSPRAM bank B. One DSPRAM bank
+ * entry is 4 bytes.
+ */
+#define SLOT_SZ 8
+static inline unsigned int decode_dspram_size(unsigned int size)
+{
+ unsigned int _sz = size & 0x7f;
+
+ return _sz * SLOT_SZ * 4;
+}
+
+static void dspram_save(struct meta_ext_context *dsp_ctx,
+ unsigned int ramA_sz, unsigned int ramB_sz)
+{
+ unsigned int ram_sz[2];
+ int i;
+
+ ram_sz[0] = ramA_sz;
+ ram_sz[1] = ramB_sz;
+
+ for (i = 0; i < 2; i++) {
+ if (ram_sz[i] != 0) {
+ unsigned int sz;
+
+ if (i == 0)
+ sz = decode_dspram_size(ram_sz[i] >> 8);
+ else
+ sz = decode_dspram_size(ram_sz[i]);
+
+ if (dsp_ctx->ram[i] == NULL) {
+ dsp_ctx->ram[i] = kmalloc(sz, GFP_KERNEL);
+
+ if (dsp_ctx->ram[i] == NULL)
+ panic("couldn't save DSP context");
+ } else {
+ if (ram_sz[i] > dsp_ctx->ram_sz[i]) {
+ kfree(dsp_ctx->ram[i]);
+
+ dsp_ctx->ram[i] = kmalloc(sz,
+ GFP_KERNEL);
+
+ if (dsp_ctx->ram[i] == NULL)
+ panic("couldn't save DSP context");
+ }
+ }
+
+ if (i == 0)
+ __TBIDspramSaveA(ram_sz[i], dsp_ctx->ram[i]);
+ else
+ __TBIDspramSaveB(ram_sz[i], dsp_ctx->ram[i]);
+
+ dsp_ctx->ram_sz[i] = ram_sz[i];
+ }
+ }
+}
+#endif /* CONFIG_METAG_DSP */
+
+/*
+ * Allow interrupts to be nested and save any "extended" register
+ * context state, e.g. DSP regs and RAMs.
+ */
+static void nest_interrupts(TBIRES State, unsigned long mask)
+{
+#ifdef CONFIG_METAG_DSP
+ struct meta_ext_context *dsp_ctx;
+ unsigned int D0_8;
+
+ /*
+ * D0.8 may contain an ECH encoding. The upper 16 bits
+ * tell us what DSP resources the current process is
+ * using. OR the bits into the SaveMask so that
+ * __TBINestInts() knows what resources to save as
+ * part of this context.
+ *
+ * Don't save the context if we're nesting interrupts in the
+ * kernel because the kernel doesn't use DSP hardware.
+ */
+ D0_8 = __core_reg_get(D0.8);
+
+ if (D0_8 && (State.Sig.SaveMask & TBICTX_PRIV_BIT)) {
+ State.Sig.SaveMask |= (D0_8 >> 16);
+
+ dsp_ctx = current->thread.dsp_context;
+ if (dsp_ctx == NULL) {
+ dsp_ctx = kzalloc(sizeof(*dsp_ctx), GFP_KERNEL);
+ if (dsp_ctx == NULL)
+ panic("couldn't save DSP context: ENOMEM");
+
+ current->thread.dsp_context = dsp_ctx;
+ }
+
+ current->thread.user_flags |= (D0_8 & 0xffff0000);
+ __TBINestInts(State, &dsp_ctx->regs, mask);
+ dspram_save(dsp_ctx, D0_8 & 0x7f00, D0_8 & 0x007f);
+ } else
+ __TBINestInts(State, NULL, mask);
+#else
+ __TBINestInts(State, NULL, mask);
+#endif
+}
+
+void head_end(TBIRES State, unsigned long mask)
+{
+ unsigned int savemask = (unsigned short)State.Sig.SaveMask;
+ unsigned int ctx_savemask = (unsigned short)State.Sig.pCtx->SaveMask;
+
+ if (savemask & TBICTX_PRIV_BIT) {
+ ctx_savemask |= TBICTX_PRIV_BIT;
+ current->thread.user_flags = savemask;
+ }
+
+ /* Always undo the sleep bit */
+ ctx_savemask &= ~TBICTX_WAIT_BIT;
+
+ /* Always save the catch buffer and RD pipe if they are dirty */
+ savemask |= TBICTX_XCBF_BIT;
+
+ /* Only save the catch and RD if we have not already done so.
+ * Note - the RD bits are in the pCtx only, and not in the
+ * State.SaveMask.
+ */
+ if ((savemask & TBICTX_CBUF_BIT) ||
+ (ctx_savemask & TBICTX_CBRP_BIT)) {
+ /* Have we already saved the buffers though?
+ * - See TestTrack 5071 */
+ if (ctx_savemask & TBICTX_XCBF_BIT) {
+ /* Strip off the bits so the call to __TBINestInts
+ * won't save the buffers again. */
+ savemask &= ~TBICTX_CBUF_BIT;
+ ctx_savemask &= ~TBICTX_CBRP_BIT;
+ }
+ }
+
+#ifdef CONFIG_METAG_META21
+ {
+ unsigned int depth, txdefr;
+
+ /*
+ * Save TXDEFR state.
+ *
+ * The process may have been interrupted after a LNKSET, but
+ * before it could read the DEFR state, so we mustn't lose that
+ * state or it could end up retrying an atomic operation that
+ * succeeded.
+ *
+ * All interrupts are disabled at this point so we
+ * don't need to perform any locking. We must do this
+ * dance before we use LNKGET or LNKSET.
+ */
+ BUG_ON(current->thread.int_depth > HARDIRQ_BITS);
+
+ depth = current->thread.int_depth++;
+
+ txdefr = __core_reg_get(TXDEFR);
+
+ txdefr &= TXDEFR_BUS_STATE_BITS;
+ if (txdefr & TXDEFR_LNKSET_SUCCESS)
+ current->thread.txdefr_failure &= ~(1 << depth);
+ else
+ current->thread.txdefr_failure |= (1 << depth);
+ }
+#endif
+
+ State.Sig.SaveMask = savemask;
+ State.Sig.pCtx->SaveMask = ctx_savemask;
+
+ nest_interrupts(State, mask);
+
+#ifdef CONFIG_METAG_POISON_CATCH_BUFFERS
+ /* Poison the catch registers. This shows up any mistakes we have
+ * made in their handling MUCH quicker.
+ */
+ __core_reg_set(TXCATCH0, 0x87650021);
+ __core_reg_set(TXCATCH1, 0x87654322);
+ __core_reg_set(TXCATCH2, 0x87654323);
+ __core_reg_set(TXCATCH3, 0x87654324);
+#endif /* CONFIG_METAG_POISON_CATCH_BUFFERS */
+}
+
+TBIRES tail_end_sys(TBIRES State, int syscall, int *restart)
+{
+ struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+ unsigned long flags;
+
+ local_irq_disable();
+
+ if (user_mode(regs)) {
+ flags = current_thread_info()->flags;
+ if (flags & _TIF_WORK_MASK &&
+ do_work_pending(regs, flags, syscall)) {
+ *restart = 1;
+ return State;
+ }
+
+#ifdef CONFIG_METAG_FPU
+ if (current->thread.fpu_context &&
+ current->thread.fpu_context->needs_restore) {
+ __TBICtxFPURestore(State, current->thread.fpu_context);
+ /*
+ * Clearing this bit ensures the FP unit is not made
+ * active again unless it is used.
+ */
+ State.Sig.SaveMask &= ~TBICTX_FPAC_BIT;
+ current->thread.fpu_context->needs_restore = false;
+ }
+ State.Sig.TrigMask |= TBI_TRIG_BIT(TBID_SIGNUM_DFR);
+#endif
+ }
+
+ /* TBI will turn interrupts back on at some point. */
+ if (!irqs_disabled_flags((unsigned long)State.Sig.TrigMask))
+ trace_hardirqs_on();
+
+#ifdef CONFIG_METAG_DSP
+ /*
+ * If we previously saved an extended context then restore it
+ * now. Otherwise, clear D0.8 because this process is not
+ * using DSP hardware.
+ */
+ if (State.Sig.pCtx->SaveMask & TBICTX_XEXT_BIT) {
+ unsigned int D0_8;
+ struct meta_ext_context *dsp_ctx = current->thread.dsp_context;
+
+ /* Make sure we're going to return to userland. */
+ BUG_ON(current->thread.int_depth != 1);
+
+ if (dsp_ctx->ram_sz[0] > 0)
+ __TBIDspramRestoreA(dsp_ctx->ram_sz[0],
+ dsp_ctx->ram[0]);
+ if (dsp_ctx->ram_sz[1] > 0)
+ __TBIDspramRestoreB(dsp_ctx->ram_sz[1],
+ dsp_ctx->ram[1]);
+
+ State.Sig.SaveMask |= State.Sig.pCtx->SaveMask;
+ __TBICtxRestore(State, current->thread.dsp_context);
+ D0_8 = __core_reg_get(D0.8);
+ D0_8 |= current->thread.user_flags & 0xffff0000;
+ D0_8 |= (dsp_ctx->ram_sz[1] | dsp_ctx->ram_sz[0]) & 0xffff;
+ __core_reg_set(D0.8, D0_8);
+ } else
+ __core_reg_set(D0.8, 0);
+#endif /* CONFIG_METAG_DSP */
+
+#ifdef CONFIG_METAG_META21
+ {
+ unsigned int depth, txdefr;
+
+ /*
+ * If there hasn't been a LNKSET since the last LNKGET then the
+ * link flag will be set, causing the next LNKSET to succeed if
+ * the addresses match. The two LNK operations may not be a pair
+ * (e.g. see atomic_read()), so the LNKSET should fail.
+ * We use a conditional-never LNKSET to clear the link flag
+ * without side effects.
+ */
+ asm volatile("LNKSETDNV [D0Re0],D0Re0");
+
+ depth = --current->thread.int_depth;
+
+ BUG_ON(user_mode(regs) && depth);
+
+ txdefr = __core_reg_get(TXDEFR);
+
+ txdefr &= ~TXDEFR_BUS_STATE_BITS;
+
+ /* Do we need to restore a failure code into TXDEFR? */
+ if (current->thread.txdefr_failure & (1 << depth))
+ txdefr |= (TXDEFR_LNKSET_FAILURE | TXDEFR_BUS_TRIG_BIT);
+ else
+ txdefr |= (TXDEFR_LNKSET_SUCCESS | TXDEFR_BUS_TRIG_BIT);
+
+ __core_reg_set(TXDEFR, txdefr);
+ }
+#endif
+ return State;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * If we took an interrupt in the middle of __kuser_get_tls then we need
+ * to rewind the PC to the start of the function in case the process
+ * gets migrated to another thread (SMP only) and it reads the wrong tls
+ * data.
+ */
+static inline void _restart_critical_section(TBIRES State)
+{
+ unsigned long get_tls_start;
+ unsigned long get_tls_end;
+
+ get_tls_start = (unsigned long)__kuser_get_tls -
+ (unsigned long)&__user_gateway_start;
+
+ get_tls_start += USER_GATEWAY_PAGE;
+
+ get_tls_end = (unsigned long)__kuser_get_tls_end -
+ (unsigned long)&__user_gateway_start;
+
+ get_tls_end += USER_GATEWAY_PAGE;
+
+ if ((State.Sig.pCtx->CurrPC >= get_tls_start) &&
+ (State.Sig.pCtx->CurrPC < get_tls_end))
+ State.Sig.pCtx->CurrPC = get_tls_start;
+}
+#else
+/*
+ * If we took an interrupt in the middle of
+ * __kuser_cmpxchg then we need to rewind the PC to the
+ * start of the function.
+ */
+static inline void _restart_critical_section(TBIRES State)
+{
+ unsigned long cmpxchg_start;
+ unsigned long cmpxchg_end;
+
+ cmpxchg_start = (unsigned long)__kuser_cmpxchg -
+ (unsigned long)&__user_gateway_start;
+
+ cmpxchg_start += USER_GATEWAY_PAGE;
+
+ cmpxchg_end = (unsigned long)__kuser_cmpxchg_end -
+ (unsigned long)&__user_gateway_start;
+
+ cmpxchg_end += USER_GATEWAY_PAGE;
+
+ if ((State.Sig.pCtx->CurrPC >= cmpxchg_start) &&
+ (State.Sig.pCtx->CurrPC < cmpxchg_end))
+ State.Sig.pCtx->CurrPC = cmpxchg_start;
+}
+#endif
+
+/* Used by kick_handler() */
+void restart_critical_section(TBIRES State)
+{
+ _restart_critical_section(State);
+}
+
+TBIRES trigger_handler(TBIRES State, int SigNum, int Triggers, int Inst,
+ PTBI pTBI)
+{
+ head_end(State, ~INTS_OFF_MASK);
+
+ /* If we interrupted user code handle any critical sections. */
+ if (State.Sig.SaveMask & TBICTX_PRIV_BIT)
+ _restart_critical_section(State);
+
+ trace_hardirqs_off();
+
+ do_IRQ(SigNum, (struct pt_regs *)State.Sig.pCtx);
+
+ return tail_end(State);
+}
+
+static unsigned int load_fault(PTBICTXEXTCB0 pbuf)
+{
+ return pbuf->CBFlags & TXCATCH0_READ_BIT;
+}
+
+static unsigned long fault_address(PTBICTXEXTCB0 pbuf)
+{
+ return pbuf->CBAddr;
+}
+
+static void unhandled_fault(struct pt_regs *regs, unsigned long addr,
+ int signo, int code, int trapno)
+{
+ if (user_mode(regs)) {
+ siginfo_t info;
+
+ if (show_unhandled_signals && unhandled_signal(current, signo)
+ && printk_ratelimit()) {
+
+ pr_info("pid %d unhandled fault: pc 0x%08x, addr 0x%08lx, trap %d (%s)\n",
+ current->pid, regs->ctx.CurrPC, addr,
+ trapno, trap_name(trapno));
+ print_vma_addr(" in ", regs->ctx.CurrPC);
+ print_vma_addr(" rtp in ", regs->ctx.DX[4].U1);
+ printk("\n");
+ show_regs(regs);
+ }
+
+ info.si_signo = signo;
+ info.si_errno = 0;
+ info.si_code = code;
+ info.si_addr = (__force void __user *)addr;
+ info.si_trapno = trapno;
+ force_sig_info(signo, &info, current);
+ } else {
+ die("Oops", regs, trapno, addr);
+ }
+}
+
+static int handle_data_fault(PTBICTXEXTCB0 pcbuf, struct pt_regs *regs,
+ unsigned int data_address, int trapno)
+{
+ int ret;
+
+ ret = do_page_fault(regs, data_address, !load_fault(pcbuf), trapno);
+
+ return ret;
+}
+
+static unsigned long get_inst_fault_address(struct pt_regs *regs)
+{
+ return regs->ctx.CurrPC;
+}
+
+TBIRES fault_handler(TBIRES State, int SigNum, int Triggers,
+ int Inst, PTBI pTBI)
+{
+ struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+ PTBICTXEXTCB0 pcbuf = (PTBICTXEXTCB0)&regs->extcb0;
+ unsigned long data_address;
+
+ head_end(State, ~INTS_OFF_MASK);
+
+ /* Hardware breakpoint or data watch */
+ if ((SigNum == TBIXXF_SIGNUM_IHF) ||
+ ((SigNum == TBIXXF_SIGNUM_DHF) &&
+ (pcbuf[0].CBFlags & (TXCATCH0_WATCH1_BIT |
+ TXCATCH0_WATCH0_BIT)))) {
+ State = __TBIUnExpXXX(State, SigNum, Triggers, Inst,
+ pTBI);
+ return tail_end(State);
+ }
+
+ local_irq_enable();
+
+ data_address = fault_address(pcbuf);
+
+ switch (SigNum) {
+ case TBIXXF_SIGNUM_IGF:
+ /* 1st-level entry invalid (instruction fetch) */
+ case TBIXXF_SIGNUM_IPF: {
+ /* 2nd-level entry invalid (instruction fetch) */
+ unsigned long addr = get_inst_fault_address(regs);
+ do_page_fault(regs, addr, 0, SigNum);
+ break;
+ }
+
+ case TBIXXF_SIGNUM_DGF:
+ /* 1st-level entry invalid (data access) */
+ case TBIXXF_SIGNUM_DPF:
+ /* 2nd-level entry invalid (data access) */
+ case TBIXXF_SIGNUM_DWF:
+ /* Write to read only page */
+ handle_data_fault(pcbuf, regs, data_address, SigNum);
+ break;
+
+ case TBIXXF_SIGNUM_IIF:
+ /* Illegal instruction */
+ unhandled_fault(regs, regs->ctx.CurrPC, SIGILL, ILL_ILLOPC,
+ SigNum);
+ break;
+
+ case TBIXXF_SIGNUM_DHF:
+ /* Unaligned access */
+ unhandled_fault(regs, data_address, SIGBUS, BUS_ADRALN,
+ SigNum);
+ break;
+ case TBIXXF_SIGNUM_PGF:
+ /* Privilege violation */
+ unhandled_fault(regs, data_address, SIGSEGV, SEGV_ACCERR,
+ SigNum);
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ return tail_end(State);
+}
+
+static bool switch_is_syscall(unsigned int inst)
+{
+ return inst == __METAG_SW_ENCODING(SYS);
+}
+
+static bool switch_is_legacy_syscall(unsigned int inst)
+{
+ return inst == __METAG_SW_ENCODING(SYS_LEGACY);
+}
+
+static inline void step_over_switch(struct pt_regs *regs, unsigned int inst)
+{
+ regs->ctx.CurrPC += 4;
+}
+
+static inline int test_syscall_work(void)
+{
+ return current_thread_info()->flags & _TIF_WORK_SYSCALL_MASK;
+}
+
+TBIRES switch1_handler(TBIRES State, int SigNum, int Triggers,
+ int Inst, PTBI pTBI)
+{
+ struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+ unsigned int sysnumber;
+ unsigned long long a1_a2, a3_a4, a5_a6;
+ LPSYSCALL syscall_entry;
+ int restart;
+
+ head_end(State, ~INTS_OFF_MASK);
+
+ /*
+ * If this is not a syscall SWITCH it could be a breakpoint.
+ */
+ if (!switch_is_syscall(Inst)) {
+ /*
+ * Alert the user if they're trying to use legacy system
+ * calls. This suggests they need to update their C
+ * library and build against up to date kernel headers.
+ */
+ if (switch_is_legacy_syscall(Inst))
+ pr_warn_once("WARNING: A legacy syscall was made. Your userland needs updating.\n");
+ /*
+ * We don't know how to handle the SWITCH and cannot
+ * safely ignore it, so treat all unknown switches
+ * (including breakpoints) as traps.
+ */
+ force_sig(SIGTRAP, current);
+ return tail_end(State);
+ }
+
+ local_irq_enable();
+
+restart_syscall:
+ restart = 0;
+ sysnumber = regs->ctx.DX[0].U1;
+
+ if (test_syscall_work())
+ sysnumber = syscall_trace_enter(regs);
+
+ /* Skip over the SWITCH instruction - or you just get 'stuck' on it! */
+ step_over_switch(regs, Inst);
+
+ if (sysnumber >= __NR_syscalls) {
+ pr_debug("unknown syscall number: %d\n", sysnumber);
+ syscall_entry = (LPSYSCALL) sys_ni_syscall;
+ } else {
+ syscall_entry = (LPSYSCALL) sys_call_table[sysnumber];
+ }
+
+ /* Use 64bit loads for speed. */
+ a5_a6 = *(unsigned long long *)&regs->ctx.DX[1];
+ a3_a4 = *(unsigned long long *)&regs->ctx.DX[2];
+ a1_a2 = *(unsigned long long *)&regs->ctx.DX[3];
+
+ /* here is the actual call to the syscall handler functions */
+ regs->ctx.DX[0].U0 = syscall_entry(a1_a2, a3_a4, a5_a6);
+
+ if (test_syscall_work())
+ syscall_trace_leave(regs);
+
+ State = tail_end_sys(State, sysnumber, &restart);
+ /* Handlerless restarts shouldn't go via userland */
+ if (restart)
+ goto restart_syscall;
+ return State;
+}
+
+TBIRES switchx_handler(TBIRES State, int SigNum, int Triggers,
+ int Inst, PTBI pTBI)
+{
+ struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+
+ /*
+ * This can be caused by any user process simply executing an unusual
+ * SWITCH instruction. If there's no DA, __TBIUnExpXXX will cause the
+ * thread to stop, so signal a SIGTRAP instead.
+ */
+ head_end(State, ~INTS_OFF_MASK);
+ if (user_mode(regs))
+ force_sig(SIGTRAP, current);
+ else
+ State = __TBIUnExpXXX(State, SigNum, Triggers, Inst, pTBI);
+ return tail_end(State);
+}
+
+#ifdef CONFIG_METAG_META21
+TBIRES fpe_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI)
+{
+ struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+ unsigned int error_state = Triggers;
+ siginfo_t info;
+
+ head_end(State, ~INTS_OFF_MASK);
+
+ local_irq_enable();
+
+ info.si_signo = SIGFPE;
+
+ if (error_state & TXSTAT_FPE_INVALID_BIT)
+ info.si_code = FPE_FLTINV;
+ else if (error_state & TXSTAT_FPE_DIVBYZERO_BIT)
+ info.si_code = FPE_FLTDIV;
+ else if (error_state & TXSTAT_FPE_OVERFLOW_BIT)
+ info.si_code = FPE_FLTOVF;
+ else if (error_state & TXSTAT_FPE_UNDERFLOW_BIT)
+ info.si_code = FPE_FLTUND;
+ else if (error_state & TXSTAT_FPE_INEXACT_BIT)
+ info.si_code = FPE_FLTRES;
+ else
+ info.si_code = 0;
+ info.si_errno = 0;
+ info.si_addr = (__force void __user *)regs->ctx.CurrPC;
+ force_sig_info(SIGFPE, &info, current);
+
+ return tail_end(State);
+}
+#endif
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+struct traps_context {
+ PTBIAPIFN fnSigs[TBID_SIGNUM_MAX + 1];
+};
+
+static struct traps_context *metag_traps_context;
+
+int traps_save_context(void)
+{
+ unsigned long cpu = smp_processor_id();
+ PTBI _pTBI = per_cpu(pTBI, cpu);
+ struct traps_context *context;
+
+ context = kzalloc(sizeof(*context), GFP_ATOMIC);
+ if (!context)
+ return -ENOMEM;
+
+ memcpy(context->fnSigs, (void *)_pTBI->fnSigs, sizeof(context->fnSigs));
+
+ metag_traps_context = context;
+ return 0;
+}
+
+int traps_restore_context(void)
+{
+ unsigned long cpu = smp_processor_id();
+ PTBI _pTBI = per_cpu(pTBI, cpu);
+ struct traps_context *context = metag_traps_context;
+
+ metag_traps_context = NULL;
+
+ memcpy((void *)_pTBI->fnSigs, context->fnSigs, sizeof(context->fnSigs));
+
+ kfree(context);
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+static inline unsigned int _get_trigger_mask(void)
+{
+ unsigned long cpu = smp_processor_id();
+ return per_cpu(trigger_mask, cpu);
+}
+
+unsigned int get_trigger_mask(void)
+{
+ return _get_trigger_mask();
+}
+EXPORT_SYMBOL(get_trigger_mask);
+
+static void set_trigger_mask(unsigned int mask)
+{
+ unsigned long cpu = smp_processor_id();
+ per_cpu(trigger_mask, cpu) = mask;
+}
+
+void arch_local_irq_enable(void)
+{
+ preempt_disable();
+ arch_local_irq_restore(_get_trigger_mask());
+ preempt_enable_no_resched();
+}
+EXPORT_SYMBOL(arch_local_irq_enable);
+#else
+static void set_trigger_mask(unsigned int mask)
+{
+ global_trigger_mask = mask;
+}
+#endif
+
+void __cpuinit per_cpu_trap_init(unsigned long cpu)
+{
+ TBIRES int_context;
+ unsigned int thread = cpu_2_hwthread_id[cpu];
+
+ set_trigger_mask(TBI_INTS_INIT(thread) | /* interrupts */
+ TBI_TRIG_BIT(TBID_SIGNUM_LWK) | /* low level kick */
+ TBI_TRIG_BIT(TBID_SIGNUM_SW1) |
+ TBI_TRIG_BIT(TBID_SIGNUM_SWS));
+
+ /* non-priv - use current stack */
+ int_context.Sig.pCtx = NULL;
+ /* Start with interrupts off */
+ int_context.Sig.TrigMask = INTS_OFF_MASK;
+ int_context.Sig.SaveMask = 0;
+
+ /* And call __TBIASyncTrigger() */
+ __TBIASyncTrigger(int_context);
+}
+
+void __init trap_init(void)
+{
+ unsigned long cpu = smp_processor_id();
+ PTBI _pTBI = per_cpu(pTBI, cpu);
+
+ _pTBI->fnSigs[TBID_SIGNUM_XXF] = fault_handler;
+ _pTBI->fnSigs[TBID_SIGNUM_SW0] = switchx_handler;
+ _pTBI->fnSigs[TBID_SIGNUM_SW1] = switch1_handler;
+ _pTBI->fnSigs[TBID_SIGNUM_SW2] = switchx_handler;
+ _pTBI->fnSigs[TBID_SIGNUM_SW3] = switchx_handler;
+ _pTBI->fnSigs[TBID_SIGNUM_SWK] = kick_handler;
+
+#ifdef CONFIG_METAG_META21
+ _pTBI->fnSigs[TBID_SIGNUM_DFR] = __TBIHandleDFR;
+ _pTBI->fnSigs[TBID_SIGNUM_FPE] = fpe_handler;
+#endif
+
+ per_cpu_trap_init(cpu);
+}
+
+void tbi_startup_interrupt(int irq)
+{
+ unsigned long cpu = smp_processor_id();
+ PTBI _pTBI = per_cpu(pTBI, cpu);
+
+ BUG_ON(irq > TBID_SIGNUM_MAX);
+
+ /* For TR1 and TR2, the thread id is encoded in the irq number */
+ if (irq >= TBID_SIGNUM_T10 && irq < TBID_SIGNUM_TR3)
+ cpu = hwthread_id_2_cpu[(irq - TBID_SIGNUM_T10) % 4];
+
+ set_trigger_mask(get_trigger_mask() | TBI_TRIG_BIT(irq));
+
+ _pTBI->fnSigs[irq] = trigger_handler;
+}
+
+void tbi_shutdown_interrupt(int irq)
+{
+ unsigned long cpu = smp_processor_id();
+ PTBI _pTBI = per_cpu(pTBI, cpu);
+
+ BUG_ON(irq > TBID_SIGNUM_MAX);
+
+ set_trigger_mask(get_trigger_mask() & ~TBI_TRIG_BIT(irq));
+
+ _pTBI->fnSigs[irq] = __TBIUnExpXXX;
+}
+
+int ret_from_fork(TBIRES arg)
+{
+ struct task_struct *prev = arg.Switch.pPara;
+ struct task_struct *tsk = current;
+ struct pt_regs *regs = task_pt_regs(tsk);
+ int (*fn)(void *);
+ TBIRES Next;
+
+ schedule_tail(prev);
+
+ if (tsk->flags & PF_KTHREAD) {
+ fn = (void *)regs->ctx.DX[4].U1;
+ BUG_ON(!fn);
+
+ fn((void *)regs->ctx.DX[3].U1);
+ }
+
+ if (test_syscall_work())
+ syscall_trace_leave(regs);
+
+ preempt_disable();
+
+ Next.Sig.TrigMask = get_trigger_mask();
+ Next.Sig.SaveMask = 0;
+ Next.Sig.pCtx = &regs->ctx;
+
+ set_gateway_tls(current->thread.tls_ptr);
+
+ preempt_enable_no_resched();
+
+ /* And interrupts should come back on when we resume the real usermode
+ * code. Call __TBIASyncResume()
+ */
+ __TBIASyncResume(tail_end(Next));
+ /* ASyncResume should NEVER return */
+ BUG();
+ return 0;
+}
+
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+ struct pt_regs *regs)
+{
+ unsigned long addr;
+#ifdef CONFIG_FRAME_POINTER
+ unsigned long fp, fpnew;
+ unsigned long stack;
+#endif
+
+ if (regs && user_mode(regs))
+ return;
+
+ printk("\nCall trace: ");
+#ifdef CONFIG_KALLSYMS
+ printk("\n");
+#endif
+
+ if (!tsk)
+ tsk = current;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (regs) {
+ print_ip_sym(regs->ctx.CurrPC);
+ fp = regs->ctx.AX[1].U0;
+ } else {
+ fp = __core_reg_get(A0FrP);
+ }
+
+ /* detect when the frame pointer has been used for other purposes and
+ * doesn't point to the stack (it may point completely elsewhere which
+ * kstack_end may not detect).
+ */
+ stack = (unsigned long)task_stack_page(tsk);
+ while (fp >= stack && fp + 8 <= stack + THREAD_SIZE) {
+ addr = __raw_readl((unsigned long *)(fp + 4)) - 4;
+ if (kernel_text_address(addr))
+ print_ip_sym(addr);
+ else
+ break;
+ /* stack grows up, so frame pointers must decrease */
+ fpnew = __raw_readl((unsigned long *)(fp + 0));
+ if (fpnew >= fp)
+ break;
+ fp = fpnew;
+ }
+#else
+ while (!kstack_end(sp)) {
+ addr = (*sp--) - 4;
+ if (kernel_text_address(addr))
+ print_ip_sym(addr);
+ }
+#endif
+
+ printk("\n");
+
+ debug_show_held_locks(tsk);
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+ if (!tsk)
+ tsk = current;
+ if (tsk == current)
+ sp = (unsigned long *)current_stack_pointer;
+ else
+ sp = (unsigned long *)tsk->thread.kernel_context->AX[0].U0;
+
+ show_trace(tsk, sp, NULL);
+}
+
+void dump_stack(void)
+{
+ show_stack(NULL, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
diff --git a/arch/metag/kernel/user_gateway.S b/arch/metag/kernel/user_gateway.S
new file mode 100644
index 00000000000..7167f3e8db6
--- /dev/null
+++ b/arch/metag/kernel/user_gateway.S
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies Ltd.
+ *
+ * This file contains code that can be accessed from userspace and can
+ * access certain kernel data structures without the overhead of a system
+ * call.
+ */
+
+#include <asm/metag_regs.h>
+#include <asm/user_gateway.h>
+
+/*
+ * User helpers.
+ *
+ * These are segment of kernel provided user code reachable from user space
+ * at a fixed address in kernel memory. This is used to provide user space
+ * with some operations which require kernel help because of unimplemented
+ * native feature and/or instructions in some Meta CPUs. The idea is for
+ * this code to be executed directly in user mode for best efficiency but
+ * which is too intimate with the kernel counter part to be left to user
+ * libraries. The kernel reserves the right to change this code as needed
+ * without warning. Only the entry points and their results are guaranteed
+ * to be stable.
+ *
+ * Each segment is 64-byte aligned. This mechanism should be used only for
+ * for things that are really small and justified, and not be abused freely.
+ */
+ .text
+ .global ___user_gateway_start
+___user_gateway_start:
+
+ /* get_tls
+ * Offset: 0
+ * Description: Get the TLS pointer for this process.
+ */
+ .global ___kuser_get_tls
+ .type ___kuser_get_tls,function
+___kuser_get_tls:
+ MOVT D1Ar1,#HI(USER_GATEWAY_PAGE + USER_GATEWAY_TLS)
+ ADD D1Ar1,D1Ar1,#LO(USER_GATEWAY_PAGE + USER_GATEWAY_TLS)
+ MOV D1Ar3,TXENABLE
+ AND D1Ar3,D1Ar3,#(TXENABLE_THREAD_BITS)
+ LSR D1Ar3,D1Ar3,#(TXENABLE_THREAD_S - 2)
+ GETD D0Re0,[D1Ar1+D1Ar3]
+___kuser_get_tls_end: /* Beyond this point the read will complete */
+ MOV PC,D1RtP
+ .size ___kuser_get_tls,.-___kuser_get_tls
+ .global ___kuser_get_tls_end
+
+ /* cmpxchg
+ * Offset: 64
+ * Description: Replace the value at 'ptr' with 'newval' if the current
+ * value is 'oldval'. Return zero if we succeeded,
+ * non-zero otherwise.
+ *
+ * Reference prototype:
+ *
+ * int __kuser_cmpxchg(int oldval, int newval, unsigned long *ptr)
+ *
+ */
+ .balign 64
+ .global ___kuser_cmpxchg
+ .type ___kuser_cmpxchg,function
+___kuser_cmpxchg:
+#ifdef CONFIG_SMP
+ /*
+ * We must use LNKGET/LNKSET with an SMP kernel because the other method
+ * does not provide atomicity across multiple CPUs.
+ */
+0: LNKGETD D0Re0,[D1Ar3]
+ CMP D0Re0,D1Ar1
+ LNKSETDZ [D1Ar3],D0Ar2
+ BNZ 1f
+ DEFR D0Re0,TXSTAT
+ ANDT D0Re0,D0Re0,#HI(0x3f000000)
+ CMPT D0Re0,#HI(0x02000000)
+ BNE 0b
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+ DCACHE [D1Ar3], D0Re0
+#endif
+1: MOV D0Re0,#1
+ XORZ D0Re0,D0Re0,D0Re0
+ MOV PC,D1RtP
+#else
+ GETD D0Re0,[D1Ar3]
+ CMP D0Re0,D1Ar1
+ SETDZ [D1Ar3],D0Ar2
+___kuser_cmpxchg_end: /* Beyond this point the write will complete */
+ MOV D0Re0,#1
+ XORZ D0Re0,D0Re0,D0Re0
+ MOV PC,D1RtP
+#endif /* CONFIG_SMP */
+ .size ___kuser_cmpxchg,.-___kuser_cmpxchg
+ .global ___kuser_cmpxchg_end
+
+ .global ___user_gateway_end
+___user_gateway_end:
diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S
new file mode 100644
index 00000000000..e12055e88bf
--- /dev/null
+++ b/arch/metag/kernel/vmlinux.lds.S
@@ -0,0 +1,71 @@
+/* ld script to make Meta Linux kernel */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf32-metag", "elf32-metag", "elf32-metag")
+OUTPUT_ARCH(metag)
+ENTRY(__start)
+
+_jiffies = _jiffies_64;
+SECTIONS
+{
+ . = CONFIG_PAGE_OFFSET;
+ _text = .;
+ __text = .;
+ __stext = .;
+ HEAD_TEXT_SECTION
+ .text : {
+ TEXT_TEXT
+ SCHED_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ IRQENTRY_TEXT
+ *(.text.*)
+ *(.gnu.warning)
+ }
+
+ __etext = .; /* End of text section */
+
+ __sdata = .;
+ RO_DATA_SECTION(PAGE_SIZE)
+ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ __edata = .; /* End of data section */
+
+ EXCEPTION_TABLE(16)
+ NOTES
+
+ . = ALIGN(PAGE_SIZE); /* Init code and data */
+ ___init_begin = .;
+ INIT_TEXT_SECTION(PAGE_SIZE)
+ INIT_DATA_SECTION(16)
+
+ .init.arch.info : {
+ ___arch_info_begin = .;
+ *(.arch.info.init)
+ ___arch_info_end = .;
+ }
+
+ PERCPU_SECTION(L1_CACHE_BYTES)
+
+ ___init_end = .;
+
+ BSS_SECTION(0, PAGE_SIZE, 0)
+
+ __end = .;
+
+ . = ALIGN(PAGE_SIZE);
+ __heap_start = .;
+
+ DWARF_DEBUG
+
+ /* When something in the kernel is NOT compiled as a module, the
+ * module cleanup code and data are put into these segments. Both
+ * can then be thrown away, as cleanup code is never called unless
+ * it's a module.
+ */
+ DISCARDS
+}
diff --git a/arch/metag/lib/Makefile b/arch/metag/lib/Makefile
new file mode 100644
index 00000000000..a41d24e270e
--- /dev/null
+++ b/arch/metag/lib/Makefile
@@ -0,0 +1,22 @@
+#
+# Makefile for Meta-specific library files.
+#
+
+lib-y += usercopy.o
+lib-y += copy_page.o
+lib-y += clear_page.o
+lib-y += memcpy.o
+lib-y += memmove.o
+lib-y += memset.o
+lib-y += delay.o
+lib-y += div64.o
+lib-y += muldi3.o
+lib-y += ashrdi3.o
+lib-y += ashldi3.o
+lib-y += lshrdi3.o
+lib-y += divsi3.o
+lib-y += modsi3.o
+lib-y += cmpdi2.o
+lib-y += ucmpdi2.o
+lib-y += ip_fast_csum.o
+lib-y += checksum.o
diff --git a/arch/metag/lib/ashldi3.S b/arch/metag/lib/ashldi3.S
new file mode 100644
index 00000000000..78d6974cffe
--- /dev/null
+++ b/arch/metag/lib/ashldi3.S
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit arithmetic shift left routine.
+!
+
+ .text
+ .global ___ashldi3
+ .type ___ashldi3,function
+
+___ashldi3:
+ MOV D0Re0,D0Ar2
+ MOV D1Re0,D1Ar1
+ CMP D1Ar3,#0 ! COUNT == 0
+ MOVEQ PC,D1RtP ! Yes, return
+
+ SUBS D0Ar4,D1Ar3,#32 ! N = COUNT - 32
+ BGE $L10
+
+!! Shift < 32
+ NEG D0Ar4,D0Ar4 ! N = - N
+ LSL D1Re0,D1Re0,D1Ar3 ! HI = HI << COUNT
+ LSR D0Ar6,D0Re0,D0Ar4 ! TMP= LO >> -(COUNT - 32)
+ OR D1Re0,D1Re0,D0Ar6 ! HI = HI | TMP
+ SWAP D0Ar4,D1Ar3
+ LSL D0Re0,D0Re0,D0Ar4 ! LO = LO << COUNT
+ MOV PC,D1RtP
+
+$L10:
+!! Shift >= 32
+ LSL D1Re0,D0Re0,D0Ar4 ! HI = LO << N
+ MOV D0Re0,#0 ! LO = 0
+ MOV PC,D1RtP
+ .size ___ashldi3,.-___ashldi3
diff --git a/arch/metag/lib/ashrdi3.S b/arch/metag/lib/ashrdi3.S
new file mode 100644
index 00000000000..7cb7ed3bb1a
--- /dev/null
+++ b/arch/metag/lib/ashrdi3.S
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit arithmetic shift right routine.
+!
+
+ .text
+ .global ___ashrdi3
+ .type ___ashrdi3,function
+
+___ashrdi3:
+ MOV D0Re0,D0Ar2
+ MOV D1Re0,D1Ar1
+ CMP D1Ar3,#0 ! COUNT == 0
+ MOVEQ PC,D1RtP ! Yes, return
+
+ MOV D0Ar4,D1Ar3
+ SUBS D1Ar3,D1Ar3,#32 ! N = COUNT - 32
+ BGE $L20
+
+!! Shift < 32
+ NEG D1Ar3,D1Ar3 ! N = - N
+ LSR D0Re0,D0Re0,D0Ar4 ! LO = LO >> COUNT
+ LSL D0Ar6,D1Re0,D1Ar3 ! TMP= HI << -(COUNT - 32)
+ OR D0Re0,D0Re0,D0Ar6 ! LO = LO | TMP
+ SWAP D1Ar3,D0Ar4
+ ASR D1Re0,D1Re0,D1Ar3 ! HI = HI >> COUNT
+ MOV PC,D1RtP
+$L20:
+!! Shift >= 32
+ ASR D0Re0,D1Re0,D1Ar3 ! LO = HI >> N
+ ASR D1Re0,D1Re0,#31 ! HI = HI >> 31
+ MOV PC,D1RtP
+ .size ___ashrdi3,.-___ashrdi3
diff --git a/arch/metag/lib/checksum.c b/arch/metag/lib/checksum.c
new file mode 100644
index 00000000000..44d2e191356
--- /dev/null
+++ b/arch/metag/lib/checksum.c
@@ -0,0 +1,168 @@
+/*
+ *
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IP/TCP/UDP checksumming routines
+ *
+ * Authors: Jorge Cwik, <jorge@laser.satlink.net>
+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ * Tom May, <ftom@netcom.com>
+ * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
+ * Lots of code moved from tcp.c and ip.c; see those files
+ * for more names.
+ *
+ * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek:
+ * Fixed some nasty bugs, causing some horrible crashes.
+ * A: At some points, the sum (%0) was used as
+ * length-counter instead of the length counter
+ * (%1). Thanks to Roman Hodek for pointing this out.
+ * B: GCC seems to mess up if one uses too many
+ * data-registers to hold input values and one tries to
+ * specify d0 and d1 as scratch registers. Letting gcc
+ * choose these registers itself solves the problem.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
+ kills, so most of the assembly has to go. */
+
+#include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short from32to16(unsigned int x)
+{
+ /* add up 16-bit and 16-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+ int odd;
+ unsigned int result = 0;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long) buff;
+ if (odd) {
+#ifdef __LITTLE_ENDIAN
+ result += (*buff << 8);
+#else
+ result = *buff;
+#endif
+ len--;
+ buff++;
+ }
+ if (len >= 2) {
+ if (2 & (unsigned long) buff) {
+ result += *(unsigned short *) buff;
+ len -= 2;
+ buff += 2;
+ }
+ if (len >= 4) {
+ const unsigned char *end = buff + ((unsigned)len & ~3);
+ unsigned int carry = 0;
+ do {
+ unsigned int w = *(unsigned int *) buff;
+ buff += 4;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (buff < end);
+ result += carry;
+ result = (result & 0xffff) + (result >> 16);
+ }
+ if (len & 2) {
+ result += *(unsigned short *) buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+#ifdef __LITTLE_ENDIAN
+ result += *buff;
+#else
+ result += (*buff << 8);
+#endif
+ result = from32to16(result);
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+ return result;
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
+{
+ unsigned int sum = (__force unsigned int)wsum;
+ unsigned int result = do_csum(buff, len);
+
+ /* add in old sum, and carry.. */
+ result += sum;
+ if (sum > result)
+ result += 1;
+ return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+ return (__force __sum16)~do_csum(buff, len);
+}
+EXPORT_SYMBOL(ip_compute_csum);
+
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+ __wsum sum, int *csum_err)
+{
+ int missing;
+
+ missing = __copy_from_user(dst, src, len);
+ if (missing) {
+ memset(dst + len - missing, 0, missing);
+ *csum_err = -EFAULT;
+ } else
+ *csum_err = 0;
+
+ return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+/*
+ * copy from ds while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
+{
+ memcpy(dst, src, len);
+ return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy);
diff --git a/arch/metag/lib/clear_page.S b/arch/metag/lib/clear_page.S
new file mode 100644
index 00000000000..43144eebec5
--- /dev/null
+++ b/arch/metag/lib/clear_page.S
@@ -0,0 +1,17 @@
+ ! Copyright 2007,2008,2009 Imagination Technologies Ltd.
+
+#include <asm/page.h>
+
+ .text
+ .global _clear_page
+ .type _clear_page,function
+ !! D1Ar1 - page
+_clear_page:
+ MOV TXRPT,#((PAGE_SIZE / 8) - 1)
+ MOV D0Re0,#0
+ MOV D1Re0,#0
+$Lclear_page_loop:
+ SETL [D1Ar1++],D0Re0,D1Re0
+ BR $Lclear_page_loop
+ MOV PC,D1RtP
+ .size _clear_page,.-_clear_page
diff --git a/arch/metag/lib/cmpdi2.S b/arch/metag/lib/cmpdi2.S
new file mode 100644
index 00000000000..9c5c663c5ae
--- /dev/null
+++ b/arch/metag/lib/cmpdi2.S
@@ -0,0 +1,32 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit signed compare routine.
+!
+
+ .text
+ .global ___cmpdi2
+ .type ___cmpdi2,function
+
+! low high
+! s64 a (D0Ar2, D1Ar1)
+! s64 b (D0Ar4, D1Ar3)
+___cmpdi2:
+ ! start at 1 (equal) and conditionally increment or decrement
+ MOV D0Re0,#1
+
+ ! high words differ?
+ CMP D1Ar1,D1Ar3
+ BNE $Lhigh_differ
+
+ ! unsigned compare low words
+ CMP D0Ar2,D0Ar4
+ SUBLO D0Re0,D0Re0,#1
+ ADDHI D0Re0,D0Re0,#1
+ MOV PC,D1RtP
+
+$Lhigh_differ:
+ ! signed compare high words
+ SUBLT D0Re0,D0Re0,#1
+ ADDGT D0Re0,D0Re0,#1
+ MOV PC,D1RtP
+ .size ___cmpdi2,.-___cmpdi2
diff --git a/arch/metag/lib/copy_page.S b/arch/metag/lib/copy_page.S
new file mode 100644
index 00000000000..91f7d461239
--- /dev/null
+++ b/arch/metag/lib/copy_page.S
@@ -0,0 +1,20 @@
+ ! Copyright 2007,2008 Imagination Technologies Ltd.
+
+#include <asm/page.h>
+
+ .text
+ .global _copy_page
+ .type _copy_page,function
+ !! D1Ar1 - to
+ !! D0Ar2 - from
+_copy_page:
+ MOV D0FrT,#PAGE_SIZE
+$Lcopy_page_loop:
+ GETL D0Re0,D1Re0,[D0Ar2++]
+ GETL D0Ar6,D1Ar5,[D0Ar2++]
+ SETL [D1Ar1++],D0Re0,D1Re0
+ SETL [D1Ar1++],D0Ar6,D1Ar5
+ SUBS D0FrT,D0FrT,#16
+ BNZ $Lcopy_page_loop
+ MOV PC,D1RtP
+ .size _copy_page,.-_copy_page
diff --git a/arch/metag/lib/delay.c b/arch/metag/lib/delay.c
new file mode 100644
index 00000000000..0b308f48b37
--- /dev/null
+++ b/arch/metag/lib/delay.c
@@ -0,0 +1,56 @@
+/*
+ * Precise Delay Loops for Meta
+ *
+ * Copyright (C) 1993 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * Copyright (C) 2007,2009 Imagination Technologies Ltd.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+
+#include <asm/core_reg.h>
+#include <asm/processor.h>
+
+/*
+ * TXTACTCYC is only 24 bits, so on chips with fast clocks it will wrap
+ * many times per-second. If it does wrap __delay will return prematurely,
+ * but this is only likely with large delay values.
+ *
+ * We also can't implement read_current_timer() with TXTACTCYC due to
+ * this wrapping behaviour.
+ */
+#define rdtimer(t) t = __core_reg_get(TXTACTCYC)
+
+void __delay(unsigned long loops)
+{
+ unsigned long bclock, now;
+
+ rdtimer(bclock);
+ do {
+ asm("NOP");
+ rdtimer(now);
+ } while ((now-bclock) < loops);
+}
+EXPORT_SYMBOL(__delay);
+
+inline void __const_udelay(unsigned long xloops)
+{
+ u64 loops = (u64)xloops * (u64)loops_per_jiffy * HZ;
+ __delay(loops >> 32);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+ __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+ __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/metag/lib/div64.S b/arch/metag/lib/div64.S
new file mode 100644
index 00000000000..1cfc93498f7
--- /dev/null
+++ b/arch/metag/lib/div64.S
@@ -0,0 +1,108 @@
+! Copyright (C) 2012 Imagination Technologies Ltd.
+!
+! Signed/unsigned 64-bit division routines.
+!
+
+ .text
+ .global _div_u64
+ .type _div_u64,function
+
+_div_u64:
+$L1:
+ ORS A0.3,D1Ar3,D0Ar4
+ BNE $L3
+$L2:
+ MOV D0Re0,D0Ar2
+ MOV D1Re0,D1Ar1
+ MOV PC,D1RtP
+$L3:
+ CMP D1Ar3,D1Ar1
+ CMPEQ D0Ar4,D0Ar2
+ MOV D0Re0,#1
+ MOV D1Re0,#0
+ BHS $L6
+$L4:
+ ADDS D0Ar6,D0Ar4,D0Ar4
+ ADD D1Ar5,D1Ar3,D1Ar3
+ ADDCS D1Ar5,D1Ar5,#1
+ CMP D1Ar5,D1Ar3
+ CMPEQ D0Ar6,D0Ar4
+ BLO $L6
+$L5:
+ MOV D0Ar4,D0Ar6
+ MOV D1Ar3,D1Ar5
+ ADDS D0Re0,D0Re0,D0Re0
+ ADD D1Re0,D1Re0,D1Re0
+ ADDCS D1Re0,D1Re0,#1
+ CMP D1Ar3,D1Ar1
+ CMPEQ D0Ar4,D0Ar2
+ BLO $L4
+$L6:
+ ORS A0.3,D1Re0,D0Re0
+ MOV D0Ar6,#0
+ MOV D1Ar5,D0Ar6
+ BEQ $L10
+$L7:
+ CMP D1Ar1,D1Ar3
+ CMPEQ D0Ar2,D0Ar4
+ BLO $L9
+$L8:
+ ADDS D0Ar6,D0Ar6,D0Re0
+ ADD D1Ar5,D1Ar5,D1Re0
+ ADDCS D1Ar5,D1Ar5,#1
+
+ SUBS D0Ar2,D0Ar2,D0Ar4
+ SUB D1Ar1,D1Ar1,D1Ar3
+ SUBCS D1Ar1,D1Ar1,#1
+$L9:
+ LSL A0.3,D1Re0,#31
+ LSR D0Re0,D0Re0,#1
+ LSR D1Re0,D1Re0,#1
+ OR D0Re0,D0Re0,A0.3
+ LSL A0.3,D1Ar3,#31
+ LSR D0Ar4,D0Ar4,#1
+ LSR D1Ar3,D1Ar3,#1
+ OR D0Ar4,D0Ar4,A0.3
+ ORS A0.3,D1Re0,D0Re0
+ BNE $L7
+$L10:
+ MOV D0Re0,D0Ar6
+ MOV D1Re0,D1Ar5
+ MOV PC,D1RtP
+ .size _div_u64,.-_div_u64
+
+ .text
+ .global _div_s64
+ .type _div_s64,function
+_div_s64:
+ MSETL [A0StP],D0FrT,D0.5
+ XOR D0.5,D0Ar2,D0Ar4
+ XOR D1.5,D1Ar1,D1Ar3
+ TSTT D1Ar1,#HI(0x80000000)
+ BZ $L25
+
+ NEGS D0Ar2,D0Ar2
+ NEG D1Ar1,D1Ar1
+ SUBCS D1Ar1,D1Ar1,#1
+$L25:
+ TSTT D1Ar3,#HI(0x80000000)
+ BZ $L27
+
+ NEGS D0Ar4,D0Ar4
+ NEG D1Ar3,D1Ar3
+ SUBCS D1Ar3,D1Ar3,#1
+$L27:
+ CALLR D1RtP,_div_u64
+ TSTT D1.5,#HI(0x80000000)
+ BZ $L29
+
+ NEGS D0Re0,D0Re0
+ NEG D1Re0,D1Re0
+ SUBCS D1Re0,D1Re0,#1
+$L29:
+
+ GETL D0FrT,D1RtP,[A0StP+#(-16)]
+ GETL D0.5,D1.5,[A0StP+#(-8)]
+ SUB A0StP,A0StP,#16
+ MOV PC,D1RtP
+ .size _div_s64,.-_div_s64
diff --git a/arch/metag/lib/divsi3.S b/arch/metag/lib/divsi3.S
new file mode 100644
index 00000000000..7c8a8ae9a0a
--- /dev/null
+++ b/arch/metag/lib/divsi3.S
@@ -0,0 +1,100 @@
+! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+! Imagination Technologies Ltd
+!
+! Integer divide routines.
+!
+
+ .text
+ .global ___udivsi3
+ .type ___udivsi3,function
+ .align 2
+___udivsi3:
+!!
+!! Since core is signed divide case, just set control variable
+!!
+ MOV D1Re0,D0Ar2 ! Au already in A1Ar1, Bu -> D1Re0
+ MOV D0Re0,#0 ! Result is 0
+ MOV D0Ar4,#0 ! Return positive result
+ B $LIDMCUStart
+ .size ___udivsi3,.-___udivsi3
+
+!!
+!! 32-bit division signed i/p - passed signed 32-bit numbers
+!!
+ .global ___divsi3
+ .type ___divsi3,function
+ .align 2
+___divsi3:
+!!
+!! A already in D1Ar1, B already in D0Ar2 -> make B abs(B)
+!!
+ MOV D1Re0,D0Ar2 ! A already in A1Ar1, B -> D1Re0
+ MOV D0Re0,#0 ! Result is 0
+ XOR D0Ar4,D1Ar1,D1Re0 ! D0Ar4 -ive if result is -ive
+ ABS D1Ar1,D1Ar1 ! abs(A) -> Au
+ ABS D1Re0,D1Re0 ! abs(B) -> Bu
+$LIDMCUStart:
+ CMP D1Ar1,D1Re0 ! Is ( Au > Bu )?
+ LSR D1Ar3,D1Ar1,#2 ! Calculate (Au & (~3)) >> 2
+ CMPHI D1Re0,D1Ar3 ! OR ( (Au & (~3)) <= (Bu << 2) )?
+ LSLSHI D1Ar3,D1Re0,#1 ! Buq = Bu << 1
+ BLS $LIDMCUSetup ! Yes: Do normal divide
+!!
+!! Quick divide setup can assume that CurBit only needs to start at 2
+!!
+$LIDMCQuick:
+ CMP D1Ar1,D1Ar3 ! ( A >= Buq )?
+ ADDCC D0Re0,D0Re0,#2 ! If yes result += 2
+ SUBCC D1Ar1,D1Ar1,D1Ar3 ! and A -= Buq
+ CMP D1Ar1,D1Re0 ! ( A >= Bu )?
+ ADDCC D0Re0,D0Re0,#1 ! If yes result += 1
+ SUBCC D1Ar1,D1Ar1,D1Re0 ! and A -= Bu
+ ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result?
+ NEG D0Ar2,D0Re0 ! Calulate neg result
+ MOVMI D0Re0,D0Ar2 ! Yes: Take neg result
+$LIDMCRet:
+ MOV PC,D1RtP
+!!
+!! Setup for general unsigned divide code
+!!
+!! D0Re0 is used to form the result, already set to Zero
+!! D1Re0 is the input Bu value, this gets trashed
+!! D0Ar6 is curbit which is set to 1 at the start and shifted up
+!! D0Ar4 is negative if we should return a negative result
+!! D1Ar1 is the input Au value, eventually this holds the remainder
+!!
+$LIDMCUSetup:
+ CMP D1Ar1,D1Re0 ! Is ( Au < Bu )?
+ MOV D0Ar6,#1 ! Set curbit to 1
+ BCS $LIDMCRet ! Yes: Return 0 remainder Au
+!!
+!! Calculate alignment using FFB instruction
+!!
+ FFB D1Ar5,D1Ar1 ! Find first bit of Au
+ ANDN D1Ar5,D1Ar5,#31 ! Handle exceptional case.
+ ORN D1Ar5,D1Ar5,#31 ! if N bit set, set to 31
+ FFB D1Ar3,D1Re0 ! Find first bit of Bu
+ ANDN D1Ar3,D1Ar3,#31 ! Handle exceptional case.
+ ORN D1Ar3,D1Ar3,#31 ! if N bit set, set to 31
+ SUBS D1Ar3,D1Ar5,D1Ar3 ! calculate diff, ffbA - ffbB
+ MOV D0Ar2,D1Ar3 ! copy into bank 0
+ LSLGT D1Re0,D1Re0,D1Ar3 ! ( > 0) ? left shift B
+ LSLGT D0Ar6,D0Ar6,D0Ar2 ! ( > 0) ? left shift curbit
+!!
+!! Now we start the divide proper, logic is
+!!
+!! if ( A >= B ) add curbit to result and subtract B from A
+!! shift curbit and B down by 1 in either case
+!!
+$LIDMCLoop:
+ CMP D1Ar1, D1Re0 ! ( A >= B )?
+ ADDCC D0Re0, D0Re0, D0Ar6 ! If yes result += curbit
+ SUBCC D1Ar1, D1Ar1, D1Re0 ! and A -= B
+ LSRS D0Ar6, D0Ar6, #1 ! Shift down curbit, is it zero?
+ LSR D1Re0, D1Re0, #1 ! Shift down B
+ BNZ $LIDMCLoop ! Was single bit in curbit lost?
+ ORS D0Ar4,D0Ar4,D0Ar4 ! Return neg result?
+ NEG D0Ar2,D0Re0 ! Calulate neg result
+ MOVMI D0Re0,D0Ar2 ! Yes: Take neg result
+ MOV PC,D1RtP
+ .size ___divsi3,.-___divsi3
diff --git a/arch/metag/lib/ip_fast_csum.S b/arch/metag/lib/ip_fast_csum.S
new file mode 100644
index 00000000000..533b1e73dea
--- /dev/null
+++ b/arch/metag/lib/ip_fast_csum.S
@@ -0,0 +1,32 @@
+
+ .text
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ *
+ * extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+ *
+ */
+ .global _ip_fast_csum
+ .type _ip_fast_csum,function
+_ip_fast_csum:
+ !! TXRPT needs loops - 1
+ SUBS TXRPT,D0Ar2,#1
+ MOV D0Re0,#0
+ BLO $Lfast_csum_exit
+$Lfast_csum_loop:
+ GETD D1Ar3,[D1Ar1++]
+ ADDS D0Re0,D0Re0,D1Ar3
+ ADDCS D0Re0,D0Re0,#1
+ BR $Lfast_csum_loop
+ LSR D0Ar4,D0Re0,#16
+ AND D0Re0,D0Re0,#0xffff
+ AND D0Ar4,D0Ar4,#0xffff
+ ADD D0Re0,D0Re0,D0Ar4
+ LSR D0Ar4,D0Re0,#16
+ ADD D0Re0,D0Re0,D0Ar4
+ XOR D0Re0,D0Re0,#-1
+ AND D0Re0,D0Re0,#0xffff
+$Lfast_csum_exit:
+ MOV PC,D1RtP
+ .size _ip_fast_csum,.-_ip_fast_csum
diff --git a/arch/metag/lib/lshrdi3.S b/arch/metag/lib/lshrdi3.S
new file mode 100644
index 00000000000..47f72028307
--- /dev/null
+++ b/arch/metag/lib/lshrdi3.S
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit logical shift right routine.
+!
+
+ .text
+ .global ___lshrdi3
+ .type ___lshrdi3,function
+
+___lshrdi3:
+ MOV D0Re0,D0Ar2
+ MOV D1Re0,D1Ar1
+ CMP D1Ar3,#0 ! COUNT == 0
+ MOVEQ PC,D1RtP ! Yes, return
+
+ MOV D0Ar4,D1Ar3
+ SUBS D1Ar3,D1Ar3,#32 ! N = COUNT - 32
+ BGE $L30
+
+!! Shift < 32
+ NEG D1Ar3,D1Ar3 ! N = - N
+ LSR D0Re0,D0Re0,D0Ar4 ! LO = LO >> COUNT
+ LSL D0Ar6,D1Re0,D1Ar3 ! TMP= HI << -(COUNT - 32)
+ OR D0Re0,D0Re0,D0Ar6 ! LO = LO | TMP
+ SWAP D1Ar3,D0Ar4
+ LSR D1Re0,D1Re0,D1Ar3 ! HI = HI >> COUNT
+ MOV PC,D1RtP
+$L30:
+!! Shift >= 32
+ LSR D0Re0,D1Re0,D1Ar3 ! LO = HI >> N
+ MOV D1Re0,#0 ! HI = 0
+ MOV PC,D1RtP
+ .size ___lshrdi3,.-___lshrdi3
diff --git a/arch/metag/lib/memcpy.S b/arch/metag/lib/memcpy.S
new file mode 100644
index 00000000000..46b7a2b9479
--- /dev/null
+++ b/arch/metag/lib/memcpy.S
@@ -0,0 +1,185 @@
+! Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+ .text
+ .global _memcpy
+ .type _memcpy,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memcpy:
+ CMP D1Ar3, #16
+ MOV A1.2, D0Ar2 ! source pointer
+ MOV A0.2, D1Ar1 ! destination pointer
+ MOV A0.3, D1Ar1 ! for return value
+! If there are less than 16 bytes to copy use the byte copy loop
+ BGE $Llong_copy
+
+$Lbyte_copy:
+! Simply copy a byte at a time
+ SUBS TXRPT, D1Ar3, #1
+ BLT $Lend
+$Lloop_byte:
+ GETB D1Re0, [A1.2++]
+ SETB [A0.2++], D1Re0
+ BR $Lloop_byte
+
+$Lend:
+! Finally set return value and return
+ MOV D0Re0, A0.3
+ MOV PC, D1RtP
+
+$Llong_copy:
+ ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
+ BZ $Laligned_dst
+
+! The destination address is not 8 byte aligned. We will copy bytes from
+! the source to the destination until the remaining data has an 8 byte
+! destination address alignment (i.e we should never copy more than 7
+! bytes here).
+$Lalign_dst:
+ GETB D0Re0, [A1.2++]
+ ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
+ SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
+ SETB [A0.2++], D0Re0
+ CMP D1Ar5, #8
+ BNE $Lalign_dst
+
+! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
+! blocks, then jump to the unaligned copy loop or fall through to the aligned
+! copy loop as appropriate.
+$Laligned_dst:
+ MOV D0Ar4, A1.2
+ LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
+ ANDS D0Ar4, D0Ar4, #7 ! test source alignment
+ BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
+
+! Both source and destination are 8 byte aligned - the easy case.
+$Laligned_copy:
+ LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
+ BZ $Lbyte_copy
+ SUB TXRPT, D1Ar5, #1
+
+$Laligned_32:
+ GETL D0Re0, D1Re0, [A1.2++]
+ GETL D0Ar6, D1Ar5, [A1.2++]
+ SETL [A0.2++], D0Re0, D1Re0
+ SETL [A0.2++], D0Ar6, D1Ar5
+ GETL D0Re0, D1Re0, [A1.2++]
+ GETL D0Ar6, D1Ar5, [A1.2++]
+ SETL [A0.2++], D0Re0, D1Re0
+ SETL [A0.2++], D0Ar6, D1Ar5
+ BR $Laligned_32
+
+! If there are any remaining bytes use the byte copy loop, otherwise we are done
+ ANDS D1Ar3, D1Ar3, #0x1f
+ BNZ $Lbyte_copy
+ B $Lend
+
+! The destination is 8 byte aligned but the source is not, and there are 8
+! or more bytes to be copied.
+$Lunaligned_copy:
+! Adjust the source pointer (A1.2) to the 8 byte boundary before its
+! current value
+ MOV D0Ar4, A1.2
+ MOV D0Ar6, A1.2
+ ANDMB D0Ar4, D0Ar4, #0xfff8
+ MOV A1.2, D0Ar4
+! Save the number of bytes of mis-alignment in D0Ar4 for use later
+ SUBS D0Ar6, D0Ar6, D0Ar4
+ MOV D0Ar4, D0Ar6
+! if there is no mis-alignment after all, use the aligned copy loop
+ BZ $Laligned_copy
+
+! prefetch 8 bytes
+ GETL D0Re0, D1Re0, [A1.2]
+
+ SUB TXRPT, D1Ar5, #1
+
+! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
+! 4 bytes, and more than 4 bytes.
+ CMP D0Ar6, #4
+ BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
+ BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
+
+! The mis-alignment is more than 4 bytes
+$Lunaligned_5_6_7:
+ SUB D0Ar6, D0Ar6, #4
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+ MULW D0Ar6, D0Ar6, #8
+ MOV D1Ar5, #32
+ SUB D1Ar5, D1Ar5, D0Ar6
+! Move data 4 bytes before we enter the main loop
+ MOV D0Re0, D1Re0
+
+$Lloop_5_6_7:
+ GETL D0Ar2, D1Ar1, [++A1.2]
+! form 64-bit data in D0Re0, D1Re0
+ LSR D0Re0, D0Re0, D0Ar6
+ MOV D1Re0, D0Ar2
+ LSL D1Re0, D1Re0, D1Ar5
+ ADD D0Re0, D0Re0, D1Re0
+
+ LSR D0Ar2, D0Ar2, D0Ar6
+ LSL D1Re0, D1Ar1, D1Ar5
+ ADD D1Re0, D1Re0, D0Ar2
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D1Ar1
+ BR $Lloop_5_6_7
+
+ B $Lunaligned_end
+
+$Lunaligned_1_2_3:
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+ MULW D0Ar6, D0Ar6, #8
+ MOV D1Ar5, #32
+ SUB D1Ar5, D1Ar5, D0Ar6
+
+$Lloop_1_2_3:
+! form 64-bit data in D0Re0,D1Re0
+ LSR D0Re0, D0Re0, D0Ar6
+ LSL D1Ar1, D1Re0, D1Ar5
+ ADD D0Re0, D0Re0, D1Ar1
+ MOV D0Ar2, D1Re0
+ LSR D0FrT, D0Ar2, D0Ar6
+ GETL D0Ar2, D1Ar1, [++A1.2]
+
+ MOV D1Re0, D0Ar2
+ LSL D1Re0, D1Re0, D1Ar5
+ ADD D1Re0, D1Re0, D0FrT
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0Ar2
+ MOV D1Re0, D1Ar1
+ BR $Lloop_1_2_3
+
+ B $Lunaligned_end
+
+! The 4 byte mis-alignment case - this does not require any shifting, just a
+! shuffling of registers.
+$Lunaligned_4:
+ MOV D0Re0, D1Re0
+$Lloop_4:
+ GETL D0Ar2, D1Ar1, [++A1.2]
+ MOV D1Re0, D0Ar2
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D1Ar1
+ BR $Lloop_4
+
+$Lunaligned_end:
+! If there are no remaining bytes to copy, we are done.
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lend
+! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
+! address of the remaining bytes, and fall through to the byte copy loop.
+ MOV D0Ar6, A1.2
+ ADD D1Ar5, D0Ar4, D0Ar6
+ MOV A1.2, D1Ar5
+ B $Lbyte_copy
+
+ .size _memcpy,.-_memcpy
diff --git a/arch/metag/lib/memmove.S b/arch/metag/lib/memmove.S
new file mode 100644
index 00000000000..228ea04d7b3
--- /dev/null
+++ b/arch/metag/lib/memmove.S
@@ -0,0 +1,345 @@
+! Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+ .text
+ .global _memmove
+ .type _memmove,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memmove:
+ CMP D1Ar3, #0
+ MOV D0Re0, D1Ar1
+ BZ $LEND2
+ MSETL [A0StP], D0.5, D0.6, D0.7
+ MOV D1Ar5, D0Ar2
+ CMP D1Ar1, D1Ar5
+ BLT $Lforwards_copy
+ SUB D0Ar4, D1Ar1, D1Ar3
+ ADD D0Ar4, D0Ar4, #1
+ CMP D0Ar2, D0Ar4
+ BLT $Lforwards_copy
+ ! should copy backwards
+ MOV D1Re0, D0Ar2
+ ! adjust pointer to the end of mem
+ ADD D0Ar2, D1Re0, D1Ar3
+ ADD D1Ar1, D1Ar1, D1Ar3
+
+ MOV A1.2, D0Ar2
+ MOV A0.2, D1Ar1
+ CMP D1Ar3, #8
+ BLT $Lbbyte_loop
+
+ MOV D0Ar4, D0Ar2
+ MOV D1Ar5, D1Ar1
+
+ ! test 8 byte alignment
+ ANDS D1Ar5, D1Ar5, #7
+ BNE $Lbdest_unaligned
+
+ ANDS D0Ar4, D0Ar4, #7
+ BNE $Lbsrc_unaligned
+
+ LSR D1Ar5, D1Ar3, #3
+
+$Lbaligned_loop:
+ GETL D0Re0, D1Re0, [--A1.2]
+ SETL [--A0.2], D0Re0, D1Re0
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbaligned_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+$Lbbyte_loop:
+ GETB D1Re0, [--A1.2]
+ SETB [--A0.2], D1Re0
+ SUBS D1Ar3, D1Ar3, #1
+ BNE $Lbbyte_loop
+$Lbbyte_loop_exit:
+ MOV D0Re0, A0.2
+$LEND:
+ SUB A0.2, A0StP, #24
+ MGETL D0.5, D0.6, D0.7, [A0.2]
+ SUB A0StP, A0StP, #24
+$LEND2:
+ MOV PC, D1RtP
+
+$Lbdest_unaligned:
+ GETB D0Re0, [--A1.2]
+ SETB [--A0.2], D0Re0
+ SUBS D1Ar5, D1Ar5, #1
+ SUB D1Ar3, D1Ar3, #1
+ BNE $Lbdest_unaligned
+ CMP D1Ar3, #8
+ BLT $Lbbyte_loop
+$Lbsrc_unaligned:
+ LSR D1Ar5, D1Ar3, #3
+ ! adjust A1.2
+ MOV D0Ar4, A1.2
+ ! save original address
+ MOV D0Ar6, A1.2
+
+ ADD D0Ar4, D0Ar4, #7
+ ANDMB D0Ar4, D0Ar4, #0xfff8
+ ! new address is the 8-byte aligned one above the original
+ MOV A1.2, D0Ar4
+
+ ! A0.2 dst 64-bit is aligned
+ ! measure the gap size
+ SUB D0Ar6, D0Ar4, D0Ar6
+ MOVS D0Ar4, D0Ar6
+ ! keep this information for the later adjustment
+ ! both aligned
+ BZ $Lbaligned_loop
+
+ ! prefetch
+ GETL D0Re0, D1Re0, [--A1.2]
+
+ CMP D0Ar6, #4
+ BLT $Lbunaligned_1_2_3
+ ! 32-bit aligned
+ BZ $Lbaligned_4
+
+ SUB D0Ar6, D0Ar6, #4
+ ! D1.6 stores the gap size in bits
+ MULW D1.6, D0Ar6, #8
+ MOV D0.6, #32
+ ! D0.6 stores the complement of the gap size
+ SUB D0.6, D0.6, D1.6
+
+$Lbunaligned_5_6_7:
+ GETL D0.7, D1.7, [--A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ MOV D1Re0, D0Re0
+ ! D1Re0 << gap-size
+ LSL D1Re0, D1Re0, D1.6
+ MOV D0Re0, D1.7
+ ! D0Re0 >> complement
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1.5, D0Re0
+ ! combine the both
+ ADD D1Re0, D1Re0, D1.5
+
+ MOV D1.5, D1.7
+ LSL D1.5, D1.5, D1.6
+ MOV D0Re0, D0.7
+ LSR D0Re0, D0Re0, D0.6
+ MOV D0.5, D1.5
+ ADD D0Re0, D0Re0, D0.5
+
+ SETL [--A0.2], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbunaligned_5_6_7
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+ ! Adjust A1.2
+ ! A1.2 <- A1.2 +8 - gapsize
+ ADD A1.2, A1.2, #8
+ SUB A1.2, A1.2, D0Ar4
+ B $Lbbyte_loop
+
+$Lbunaligned_1_2_3:
+ MULW D1.6, D0Ar6, #8
+ MOV D0.6, #32
+ SUB D0.6, D0.6, D1.6
+
+$Lbunaligned_1_2_3_loop:
+ GETL D0.7, D1.7, [--A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ LSL D1Re0, D1Re0, D1.6
+ ! save D0Re0 for later use
+ MOV D0.5, D0Re0
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1.5, D0Re0
+ ADD D1Re0, D1Re0, D1.5
+
+ ! orignal data in D0Re0
+ MOV D1.5, D0.5
+ LSL D1.5, D1.5, D1.6
+ MOV D0Re0, D1.7
+ LSR D0Re0, D0Re0, D0.6
+ MOV D0.5, D1.5
+ ADD D0Re0, D0Re0, D0.5
+
+ SETL [--A0.2], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbunaligned_1_2_3_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, #8
+ SUB A1.2, A1.2, D0Ar4
+ B $Lbbyte_loop
+
+$Lbaligned_4:
+ GETL D0.7, D1.7, [--A1.2]
+ MOV D1Re0, D0Re0
+ MOV D0Re0, D1.7
+ SETL [--A0.2], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbaligned_4
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, #8
+ SUB A1.2, A1.2, D0Ar4
+ B $Lbbyte_loop
+
+$Lforwards_copy:
+ MOV A1.2, D0Ar2
+ MOV A0.2, D1Ar1
+ CMP D1Ar3, #8
+ BLT $Lfbyte_loop
+
+ MOV D0Ar4, D0Ar2
+ MOV D1Ar5, D1Ar1
+
+ ANDS D1Ar5, D1Ar5, #7
+ BNE $Lfdest_unaligned
+
+ ANDS D0Ar4, D0Ar4, #7
+ BNE $Lfsrc_unaligned
+
+ LSR D1Ar5, D1Ar3, #3
+
+$Lfaligned_loop:
+ GETL D0Re0, D1Re0, [A1.2++]
+ SUBS D1Ar5, D1Ar5, #1
+ SETL [A0.2++], D0Re0, D1Re0
+ BNE $Lfaligned_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+$Lfbyte_loop:
+ GETB D1Re0, [A1.2++]
+ SETB [A0.2++], D1Re0
+ SUBS D1Ar3, D1Ar3, #1
+ BNE $Lfbyte_loop
+$Lfbyte_loop_exit:
+ MOV D0Re0, D1Ar1
+ B $LEND
+
+$Lfdest_unaligned:
+ GETB D0Re0, [A1.2++]
+ ADD D1Ar5, D1Ar5, #1
+ SUB D1Ar3, D1Ar3, #1
+ SETB [A0.2++], D0Re0
+ CMP D1Ar5, #8
+ BNE $Lfdest_unaligned
+ CMP D1Ar3, #8
+ BLT $Lfbyte_loop
+$Lfsrc_unaligned:
+ ! adjust A1.2
+ LSR D1Ar5, D1Ar3, #3
+
+ MOV D0Ar4, A1.2
+ MOV D0Ar6, A1.2
+ ANDMB D0Ar4, D0Ar4, #0xfff8
+ MOV A1.2, D0Ar4
+
+ ! A0.2 dst 64-bit is aligned
+ SUB D0Ar6, D0Ar6, D0Ar4
+ ! keep the information for the later adjustment
+ MOVS D0Ar4, D0Ar6
+
+ ! both aligned
+ BZ $Lfaligned_loop
+
+ ! prefetch
+ GETL D0Re0, D1Re0, [A1.2]
+
+ CMP D0Ar6, #4
+ BLT $Lfunaligned_1_2_3
+ BZ $Lfaligned_4
+
+ SUB D0Ar6, D0Ar6, #4
+ MULW D0.6, D0Ar6, #8
+ MOV D1.6, #32
+ SUB D1.6, D1.6, D0.6
+
+$Lfunaligned_5_6_7:
+ GETL D0.7, D1.7, [++A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ MOV D0Re0, D1Re0
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1Re0, D0.7
+ LSL D1Re0, D1Re0, D1.6
+ MOV D0.5, D1Re0
+ ADD D0Re0, D0Re0, D0.5
+
+ MOV D0.5, D0.7
+ LSR D0.5, D0.5, D0.6
+ MOV D1Re0, D1.7
+ LSL D1Re0, D1Re0, D1.6
+ MOV D1.5, D0.5
+ ADD D1Re0, D1Re0, D1.5
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lfunaligned_5_6_7
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, D0Ar4
+ B $Lfbyte_loop
+
+$Lfunaligned_1_2_3:
+ MULW D0.6, D0Ar6, #8
+ MOV D1.6, #32
+ SUB D1.6, D1.6, D0.6
+
+$Lfunaligned_1_2_3_loop:
+ GETL D0.7, D1.7, [++A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1.5, D1Re0
+ LSL D1Re0, D1Re0, D1.6
+ MOV D0.5, D1Re0
+ ADD D0Re0, D0Re0, D0.5
+
+ MOV D0.5, D1.5
+ LSR D0.5, D0.5, D0.6
+ MOV D1Re0, D0.7
+ LSL D1Re0, D1Re0, D1.6
+ MOV D1.5, D0.5
+ ADD D1Re0, D1Re0, D1.5
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lfunaligned_1_2_3_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, D0Ar4
+ B $Lfbyte_loop
+
+$Lfaligned_4:
+ GETL D0.7, D1.7, [++A1.2]
+ MOV D0Re0, D1Re0
+ MOV D1Re0, D0.7
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lfaligned_4
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, D0Ar4
+ B $Lfbyte_loop
+
+ .size _memmove,.-_memmove
diff --git a/arch/metag/lib/memset.S b/arch/metag/lib/memset.S
new file mode 100644
index 00000000000..721085bad1d
--- /dev/null
+++ b/arch/metag/lib/memset.S
@@ -0,0 +1,86 @@
+! Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+ .text
+ .global _memset
+ .type _memset,function
+! D1Ar1 dst
+! D0Ar2 c
+! D1Ar3 cnt
+! D0Re0 dst
+_memset:
+ AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value
+ MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15
+ ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst
+ LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31
+ ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2)
+ MOV D0Re0,D1Ar1 ! Return dst
+ BZ $LLongStub ! if start address is aligned
+ ! start address is not aligned on an 8 byte boundary, so we
+ ! need the number of bytes up to the next 8 byte address
+ ! boundary, or the length of the string if less than 8, in D1Ar5
+ MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ...
+ SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N
+ CMP D1Ar3,D1Ar5
+ MOVMI D1Ar5,D1Ar3
+ B $LByteStub ! dst is mis-aligned, do $LByteStub
+
+!
+! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles)
+!
+$LLongStub:
+ LSRS D0Ar2,D1Ar3,#5
+ AND D1Ar3,D1Ar3,#0x1F
+ MOV A1.2,A0.2
+ BEQ $LLongishStub
+ SUB TXRPT,D0Ar2,#1
+ CMP D1Ar3,#0
+$LLongLoop:
+ SETL [D1Ar1++],A0.2,A1.2
+ SETL [D1Ar1++],A0.2,A1.2
+ SETL [D1Ar1++],A0.2,A1.2
+ SETL [D1Ar1++],A0.2,A1.2
+ BR $LLongLoop
+ BZ $Lexit
+!
+! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles)
+!
+$LLongishStub:
+ LSRS D0Ar2,D1Ar3,#3
+ AND D1Ar3,D1Ar3,#0x7
+ MOV D1Ar5,D1Ar3
+ BEQ $LByteStub
+ SUB TXRPT,D0Ar2,#1
+ CMP D1Ar3,#0
+$LLongishLoop:
+ SETL [D1Ar1++],A0.2,A1.2
+ BR $LLongishLoop
+ BZ $Lexit
+!
+! This does a byte structured burst of up to 7 bytes
+!
+! D1Ar1 should point to the location required
+! D1Ar3 should be the remaining total byte count
+! D1Ar5 should be burst size (<= D1Ar3)
+!
+$LByteStub:
+ SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count
+ ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area
+ MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4)
+ SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ...
+ MOV A1.2,D1Ar5
+ SUB PC,CPC1,A1.2 ! Jump into table below
+ SETB [D1Ar1+#(-7)],A0.2
+ SETB [D1Ar1+#(-6)],A0.2
+ SETB [D1Ar1+#(-5)],A0.2
+ SETB [D1Ar1+#(-4)],A0.2
+ SETB [D1Ar1+#(-3)],A0.2
+ SETB [D1Ar1+#(-2)],A0.2
+ SETB [D1Ar1+#(-1)],A0.2
+!
+! Return if all data has been output, otherwise do $LLongStub
+!
+ BNZ $LLongStub
+$Lexit:
+ MOV PC,D1RtP
+ .size _memset,.-_memset
+
diff --git a/arch/metag/lib/modsi3.S b/arch/metag/lib/modsi3.S
new file mode 100644
index 00000000000..210cfa85659
--- /dev/null
+++ b/arch/metag/lib/modsi3.S
@@ -0,0 +1,38 @@
+! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+! Imagination Technologies Ltd
+!
+! Integer modulus routines.
+!
+!!
+!! 32-bit modulus unsigned i/p - passed unsigned 32-bit numbers
+!!
+ .text
+ .global ___umodsi3
+ .type ___umodsi3,function
+ .align 2
+___umodsi3:
+ MOV D0FrT,D1RtP ! Save original return address
+ CALLR D1RtP,___udivsi3
+ MOV D1RtP,D0FrT ! Recover return address
+ MOV D0Re0,D1Ar1 ! Return remainder
+ MOV PC,D1RtP
+ .size ___umodsi3,.-___umodsi3
+
+!!
+!! 32-bit modulus signed i/p - passed signed 32-bit numbers
+!!
+ .global ___modsi3
+ .type ___modsi3,function
+ .align 2
+___modsi3:
+ MOV D0FrT,D1RtP ! Save original return address
+ MOV A0.2,D1Ar1 ! Save A in A0.2
+ CALLR D1RtP,___divsi3
+ MOV D1RtP,D0FrT ! Recover return address
+ MOV D1Re0,A0.2 ! Recover A
+ MOV D0Re0,D1Ar1 ! Return remainder
+ ORS D1Re0,D1Re0,D1Re0 ! Was A negative?
+ NEG D1Ar1,D1Ar1 ! Negate remainder
+ MOVMI D0Re0,D1Ar1 ! Return neg remainder
+ MOV PC, D1RtP
+ .size ___modsi3,.-___modsi3
diff --git a/arch/metag/lib/muldi3.S b/arch/metag/lib/muldi3.S
new file mode 100644
index 00000000000..ee66ca8644d
--- /dev/null
+++ b/arch/metag/lib/muldi3.S
@@ -0,0 +1,44 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit multiply routine.
+!
+
+!
+! 64-bit signed/unsigned multiply
+!
+! A = D1Ar1:D0Ar2 = a 2^48 + b 2^32 + c 2^16 + d 2^0
+!
+! B = D1Ar3:D0Ar4 = w 2^48 + x 2^32 + y 2^16 + z 2^0
+!
+ .text
+ .global ___muldi3
+ .type ___muldi3,function
+
+___muldi3:
+ MULD D1Re0,D1Ar1,D0Ar4 ! (a 2^48 + b 2^32)(y 2^16 + z 2^0)
+ MULD D0Re0,D0Ar2,D1Ar3 ! (w 2^48 + x 2^32)(c 2^16 + d 2^0)
+ ADD D1Re0,D1Re0,D0Re0
+
+ MULW D0Re0,D0Ar2,D0Ar4 ! (d 2^0) * (z 2^0)
+
+ RTDW D0Ar2,D0Ar2
+ MULW D0Ar6,D0Ar2,D0Ar4 ! (c 2^16)(z 2^0)
+ LSR D1Ar5,D0Ar6,#16
+ LSL D0Ar6,D0Ar6,#16
+ ADDS D0Re0,D0Re0,D0Ar6
+ ADDCS D1Re0,D1Re0,#1
+ RTDW D0Ar4,D0Ar4
+ ADD D1Re0,D1Re0,D1Ar5
+
+ MULW D0Ar6,D0Ar2,D0Ar4 ! (c 2^16)(y 2^16)
+ ADD D1Re0,D1Re0,D0Ar6
+
+ RTDW D0Ar2,D0Ar2
+ MULW D0Ar6,D0Ar2,D0Ar4 ! (d 2^0)(y 2^16)
+ LSR D1Ar5,D0Ar6,#16
+ LSL D0Ar6,D0Ar6,#16
+ ADDS D0Re0,D0Re0,D0Ar6
+ ADD D1Re0,D1Re0,D1Ar5
+ ADDCS D1Re0,D1Re0,#1
+ MOV PC, D1RtP
+ .size ___muldi3,.-___muldi3
diff --git a/arch/metag/lib/ucmpdi2.S b/arch/metag/lib/ucmpdi2.S
new file mode 100644
index 00000000000..6f3347f7dae
--- /dev/null
+++ b/arch/metag/lib/ucmpdi2.S
@@ -0,0 +1,27 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit unsigned compare routine.
+!
+
+ .text
+ .global ___ucmpdi2
+ .type ___ucmpdi2,function
+
+! low high
+! u64 a (D0Ar2, D1Ar1)
+! u64 b (D0Ar4, D1Ar3)
+___ucmpdi2:
+ ! start at 1 (equal) and conditionally increment or decrement
+ MOV D0Re0,#1
+
+ ! high words
+ CMP D1Ar1,D1Ar3
+ ! or if equal, low words
+ CMPEQ D0Ar2,D0Ar4
+
+ ! unsigned compare
+ SUBLO D0Re0,D0Re0,#1
+ ADDHI D0Re0,D0Re0,#1
+
+ MOV PC,D1RtP
+ .size ___ucmpdi2,.-___ucmpdi2
diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
new file mode 100644
index 00000000000..b3ebfe9c8e8
--- /dev/null
+++ b/arch/metag/lib/usercopy.c
@@ -0,0 +1,1354 @@
+/*
+ * User address space access functions.
+ * The non-inlined parts of asm-metag/uaccess.h are here.
+ *
+ * Copyright (C) 2006, Imagination Technologies.
+ * Copyright (C) 2000, Axis Communications AB.
+ *
+ * Written by Hans-Peter Nilsson.
+ * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
+ * Modified for Meta by Will Newton.
+ */
+
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <asm/cache.h> /* def of L1_CACHE_BYTES */
+
+#define USE_RAPF
+#define RAPF_MIN_BUF_SIZE (3*L1_CACHE_BYTES)
+
+
+/* The "double write" in this code is because the Meta will not fault
+ * immediately unless the memory pipe is forced to by e.g. a data stall or
+ * another memory op. The second write should be discarded by the write
+ * combiner so should have virtually no cost.
+ */
+
+#define __asm_copy_user_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ asm volatile ( \
+ COPY \
+ "1:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " MOV D1Ar1,#0\n" \
+ FIXUP \
+ " MOVT D1Ar1,#HI(1b)\n" \
+ " JUMP D1Ar1,#LO(1b)\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ TENTRY \
+ " .previous\n" \
+ : "=r" (to), "=r" (from), "=r" (ret) \
+ : "0" (to), "1" (from), "2" (ret) \
+ : "D1Ar1", "memory")
+
+
+#define __asm_copy_to_user_1(to, from, ret) \
+ __asm_copy_user_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "2: SETB [%0++],D1Ar1\n", \
+ "3: ADD %2,%2,#1\n", \
+ " .long 2b,3b\n")
+
+#define __asm_copy_to_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_user_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ " SETW [%0],D1Ar1\n" \
+ "2: SETW [%0++],D1Ar1\n" COPY, \
+ "3: ADD %2,%2,#2\n" FIXUP, \
+ " .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_to_user_2(to, from, ret) \
+ __asm_copy_to_user_2x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_3(to, from, ret) \
+ __asm_copy_to_user_2x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "4: SETB [%0++],D1Ar1\n", \
+ "5: ADD %2,%2,#1\n", \
+ " .long 4b,5b\n")
+
+#define __asm_copy_to_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_user_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ " SETD [%0],D1Ar1\n" \
+ "2: SETD [%0++],D1Ar1\n" COPY, \
+ "3: ADD %2,%2,#4\n" FIXUP, \
+ " .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_to_user_4(to, from, ret) \
+ __asm_copy_to_user_4x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_5(to, from, ret) \
+ __asm_copy_to_user_4x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "4: SETB [%0++],D1Ar1\n", \
+ "5: ADD %2,%2,#1\n", \
+ " .long 4b,5b\n")
+
+#define __asm_copy_to_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_to_user_4x_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ " SETW [%0],D1Ar1\n" \
+ "4: SETW [%0++],D1Ar1\n" COPY, \
+ "5: ADD %2,%2,#2\n" FIXUP, \
+ " .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_to_user_6(to, from, ret) \
+ __asm_copy_to_user_6x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_7(to, from, ret) \
+ __asm_copy_to_user_6x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "6: SETB [%0++],D1Ar1\n", \
+ "7: ADD %2,%2,#1\n", \
+ " .long 6b,7b\n")
+
+#define __asm_copy_to_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_to_user_4x_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ " SETD [%0],D1Ar1\n" \
+ "4: SETD [%0++],D1Ar1\n" COPY, \
+ "5: ADD %2,%2,#4\n" FIXUP, \
+ " .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_to_user_8(to, from, ret) \
+ __asm_copy_to_user_8x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_9(to, from, ret) \
+ __asm_copy_to_user_8x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "6: SETB [%0++],D1Ar1\n", \
+ "7: ADD %2,%2,#1\n", \
+ " .long 6b,7b\n")
+
+#define __asm_copy_to_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_to_user_8x_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ " SETW [%0],D1Ar1\n" \
+ "6: SETW [%0++],D1Ar1\n" COPY, \
+ "7: ADD %2,%2,#2\n" FIXUP, \
+ " .long 6b,7b\n" TENTRY)
+
+#define __asm_copy_to_user_10(to, from, ret) \
+ __asm_copy_to_user_10x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_11(to, from, ret) \
+ __asm_copy_to_user_10x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "8: SETB [%0++],D1Ar1\n", \
+ "9: ADD %2,%2,#1\n", \
+ " .long 8b,9b\n")
+
+#define __asm_copy_to_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_to_user_8x_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ " SETD [%0],D1Ar1\n" \
+ "6: SETD [%0++],D1Ar1\n" COPY, \
+ "7: ADD %2,%2,#4\n" FIXUP, \
+ " .long 6b,7b\n" TENTRY)
+#define __asm_copy_to_user_12(to, from, ret) \
+ __asm_copy_to_user_12x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_13(to, from, ret) \
+ __asm_copy_to_user_12x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "8: SETB [%0++],D1Ar1\n", \
+ "9: ADD %2,%2,#1\n", \
+ " .long 8b,9b\n")
+
+#define __asm_copy_to_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_to_user_12x_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ " SETW [%0],D1Ar1\n" \
+ "8: SETW [%0++],D1Ar1\n" COPY, \
+ "9: ADD %2,%2,#2\n" FIXUP, \
+ " .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_to_user_14(to, from, ret) \
+ __asm_copy_to_user_14x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_15(to, from, ret) \
+ __asm_copy_to_user_14x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ " SETB [%0],D1Ar1\n" \
+ "10: SETB [%0++],D1Ar1\n", \
+ "11: ADD %2,%2,#1\n", \
+ " .long 10b,11b\n")
+
+#define __asm_copy_to_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_to_user_12x_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ " SETD [%0],D1Ar1\n" \
+ "8: SETD [%0++],D1Ar1\n" COPY, \
+ "9: ADD %2,%2,#4\n" FIXUP, \
+ " .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_to_user_16(to, from, ret) \
+ __asm_copy_to_user_16x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_8x64(to, from, ret) \
+ asm volatile ( \
+ " GETL D0Ar2,D1Ar1,[%1++]\n" \
+ " SETL [%0],D0Ar2,D1Ar1\n" \
+ "2: SETL [%0++],D0Ar2,D1Ar1\n" \
+ "1:\n" \
+ " .section .fixup,\"ax\"\n" \
+ "3: ADD %2,%2,#8\n" \
+ " MOVT D0Ar2,#HI(1b)\n" \
+ " JUMP D0Ar2,#LO(1b)\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .long 2b,3b\n" \
+ " .previous\n" \
+ : "=r" (to), "=r" (from), "=r" (ret) \
+ : "0" (to), "1" (from), "2" (ret) \
+ : "D1Ar1", "D0Ar2", "memory")
+
+/*
+ * optimized copying loop using RAPF when 64 bit aligned
+ *
+ * n will be automatically decremented inside the loop
+ * ret will be left intact. if error occurs we will rewind
+ * so that the original non optimized code will fill up
+ * this value correctly.
+ *
+ * on fault:
+ * > n will hold total number of uncopied bytes
+ *
+ * > {'to','from'} will be rewind back so that
+ * the non-optimized code will do the proper fix up
+ *
+ * DCACHE drops the cacheline which helps in reducing cache
+ * pollution.
+ *
+ * We introduce an extra SETL at the end of the loop to
+ * ensure we don't fall off the loop before we catch all
+ * erros.
+ *
+ * NOTICE:
+ * LSM_STEP in TXSTATUS must be cleared in fix up code.
+ * since we're using M{S,G}ETL, a fault might happen at
+ * any address in the middle of M{S,G}ETL causing
+ * the value of LSM_STEP to be incorrect which can
+ * cause subsequent use of M{S,G}ET{L,D} to go wrong.
+ * ie: if LSM_STEP was 1 when a fault occurs, the
+ * next call to M{S,G}ET{L,D} will skip the first
+ * copy/getting as it think that the first 1 has already
+ * been done.
+ *
+ */
+#define __asm_copy_user_64bit_rapf_loop( \
+ to, from, ret, n, id, FIXUP) \
+ asm volatile ( \
+ ".balign 8\n" \
+ "MOV RAPF, %1\n" \
+ "MSETL [A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \
+ "MOV D0Ar6, #0\n" \
+ "LSR D1Ar5, %3, #6\n" \
+ "SUB TXRPT, D1Ar5, #2\n" \
+ "MOV RAPF, %1\n" \
+ "$Lloop"id":\n" \
+ "ADD RAPF, %1, #64\n" \
+ "21:\n" \
+ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "22:\n" \
+ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #32\n" \
+ "23:\n" \
+ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "24:\n" \
+ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #32\n" \
+ "DCACHE [%1+#-64], D0Ar6\n" \
+ "BR $Lloop"id"\n" \
+ \
+ "MOV RAPF, %1\n" \
+ "25:\n" \
+ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "26:\n" \
+ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #32\n" \
+ "27:\n" \
+ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "28:\n" \
+ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %0, %0, #8\n" \
+ "29:\n" \
+ "SETL [%0++], D0.7, D1.7\n" \
+ "SUB %3, %3, #32\n" \
+ "1:" \
+ "DCACHE [%1+#-64], D0Ar6\n" \
+ "GETL D0Ar6, D1Ar5, [A0StP+#-40]\n" \
+ "GETL D0FrT, D1RtP, [A0StP+#-32]\n" \
+ "GETL D0.5, D1.5, [A0StP+#-24]\n" \
+ "GETL D0.6, D1.6, [A0StP+#-16]\n" \
+ "GETL D0.7, D1.7, [A0StP+#-8]\n" \
+ "SUB A0StP, A0StP, #40\n" \
+ " .section .fixup,\"ax\"\n" \
+ "4:\n" \
+ " ADD %0, %0, #8\n" \
+ "3:\n" \
+ " MOV D0Ar2, TXSTATUS\n" \
+ " MOV D1Ar1, TXSTATUS\n" \
+ " AND D1Ar1, D1Ar1, #0xFFFFF8FF\n" \
+ " MOV TXSTATUS, D1Ar1\n" \
+ FIXUP \
+ " MOVT D0Ar2,#HI(1b)\n" \
+ " JUMP D0Ar2,#LO(1b)\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .long 21b,3b\n" \
+ " .long 22b,3b\n" \
+ " .long 23b,3b\n" \
+ " .long 24b,3b\n" \
+ " .long 25b,3b\n" \
+ " .long 26b,3b\n" \
+ " .long 27b,3b\n" \
+ " .long 28b,3b\n" \
+ " .long 29b,4b\n" \
+ " .previous\n" \
+ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \
+ : "0" (to), "1" (from), "2" (ret), "3" (n) \
+ : "D1Ar1", "D0Ar2", "memory")
+
+/* rewind 'to' and 'from' pointers when a fault occurs
+ *
+ * Rationale:
+ * A fault always occurs on writing to user buffer. A fault
+ * is at a single address, so we need to rewind by only 4
+ * bytes.
+ * Since we do a complete read from kernel buffer before
+ * writing, we need to rewind it also. The amount to be
+ * rewind equals the number of faulty writes in MSETD
+ * which is: [4 - (LSM_STEP-1)]*8
+ * LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ * and stored in D0Ar2
+ *
+ * NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ * LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ * a fault happens at the 4th write, LSM_STEP will be 0
+ * instead of 4. The code copes with that.
+ *
+ * n is updated by the number of successful writes, which is:
+ * n = n - (LSM_STEP-1)*8
+ */
+#define __asm_copy_to_user_64bit_rapf_loop(to, from, ret, n, id)\
+ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \
+ "LSR D0Ar2, D0Ar2, #8\n" \
+ "AND D0Ar2, D0Ar2, #0x7\n" \
+ "ADDZ D0Ar2, D0Ar2, #4\n" \
+ "SUB D0Ar2, D0Ar2, #1\n" \
+ "MOV D1Ar1, #4\n" \
+ "SUB D0Ar2, D1Ar1, D0Ar2\n" \
+ "LSL D0Ar2, D0Ar2, #3\n" \
+ "LSL D1Ar1, D1Ar1, #3\n" \
+ "SUB D1Ar1, D1Ar1, D0Ar2\n" \
+ "SUB %0, %0, #8\n" \
+ "SUB %1, %1,D0Ar2\n" \
+ "SUB %3, %3, D1Ar1\n")
+
+/*
+ * optimized copying loop using RAPF when 32 bit aligned
+ *
+ * n will be automatically decremented inside the loop
+ * ret will be left intact. if error occurs we will rewind
+ * so that the original non optimized code will fill up
+ * this value correctly.
+ *
+ * on fault:
+ * > n will hold total number of uncopied bytes
+ *
+ * > {'to','from'} will be rewind back so that
+ * the non-optimized code will do the proper fix up
+ *
+ * DCACHE drops the cacheline which helps in reducing cache
+ * pollution.
+ *
+ * We introduce an extra SETD at the end of the loop to
+ * ensure we don't fall off the loop before we catch all
+ * erros.
+ *
+ * NOTICE:
+ * LSM_STEP in TXSTATUS must be cleared in fix up code.
+ * since we're using M{S,G}ETL, a fault might happen at
+ * any address in the middle of M{S,G}ETL causing
+ * the value of LSM_STEP to be incorrect which can
+ * cause subsequent use of M{S,G}ET{L,D} to go wrong.
+ * ie: if LSM_STEP was 1 when a fault occurs, the
+ * next call to M{S,G}ET{L,D} will skip the first
+ * copy/getting as it think that the first 1 has already
+ * been done.
+ *
+ */
+#define __asm_copy_user_32bit_rapf_loop( \
+ to, from, ret, n, id, FIXUP) \
+ asm volatile ( \
+ ".balign 8\n" \
+ "MOV RAPF, %1\n" \
+ "MSETL [A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n" \
+ "MOV D0Ar6, #0\n" \
+ "LSR D1Ar5, %3, #6\n" \
+ "SUB TXRPT, D1Ar5, #2\n" \
+ "MOV RAPF, %1\n" \
+ "$Lloop"id":\n" \
+ "ADD RAPF, %1, #64\n" \
+ "21:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "22:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "23:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "24:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "25:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "26:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "27:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "28:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "DCACHE [%1+#-64], D0Ar6\n" \
+ "BR $Lloop"id"\n" \
+ \
+ "MOV RAPF, %1\n" \
+ "29:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "30:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "31:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "32:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "33:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "34:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "35:\n" \
+ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \
+ "36:\n" \
+ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \
+ "SUB %0, %0, #4\n" \
+ "37:\n" \
+ "SETD [%0++], D0.7\n" \
+ "SUB %3, %3, #16\n" \
+ "1:" \
+ "DCACHE [%1+#-64], D0Ar6\n" \
+ "GETL D0Ar6, D1Ar5, [A0StP+#-40]\n" \
+ "GETL D0FrT, D1RtP, [A0StP+#-32]\n" \
+ "GETL D0.5, D1.5, [A0StP+#-24]\n" \
+ "GETL D0.6, D1.6, [A0StP+#-16]\n" \
+ "GETL D0.7, D1.7, [A0StP+#-8]\n" \
+ "SUB A0StP, A0StP, #40\n" \
+ " .section .fixup,\"ax\"\n" \
+ "4:\n" \
+ " ADD %0, %0, #4\n" \
+ "3:\n" \
+ " MOV D0Ar2, TXSTATUS\n" \
+ " MOV D1Ar1, TXSTATUS\n" \
+ " AND D1Ar1, D1Ar1, #0xFFFFF8FF\n" \
+ " MOV TXSTATUS, D1Ar1\n" \
+ FIXUP \
+ " MOVT D0Ar2,#HI(1b)\n" \
+ " JUMP D0Ar2,#LO(1b)\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .long 21b,3b\n" \
+ " .long 22b,3b\n" \
+ " .long 23b,3b\n" \
+ " .long 24b,3b\n" \
+ " .long 25b,3b\n" \
+ " .long 26b,3b\n" \
+ " .long 27b,3b\n" \
+ " .long 28b,3b\n" \
+ " .long 29b,3b\n" \
+ " .long 30b,3b\n" \
+ " .long 31b,3b\n" \
+ " .long 32b,3b\n" \
+ " .long 33b,3b\n" \
+ " .long 34b,3b\n" \
+ " .long 35b,3b\n" \
+ " .long 36b,3b\n" \
+ " .long 37b,4b\n" \
+ " .previous\n" \
+ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \
+ : "0" (to), "1" (from), "2" (ret), "3" (n) \
+ : "D1Ar1", "D0Ar2", "memory")
+
+/* rewind 'to' and 'from' pointers when a fault occurs
+ *
+ * Rationale:
+ * A fault always occurs on writing to user buffer. A fault
+ * is at a single address, so we need to rewind by only 4
+ * bytes.
+ * Since we do a complete read from kernel buffer before
+ * writing, we need to rewind it also. The amount to be
+ * rewind equals the number of faulty writes in MSETD
+ * which is: [4 - (LSM_STEP-1)]*4
+ * LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ * and stored in D0Ar2
+ *
+ * NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ * LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ * a fault happens at the 4th write, LSM_STEP will be 0
+ * instead of 4. The code copes with that.
+ *
+ * n is updated by the number of successful writes, which is:
+ * n = n - (LSM_STEP-1)*4
+ */
+#define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\
+ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \
+ "LSR D0Ar2, D0Ar2, #8\n" \
+ "AND D0Ar2, D0Ar2, #0x7\n" \
+ "ADDZ D0Ar2, D0Ar2, #4\n" \
+ "SUB D0Ar2, D0Ar2, #1\n" \
+ "MOV D1Ar1, #4\n" \
+ "SUB D0Ar2, D1Ar1, D0Ar2\n" \
+ "LSL D0Ar2, D0Ar2, #2\n" \
+ "LSL D1Ar1, D1Ar1, #2\n" \
+ "SUB D1Ar1, D1Ar1, D0Ar2\n" \
+ "SUB %0, %0, #4\n" \
+ "SUB %1, %1, D0Ar2\n" \
+ "SUB %3, %3, D1Ar1\n")
+
+unsigned long __copy_user(void __user *pdst, const void *psrc,
+ unsigned long n)
+{
+ register char __user *dst asm ("A0.2") = pdst;
+ register const char *src asm ("A1.2") = psrc;
+ unsigned long retn = 0;
+
+ if (n == 0)
+ return 0;
+
+ if ((unsigned long) src & 1) {
+ __asm_copy_to_user_1(dst, src, retn);
+ n--;
+ }
+ if ((unsigned long) dst & 1) {
+ /* Worst case - byte copy */
+ while (n > 0) {
+ __asm_copy_to_user_1(dst, src, retn);
+ n--;
+ }
+ }
+ if (((unsigned long) src & 2) && n >= 2) {
+ __asm_copy_to_user_2(dst, src, retn);
+ n -= 2;
+ }
+ if ((unsigned long) dst & 2) {
+ /* Second worst case - word copy */
+ while (n >= 2) {
+ __asm_copy_to_user_2(dst, src, retn);
+ n -= 2;
+ }
+ }
+
+#ifdef USE_RAPF
+ /* 64 bit copy loop */
+ if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) {
+ if (n >= RAPF_MIN_BUF_SIZE) {
+ /* copy user using 64 bit rapf copy */
+ __asm_copy_to_user_64bit_rapf_loop(dst, src, retn,
+ n, "64cu");
+ }
+ while (n >= 8) {
+ __asm_copy_to_user_8x64(dst, src, retn);
+ n -= 8;
+ }
+ }
+ if (n >= RAPF_MIN_BUF_SIZE) {
+ /* copy user using 32 bit rapf copy */
+ __asm_copy_to_user_32bit_rapf_loop(dst, src, retn, n, "32cu");
+ }
+#else
+ /* 64 bit copy loop */
+ if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) {
+ while (n >= 8) {
+ __asm_copy_to_user_8x64(dst, src, retn);
+ n -= 8;
+ }
+ }
+#endif
+
+ while (n >= 16) {
+ __asm_copy_to_user_16(dst, src, retn);
+ n -= 16;
+ }
+
+ while (n >= 4) {
+ __asm_copy_to_user_4(dst, src, retn);
+ n -= 4;
+ }
+
+ switch (n) {
+ case 0:
+ break;
+ case 1:
+ __asm_copy_to_user_1(dst, src, retn);
+ break;
+ case 2:
+ __asm_copy_to_user_2(dst, src, retn);
+ break;
+ case 3:
+ __asm_copy_to_user_3(dst, src, retn);
+ break;
+ }
+
+ return retn;
+}
+EXPORT_SYMBOL(__copy_user);
+
+#define __asm_copy_from_user_1(to, from, ret) \
+ __asm_copy_user_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "2: SETB [%0++],D1Ar1\n", \
+ "3: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 2b,3b\n")
+
+#define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_user_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ "2: SETW [%0++],D1Ar1\n" COPY, \
+ "3: ADD %2,%2,#2\n" \
+ " SETW [%0++],D1Ar1\n" FIXUP, \
+ " .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_from_user_2(to, from, ret) \
+ __asm_copy_from_user_2x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_3(to, from, ret) \
+ __asm_copy_from_user_2x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "4: SETB [%0++],D1Ar1\n", \
+ "5: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 4b,5b\n")
+
+#define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_user_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ "2: SETD [%0++],D1Ar1\n" COPY, \
+ "3: ADD %2,%2,#4\n" \
+ " SETD [%0++],D1Ar1\n" FIXUP, \
+ " .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_from_user_4(to, from, ret) \
+ __asm_copy_from_user_4x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_5(to, from, ret) \
+ __asm_copy_from_user_4x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "4: SETB [%0++],D1Ar1\n", \
+ "5: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 4b,5b\n")
+
+#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_from_user_4x_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ "4: SETW [%0++],D1Ar1\n" COPY, \
+ "5: ADD %2,%2,#2\n" \
+ " SETW [%0++],D1Ar1\n" FIXUP, \
+ " .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_from_user_6(to, from, ret) \
+ __asm_copy_from_user_6x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_7(to, from, ret) \
+ __asm_copy_from_user_6x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "6: SETB [%0++],D1Ar1\n", \
+ "7: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 6b,7b\n")
+
+#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_from_user_4x_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ "4: SETD [%0++],D1Ar1\n" COPY, \
+ "5: ADD %2,%2,#4\n" \
+ " SETD [%0++],D1Ar1\n" FIXUP, \
+ " .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_from_user_8(to, from, ret) \
+ __asm_copy_from_user_8x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_9(to, from, ret) \
+ __asm_copy_from_user_8x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "6: SETB [%0++],D1Ar1\n", \
+ "7: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 6b,7b\n")
+
+#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_from_user_8x_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ "6: SETW [%0++],D1Ar1\n" COPY, \
+ "7: ADD %2,%2,#2\n" \
+ " SETW [%0++],D1Ar1\n" FIXUP, \
+ " .long 6b,7b\n" TENTRY)
+
+#define __asm_copy_from_user_10(to, from, ret) \
+ __asm_copy_from_user_10x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_11(to, from, ret) \
+ __asm_copy_from_user_10x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "8: SETB [%0++],D1Ar1\n", \
+ "9: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 8b,9b\n")
+
+#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_from_user_8x_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ "6: SETD [%0++],D1Ar1\n" COPY, \
+ "7: ADD %2,%2,#4\n" \
+ " SETD [%0++],D1Ar1\n" FIXUP, \
+ " .long 6b,7b\n" TENTRY)
+
+#define __asm_copy_from_user_12(to, from, ret) \
+ __asm_copy_from_user_12x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_13(to, from, ret) \
+ __asm_copy_from_user_12x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "8: SETB [%0++],D1Ar1\n", \
+ "9: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 8b,9b\n")
+
+#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_from_user_12x_cont(to, from, ret, \
+ " GETW D1Ar1,[%1++]\n" \
+ "8: SETW [%0++],D1Ar1\n" COPY, \
+ "9: ADD %2,%2,#2\n" \
+ " SETW [%0++],D1Ar1\n" FIXUP, \
+ " .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_from_user_14(to, from, ret) \
+ __asm_copy_from_user_14x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_15(to, from, ret) \
+ __asm_copy_from_user_14x_cont(to, from, ret, \
+ " GETB D1Ar1,[%1++]\n" \
+ "10: SETB [%0++],D1Ar1\n", \
+ "11: ADD %2,%2,#1\n" \
+ " SETB [%0++],D1Ar1\n", \
+ " .long 10b,11b\n")
+
+#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+ __asm_copy_from_user_12x_cont(to, from, ret, \
+ " GETD D1Ar1,[%1++]\n" \
+ "8: SETD [%0++],D1Ar1\n" COPY, \
+ "9: ADD %2,%2,#4\n" \
+ " SETD [%0++],D1Ar1\n" FIXUP, \
+ " .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_from_user_16(to, from, ret) \
+ __asm_copy_from_user_16x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_8x64(to, from, ret) \
+ asm volatile ( \
+ " GETL D0Ar2,D1Ar1,[%1++]\n" \
+ "2: SETL [%0++],D0Ar2,D1Ar1\n" \
+ "1:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " MOV D1Ar1,#0\n" \
+ " MOV D0Ar2,#0\n" \
+ "3: ADD %2,%2,#8\n" \
+ " SETL [%0++],D0Ar2,D1Ar1\n" \
+ " MOVT D0Ar2,#HI(1b)\n" \
+ " JUMP D0Ar2,#LO(1b)\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .long 2b,3b\n" \
+ " .previous\n" \
+ : "=a" (to), "=r" (from), "=r" (ret) \
+ : "0" (to), "1" (from), "2" (ret) \
+ : "D1Ar1", "D0Ar2", "memory")
+
+/* rewind 'from' pointer when a fault occurs
+ *
+ * Rationale:
+ * A fault occurs while reading from user buffer, which is the
+ * source. Since the fault is at a single address, we only
+ * need to rewind by 8 bytes.
+ * Since we don't write to kernel buffer until we read first,
+ * the kernel buffer is at the right state and needn't be
+ * corrected.
+ */
+#define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id) \
+ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \
+ "SUB %1, %1, #8\n")
+
+/* rewind 'from' pointer when a fault occurs
+ *
+ * Rationale:
+ * A fault occurs while reading from user buffer, which is the
+ * source. Since the fault is at a single address, we only
+ * need to rewind by 4 bytes.
+ * Since we don't write to kernel buffer until we read first,
+ * the kernel buffer is at the right state and needn't be
+ * corrected.
+ */
+#define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id) \
+ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \
+ "SUB %1, %1, #4\n")
+
+
+/* Copy from user to kernel, zeroing the bytes that were inaccessible in
+ userland. The return-value is the number of bytes that were
+ inaccessible. */
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+ unsigned long n)
+{
+ register char *dst asm ("A0.2") = pdst;
+ register const char __user *src asm ("A1.2") = psrc;
+ unsigned long retn = 0;
+
+ if (n == 0)
+ return 0;
+
+ if ((unsigned long) src & 1) {
+ __asm_copy_from_user_1(dst, src, retn);
+ n--;
+ }
+ if ((unsigned long) dst & 1) {
+ /* Worst case - byte copy */
+ while (n > 0) {
+ __asm_copy_from_user_1(dst, src, retn);
+ n--;
+ if (retn)
+ goto copy_exception_bytes;
+ }
+ }
+ if (((unsigned long) src & 2) && n >= 2) {
+ __asm_copy_from_user_2(dst, src, retn);
+ n -= 2;
+ }
+ if ((unsigned long) dst & 2) {
+ /* Second worst case - word copy */
+ while (n >= 2) {
+ __asm_copy_from_user_2(dst, src, retn);
+ n -= 2;
+ if (retn)
+ goto copy_exception_bytes;
+ }
+ }
+
+ /* We only need one check after the unalignment-adjustments,
+ because if both adjustments were done, either both or
+ neither reference had an exception. */
+ if (retn != 0)
+ goto copy_exception_bytes;
+
+#ifdef USE_RAPF
+ /* 64 bit copy loop */
+ if (!(((unsigned long) src | (unsigned long) dst) & 7)) {
+ if (n >= RAPF_MIN_BUF_SIZE) {
+ /* Copy using fast 64bit rapf */
+ __asm_copy_from_user_64bit_rapf_loop(dst, src, retn,
+ n, "64cuz");
+ }
+ while (n >= 8) {
+ __asm_copy_from_user_8x64(dst, src, retn);
+ n -= 8;
+ if (retn)
+ goto copy_exception_bytes;
+ }
+ }
+
+ if (n >= RAPF_MIN_BUF_SIZE) {
+ /* Copy using fast 32bit rapf */
+ __asm_copy_from_user_32bit_rapf_loop(dst, src, retn,
+ n, "32cuz");
+ }
+#else
+ /* 64 bit copy loop */
+ if (!(((unsigned long) src | (unsigned long) dst) & 7)) {
+ while (n >= 8) {
+ __asm_copy_from_user_8x64(dst, src, retn);
+ n -= 8;
+ if (retn)
+ goto copy_exception_bytes;
+ }
+ }
+#endif
+
+ while (n >= 4) {
+ __asm_copy_from_user_4(dst, src, retn);
+ n -= 4;
+
+ if (retn)
+ goto copy_exception_bytes;
+ }
+
+ /* If we get here, there were no memory read faults. */
+ switch (n) {
+ /* These copies are at least "naturally aligned" (so we don't
+ have to check each byte), due to the src alignment code.
+ The *_3 case *will* get the correct count for retn. */
+ case 0:
+ /* This case deliberately left in (if you have doubts check the
+ generated assembly code). */
+ break;
+ case 1:
+ __asm_copy_from_user_1(dst, src, retn);
+ break;
+ case 2:
+ __asm_copy_from_user_2(dst, src, retn);
+ break;
+ case 3:
+ __asm_copy_from_user_3(dst, src, retn);
+ break;
+ }
+
+ /* If we get here, retn correctly reflects the number of failing
+ bytes. */
+ return retn;
+
+ copy_exception_bytes:
+ /* We already have "retn" bytes cleared, and need to clear the
+ remaining "n" bytes. A non-optimized simple byte-for-byte in-line
+ memset is preferred here, since this isn't speed-critical code and
+ we'd rather have this a leaf-function than calling memset. */
+ {
+ char *endp;
+ for (endp = dst + n; dst < endp; dst++)
+ *dst = 0;
+ }
+
+ return retn + n;
+}
+EXPORT_SYMBOL(__copy_user_zeroing);
+
+#define __asm_clear_8x64(to, ret) \
+ asm volatile ( \
+ " MOV D0Ar2,#0\n" \
+ " MOV D1Ar1,#0\n" \
+ " SETL [%0],D0Ar2,D1Ar1\n" \
+ "2: SETL [%0++],D0Ar2,D1Ar1\n" \
+ "1:\n" \
+ " .section .fixup,\"ax\"\n" \
+ "3: ADD %1,%1,#8\n" \
+ " MOVT D0Ar2,#HI(1b)\n" \
+ " JUMP D0Ar2,#LO(1b)\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .long 2b,3b\n" \
+ " .previous\n" \
+ : "=r" (to), "=r" (ret) \
+ : "0" (to), "1" (ret) \
+ : "D1Ar1", "D0Ar2", "memory")
+
+/* Zero userspace. */
+
+#define __asm_clear(to, ret, CLEAR, FIXUP, TENTRY) \
+ asm volatile ( \
+ " MOV D1Ar1,#0\n" \
+ CLEAR \
+ "1:\n" \
+ " .section .fixup,\"ax\"\n" \
+ FIXUP \
+ " MOVT D1Ar1,#HI(1b)\n" \
+ " JUMP D1Ar1,#LO(1b)\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ TENTRY \
+ " .previous" \
+ : "=r" (to), "=r" (ret) \
+ : "0" (to), "1" (ret) \
+ : "D1Ar1", "memory")
+
+#define __asm_clear_1(to, ret) \
+ __asm_clear(to, ret, \
+ " SETB [%0],D1Ar1\n" \
+ "2: SETB [%0++],D1Ar1\n", \
+ "3: ADD %1,%1,#1\n", \
+ " .long 2b,3b\n")
+
+#define __asm_clear_2(to, ret) \
+ __asm_clear(to, ret, \
+ " SETW [%0],D1Ar1\n" \
+ "2: SETW [%0++],D1Ar1\n", \
+ "3: ADD %1,%1,#2\n", \
+ " .long 2b,3b\n")
+
+#define __asm_clear_3(to, ret) \
+ __asm_clear(to, ret, \
+ "2: SETW [%0++],D1Ar1\n" \
+ " SETB [%0],D1Ar1\n" \
+ "3: SETB [%0++],D1Ar1\n", \
+ "4: ADD %1,%1,#2\n" \
+ "5: ADD %1,%1,#1\n", \
+ " .long 2b,4b\n" \
+ " .long 3b,5b\n")
+
+#define __asm_clear_4x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+ __asm_clear(to, ret, \
+ " SETD [%0],D1Ar1\n" \
+ "2: SETD [%0++],D1Ar1\n" CLEAR, \
+ "3: ADD %1,%1,#4\n" FIXUP, \
+ " .long 2b,3b\n" TENTRY)
+
+#define __asm_clear_4(to, ret) \
+ __asm_clear_4x_cont(to, ret, "", "", "")
+
+#define __asm_clear_8x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+ __asm_clear_4x_cont(to, ret, \
+ " SETD [%0],D1Ar1\n" \
+ "4: SETD [%0++],D1Ar1\n" CLEAR, \
+ "5: ADD %1,%1,#4\n" FIXUP, \
+ " .long 4b,5b\n" TENTRY)
+
+#define __asm_clear_8(to, ret) \
+ __asm_clear_8x_cont(to, ret, "", "", "")
+
+#define __asm_clear_12x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+ __asm_clear_8x_cont(to, ret, \
+ " SETD [%0],D1Ar1\n" \
+ "6: SETD [%0++],D1Ar1\n" CLEAR, \
+ "7: ADD %1,%1,#4\n" FIXUP, \
+ " .long 6b,7b\n" TENTRY)
+
+#define __asm_clear_12(to, ret) \
+ __asm_clear_12x_cont(to, ret, "", "", "")
+
+#define __asm_clear_16x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+ __asm_clear_12x_cont(to, ret, \
+ " SETD [%0],D1Ar1\n" \
+ "8: SETD [%0++],D1Ar1\n" CLEAR, \
+ "9: ADD %1,%1,#4\n" FIXUP, \
+ " .long 8b,9b\n" TENTRY)
+
+#define __asm_clear_16(to, ret) \
+ __asm_clear_16x_cont(to, ret, "", "", "")
+
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
+{
+ register char __user *dst asm ("D0Re0") = pto;
+ register unsigned long n asm ("D1Re0") = pn;
+ register unsigned long retn asm ("D0Ar6") = 0;
+
+ if ((unsigned long) dst & 1) {
+ __asm_clear_1(dst, retn);
+ n--;
+ }
+
+ if ((unsigned long) dst & 2) {
+ __asm_clear_2(dst, retn);
+ n -= 2;
+ }
+
+ /* 64 bit copy loop */
+ if (!((__force unsigned long) dst & 7)) {
+ while (n >= 8) {
+ __asm_clear_8x64(dst, retn);
+ n -= 8;
+ }
+ }
+
+ while (n >= 16) {
+ __asm_clear_16(dst, retn);
+ n -= 16;
+ }
+
+ while (n >= 4) {
+ __asm_clear_4(dst, retn);
+ n -= 4;
+ }
+
+ switch (n) {
+ case 0:
+ break;
+ case 1:
+ __asm_clear_1(dst, retn);
+ break;
+ case 2:
+ __asm_clear_2(dst, retn);
+ break;
+ case 3:
+ __asm_clear_3(dst, retn);
+ break;
+ }
+
+ return retn;
+}
+EXPORT_SYMBOL(__do_clear_user);
+
+unsigned char __get_user_asm_b(const void __user *addr, long *err)
+{
+ register unsigned char x asm ("D0Re0") = 0;
+ asm volatile (
+ " GETB %0,[%2]\n"
+ "1:\n"
+ " GETB %0,[%2]\n"
+ "2:\n"
+ " .section .fixup,\"ax\"\n"
+ "3: MOV D0FrT,%3\n"
+ " SETD [%1],D0FrT\n"
+ " MOVT D0FrT,#HI(2b)\n"
+ " JUMP D0FrT,#LO(2b)\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .long 1b,3b\n"
+ " .previous\n"
+ : "=r" (x)
+ : "r" (err), "r" (addr), "P" (-EFAULT)
+ : "D0FrT");
+ return x;
+}
+EXPORT_SYMBOL(__get_user_asm_b);
+
+unsigned short __get_user_asm_w(const void __user *addr, long *err)
+{
+ register unsigned short x asm ("D0Re0") = 0;
+ asm volatile (
+ " GETW %0,[%2]\n"
+ "1:\n"
+ " GETW %0,[%2]\n"
+ "2:\n"
+ " .section .fixup,\"ax\"\n"
+ "3: MOV D0FrT,%3\n"
+ " SETD [%1],D0FrT\n"
+ " MOVT D0FrT,#HI(2b)\n"
+ " JUMP D0FrT,#LO(2b)\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .long 1b,3b\n"
+ " .previous\n"
+ : "=r" (x)
+ : "r" (err), "r" (addr), "P" (-EFAULT)
+ : "D0FrT");
+ return x;
+}
+EXPORT_SYMBOL(__get_user_asm_w);
+
+unsigned int __get_user_asm_d(const void __user *addr, long *err)
+{
+ register unsigned int x asm ("D0Re0") = 0;
+ asm volatile (
+ " GETD %0,[%2]\n"
+ "1:\n"
+ " GETD %0,[%2]\n"
+ "2:\n"
+ " .section .fixup,\"ax\"\n"
+ "3: MOV D0FrT,%3\n"
+ " SETD [%1],D0FrT\n"
+ " MOVT D0FrT,#HI(2b)\n"
+ " JUMP D0FrT,#LO(2b)\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .long 1b,3b\n"
+ " .previous\n"
+ : "=r" (x)
+ : "r" (err), "r" (addr), "P" (-EFAULT)
+ : "D0FrT");
+ return x;
+}
+EXPORT_SYMBOL(__get_user_asm_d);
+
+long __put_user_asm_b(unsigned int x, void __user *addr)
+{
+ register unsigned int err asm ("D0Re0") = 0;
+ asm volatile (
+ " MOV %0,#0\n"
+ " SETB [%2],%1\n"
+ "1:\n"
+ " SETB [%2],%1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: MOV %0,%3\n"
+ " MOVT D0FrT,#HI(2b)\n"
+ " JUMP D0FrT,#LO(2b)\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .long 1b,3b\n"
+ ".previous"
+ : "=r"(err)
+ : "d" (x), "a" (addr), "P"(-EFAULT)
+ : "D0FrT");
+ return err;
+}
+EXPORT_SYMBOL(__put_user_asm_b);
+
+long __put_user_asm_w(unsigned int x, void __user *addr)
+{
+ register unsigned int err asm ("D0Re0") = 0;
+ asm volatile (
+ " MOV %0,#0\n"
+ " SETW [%2],%1\n"
+ "1:\n"
+ " SETW [%2],%1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: MOV %0,%3\n"
+ " MOVT D0FrT,#HI(2b)\n"
+ " JUMP D0FrT,#LO(2b)\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .long 1b,3b\n"
+ ".previous"
+ : "=r"(err)
+ : "d" (x), "a" (addr), "P"(-EFAULT)
+ : "D0FrT");
+ return err;
+}
+EXPORT_SYMBOL(__put_user_asm_w);
+
+long __put_user_asm_d(unsigned int x, void __user *addr)
+{
+ register unsigned int err asm ("D0Re0") = 0;
+ asm volatile (
+ " MOV %0,#0\n"
+ " SETD [%2],%1\n"
+ "1:\n"
+ " SETD [%2],%1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: MOV %0,%3\n"
+ " MOVT D0FrT,#HI(2b)\n"
+ " JUMP D0FrT,#LO(2b)\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .long 1b,3b\n"
+ ".previous"
+ : "=r"(err)
+ : "d" (x), "a" (addr), "P"(-EFAULT)
+ : "D0FrT");
+ return err;
+}
+EXPORT_SYMBOL(__put_user_asm_d);
+
+long __put_user_asm_l(unsigned long long x, void __user *addr)
+{
+ register unsigned int err asm ("D0Re0") = 0;
+ asm volatile (
+ " MOV %0,#0\n"
+ " SETL [%2],%1,%t1\n"
+ "1:\n"
+ " SETL [%2],%1,%t1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: MOV %0,%3\n"
+ " MOVT D0FrT,#HI(2b)\n"
+ " JUMP D0FrT,#LO(2b)\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ " .long 1b,3b\n"
+ ".previous"
+ : "=r"(err)
+ : "d" (x), "a" (addr), "P"(-EFAULT)
+ : "D0FrT");
+ return err;
+}
+EXPORT_SYMBOL(__put_user_asm_l);
+
+long strnlen_user(const char __user *src, long count)
+{
+ long res;
+
+ if (!access_ok(VERIFY_READ, src, 0))
+ return 0;
+
+ asm volatile (" MOV D0Ar4, %1\n"
+ " MOV D0Ar6, %2\n"
+ "0:\n"
+ " SUBS D0FrT, D0Ar6, #0\n"
+ " SUB D0Ar6, D0Ar6, #1\n"
+ " BLE 2f\n"
+ " GETB D0FrT, [D0Ar4+#1++]\n"
+ "1:\n"
+ " TST D0FrT, #255\n"
+ " BNE 0b\n"
+ "2:\n"
+ " SUB %0, %2, D0Ar6\n"
+ "3:\n"
+ " .section .fixup,\"ax\"\n"
+ "4:\n"
+ " MOV %0, #0\n"
+ " MOVT D0FrT,#HI(3b)\n"
+ " JUMP D0FrT,#LO(3b)\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .long 1b,4b\n"
+ " .previous\n"
+ : "=r" (res)
+ : "r" (src), "r" (count)
+ : "D0FrT", "D0Ar4", "D0Ar6", "cc");
+
+ return res;
+}
+EXPORT_SYMBOL(strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long count)
+{
+ long res;
+
+ if (count == 0)
+ return 0;
+
+ /*
+ * Currently, in 2.4.0-test9, most ports use a simple byte-copy loop.
+ * So do we.
+ *
+ * This code is deduced from:
+ *
+ * char tmp2;
+ * long tmp1, tmp3;
+ * tmp1 = count;
+ * while ((*dst++ = (tmp2 = *src++)) != 0
+ * && --tmp1)
+ * ;
+ *
+ * res = count - tmp1;
+ *
+ * with tweaks.
+ */
+
+ asm volatile (" MOV %0,%3\n"
+ "1:\n"
+ " GETB D0FrT,[%2++]\n"
+ "2:\n"
+ " CMP D0FrT,#0\n"
+ " SETB [%1++],D0FrT\n"
+ " BEQ 3f\n"
+ " SUBS %0,%0,#1\n"
+ " BNZ 1b\n"
+ "3:\n"
+ " SUB %0,%3,%0\n"
+ "4:\n"
+ " .section .fixup,\"ax\"\n"
+ "5:\n"
+ " MOV %0,%7\n"
+ " MOVT D0FrT,#HI(4b)\n"
+ " JUMP D0FrT,#LO(4b)\n"
+ " .previous\n"
+ " .section __ex_table,\"a\"\n"
+ " .long 2b,5b\n"
+ " .previous"
+ : "=r" (res), "=r" (dst), "=r" (src), "=r" (count)
+ : "3" (count), "1" (dst), "2" (src), "P" (-EFAULT)
+ : "D0FrT", "memory", "cc");
+
+ return res;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
diff --git a/arch/metag/mm/Kconfig b/arch/metag/mm/Kconfig
new file mode 100644
index 00000000000..cd7f2f2ad41
--- /dev/null
+++ b/arch/metag/mm/Kconfig
@@ -0,0 +1,153 @@
+menu "Memory management options"
+
+config PAGE_OFFSET
+ hex "Kernel page offset address"
+ default "0x40000000"
+ help
+ This option allows you to set the virtual address at which the
+ kernel will be mapped to.
+endmenu
+
+config KERNEL_4M_PAGES
+ bool "Map kernel with 4MB pages"
+ depends on METAG_META21_MMU
+ default y
+ help
+ Map the kernel with large pages to reduce TLB pressure.
+
+choice
+ prompt "User page size"
+ default PAGE_SIZE_4K
+
+config PAGE_SIZE_4K
+ bool "4kB"
+ help
+ This is the default page size used by all Meta cores.
+
+config PAGE_SIZE_8K
+ bool "8kB"
+ depends on METAG_META21_MMU
+ help
+ This enables 8kB pages as supported by Meta 2.x and later MMUs.
+
+config PAGE_SIZE_16K
+ bool "16kB"
+ depends on METAG_META21_MMU
+ help
+ This enables 16kB pages as supported by Meta 2.x and later MMUs.
+
+endchoice
+
+config NUMA
+ bool "Non Uniform Memory Access (NUMA) Support"
+ help
+ Some Meta systems have MMU-mappable on-chip memories with
+ lower latencies than main memory. This enables support for
+ these blocks by binding them to nodes and allowing
+ memory policies to be used for prioritizing and controlling
+ allocation behaviour.
+
+config FORCE_MAX_ZONEORDER
+ int "Maximum zone order"
+ range 10 32
+ default "10"
+ help
+ The kernel memory allocator divides physically contiguous memory
+ blocks into "zones", where each zone is a power of two number of
+ pages. This option selects the largest power of two that the kernel
+ keeps in the memory allocator. If you need to allocate very large
+ blocks of physically contiguous memory, then you may need to
+ increase this value.
+
+ This config option is actually maximum order plus one. For example,
+ a value of 11 means that the largest free memory block is 2^10 pages.
+
+ The page size is not necessarily 4KB. Keep this in mind
+ when choosing a value for this option.
+
+config METAG_L2C
+ bool "Level 2 Cache Support"
+ depends on METAG_META21
+ help
+ Press y here to enable support for the Meta Level 2 (L2) cache. This
+ will enable the cache at start up if it hasn't already been enabled
+ by the bootloader.
+
+ If the bootloader enables the L2 you must press y here to ensure the
+ kernel takes the appropriate actions to keep the cache coherent.
+
+config NODES_SHIFT
+ int
+ default "1"
+ depends on NEED_MULTIPLE_NODES
+
+config ARCH_FLATMEM_ENABLE
+ def_bool y
+ depends on !NUMA
+
+config ARCH_SPARSEMEM_ENABLE
+ def_bool y
+ select SPARSEMEM_STATIC
+
+config ARCH_SPARSEMEM_DEFAULT
+ def_bool y
+
+config MAX_ACTIVE_REGIONS
+ int
+ default "2" if SPARSEMEM
+ default "1"
+
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
+config ARCH_SELECT_MEMORY_MODEL
+ def_bool y
+
+config SYS_SUPPORTS_HUGETLBFS
+ def_bool y
+ depends on METAG_META21_MMU
+
+choice
+ prompt "HugeTLB page size"
+ depends on METAG_META21_MMU && HUGETLB_PAGE
+ default HUGETLB_PAGE_SIZE_1M
+
+config HUGETLB_PAGE_SIZE_8K
+ bool "8kB"
+ depends on PAGE_SIZE_4K
+
+config HUGETLB_PAGE_SIZE_16K
+ bool "16kB"
+ depends on PAGE_SIZE_4K || PAGE_SIZE_8K
+
+config HUGETLB_PAGE_SIZE_32K
+ bool "32kB"
+
+config HUGETLB_PAGE_SIZE_64K
+ bool "64kB"
+
+config HUGETLB_PAGE_SIZE_128K
+ bool "128kB"
+
+config HUGETLB_PAGE_SIZE_256K
+ bool "256kB"
+
+config HUGETLB_PAGE_SIZE_512K
+ bool "512kB"
+
+config HUGETLB_PAGE_SIZE_1M
+ bool "1MB"
+
+config HUGETLB_PAGE_SIZE_2M
+ bool "2MB"
+
+config HUGETLB_PAGE_SIZE_4M
+ bool "4MB"
+
+endchoice
+
+config METAG_COREMEM
+ bool
+ default y if SUSPEND
+
+source "mm/Kconfig"
diff --git a/arch/metag/mm/Makefile b/arch/metag/mm/Makefile
new file mode 100644
index 00000000000..99433116412
--- /dev/null
+++ b/arch/metag/mm/Makefile
@@ -0,0 +1,19 @@
+#
+# Makefile for the linux Meta-specific parts of the memory manager.
+#
+
+obj-y += cache.o
+obj-y += extable.o
+obj-y += fault.o
+obj-y += init.o
+obj-y += ioremap.o
+obj-y += maccess.o
+
+mmu-y := mmu-meta1.o
+mmu-$(CONFIG_METAG_META21_MMU) := mmu-meta2.o
+obj-y += $(mmu-y)
+
+obj-$(CONFIG_HIGHMEM) += highmem.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_METAG_L2C) += l2cache.o
+obj-$(CONFIG_NUMA) += numa.o
diff --git a/arch/metag/mm/cache.c b/arch/metag/mm/cache.c
new file mode 100644
index 00000000000..b5d3b2e7c16
--- /dev/null
+++ b/arch/metag/mm/cache.c
@@ -0,0 +1,521 @@
+/*
+ * arch/metag/mm/cache.c
+ *
+ * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Cache control code
+ */
+
+#include <linux/export.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define DEFAULT_CACHE_WAYS_LOG2 2
+
+/*
+ * Size of a set in the caches. Initialised for default 16K stride, adjusted
+ * according to values passed through TBI global heap segment via LDLK (on ATP)
+ * or config registers (on HTP/MTP)
+ */
+static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2;
+static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2;
+/*
+ * The number of sets in the caches. Initialised for HTP/ATP, adjusted
+ * according to NOMMU setting in config registers
+ */
+static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+
+#ifndef CONFIG_METAG_META12
+/**
+ * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache
+ */
+static volatile u32 lnkget_testdata[16] __initdata __aligned(64);
+
+#define LNKGET_CONSTANT 0xdeadbeef
+
+void __init metag_lnkget_probe(void)
+{
+ int temp;
+ long flags;
+
+ /*
+ * It's conceivable the user has configured a globally coherent cache
+ * shared with non-Linux hardware threads, so use LOCK2 to prevent them
+ * from executing and causing cache eviction during the test.
+ */
+ __global_lock2(flags);
+
+ /* read a value to bring it into the cache */
+ (void)lnkget_testdata[0];
+ lnkget_testdata[0] = 0;
+
+ /* lnkget/lnkset it to modify it */
+ asm volatile(
+ "1: LNKGETD %0, [%1]\n"
+ " LNKSETD [%1], %2\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp)
+ : "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT)
+ : "cc");
+
+ /* re-read it to see if the cached value changed */
+ temp = lnkget_testdata[0];
+
+ __global_unlock2(flags);
+
+ /* flush the cache line to fix any incoherency */
+ __builtin_dcache_flush((void *)&lnkget_testdata[0]);
+
+#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE)
+ /* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */
+ if (temp == LNKGET_CONSTANT)
+ pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n");
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+ /*
+ * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary
+ * because the kernel is configured to use LNKGET/SET for atomicity
+ */
+ WARN(temp != LNKGET_CONSTANT,
+ "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+ "Expect kernel failure as it's used for atomicity primitives\n");
+#elif defined(CONFIG_SMP)
+ /*
+ * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the
+ * gateway page won't flush and userland could break.
+ */
+ WARN(temp != LNKGET_CONSTANT,
+ "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+ "Expect userland failure as it's used for user gateway page\n");
+#else
+ /*
+ * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it
+ * doesn't actually matter as it doesn't have any effect on !SMP &&
+ * !ATOMICITY_LNKGET.
+ */
+ if (temp != LNKGET_CONSTANT)
+ pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n");
+#endif
+}
+#endif /* !CONFIG_METAG_META12 */
+
+/**
+ * metag_cache_probe() - Probe L1 cache configuration.
+ *
+ * Probe the L1 cache configuration to aid the L1 physical cache flushing
+ * functions.
+ */
+void __init metag_cache_probe(void)
+{
+#ifndef CONFIG_METAG_META12
+ int coreid = metag_in32(METAC_CORE_ID);
+ int config = metag_in32(METAC_CORE_CONFIG2);
+ int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS;
+
+ if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 ||
+ cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) {
+ icache_sets_log2 = 1;
+ dcache_sets_log2 = 1;
+ }
+
+ /* For normal size caches, the smallest size is 4Kb.
+ For small caches, the smallest size is 64b */
+ icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT)
+ ? 6 : 12;
+ icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS)
+ >> METAC_CORE_C2ICSZ_S;
+ icache_set_shift -= icache_sets_log2;
+
+ dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT)
+ ? 6 : 12;
+ dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS)
+ >> METAC_CORECFG2_DCSZ_S;
+ dcache_set_shift -= dcache_sets_log2;
+
+ metag_lnkget_probe();
+#else
+ /* Extract cache sizes from global heap segment */
+ unsigned long val, u;
+ int width, shift, addend;
+ PTBISEG seg;
+
+ seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
+ TBID_SEGSCOPE_GLOBAL,
+ TBID_SEGTYPE_HEAP));
+ if (seg != NULL) {
+ val = seg->Data[1];
+
+ /* Work out width of I-cache size bit-field */
+ u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS)
+ >> METAG_TBI_ICACHE_SIZE_S;
+ width = 0;
+ while (u & 1) {
+ width++;
+ u >>= 1;
+ }
+ /* Extract sign-extended size addend value */
+ shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width);
+ addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS)
+ << shift)
+ >> (shift + METAG_TBI_ICACHE_SIZE_S);
+ /* Now calculate I-cache set size */
+ icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2)
+ + addend;
+
+ /* Similarly for D-cache */
+ u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS)
+ >> METAG_TBI_DCACHE_SIZE_S;
+ width = 0;
+ while (u & 1) {
+ width++;
+ u >>= 1;
+ }
+ shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width);
+ addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS)
+ << shift)
+ >> (shift + METAG_TBI_DCACHE_SIZE_S);
+ dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2)
+ + addend;
+ }
+#endif
+}
+
+static void metag_phys_data_cache_flush(const void *start)
+{
+ unsigned long flush0, flush1, flush2, flush3;
+ int loops, step;
+ int thread;
+ int part, offset;
+ int set_shift;
+
+ /* Use a sequence of writes to flush the cache region requested */
+ thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+ >> TXENABLE_THREAD_S;
+
+ /* Cache is broken into sets which lie in contiguous RAMs */
+ set_shift = dcache_set_shift;
+
+ /* Move to the base of the physical cache flush region */
+ flush0 = LINSYSCFLUSH_DCACHE_LINE;
+ step = 64;
+
+ /* Get partition data for this thread */
+ part = metag_in32(SYSC_DCPART0 +
+ (SYSC_xCPARTn_STRIDE * thread));
+
+ if ((int)start < 0)
+ /* Access Global vs Local partition */
+ part >>= SYSC_xCPARTG_AND_S
+ - SYSC_xCPARTL_AND_S;
+
+ /* Extract offset and move SetOff */
+ offset = (part & SYSC_xCPARTL_OR_BITS)
+ >> SYSC_xCPARTL_OR_S;
+ flush0 += (offset << (set_shift - 4));
+
+ /* Shrink size */
+ part = (part & SYSC_xCPARTL_AND_BITS)
+ >> SYSC_xCPARTL_AND_S;
+ loops = ((part + 1) << (set_shift - 4));
+
+ /* Reduce loops by step of cache line size */
+ loops /= step;
+
+ flush1 = flush0 + (1 << set_shift);
+ flush2 = flush0 + (2 << set_shift);
+ flush3 = flush0 + (3 << set_shift);
+
+ if (dcache_sets_log2 == 1) {
+ flush2 = flush1;
+ flush3 = flush1 + step;
+ flush1 = flush0 + step;
+ step <<= 1;
+ loops >>= 1;
+ }
+
+ /* Clear loops ways in cache */
+ while (loops-- != 0) {
+ /* Clear the ways. */
+#if 0
+ /*
+ * GCC doesn't generate very good code for this so we
+ * provide inline assembly instead.
+ */
+ metag_out8(0, flush0);
+ metag_out8(0, flush1);
+ metag_out8(0, flush2);
+ metag_out8(0, flush3);
+
+ flush0 += step;
+ flush1 += step;
+ flush2 += step;
+ flush3 += step;
+#else
+ asm volatile (
+ "SETB\t[%0+%4++],%5\n"
+ "SETB\t[%1+%4++],%5\n"
+ "SETB\t[%2+%4++],%5\n"
+ "SETB\t[%3+%4++],%5\n"
+ : "+e" (flush0),
+ "+e" (flush1),
+ "+e" (flush2),
+ "+e" (flush3)
+ : "e" (step), "a" (0));
+#endif
+ }
+}
+
+void metag_data_cache_flush_all(const void *start)
+{
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+ /* No need to flush the data cache it's not actually enabled */
+ return;
+
+ metag_phys_data_cache_flush(start);
+}
+
+void metag_data_cache_flush(const void *start, int bytes)
+{
+ unsigned long flush0;
+ int loops, step;
+
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+ /* No need to flush the data cache it's not actually enabled */
+ return;
+
+ if (bytes >= 4096) {
+ metag_phys_data_cache_flush(start);
+ return;
+ }
+
+ /* Use linear cache flush mechanism on META IP */
+ flush0 = (int)start;
+ loops = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes +
+ (DCACHE_LINE_BYTES - 1);
+ loops >>= DCACHE_LINE_S;
+
+#define PRIM_FLUSH(addr, offset) do { \
+ int __addr = ((int) (addr)) + ((offset) * 64); \
+ __builtin_dcache_flush((void *)(__addr)); \
+ } while (0)
+
+#define LOOP_INC (4*64)
+
+ do {
+ /* By default stop */
+ step = 0;
+
+ switch (loops) {
+ /* Drop Thru Cases! */
+ default:
+ PRIM_FLUSH(flush0, 3);
+ loops -= 4;
+ step = 1;
+ case 3:
+ PRIM_FLUSH(flush0, 2);
+ case 2:
+ PRIM_FLUSH(flush0, 1);
+ case 1:
+ PRIM_FLUSH(flush0, 0);
+ flush0 += LOOP_INC;
+ case 0:
+ break;
+ }
+ } while (step);
+}
+EXPORT_SYMBOL(metag_data_cache_flush);
+
+static void metag_phys_code_cache_flush(const void *start, int bytes)
+{
+ unsigned long flush0, flush1, flush2, flush3, end_set;
+ int loops, step;
+ int thread;
+ int set_shift, set_size;
+ int part, offset;
+
+ /* Use a sequence of writes to flush the cache region requested */
+ thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+ >> TXENABLE_THREAD_S;
+ set_shift = icache_set_shift;
+
+ /* Move to the base of the physical cache flush region */
+ flush0 = LINSYSCFLUSH_ICACHE_LINE;
+ step = 64;
+
+ /* Get partition code for this thread */
+ part = metag_in32(SYSC_ICPART0 +
+ (SYSC_xCPARTn_STRIDE * thread));
+
+ if ((int)start < 0)
+ /* Access Global vs Local partition */
+ part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S;
+
+ /* Extract offset and move SetOff */
+ offset = (part & SYSC_xCPARTL_OR_BITS)
+ >> SYSC_xCPARTL_OR_S;
+ flush0 += (offset << (set_shift - 4));
+
+ /* Shrink size */
+ part = (part & SYSC_xCPARTL_AND_BITS)
+ >> SYSC_xCPARTL_AND_S;
+ loops = ((part + 1) << (set_shift - 4));
+
+ /* Where does the Set end? */
+ end_set = flush0 + loops;
+ set_size = loops;
+
+#ifdef CONFIG_METAG_META12
+ if ((bytes < 4096) && (bytes < loops)) {
+ /* Unreachable on HTP/MTP */
+ /* Only target the sets that could be relavent */
+ flush0 += (loops - step) & ((int) start);
+ loops = (((int) start) & (step-1)) + bytes + step - 1;
+ }
+#endif
+
+ /* Reduce loops by step of cache line size */
+ loops /= step;
+
+ flush1 = flush0 + (1<<set_shift);
+ flush2 = flush0 + (2<<set_shift);
+ flush3 = flush0 + (3<<set_shift);
+
+ if (icache_sets_log2 == 1) {
+ flush2 = flush1;
+ flush3 = flush1 + step;
+ flush1 = flush0 + step;
+#if 0
+ /* flush0 will stop one line early in this case
+ * (flush1 will do the final line).
+ * However we don't correct end_set here at the moment
+ * because it will never wrap on HTP/MTP
+ */
+ end_set -= step;
+#endif
+ step <<= 1;
+ loops >>= 1;
+ }
+
+ /* Clear loops ways in cache */
+ while (loops-- != 0) {
+#if 0
+ /*
+ * GCC doesn't generate very good code for this so we
+ * provide inline assembly instead.
+ */
+ /* Clear the ways */
+ metag_out8(0, flush0);
+ metag_out8(0, flush1);
+ metag_out8(0, flush2);
+ metag_out8(0, flush3);
+
+ flush0 += step;
+ flush1 += step;
+ flush2 += step;
+ flush3 += step;
+#else
+ asm volatile (
+ "SETB\t[%0+%4++],%5\n"
+ "SETB\t[%1+%4++],%5\n"
+ "SETB\t[%2+%4++],%5\n"
+ "SETB\t[%3+%4++],%5\n"
+ : "+e" (flush0),
+ "+e" (flush1),
+ "+e" (flush2),
+ "+e" (flush3)
+ : "e" (step), "a" (0));
+#endif
+
+ if (flush0 == end_set) {
+ /* Wrap within Set 0 */
+ flush0 -= set_size;
+ flush1 -= set_size;
+ flush2 -= set_size;
+ flush3 -= set_size;
+ }
+ }
+}
+
+void metag_code_cache_flush_all(const void *start)
+{
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+ /* No need to flush the code cache it's not actually enabled */
+ return;
+
+ metag_phys_code_cache_flush(start, 4096);
+}
+EXPORT_SYMBOL(metag_code_cache_flush_all);
+
+void metag_code_cache_flush(const void *start, int bytes)
+{
+#ifndef CONFIG_METAG_META12
+ void *flush;
+ int loops, step;
+#endif /* !CONFIG_METAG_META12 */
+
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+ /* No need to flush the code cache it's not actually enabled */
+ return;
+
+#ifdef CONFIG_METAG_META12
+ /* CACHEWD isn't available on Meta1, so always do full cache flush */
+ metag_phys_code_cache_flush(start, bytes);
+
+#else /* CONFIG_METAG_META12 */
+ /* If large size do full physical cache flush */
+ if (bytes >= 4096) {
+ metag_phys_code_cache_flush(start, bytes);
+ return;
+ }
+
+ /* Use linear cache flush mechanism on META IP */
+ flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1));
+ loops = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes +
+ (ICACHE_LINE_BYTES-1);
+ loops >>= ICACHE_LINE_S;
+
+#define PRIM_IFLUSH(addr, offset) \
+ __builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT)
+
+#define LOOP_INC (4*64)
+
+ do {
+ /* By default stop */
+ step = 0;
+
+ switch (loops) {
+ /* Drop Thru Cases! */
+ default:
+ PRIM_IFLUSH(flush, 3);
+ loops -= 4;
+ step = 1;
+ case 3:
+ PRIM_IFLUSH(flush, 2);
+ case 2:
+ PRIM_IFLUSH(flush, 1);
+ case 1:
+ PRIM_IFLUSH(flush, 0);
+ flush += LOOP_INC;
+ case 0:
+ break;
+ }
+ } while (step);
+#endif /* !CONFIG_METAG_META12 */
+}
+EXPORT_SYMBOL(metag_code_cache_flush);
diff --git a/arch/metag/mm/extable.c b/arch/metag/mm/extable.c
new file mode 100644
index 00000000000..2a21eaebe84
--- /dev/null
+++ b/arch/metag/mm/extable.c
@@ -0,0 +1,15 @@
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+ unsigned long pc = instruction_pointer(regs);
+
+ fixup = search_exception_tables(pc);
+ if (fixup)
+ regs->ctx.CurrPC = fixup->fixup;
+
+ return fixup != NULL;
+}
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
new file mode 100644
index 00000000000..2c75bf7357c
--- /dev/null
+++ b/arch/metag/mm/fault.c
@@ -0,0 +1,239 @@
+/*
+ * Meta page fault handling.
+ *
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ */
+
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+#include <asm/traps.h>
+
+/* Clear any pending catch buffer state. */
+static void clear_cbuf_entry(struct pt_regs *regs, unsigned long addr,
+ unsigned int trapno)
+{
+ PTBICTXEXTCB0 cbuf = regs->extcb0;
+
+ switch (trapno) {
+ /* Instruction fetch faults leave no catch buffer state. */
+ case TBIXXF_SIGNUM_IGF:
+ case TBIXXF_SIGNUM_IPF:
+ return;
+ default:
+ if (cbuf[0].CBAddr == addr) {
+ cbuf[0].CBAddr = 0;
+ cbuf[0].CBFlags &= ~TXCATCH0_FAULT_BITS;
+
+ /* And, as this is the ONLY catch entry, we
+ * need to clear the cbuf bit from the context!
+ */
+ regs->ctx.SaveMask &= ~(TBICTX_CBUF_BIT |
+ TBICTX_XCBF_BIT);
+
+ return;
+ }
+ pr_err("Failed to clear cbuf entry!\n");
+ }
+}
+
+int show_unhandled_signals = 1;
+
+int do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned int write_access, unsigned int trapno)
+{
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma, *prev_vma;
+ siginfo_t info;
+ int fault;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+ (write_access ? FAULT_FLAG_WRITE : 0);
+
+ tsk = current;
+
+ if ((address >= VMALLOC_START) && (address < VMALLOC_END)) {
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "tsk" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ int offset = pgd_index(address);
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte_k;
+
+ pgd = ((pgd_t *)mmu_get_base()) + offset;
+ pgd_k = swapper_pg_dir + offset;
+
+ /* This will never happen with the folded page table. */
+ if (!pgd_present(*pgd)) {
+ if (!pgd_present(*pgd_k))
+ goto bad_area_nosemaphore;
+ set_pgd(pgd, *pgd_k);
+ return 0;
+ }
+
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ goto bad_area_nosemaphore;
+ set_pud(pud, *pud_k);
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ goto bad_area_nosemaphore;
+ set_pmd(pmd, *pmd_k);
+
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ goto bad_area_nosemaphore;
+
+ /* May only be needed on Chorus2 */
+ flush_tlb_all();
+ return 0;
+ }
+
+ mm = tsk->mm;
+
+ if (in_atomic() || !mm)
+ goto no_context;
+
+retry:
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma_prev(mm, address, &prev_vma);
+
+ if (!vma || address < vma->vm_start)
+ goto check_expansion;
+
+good_area:
+ if (write_access) {
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+ } else {
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+ goto bad_area;
+ }
+
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(mm, vma, address, flags);
+
+ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ return 0;
+
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ if (fault & VM_FAULT_OOM)
+ goto out_of_memory;
+ else if (fault & VM_FAULT_SIGBUS)
+ goto do_sigbus;
+ BUG();
+ }
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+ if (fault & VM_FAULT_RETRY) {
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
+
+ /*
+ * No need to up_read(&mm->mmap_sem) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+
+ goto retry;
+ }
+ }
+
+ up_read(&mm->mmap_sem);
+ return 0;
+
+check_expansion:
+ vma = prev_vma;
+ if (vma && (expand_stack(vma, address) == 0))
+ goto good_area;
+
+bad_area:
+ up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+ if (user_mode(regs)) {
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = (__force void __user *)address;
+ info.si_trapno = trapno;
+
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+ printk_ratelimit()) {
+ pr_info("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)",
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+ tsk->comm, task_pid_nr(tsk), address,
+ regs->ctx.CurrPC, regs->ctx.AX[0].U0,
+ write_access, trapno, trap_name(trapno));
+ print_vma_addr(" in ", regs->ctx.CurrPC);
+ print_vma_addr(" rtp in ", regs->ctx.DX[4].U1);
+ printk("\n");
+ show_regs(regs);
+ }
+ force_sig_info(SIGSEGV, &info, tsk);
+ return 1;
+ }
+ goto no_context;
+
+do_sigbus:
+ up_read(&mm->mmap_sem);
+
+ /*
+ * Send a sigbus, regardless of whether we were in kernel
+ * or user mode.
+ */
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (__force void __user *)address;
+ info.si_trapno = trapno;
+ force_sig_info(SIGBUS, &info, tsk);
+
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs))
+ goto no_context;
+
+ return 1;
+
+ /*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+ up_read(&mm->mmap_sem);
+ if (user_mode(regs))
+ do_group_exit(SIGKILL);
+
+no_context:
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs)) {
+ clear_cbuf_entry(regs, address, trapno);
+ return 1;
+ }
+
+ die("Oops", regs, (write_access << 15) | trapno, address);
+ do_exit(SIGKILL);
+}
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
new file mode 100644
index 00000000000..d71f621a2c0
--- /dev/null
+++ b/arch/metag/mm/highmem.c
@@ -0,0 +1,133 @@
+#include <linux/export.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+
+static pte_t *kmap_pte;
+
+unsigned long highstart_pfn, highend_pfn;
+
+void *kmap(struct page *page)
+{
+ might_sleep();
+ if (!PageHighMem(page))
+ return page_address(page);
+ return kmap_high(page);
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+ BUG_ON(in_interrupt());
+ if (!PageHighMem(page))
+ return;
+ kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+/*
+ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
+ * no global lock is needed and because the kmap code must perform a global TLB
+ * invalidation when the kmap pool wraps.
+ *
+ * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+
+void *kmap_atomic(struct page *page)
+{
+ enum fixed_addresses idx;
+ unsigned long vaddr;
+ int type;
+
+ /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+ pagefault_disable();
+ if (!PageHighMem(page))
+ return page_address(page);
+
+ type = kmap_atomic_idx_push();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(!pte_none(*(kmap_pte - idx)));
+#endif
+ set_pte(kmap_pte - idx, mk_pte(page, PAGE_KERNEL));
+
+ return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kvaddr)
+{
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+ int idx, type;
+
+ if (kvaddr >= (void *)FIXADDR_START) {
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+
+ /*
+ * Force other mappings to Oops if they'll try to access this
+ * pte without first remap it. Keeping stale mappings around
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
+ pte_clear(&init_mm, vaddr, kmap_pte-idx);
+ flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+
+ kmap_atomic_idx_pop();
+ }
+
+ pagefault_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
+
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
+{
+ enum fixed_addresses idx;
+ unsigned long vaddr;
+ int type;
+
+ pagefault_disable();
+
+ type = kmap_atomic_idx_push();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+ BUG_ON(!pte_none(*(kmap_pte - idx)));
+#endif
+ set_pte(kmap_pte - idx, pfn_pte(pfn, PAGE_KERNEL));
+ flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+
+ return (void *)vaddr;
+}
+
+struct page *kmap_atomic_to_page(void *ptr)
+{
+ unsigned long vaddr = (unsigned long)ptr;
+ int idx;
+ pte_t *pte;
+
+ if (vaddr < FIXADDR_START)
+ return virt_to_page(ptr);
+
+ idx = virt_to_fix(vaddr);
+ pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
+ return pte_page(*pte);
+}
+
+void __init kmap_init(void)
+{
+ unsigned long kmap_vstart;
+
+ /* cache the first kmap pte */
+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+}
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
new file mode 100644
index 00000000000..3c52fa6d0f8
--- /dev/null
+++ b/arch/metag/mm/hugetlbpage.c
@@ -0,0 +1,259 @@
+/*
+ * arch/metag/mm/hugetlbpage.c
+ *
+ * METAG HugeTLB page support.
+ *
+ * Cloned from SuperH
+ *
+ * Cloned from sparc64 by Paul Mundt.
+ *
+ * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+int prepare_hugepage_range(struct file *file, unsigned long addr,
+ unsigned long len)
+{
+ struct mm_struct *mm = current->mm;
+ struct hstate *h = hstate_file(file);
+ struct vm_area_struct *vma;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (addr & ~huge_page_mask(h))
+ return -EINVAL;
+ if (TASK_SIZE - len < addr)
+ return -EINVAL;
+
+ vma = find_vma(mm, ALIGN_HUGEPT(addr));
+ if (vma && !(vma->vm_flags & MAP_HUGETLB))
+ return -EINVAL;
+
+ vma = find_vma(mm, addr);
+ if (vma) {
+ if (addr + len > vma->vm_start)
+ return -EINVAL;
+ if (!(vma->vm_flags & MAP_HUGETLB) &&
+ (ALIGN_HUGEPT(addr + len) > vma->vm_start))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+ unsigned long addr, unsigned long sz)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset(mm, addr);
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ pte = pte_alloc_map(mm, NULL, pmd, addr);
+ pgd->pgd &= ~_PAGE_SZ_MASK;
+ pgd->pgd |= _PAGE_SZHUGE;
+
+ return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte = NULL;
+
+ pgd = pgd_offset(mm, addr);
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ pte = pte_offset_kernel(pmd, addr);
+
+ return pte;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+ return 0;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm,
+ unsigned long address, int write)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+ return pmd_page_shift(pmd) > PAGE_SHIFT;
+}
+
+int pud_huge(pud_t pud)
+{
+ return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+ pmd_t *pmd, int write)
+{
+ return NULL;
+}
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
+/*
+ * Look for an unmapped area starting after another hugetlb vma.
+ * There are guaranteed to be no huge pte's spare if all the huge pages are
+ * full size (4MB), so in that case compile out this search.
+ */
+#if HPAGE_SHIFT == HUGEPT_SHIFT
+static inline unsigned long
+hugetlb_get_unmapped_area_existing(unsigned long len)
+{
+ return 0;
+}
+#else
+static unsigned long
+hugetlb_get_unmapped_area_existing(unsigned long len)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long start_addr, addr;
+ int after_huge;
+
+ if (mm->context.part_huge) {
+ start_addr = mm->context.part_huge;
+ after_huge = 1;
+ } else {
+ start_addr = TASK_UNMAPPED_BASE;
+ after_huge = 0;
+ }
+new_search:
+ addr = start_addr;
+
+ for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+ if ((!vma && !after_huge) || TASK_SIZE - len < addr) {
+ /*
+ * Start a new search - just in case we missed
+ * some holes.
+ */
+ if (start_addr != TASK_UNMAPPED_BASE) {
+ start_addr = TASK_UNMAPPED_BASE;
+ goto new_search;
+ }
+ return 0;
+ }
+ /* skip ahead if we've aligned right over some vmas */
+ if (vma && vma->vm_end <= addr)
+ continue;
+ /* space before the next vma? */
+ if (after_huge && (!vma || ALIGN_HUGEPT(addr + len)
+ <= vma->vm_start)) {
+ unsigned long end = addr + len;
+ if (end & HUGEPT_MASK)
+ mm->context.part_huge = end;
+ else if (addr == mm->context.part_huge)
+ mm->context.part_huge = 0;
+ return addr;
+ }
+ if (vma && (vma->vm_flags & MAP_HUGETLB)) {
+ /* space after a huge vma in 2nd level page table? */
+ if (vma->vm_end & HUGEPT_MASK) {
+ after_huge = 1;
+ /* no need to align to the next PT block */
+ addr = vma->vm_end;
+ continue;
+ }
+ }
+ after_huge = 0;
+ addr = ALIGN_HUGEPT(vma->vm_end);
+ }
+}
+#endif
+
+/* Do a full search to find an area without any nearby normal pages. */
+static unsigned long
+hugetlb_get_unmapped_area_new_pmd(unsigned long len)
+{
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & HUGEPT_MASK;
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+ return addr;
+ }
+
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ if (!prepare_hugepage_range(file, addr, len))
+ return addr;
+ }
+
+ /*
+ * Look for an existing hugetlb vma with space after it (this is to to
+ * minimise fragmentation caused by huge pages.
+ */
+ addr = hugetlb_get_unmapped_area_existing(len);
+ if (addr)
+ return addr;
+
+ /*
+ * Find an unmapped naturally aligned set of 4MB blocks that we can use
+ * for huge pages.
+ */
+ return hugetlb_get_unmapped_area_new_pmd(len);
+}
+
+#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+
+/* necessary for boot time 4MB huge page allocation */
+static __init int setup_hugepagesz(char *opt)
+{
+ unsigned long ps = memparse(opt, &opt);
+ if (ps == (1 << HPAGE_SHIFT)) {
+ hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT);
+ } else {
+ pr_err("hugepagesz: Unsupported page size %lu M\n",
+ ps >> 20);
+ return 0;
+ }
+ return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
new file mode 100644
index 00000000000..504a398d5f8
--- /dev/null
+++ b/arch/metag/mm/init.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008,2009,2010 Imagination Technologies
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+#include <linux/initrd.h>
+#include <linux/of_fdt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/tlb.h>
+#include <asm/user_gateway.h>
+#include <asm/mmzone.h>
+#include <asm/fixmap.h>
+
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data;
+
+unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+extern char __user_gateway_start;
+extern char __user_gateway_end;
+
+void *gateway_page;
+
+/*
+ * Insert the gateway page into a set of page tables, creating the
+ * page tables if necessary.
+ */
+static void insert_gateway_page(pgd_t *pgd, unsigned long address)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ BUG_ON(!pgd_present(*pgd));
+
+ pud = pud_offset(pgd, address);
+ BUG_ON(!pud_present(*pud));
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd)) {
+ pte = alloc_bootmem_pages(PAGE_SIZE);
+ set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)));
+ }
+
+ pte = pte_offset_kernel(pmd, address);
+ set_pte(pte, pfn_pte(__pa(gateway_page) >> PAGE_SHIFT, PAGE_READONLY));
+}
+
+/* Alloc and map a page in a known location accessible to userspace. */
+static void __init user_gateway_init(void)
+{
+ unsigned long address = USER_GATEWAY_PAGE;
+ int offset = pgd_index(address);
+ pgd_t *pgd;
+
+ gateway_page = alloc_bootmem_pages(PAGE_SIZE);
+
+ pgd = swapper_pg_dir + offset;
+ insert_gateway_page(pgd, address);
+
+#ifdef CONFIG_METAG_META12
+ /*
+ * Insert the gateway page into our current page tables even
+ * though we've already inserted it into our reference page
+ * table (swapper_pg_dir). This is because with a META1 mmu we
+ * copy just the user address range and not the gateway page
+ * entry on context switch, see switch_mmu().
+ */
+ pgd = (pgd_t *)mmu_get_base() + offset;
+ insert_gateway_page(pgd, address);
+#endif /* CONFIG_METAG_META12 */
+
+ BUG_ON((&__user_gateway_end - &__user_gateway_start) > PAGE_SIZE);
+
+ gateway_page += (address & ~PAGE_MASK);
+
+ memcpy(gateway_page, &__user_gateway_start,
+ &__user_gateway_end - &__user_gateway_start);
+
+ /*
+ * We don't need to flush the TLB here, there should be no mapping
+ * present at boot for this address and only valid mappings are in
+ * the TLB (apart from on Meta 1.x, but those cached invalid
+ * mappings should be impossible to hit here).
+ *
+ * We don't flush the code cache here even though we have written
+ * code through the data cache and they may not be coherent. At
+ * this point we assume there is no stale data in the code cache
+ * for this address so there is no need to flush.
+ */
+}
+
+static void __init allocate_pgdat(unsigned int nid)
+{
+ unsigned long start_pfn, end_pfn;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ unsigned long phys;
+#endif
+
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ phys = __memblock_alloc_base(sizeof(struct pglist_data),
+ SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
+ /* Retry with all of system memory */
+ if (!phys)
+ phys = __memblock_alloc_base(sizeof(struct pglist_data),
+ SMP_CACHE_BYTES,
+ memblock_end_of_DRAM());
+ if (!phys)
+ panic("Can't allocate pgdat for node %d\n", nid);
+
+ NODE_DATA(nid) = __va(phys);
+ memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+ NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
+
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+static void __init bootmem_init_one_node(unsigned int nid)
+{
+ unsigned long total_pages, paddr;
+ unsigned long end_pfn;
+ struct pglist_data *p;
+
+ p = NODE_DATA(nid);
+
+ /* Nothing to do.. */
+ if (!p->node_spanned_pages)
+ return;
+
+ end_pfn = p->node_start_pfn + p->node_spanned_pages;
+#ifdef CONFIG_HIGHMEM
+ if (end_pfn > max_low_pfn)
+ end_pfn = max_low_pfn;
+#endif
+
+ total_pages = bootmem_bootmap_pages(end_pfn - p->node_start_pfn);
+
+ paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
+ if (!paddr)
+ panic("Can't allocate bootmap for nid[%d]\n", nid);
+
+ init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
+
+ free_bootmem_with_active_regions(nid, end_pfn);
+
+ /*
+ * XXX Handle initial reservations for the system memory node
+ * only for the moment, we'll refactor this later for handling
+ * reservations in other nodes.
+ */
+ if (nid == 0) {
+ struct memblock_region *reg;
+
+ /* Reserve the sections we're already using. */
+ for_each_memblock(reserved, reg) {
+ unsigned long size = reg->size;
+
+#ifdef CONFIG_HIGHMEM
+ /* ...but not highmem */
+ if (PFN_DOWN(reg->base) >= highstart_pfn)
+ continue;
+
+ if (PFN_UP(reg->base + size) > highstart_pfn)
+ size = (highstart_pfn - PFN_DOWN(reg->base))
+ << PAGE_SHIFT;
+#endif
+
+ reserve_bootmem(reg->base, size, BOOTMEM_DEFAULT);
+ }
+ }
+
+ sparse_memory_present_with_active_regions(nid);
+}
+
+static void __init do_init_bootmem(void)
+{
+ struct memblock_region *reg;
+ int i;
+
+ /* Add active regions with valid PFNs. */
+ for_each_memblock(memory, reg) {
+ unsigned long start_pfn, end_pfn;
+ start_pfn = memblock_region_memory_base_pfn(reg);
+ end_pfn = memblock_region_memory_end_pfn(reg);
+ memblock_set_node(PFN_PHYS(start_pfn),
+ PFN_PHYS(end_pfn - start_pfn), 0);
+ }
+
+ /* All of system RAM sits in node 0 for the non-NUMA case */
+ allocate_pgdat(0);
+ node_set_online(0);
+
+ soc_mem_setup();
+
+ for_each_online_node(i)
+ bootmem_init_one_node(i);
+
+ sparse_init();
+}
+
+extern char _heap_start[];
+
+static void __init init_and_reserve_mem(void)
+{
+ unsigned long start_pfn, heap_start;
+ u64 base = min_low_pfn << PAGE_SHIFT;
+ u64 size = (max_low_pfn << PAGE_SHIFT) - base;
+
+ heap_start = (unsigned long) &_heap_start;
+
+ memblock_add(base, size);
+
+ /*
+ * Partially used pages are not usable - thus
+ * we are rounding upwards:
+ */
+ start_pfn = PFN_UP(__pa(heap_start));
+
+ /*
+ * Reserve the kernel text.
+ */
+ memblock_reserve(base, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - base);
+
+#ifdef CONFIG_HIGHMEM
+ /*
+ * Add & reserve highmem, so page structures are initialised.
+ */
+ base = highstart_pfn << PAGE_SHIFT;
+ size = (highend_pfn << PAGE_SHIFT) - base;
+ if (size) {
+ memblock_add(base, size);
+ memblock_reserve(base, size);
+ }
+#endif
+}
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * Ensure we have allocated page tables in swapper_pg_dir for the
+ * fixed mappings range from 'start' to 'end'.
+ */
+static void __init allocate_pgtables(unsigned long start, unsigned long end)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ int i, j;
+ unsigned long vaddr;
+
+ vaddr = start;
+ i = pgd_index(vaddr);
+ j = pmd_index(vaddr);
+ pgd = swapper_pg_dir + i;
+
+ for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+ pmd = (pmd_t *)pgd;
+ for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+ vaddr += PMD_SIZE;
+
+ if (!pmd_none(*pmd))
+ continue;
+
+ pte = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ pmd_populate_kernel(&init_mm, pmd, pte);
+ }
+ j = 0;
+ }
+}
+
+static void __init fixedrange_init(void)
+{
+ unsigned long vaddr, end;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /*
+ * Fixed mappings:
+ */
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+ end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+ allocate_pgtables(vaddr, end);
+
+ /*
+ * Permanent kmaps:
+ */
+ vaddr = PKMAP_BASE;
+ allocate_pgtables(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP);
+
+ pgd = swapper_pg_dir + pgd_index(vaddr);
+ pud = pud_offset(pgd, vaddr);
+ pmd = pmd_offset(pud, vaddr);
+ pte = pte_offset_kernel(pmd, vaddr);
+ pkmap_page_table = pte;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * paging_init() continues the virtual memory environment setup which
+ * was begun by the code in arch/metag/kernel/setup.c.
+ */
+void __init paging_init(unsigned long mem_end)
+{
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
+ int nid;
+
+ init_and_reserve_mem();
+
+ memblock_allow_resize();
+
+ memblock_dump_all();
+
+ nodes_clear(node_online_map);
+
+ init_new_context(&init_task, &init_mm);
+
+ memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
+
+ do_init_bootmem();
+ mmu_init(mem_end);
+
+#ifdef CONFIG_HIGHMEM
+ fixedrange_init();
+ kmap_init();
+#endif
+
+ /* Initialize the zero page to a bootmem page, already zeroed. */
+ empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
+
+ user_gateway_init();
+
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ unsigned long low, start_pfn;
+
+ start_pfn = pgdat->bdata->node_min_pfn;
+ low = pgdat->bdata->node_low_pfn;
+
+ if (max_zone_pfns[ZONE_NORMAL] < low)
+ max_zone_pfns[ZONE_NORMAL] = low;
+
+#ifdef CONFIG_HIGHMEM
+ max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
+#endif
+ pr_info("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
+ nid, start_pfn, low);
+ }
+
+ free_area_init_nodes(max_zone_pfns);
+}
+
+void __init mem_init(void)
+{
+ int nid;
+
+#ifdef CONFIG_HIGHMEM
+ unsigned long tmp;
+ for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+ struct page *page = pfn_to_page(tmp);
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+ totalhigh_pages++;
+ }
+ totalram_pages += totalhigh_pages;
+ num_physpages += totalhigh_pages;
+#endif /* CONFIG_HIGHMEM */
+
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ unsigned long node_pages = 0;
+
+ num_physpages += pgdat->node_present_pages;
+
+ if (pgdat->node_spanned_pages)
+ node_pages = free_all_bootmem_node(pgdat);
+
+ totalram_pages += node_pages;
+ }
+
+ pr_info("Memory: %luk/%luk available\n",
+ (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
+ num_physpages << (PAGE_SHIFT - 10));
+
+ show_mem(0);
+
+ return;
+}
+
+static void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ init_page_count(virt_to_page(addr));
+ memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
+ free_page(addr);
+ totalram_pages++;
+ }
+ pr_info("Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+ free_init_pages("unused kernel memory",
+ (unsigned long)(&__init_begin),
+ (unsigned long)(&__init_end));
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+ end = end & PAGE_MASK;
+ free_init_pages("initrd memory", start, end);
+}
+#endif
+
+#ifdef CONFIG_OF_FLATTREE
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+ unsigned long end)
+{
+ pr_err("%s(%lx, %lx)\n",
+ __func__, start, end);
+}
+#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/metag/mm/ioremap.c b/arch/metag/mm/ioremap.c
new file mode 100644
index 00000000000..a136a435fda
--- /dev/null
+++ b/arch/metag/mm/ioremap.c
@@ -0,0 +1,89 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ * Needed for memory-mapped I/O devices mapped outside our normal DRAM
+ * window (that is, all memory-mapped I/O devices).
+ *
+ * Copyright (C) 1995,1996 Linus Torvalds
+ *
+ * Meta port based on CRIS-port by Axis Communications AB
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/io.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem *__ioremap(unsigned long phys_addr, size_t size,
+ unsigned long flags)
+{
+ unsigned long addr;
+ struct vm_struct *area;
+ unsigned long offset, last_addr;
+ pgprot_t prot;
+
+ /* Don't allow wraparound or zero size */
+ last_addr = phys_addr + size - 1;
+ if (!size || last_addr < phys_addr)
+ return NULL;
+
+ /* Custom region addresses are accessible and uncached by default. */
+ if (phys_addr >= LINSYSCUSTOM_BASE &&
+ phys_addr < (LINSYSCUSTOM_BASE + LINSYSCUSTOM_LIMIT))
+ return (__force void __iomem *) phys_addr;
+
+ /*
+ * Mappings have to be page-aligned
+ */
+ offset = phys_addr & ~PAGE_MASK;
+ phys_addr &= PAGE_MASK;
+ size = PAGE_ALIGN(last_addr+1) - phys_addr;
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY |
+ _PAGE_ACCESSED | _PAGE_KERNEL | _PAGE_CACHE_WIN0 |
+ flags);
+
+ /*
+ * Ok, go for it..
+ */
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = (unsigned long) area->addr;
+ if (ioremap_page_range(addr, addr + size, phys_addr, prot)) {
+ vunmap((void *) addr);
+ return NULL;
+ }
+ return (__force void __iomem *) (offset + (char *)addr);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+ struct vm_struct *p;
+
+ if ((__force unsigned long)addr >= LINSYSCUSTOM_BASE &&
+ (__force unsigned long)addr < (LINSYSCUSTOM_BASE +
+ LINSYSCUSTOM_LIMIT))
+ return;
+
+ p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
+ if (unlikely(!p)) {
+ pr_err("iounmap: bad address %p\n", addr);
+ return;
+ }
+
+ kfree(p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/metag/mm/l2cache.c b/arch/metag/mm/l2cache.c
new file mode 100644
index 00000000000..c64ee615cf9
--- /dev/null
+++ b/arch/metag/mm/l2cache.c
@@ -0,0 +1,192 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+
+#include <asm/l2cache.h>
+#include <asm/metag_isa.h>
+
+/* If non-0, then initialise the L2 cache */
+static int l2cache_init = 1;
+/* If non-0, then initialise the L2 cache prefetch */
+static int l2cache_init_pf = 1;
+
+int l2c_pfenable;
+
+static volatile u32 l2c_testdata[16] __initdata __aligned(64);
+
+static int __init parse_l2cache(char *p)
+{
+ char *cp = p;
+
+ if (get_option(&cp, &l2cache_init) != 1) {
+ pr_err("Bad l2cache parameter (%s)\n", p);
+ return 1;
+ }
+ return 0;
+}
+early_param("l2cache", parse_l2cache);
+
+static int __init parse_l2cache_pf(char *p)
+{
+ char *cp = p;
+
+ if (get_option(&cp, &l2cache_init_pf) != 1) {
+ pr_err("Bad l2cache_pf parameter (%s)\n", p);
+ return 1;
+ }
+ return 0;
+}
+early_param("l2cache_pf", parse_l2cache_pf);
+
+static int __init meta_l2c_setup(void)
+{
+ /*
+ * If the L2 cache isn't even present, don't do anything, but say so in
+ * the log.
+ */
+ if (!meta_l2c_is_present()) {
+ pr_info("L2 Cache: Not present\n");
+ return 0;
+ }
+
+ /*
+ * Check whether the line size is recognised.
+ */
+ if (!meta_l2c_linesize()) {
+ pr_warn_once("L2 Cache: unknown line size id (config=0x%08x)\n",
+ meta_l2c_config());
+ }
+
+ /*
+ * Initialise state.
+ */
+ l2c_pfenable = _meta_l2c_pf_is_enabled();
+
+ /*
+ * Enable the L2 cache and print to log whether it was already enabled
+ * by the bootloader.
+ */
+ if (l2cache_init) {
+ pr_info("L2 Cache: Enabling... ");
+ if (meta_l2c_enable())
+ pr_cont("already enabled\n");
+ else
+ pr_cont("done\n");
+ } else {
+ pr_info("L2 Cache: Not enabling\n");
+ }
+
+ /*
+ * Enable L2 cache prefetch.
+ */
+ if (l2cache_init_pf) {
+ pr_info("L2 Cache: Enabling prefetch... ");
+ if (meta_l2c_pf_enable(1))
+ pr_cont("already enabled\n");
+ else
+ pr_cont("done\n");
+ } else {
+ pr_info("L2 Cache: Not enabling prefetch\n");
+ }
+
+ return 0;
+}
+core_initcall(meta_l2c_setup);
+
+int meta_l2c_disable(void)
+{
+ unsigned long flags;
+ int en;
+
+ if (!meta_l2c_is_present())
+ return 1;
+
+ /*
+ * Prevent other threads writing during the writeback, otherwise the
+ * writes will get "lost" when the L2 is disabled.
+ */
+ __global_lock2(flags);
+ en = meta_l2c_is_enabled();
+ if (likely(en)) {
+ _meta_l2c_pf_enable(0);
+ wr_fence();
+ _meta_l2c_purge();
+ _meta_l2c_enable(0);
+ }
+ __global_unlock2(flags);
+
+ return !en;
+}
+
+int meta_l2c_enable(void)
+{
+ unsigned long flags;
+ int en;
+
+ if (!meta_l2c_is_present())
+ return 0;
+
+ /*
+ * Init (clearing the L2) can happen while the L2 is disabled, so other
+ * threads are safe to continue executing, however we must not init the
+ * cache if it's already enabled (dirty lines would be discarded), so
+ * this operation should still be atomic with other threads.
+ */
+ __global_lock1(flags);
+ en = meta_l2c_is_enabled();
+ if (likely(!en)) {
+ _meta_l2c_init();
+ _meta_l2c_enable(1);
+ _meta_l2c_pf_enable(l2c_pfenable);
+ }
+ __global_unlock1(flags);
+
+ return en;
+}
+
+int meta_l2c_pf_enable(int pfenable)
+{
+ unsigned long flags;
+ int en = l2c_pfenable;
+
+ if (!meta_l2c_is_present())
+ return 0;
+
+ /*
+ * We read modify write the enable register, so this operation must be
+ * atomic with other threads.
+ */
+ __global_lock1(flags);
+ en = l2c_pfenable;
+ l2c_pfenable = pfenable;
+ if (meta_l2c_is_enabled())
+ _meta_l2c_pf_enable(pfenable);
+ __global_unlock1(flags);
+
+ return en;
+}
+
+int meta_l2c_flush(void)
+{
+ unsigned long flags;
+ int en;
+
+ /*
+ * Prevent other threads writing during the writeback. This also
+ * involves read modify writes.
+ */
+ __global_lock2(flags);
+ en = meta_l2c_is_enabled();
+ if (likely(en)) {
+ _meta_l2c_pf_enable(0);
+ wr_fence();
+ _meta_l2c_purge();
+ _meta_l2c_enable(0);
+ _meta_l2c_init();
+ _meta_l2c_enable(1);
+ _meta_l2c_pf_enable(l2c_pfenable);
+ }
+ __global_unlock2(flags);
+
+ return !en;
+}
diff --git a/arch/metag/mm/maccess.c b/arch/metag/mm/maccess.c
new file mode 100644
index 00000000000..eba2cfc935b
--- /dev/null
+++ b/arch/metag/mm/maccess.c
@@ -0,0 +1,68 @@
+/*
+ * safe read and write memory routines callable while atomic
+ *
+ * Copyright 2012 Imagination Technologies
+ */
+
+#include <linux/uaccess.h>
+#include <asm/io.h>
+
+/*
+ * The generic probe_kernel_write() uses the user copy code which can split the
+ * writes if the source is unaligned, and repeats writes to make exceptions
+ * precise. We override it here to avoid these things happening to memory mapped
+ * IO memory where they could have undesired effects.
+ * Due to the use of CACHERD instruction this only works on Meta2 onwards.
+ */
+#ifdef CONFIG_METAG_META21
+long probe_kernel_write(void *dst, const void *src, size_t size)
+{
+ unsigned long ldst = (unsigned long)dst;
+ void __iomem *iodst = (void __iomem *)dst;
+ unsigned long lsrc = (unsigned long)src;
+ const u8 *psrc = (u8 *)src;
+ unsigned int pte, i;
+ u8 bounce[8] __aligned(8);
+
+ if (!size)
+ return 0;
+
+ /* Use the write combine bit to decide is the destination is MMIO. */
+ pte = __builtin_meta2_cacherd(dst);
+
+ /* Check the mapping is valid and writeable. */
+ if ((pte & (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT))
+ != (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT))
+ return -EFAULT;
+
+ /* Fall back to generic version for cases we're not interested in. */
+ if (pte & MMCU_ENTRY_WRC_BIT || /* write combined memory */
+ (ldst & (size - 1)) || /* destination unaligned */
+ size > 8 || /* more than max write size */
+ (size & (size - 1))) /* non power of 2 size */
+ return __probe_kernel_write(dst, src, size);
+
+ /* If src is unaligned, copy to the aligned bounce buffer first. */
+ if (lsrc & (size - 1)) {
+ for (i = 0; i < size; ++i)
+ bounce[i] = psrc[i];
+ psrc = bounce;
+ }
+
+ switch (size) {
+ case 1:
+ writeb(*psrc, iodst);
+ break;
+ case 2:
+ writew(*(const u16 *)psrc, iodst);
+ break;
+ case 4:
+ writel(*(const u32 *)psrc, iodst);
+ break;
+ case 8:
+ writeq(*(const u64 *)psrc, iodst);
+ break;
+ }
+ return 0;
+}
+#endif
diff --git a/arch/metag/mm/mmu-meta1.c b/arch/metag/mm/mmu-meta1.c
new file mode 100644
index 00000000000..91f4255bcb5
--- /dev/null
+++ b/arch/metag/mm/mmu-meta1.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008,2009 Imagination Technologies
+ *
+ * Meta 1 MMU handling code.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+
+#include <asm/mmu.h>
+
+#define DM3_BASE (LINSYSDIRECT_BASE + (MMCU_DIRECTMAPn_ADDR_SCALE * 3))
+
+/*
+ * This contains the physical address of the top level 2k pgd table.
+ */
+static unsigned long mmu_base_phys;
+
+/*
+ * Given a physical address, return a mapped virtual address that can be used
+ * to access that location.
+ * In practice, we use the DirectMap region to make this happen.
+ */
+static unsigned long map_addr(unsigned long phys)
+{
+ static unsigned long dm_base = 0xFFFFFFFF;
+ int offset;
+
+ offset = phys - dm_base;
+
+ /* Are we in the current map range ? */
+ if ((offset < 0) || (offset >= MMCU_DIRECTMAPn_ADDR_SCALE)) {
+ /* Calculate new DM area */
+ dm_base = phys & ~(MMCU_DIRECTMAPn_ADDR_SCALE - 1);
+
+ /* Actually map it in! */
+ metag_out32(dm_base, MMCU_DIRECTMAP3_ADDR);
+
+ /* And calculate how far into that area our reference is */
+ offset = phys - dm_base;
+ }
+
+ return DM3_BASE + offset;
+}
+
+/*
+ * Return the physical address of the base of our pgd table.
+ */
+static inline unsigned long __get_mmu_base(void)
+{
+ unsigned long base_phys;
+ unsigned int stride;
+
+ if (is_global_space(PAGE_OFFSET))
+ stride = 4;
+ else
+ stride = hard_processor_id(); /* [0..3] */
+
+ base_phys = metag_in32(MMCU_TABLE_PHYS_ADDR);
+ base_phys += (0x800 * stride);
+
+ return base_phys;
+}
+
+/* Given a virtual address, return the virtual address of the relevant pgd */
+static unsigned long pgd_entry_addr(unsigned long virt)
+{
+ unsigned long pgd_phys;
+ unsigned long pgd_virt;
+
+ if (!mmu_base_phys)
+ mmu_base_phys = __get_mmu_base();
+
+ /*
+ * Are we trying to map a global address. If so, then index
+ * the global pgd table instead of our local one.
+ */
+ if (is_global_space(virt)) {
+ /* Scale into 2gig map */
+ virt &= ~0x80000000;
+ }
+
+ /* Base of the pgd table plus our 4Meg entry, 4bytes each */
+ pgd_phys = mmu_base_phys + ((virt >> PGDIR_SHIFT) * 4);
+
+ pgd_virt = map_addr(pgd_phys);
+
+ return pgd_virt;
+}
+
+/* Given a virtual address, return the virtual address of the relevant pte */
+static unsigned long pgtable_entry_addr(unsigned long virt)
+{
+ unsigned long pgtable_phys;
+ unsigned long pgtable_virt, pte_virt;
+
+ /* Find the physical address of the 4MB page table*/
+ pgtable_phys = metag_in32(pgd_entry_addr(virt)) & MMCU_ENTRY_ADDR_BITS;
+
+ /* Map it to a virtual address */
+ pgtable_virt = map_addr(pgtable_phys);
+
+ /* And index into it for our pte */
+ pte_virt = pgtable_virt + ((virt >> PAGE_SHIFT) & 0x3FF) * 4;
+
+ return pte_virt;
+}
+
+unsigned long mmu_read_first_level_page(unsigned long vaddr)
+{
+ return metag_in32(pgd_entry_addr(vaddr));
+}
+
+unsigned long mmu_read_second_level_page(unsigned long vaddr)
+{
+ return metag_in32(pgtable_entry_addr(vaddr));
+}
+
+unsigned long mmu_get_base(void)
+{
+ static unsigned long __base;
+
+ /* Find the base of our MMU pgd table */
+ if (!__base)
+ __base = pgd_entry_addr(0);
+
+ return __base;
+}
+
+void __init mmu_init(unsigned long mem_end)
+{
+ unsigned long entry, addr;
+ pgd_t *p_swapper_pg_dir;
+
+ /*
+ * Now copy over any MMU pgd entries already in the mmu page tables
+ * over to our root init process (swapper_pg_dir) map. This map is
+ * then inherited by all other processes, which means all processes
+ * inherit a map of the kernel space.
+ */
+ addr = PAGE_OFFSET;
+ entry = pgd_index(PAGE_OFFSET);
+ p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+ while (addr <= META_MEMORY_LIMIT) {
+ unsigned long pgd_entry;
+ /* copy over the current MMU value */
+ pgd_entry = mmu_read_first_level_page(addr);
+ pgd_val(*p_swapper_pg_dir) = pgd_entry;
+
+ p_swapper_pg_dir++;
+ addr += PGDIR_SIZE;
+ entry++;
+ }
+}
diff --git a/arch/metag/mm/mmu-meta2.c b/arch/metag/mm/mmu-meta2.c
new file mode 100644
index 00000000000..81dcbb0bba3
--- /dev/null
+++ b/arch/metag/mm/mmu-meta2.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2008,2009,2010,2011 Imagination Technologies Ltd.
+ *
+ * Meta 2 enhanced mode MMU handling code.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/bootmem.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+
+unsigned long mmu_read_first_level_page(unsigned long vaddr)
+{
+ unsigned int cpu = hard_processor_id();
+ unsigned long offset, linear_base, linear_limit;
+ unsigned int phys0;
+ pgd_t *pgd, entry;
+
+ if (is_global_space(vaddr))
+ vaddr &= ~0x80000000;
+
+ offset = vaddr >> PGDIR_SHIFT;
+
+ phys0 = metag_in32(mmu_phys0_addr(cpu));
+
+ /* Top bit of linear base is always zero. */
+ linear_base = (phys0 >> PGDIR_SHIFT) & 0x1ff;
+
+ /* Limit in the range 0 (4MB) to 9 (2GB). */
+ linear_limit = 1 << ((phys0 >> 8) & 0xf);
+ linear_limit += linear_base;
+
+ /*
+ * If offset is below linear base or above the limit then no
+ * mapping exists.
+ */
+ if (offset < linear_base || offset > linear_limit)
+ return 0;
+
+ offset -= linear_base;
+ pgd = (pgd_t *)mmu_get_base();
+ entry = pgd[offset];
+
+ return pgd_val(entry);
+}
+
+unsigned long mmu_read_second_level_page(unsigned long vaddr)
+{
+ return __builtin_meta2_cacherd((void *)(vaddr & PAGE_MASK));
+}
+
+unsigned long mmu_get_base(void)
+{
+ unsigned int cpu = hard_processor_id();
+ unsigned long stride;
+
+ stride = cpu * LINSYSMEMTnX_STRIDE;
+
+ /*
+ * Bits 18:2 of the MMCU_TnLocal_TABLE_PHYS1 register should be
+ * used as an offset to the start of the top-level pgd table.
+ */
+ stride += (metag_in32(mmu_phys1_addr(cpu)) & 0x7fffc);
+
+ if (is_global_space(PAGE_OFFSET))
+ stride += LINSYSMEMTXG_OFFSET;
+
+ return LINSYSMEMT0L_BASE + stride;
+}
+
+#define FIRST_LEVEL_MASK 0xffffffc0
+#define SECOND_LEVEL_MASK 0xfffff000
+#define SECOND_LEVEL_ALIGN 64
+
+static void repriv_mmu_tables(void)
+{
+ unsigned long phys0_addr;
+ unsigned int g;
+
+ /*
+ * Check that all the mmu table regions are priv protected, and if not
+ * fix them and emit a warning. If we left them without priv protection
+ * then userland processes would have access to a 2M window into
+ * physical memory near where the page tables are.
+ */
+ phys0_addr = MMCU_T0LOCAL_TABLE_PHYS0;
+ for (g = 0; g < 2; ++g) {
+ unsigned int t, phys0;
+ unsigned long flags;
+ for (t = 0; t < 4; ++t) {
+ __global_lock2(flags);
+ phys0 = metag_in32(phys0_addr);
+ if ((phys0 & _PAGE_PRESENT) && !(phys0 & _PAGE_PRIV)) {
+ pr_warn("Fixing priv protection on T%d %s MMU table region\n",
+ t,
+ g ? "global" : "local");
+ phys0 |= _PAGE_PRIV;
+ metag_out32(phys0, phys0_addr);
+ }
+ __global_unlock2(flags);
+
+ phys0_addr += MMCU_TnX_TABLE_PHYSX_STRIDE;
+ }
+
+ phys0_addr += MMCU_TXG_TABLE_PHYSX_OFFSET
+ - 4*MMCU_TnX_TABLE_PHYSX_STRIDE;
+ }
+}
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+static void mmu_resume(void)
+{
+ /*
+ * If a full suspend to RAM has happened then the original bad MMU table
+ * priv may have been restored, so repriv them again.
+ */
+ repriv_mmu_tables();
+}
+#else
+#define mmu_resume NULL
+#endif /* CONFIG_METAG_SUSPEND_MEM */
+
+static struct syscore_ops mmu_syscore_ops = {
+ .resume = mmu_resume,
+};
+
+void __init mmu_init(unsigned long mem_end)
+{
+ unsigned long entry, addr;
+ pgd_t *p_swapper_pg_dir;
+#ifdef CONFIG_KERNEL_4M_PAGES
+ unsigned long mem_size = mem_end - PAGE_OFFSET;
+ unsigned int pages = DIV_ROUND_UP(mem_size, 1 << 22);
+ unsigned int second_level_entry = 0;
+ unsigned long *second_level_table;
+#endif
+
+ /*
+ * Now copy over any MMU pgd entries already in the mmu page tables
+ * over to our root init process (swapper_pg_dir) map. This map is
+ * then inherited by all other processes, which means all processes
+ * inherit a map of the kernel space.
+ */
+ addr = META_MEMORY_BASE;
+ entry = pgd_index(META_MEMORY_BASE);
+ p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+ while (entry < (PTRS_PER_PGD - pgd_index(META_MEMORY_BASE))) {
+ unsigned long pgd_entry;
+ /* copy over the current MMU value */
+ pgd_entry = mmu_read_first_level_page(addr);
+ pgd_val(*p_swapper_pg_dir) = pgd_entry;
+
+ p_swapper_pg_dir++;
+ addr += PGDIR_SIZE;
+ entry++;
+ }
+
+#ifdef CONFIG_KERNEL_4M_PAGES
+ /*
+ * At this point we can also map the kernel with 4MB pages to
+ * reduce TLB pressure.
+ */
+ second_level_table = alloc_bootmem_pages(SECOND_LEVEL_ALIGN * pages);
+
+ addr = PAGE_OFFSET;
+ entry = pgd_index(PAGE_OFFSET);
+ p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+ while (pages > 0) {
+ unsigned long phys_addr, second_level_phys;
+ pte_t *pte = (pte_t *)&second_level_table[second_level_entry];
+
+ phys_addr = __pa(addr);
+
+ second_level_phys = __pa(pte);
+
+ pgd_val(*p_swapper_pg_dir) = ((second_level_phys &
+ FIRST_LEVEL_MASK) |
+ _PAGE_SZ_4M |
+ _PAGE_PRESENT);
+
+ pte_val(*pte) = ((phys_addr & SECOND_LEVEL_MASK) |
+ _PAGE_PRESENT | _PAGE_DIRTY |
+ _PAGE_ACCESSED | _PAGE_WRITE |
+ _PAGE_CACHEABLE | _PAGE_KERNEL);
+
+ p_swapper_pg_dir++;
+ addr += PGDIR_SIZE;
+ /* Second level pages must be 64byte aligned. */
+ second_level_entry += (SECOND_LEVEL_ALIGN /
+ sizeof(unsigned long));
+ pages--;
+ }
+ load_pgd(swapper_pg_dir, hard_processor_id());
+ flush_tlb_all();
+#endif
+
+ repriv_mmu_tables();
+ register_syscore_ops(&mmu_syscore_ops);
+}
diff --git a/arch/metag/mm/numa.c b/arch/metag/mm/numa.c
new file mode 100644
index 00000000000..9ae578c9b62
--- /dev/null
+++ b/arch/metag/mm/numa.c
@@ -0,0 +1,81 @@
+/*
+ * Multiple memory node support for Meta machines
+ *
+ * Copyright (C) 2007 Paul Mundt
+ * Copyright (C) 2010 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/numa.h>
+#include <linux/pfn.h>
+#include <asm/sections.h>
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL_GPL(node_data);
+
+extern char _heap_start[];
+
+/*
+ * On Meta machines the conventional approach is to stash system RAM
+ * in node 0, and other memory blocks in to node 1 and up, ordered by
+ * latency. Each node's pgdat is node-local at the beginning of the node,
+ * immediately followed by the node mem map.
+ */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+ unsigned long bootmap_pages, bootmem_paddr;
+ unsigned long start_pfn, end_pfn;
+ unsigned long pgdat_paddr;
+
+ /* Don't allow bogus node assignment */
+ BUG_ON(nid > MAX_NUMNODES || nid <= 0);
+
+ start_pfn = start >> PAGE_SHIFT;
+ end_pfn = end >> PAGE_SHIFT;
+
+ memblock_add(start, end - start);
+
+ memblock_set_node(PFN_PHYS(start_pfn),
+ PFN_PHYS(end_pfn - start_pfn), nid);
+
+ /* Node-local pgdat */
+ pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data),
+ SMP_CACHE_BYTES, end);
+ NODE_DATA(nid) = __va(pgdat_paddr);
+ memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+ NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+
+ /* Node-local bootmap */
+ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+ bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
+ PAGE_SIZE, end);
+ init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+ start_pfn, end_pfn);
+
+ free_bootmem_with_active_regions(nid, end_pfn);
+
+ /* Reserve the pgdat and bootmap space with the bootmem allocator */
+ reserve_bootmem_node(NODE_DATA(nid), pgdat_paddr & PAGE_MASK,
+ sizeof(struct pglist_data), BOOTMEM_DEFAULT);
+ reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr,
+ bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
+
+ /* It's up */
+ node_set_online(nid);
+
+ /* Kick sparsemem */
+ sparse_memory_present_with_active_regions(nid);
+}
+
+void __init __weak soc_mem_setup(void)
+{
+}
diff --git a/arch/metag/tbx/Makefile b/arch/metag/tbx/Makefile
new file mode 100644
index 00000000000..e994239e518
--- /dev/null
+++ b/arch/metag/tbx/Makefile
@@ -0,0 +1,21 @@
+#
+# Makefile for TBX library files..
+#
+
+asflags-y += -mmetac=2.1 -Wa,-mfpu=metac21 -mdsp
+asflags-$(CONFIG_SMP) += -DTBX_PERCPU_SP_SAVE
+
+ccflags-y += -mmetac=2.1
+
+lib-y += tbicore.o
+lib-y += tbictx.o
+lib-y += tbidefr.o
+lib-y += tbilogf.o
+lib-y += tbipcx.o
+lib-y += tbiroot.o
+lib-y += tbisoft.o
+lib-y += tbistring.o
+lib-y += tbitimer.o
+
+lib-$(CONFIG_METAG_DSP) += tbidspram.o
+lib-$(CONFIG_METAG_FPU) += tbictxfpu.o
diff --git a/arch/metag/tbx/tbicore.S b/arch/metag/tbx/tbicore.S
new file mode 100644
index 00000000000..a0838ebcb43
--- /dev/null
+++ b/arch/metag/tbx/tbicore.S
@@ -0,0 +1,136 @@
+/*
+ * tbicore.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Core functions needed to support use of the thread binary interface for META
+ * processors
+ */
+
+ .file "tbicore.S"
+/* Get data structures and defines from the TBI C header */
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+ .data
+ .balign 8
+ .global ___pTBISegs
+ .type ___pTBISegs,object
+___pTBISegs:
+ .quad 0 /* Segment list pointer with it's */
+ .size ___pTBISegs,.-___pTBISegs
+ /* own id or spin-lock location */
+/*
+ * Return ___pTBISegs value specific to privilege level - not very complicated
+ * at the moment
+ *
+ * Register Usage: D0Re0 is the result, D1Re0 is used as a scratch
+ */
+ .text
+ .balign 4
+ .global ___TBISegList
+ .type ___TBISegList,function
+___TBISegList:
+ MOVT A1LbP,#HI(___pTBISegs)
+ ADD A1LbP,A1LbP,#LO(___pTBISegs)
+ GETL D0Re0,D1Re0,[A1LbP]
+ MOV PC,D1RtP
+ .size ___TBISegList,.-___TBISegList
+
+/*
+ * Search the segment list for a match given Id, pStart can be NULL
+ *
+ * Register Usage: D1Ar1 is pSeg, D0Ar2 is Id, D0Re0 is the result
+ * D0Ar4, D1Ar3 are used as a scratch
+ * NB: The PSTAT bit if Id in D0Ar2 may be toggled
+ */
+ .text
+ .balign 4
+ .global ___TBIFindSeg
+ .type ___TBIFindSeg,function
+___TBIFindSeg:
+ MOVT A1LbP,#HI(___pTBISegs)
+ ADD A1LbP,A1LbP,#LO(___pTBISegs)
+ GETL D1Ar3,D0Ar4,[A1LbP] /* Read segment list head */
+ MOV D0Re0,TXSTATUS /* What priv level are we at? */
+ CMP D1Ar1,#0 /* Is pStart provided? */
+/* Disable privilege adaption for now */
+ ANDT D0Re0,D0Re0,#0 /*HI(TXSTATUS_PSTAT_BIT) ; Is PSTAT set? Zero if not */
+ LSL D0Re0,D0Re0,#(TBID_PSTAT_S-TXSTATUS_PSTAT_S)
+ XOR D0Ar2,D0Ar2,D0Re0 /* Toggle Id PSTAT if privileged */
+ MOVNZ D1Ar3,D1Ar1 /* Use pStart if provided */
+$LFindSegLoop:
+ ADDS D0Re0,D1Ar3,#0 /* End of list? Load result into D0Re0 */
+ MOVZ PC,D1RtP /* If result is NULL we leave */
+ GETL D1Ar3,D0Ar4,[D1Ar3] /* Read pLink and Id */
+ CMP D0Ar4,D0Ar2 /* Does it match? */
+ BNZ $LFindSegLoop /* Loop if there is no match */
+ TST D0Re0,D0Re0 /* Clear zero flag - we found it! */
+ MOV PC,D1RtP /* Return */
+ .size ___TBIFindSeg,.-___TBIFindSeg
+
+/* Useful offsets to encode the lower bits of the lock/unlock addresses */
+#define UON (LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFF8)
+#define UOFF (LINSYSEVENT_WR_ATOMIC_UNLOCK & 0xFFF8)
+
+/*
+ * Perform a whole spin-lock sequence as used by the TBISignal routine
+ *
+ * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result
+ * (All other usage due to ___TBIPoll - D0Ar6, D1Re0)
+ */
+ .text
+ .balign 4
+ .global ___TBISpin
+ .type ___TBISpin,function
+___TBISpin:
+ SETL [A0StP++],D0FrT,D1RtP /* Save our return address */
+ ORS D0Re0,D0Re0,#1 /* Clear zero flag */
+ MOV D1RtP,PC /* Setup return address to form loop */
+$LSpinLoop:
+ BNZ ___TBIPoll /* Keep repeating if fail to set */
+ GETL D0FrT,D1RtP,[--A0StP] /* Restore return address */
+ MOV PC,D1RtP /* Return */
+ .size ___TBISpin,.-___TBISpin
+
+/*
+ * Perform an attempt to gain access to a spin-lock and set some bits
+ *
+ * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result
+ * !!On return Zero flag is SET if we are sucessfull!!
+ * A0.3 is used to hold base address of system event region
+ * D1Re0 use to hold TXMASKI while interrupts are off
+ */
+ .text
+ .balign 4
+ .global ___TBIPoll
+ .type ___TBIPoll,function
+___TBIPoll:
+ MOV D1Re0,#0 /* Prepare to disable ints */
+ MOVT A0.3,#HI(LINSYSEVENT_WR_ATOMIC_LOCK)
+ SWAP D1Re0,TXMASKI /* Really stop ints */
+ LOCK2 /* Gain all locks */
+ SET [A0.3+#UON],D1RtP /* Stop shared memory access too */
+ DCACHE [D1Ar1],A0.3 /* Flush Cache line */
+ GETD D0Re0,[D1Ar1] /* Get new state from memory or hit */
+ DCACHE [D1Ar1],A0.3 /* Flush Cache line */
+ GETD D0Re0,[D1Ar1] /* Get current state */
+ TST D0Re0,D0Ar2 /* Are we clear to send? */
+ ORZ D0Re0,D0Re0,D0Ar2 /* Yes: So set bits and */
+ SETDZ [D1Ar1],D0Re0 /* transmit new state */
+ SET [A0.3+#UOFF],D1RtP /* Allow shared memory access */
+ LOCK0 /* Release all locks */
+ MOV TXMASKI,D1Re0 /* Allow ints */
+$LPollEnd:
+ XORNZ D0Re0,D0Re0,D0Re0 /* No: Generate zero result */
+ MOV PC,D1RtP /* Return (NZ indicates failure) */
+ .size ___TBIPoll,.-___TBIPoll
+
+/*
+ * End of tbicore.S
+ */
diff --git a/arch/metag/tbx/tbictx.S b/arch/metag/tbx/tbictx.S
new file mode 100644
index 00000000000..19af983a13a
--- /dev/null
+++ b/arch/metag/tbx/tbictx.S
@@ -0,0 +1,366 @@
+/*
+ * tbictx.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+ .file "tbictx.S"
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+#ifdef METAC_1_0
+/* Ax.4 is NOT saved in XAX3 */
+#define A0_4
+#else
+/* Ax.4 is saved in XAX4 */
+#define A0_4 A0.4,
+#endif
+
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+/*
+ * TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask )
+ */
+ .text
+ .balign 4
+ .global ___TBINestInts
+ .type ___TBINestInts,function
+___TBINestInts:
+ XOR D0Ar4,D0Ar4,#-1 /* D0Ar4 = ~TrigBit */
+ AND D0Ar4,D0Ar4,#0xFFFF /* D0Ar4 &= 0xFFFF */
+ MOV D0Ar6,TXMASKI /* BGNDHALT currently enabled? */
+ TSTT D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XCBF_BIT
+ AND D0Ar4,D0Ar2,D0Ar4 /* D0Ar4 = Ints to allow */
+ XOR D0Ar2,D0Ar2,D0Ar4 /* Less Ints in TrigMask */
+ BNZ ___TBINestInts2 /* Jump if ctx save required! */
+ TSTT D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT /* Is catch state dirty? */
+ OR D0Ar4,D0Ar4,D0Ar6 /* Or in TXMASKI BGNDHALT if set */
+ TSTNZ D0Ar4,D0Ar4 /* Yes: AND triggers enabled */
+ MOV D0Re0,D0Ar2 /* Update State argument */
+ MOV D1Re0,D1Ar1 /* with less Ints in TrigMask */
+ MOVZ TXMASKI,D0Ar4 /* Early return: Enable Ints */
+ MOVZ PC,D1RtP /* Early return */
+ .size ___TBINestInts,.-___TBINestInts
+/*
+ * Drop thru into sub-function-
+ */
+ .global ___TBINestInts2
+ .type ___TBINestInts2,function
+___TBINestInts2:
+ MOV D0FrT,A0FrP /* Full entry sequence so we */
+ ADD A0FrP,A0StP,#0 /* can make sub-calls */
+ MSETL [A0StP],D0FrT,D0.5,D0.6 /* and preserve our result */
+ ORT D0Ar2,D0Ar2,#TBICTX_XCBF_BIT /* Add in XCBF save request */
+ MOV D0.5,D0Ar2 /* Save State in DX.5 */
+ MOV D1.5,D1Ar1
+ OR D0.6,D0Ar4,D0Ar6 /* Save TrigMask in D0.6 */
+ MOVT D1RtP,#HI(___TBICtxSave) /* Save catch buffer */
+ CALL D1RtP,#LO(___TBICtxSave)
+ MOV TXMASKI,D0.6 /* Allow Ints */
+ MOV D0Re0,D0.5 /* Return State */
+ MOV D1Re0,D1.5
+ MGETL D0FrT,D0.5,D0.6,[A0FrP] /* Full exit sequence */
+ SUB A0StP,A0FrP,#(8*3)
+ MOV A0FrP,D0FrT
+ MOV PC,D1RtP
+ .size ___TBINestInts2,.-___TBINestInts2
+
+/*
+ * void *__TBICtxSave( TBIRES State, void *pExt )
+ *
+ * D0Ar2 contains TBICTX_*_BIT values that control what
+ * extended data is to be saved beyond the end of D1Ar1.
+ * These bits must be ored into the SaveMask of this structure.
+ *
+ * Virtually all possible scratch registers are used.
+ *
+ * The D1Ar1 parameter is only used as the basis for saving
+ * CBUF state.
+ */
+/*
+ * If TBICTX_XEXT_BIT is specified in State. then State.pCtx->Ext is
+ * utilised to save the base address of the context save area and
+ * the extended states saved. The XEXT flag then indicates that the
+ * original state of the A0.2 and A1.2 registers from TBICTX.Ext.AX2
+ * are stored as the first part of the extended state structure.
+ */
+ .balign 4
+ .global ___TBICtxSave
+ .type ___TBICtxSave,function
+___TBICtxSave:
+ GETD D0Re0,[D1Ar1+#TBICTX_SaveMask-2] /* Get SaveMask */
+ TSTT D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+ /* Just XCBF to save? */
+ MOV A0.2,D1Ar3 /* Save pointer into A0.2 */
+ MOV A1.2,D1RtP /* Free off D0FrT:D1RtP pair */
+ BZ $LCtxSaveCBUF /* Yes: Only XCBF may be saved */
+ TSTT D0Ar2,#TBICTX_XEXT_BIT /* Extended base-state model? */
+ BZ $LCtxSaveXDX8
+ GETL D0Ar6,D1Ar5,[D1Ar1+#TBICTX_Ext_AX2] /* Get A0.2, A1.2 state */
+ MOV D0Ar4,D0Ar2 /* Extract Ctx.SaveFlags value */
+ ANDMT D0Ar4,D0Ar4,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+ SETD [D1Ar1+#TBICTX_Ext_Ctx_pExt],A0.2
+ SETD [D1Ar1+#TBICTX_Ext_Ctx_SaveMask-2],D0Ar4
+ SETL [A0.2++],D0Ar6,D1Ar5 /* Save A0.2, A1.2 state */
+$LCtxSaveXDX8:
+ TSTT D0Ar2,#TBICTX_XDX8_BIT /* Save extended DX regs? */
+ BZ $LCtxSaveXAXX
+/*
+ * Save 8 extra DX registers
+ */
+ MSETL [A0.2],D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15
+$LCtxSaveXAXX:
+ TSTT D0Ar2,#TBICTX_XAXX_BIT /* Save extended AX regs? */
+ SWAP D0Re0,A0.2 /* pDst into D0Re0 */
+ BZ $LCtxSaveXHL2
+/*
+ * Save 4 extra AX registers
+ */
+ MSETL [D0Re0], A0_4 A0.5,A0.6,A0.7 /* Save 8*3 bytes */
+$LCtxSaveXHL2:
+ TSTT D0Ar2,#TBICTX_XHL2_BIT /* Save hardware-loop regs? */
+ SWAP D0Re0,A0.2 /* pDst back into A0.2 */
+ MOV D0Ar6,TXL1START
+ MOV D1Ar5,TXL2START
+ BZ $LCtxSaveXTDP
+/*
+ * Save hardware loop registers
+ */
+ SETL [A0.2++],D0Ar6,D1Ar5 /* Save 8*1 bytes */
+ MOV D0Ar6,TXL1END
+ MOV D1Ar5,TXL2END
+ MOV D0FrT,TXL1COUNT
+ MOV D1RtP,TXL2COUNT
+ MSETL [A0.2],D0Ar6,D0FrT /* Save 8*2 bytes */
+/*
+ * Clear loop counters to disable any current loops
+ */
+ XOR TXL1COUNT,D0FrT,D0FrT
+ XOR TXL2COUNT,D1RtP,D1RtP
+$LCtxSaveXTDP:
+ TSTT D0Ar2,#TBICTX_XTDP_BIT /* Save per-thread DSP regs? */
+ BZ $LCtxSaveCBUF
+/*
+ * Save per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero)
+ */
+#ifndef CTX_NO_DSP
+D SETL [A0.2++],AC0.0,AC1.0 /* Save ACx.0 lower 32-bits */
+DH SETL [A0.2++],AC0.0,AC1.0 /* Save ACx.0 upper 32-bits */
+D SETL [A0.2++],D0AR.0,D1AR.0 /* Save DSP RAM registers */
+D SETL [A0.2++],D0AR.1,D1AR.1
+D SETL [A0.2++],D0AW.0,D1AW.0
+D SETL [A0.2++],D0AW.1,D1AW.1
+D SETL [A0.2++],D0BR.0,D1BR.0
+D SETL [A0.2++],D0BR.1,D1BR.1
+D SETL [A0.2++],D0BW.0,D1BW.0
+D SETL [A0.2++],D0BW.1,D1BW.1
+D SETL [A0.2++],D0ARI.0,D1ARI.0
+D SETL [A0.2++],D0ARI.1,D1ARI.1
+D SETL [A0.2++],D0AWI.0,D1AWI.0
+D SETL [A0.2++],D0AWI.1,D1AWI.1
+D SETL [A0.2++],D0BRI.0,D1BRI.0
+D SETL [A0.2++],D0BRI.1,D1BRI.1
+D SETL [A0.2++],D0BWI.0,D1BWI.0
+D SETL [A0.2++],D0BWI.1,D1BWI.1
+D SETD [A0.2++],T0
+D SETD [A0.2++],T1
+D SETD [A0.2++],T2
+D SETD [A0.2++],T3
+D SETD [A0.2++],T4
+D SETD [A0.2++],T5
+D SETD [A0.2++],T6
+D SETD [A0.2++],T7
+D SETD [A0.2++],T8
+D SETD [A0.2++],T9
+D SETD [A0.2++],TA
+D SETD [A0.2++],TB
+D SETD [A0.2++],TC
+D SETD [A0.2++],TD
+D SETD [A0.2++],TE
+D SETD [A0.2++],TF
+#else
+ ADD A0.2,A0.2,#(8*18+4*16)
+#endif
+ MOV D0Ar6,TXMRSIZE
+ MOV D1Ar5,TXDRSIZE
+ SETL [A0.2++],D0Ar6,D1Ar5 /* Save 8*1 bytes */
+
+$LCtxSaveCBUF:
+#ifdef TBI_1_3
+ MOV D0Ar4,D0Re0 /* Copy Ctx Flags */
+ ANDT D0Ar4,D0Ar4,#TBICTX_XCBF_BIT /* mask XCBF if already set */
+ XOR D0Ar4,D0Ar4,#-1
+ AND D0Ar2,D0Ar2,D0Ar4 /* remove XCBF if already set */
+#endif
+ TSTT D0Ar2,#TBICTX_XCBF_BIT /* Want to save CBUF? */
+ ANDT D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+ OR D0Ar2,D0Ar2,D0Re0 /* Generate new SaveMask */
+ SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in bits saved to TBICTX */
+ MOV D0Re0,A0.2 /* Return end of save area */
+ MOV D0Ar4,TXDIVTIME /* Get TXDIVTIME */
+ MOVZ PC,A1.2 /* No: Early return */
+ TSTT D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT /* Need to save CBUF? */
+ MOVZ PC,A1.2 /* No: Early return */
+ ORT D0Ar2,D0Ar2,#TBICTX_XCBF_BIT
+ SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in XCBF bit to TBICTX */
+ ADD A0.2,D1Ar1,#TBICTX_BYTES /* Dump CBUF state after TBICTX */
+/*
+ * Save CBUF
+ */
+ SETD [A0.2+# 0],TXCATCH0 /* Restore TXCATCHn */
+ SETD [A0.2+# 4],TXCATCH1
+ TSTT D0Ar2,#TBICTX_CBRP_BIT /* ... RDDIRTY was/is set */
+ SETD [A0.2+# 8],TXCATCH2
+ SETD [A0.2+#12],TXCATCH3
+ BZ $LCtxSaveComplete
+ SETL [A0.2+#(2*8)],RD /* Save read pipeline */
+ SETL [A0.2+#(3*8)],RD /* Save read pipeline */
+ SETL [A0.2+#(4*8)],RD /* Save read pipeline */
+ SETL [A0.2+#(5*8)],RD /* Save read pipeline */
+ SETL [A0.2+#(6*8)],RD /* Save read pipeline */
+ SETL [A0.2+#(7*8)],RD /* Save read pipeline */
+ AND TXDIVTIME,D0Ar4,#TXDIVTIME_DIV_BITS /* Clear RPDIRTY */
+$LCtxSaveComplete:
+ MOV PC,A1.2 /* Return */
+ .size ___TBICtxSave,.-___TBICtxSave
+
+/*
+ * void *__TBICtxRestore( TBIRES State, void *pExt )
+ *
+ * D0Ar2 contains TBICTX_*_BIT values that control what
+ * extended data is to be recovered from D1Ar3 (pExt).
+ *
+ * Virtually all possible scratch registers are used.
+ */
+/*
+ * If TBICTX_XEXT_BIT is specified in State. Then the saved state of
+ * the orginal A0.2 and A1.2 is restored from pExt and the XEXT
+ * related flags are removed from State.pCtx->SaveMask.
+ *
+ */
+ .balign 4
+ .global ___TBICtxRestore
+ .type ___TBICtxRestore,function
+___TBICtxRestore:
+ GETD D0Ar6,[D1Ar1+#TBICTX_CurrMODE] /* Get TXMODE Value */
+ ANDST D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+ MOV D1Re0,D0Ar2 /* Keep flags in D1Re0 */
+ MOV D0Re0,D1Ar3 /* D1Ar3 is default result */
+ MOVZ PC,D1RtP /* Early return, nothing to do */
+ ANDT D0Ar6,D0Ar6,#0xE000 /* Top bits of TXMODE required */
+ MOV A0.3,D0Ar6 /* Save TXMODE for later */
+ TSTT D1Re0,#TBICTX_XEXT_BIT /* Check for XEXT bit */
+ BZ $LCtxRestXDX8
+ GETD D0Ar4,[D1Ar1+#TBICTX_SaveMask-2]/* Get current SaveMask */
+ GETL D0Ar6,D1Ar5,[D0Re0++] /* Restore A0.2, A1.2 state */
+ ANDMT D0Ar4,D0Ar4,#(0xFFFF-(TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT))
+ SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar4/* New SaveMask */
+#ifdef METAC_1_0
+ SETD [D1Ar1+#TBICTX_Ext_AX2_U0],D0Ar6
+ MOV D0Ar6,D1Ar1
+ SETD [D0Ar6+#TBICTX_Ext_AX2_U1],D1Ar5
+#else
+ SETL [D1Ar1+#TBICTX_Ext_AX2],D0Ar6,D1Ar5
+#endif
+$LCtxRestXDX8:
+ TSTT D1Re0,#TBICTX_XDX8_BIT /* Get extended DX regs? */
+ MOV A1.2,D1RtP /* Free off D1RtP register */
+ BZ $LCtxRestXAXX
+/*
+ * Restore 8 extra DX registers
+ */
+ MGETL D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15,[D0Re0]
+$LCtxRestXAXX:
+ TSTT D1Re0,#TBICTX_XAXX_BIT /* Get extended AX regs? */
+ BZ $LCtxRestXHL2
+/*
+ * Restore 3 extra AX registers
+ */
+ MGETL A0_4 A0.5,A0.6,A0.7,[D0Re0] /* Get 8*3 bytes */
+$LCtxRestXHL2:
+ TSTT D1Re0,#TBICTX_XHL2_BIT /* Get hardware-loop regs? */
+ BZ $LCtxRestXTDP
+/*
+ * Get hardware loop registers
+ */
+ MGETL D0Ar6,D0Ar4,D0Ar2,[D0Re0] /* Get 8*3 bytes */
+ MOV TXL1START,D0Ar6
+ MOV TXL2START,D1Ar5
+ MOV TXL1END,D0Ar4
+ MOV TXL2END,D1Ar3
+ MOV TXL1COUNT,D0Ar2
+ MOV TXL2COUNT,D1Ar1
+$LCtxRestXTDP:
+ TSTT D1Re0,#TBICTX_XTDP_BIT /* Get per-thread DSP regs? */
+ MOVZ PC,A1.2 /* No: Early return */
+/*
+ * Get per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero)
+ */
+ MOV A0.2,D0Re0
+ GETL D0Ar6,D1Ar5,[D0Re0++#((16*4)+(18*8))]
+#ifndef CTX_NO_DSP
+D GETL AC0.0,AC1.0,[A0.2++] /* Restore ACx.0 lower 32-bits */
+DH GETL AC0.0,AC1.0,[A0.2++] /* Restore ACx.0 upper 32-bits */
+#else
+ ADD A0.2,A0.2,#(2*8)
+#endif
+ ADD D0Re0,D0Re0,#(2*4)
+ MOV TXMODE,A0.3 /* Some TXMODE bits needed */
+ MOV TXMRSIZE,D0Ar6
+ MOV TXDRSIZE,D1Ar5
+#ifndef CTX_NO_DSP
+D GETL D0AR.0,D1AR.0,[A0.2++] /* Restore DSP RAM registers */
+D GETL D0AR.1,D1AR.1,[A0.2++]
+D GETL D0AW.0,D1AW.0,[A0.2++]
+D GETL D0AW.1,D1AW.1,[A0.2++]
+D GETL D0BR.0,D1BR.0,[A0.2++]
+D GETL D0BR.1,D1BR.1,[A0.2++]
+D GETL D0BW.0,D1BW.0,[A0.2++]
+D GETL D0BW.1,D1BW.1,[A0.2++]
+#else
+ ADD A0.2,A0.2,#(8*8)
+#endif
+ MOV TXMODE,#0 /* Restore TXMODE */
+#ifndef CTX_NO_DSP
+D GETL D0ARI.0,D1ARI.0,[A0.2++]
+D GETL D0ARI.1,D1ARI.1,[A0.2++]
+D GETL D0AWI.0,D1AWI.0,[A0.2++]
+D GETL D0AWI.1,D1AWI.1,[A0.2++]
+D GETL D0BRI.0,D1BRI.0,[A0.2++]
+D GETL D0BRI.1,D1BRI.1,[A0.2++]
+D GETL D0BWI.0,D1BWI.0,[A0.2++]
+D GETL D0BWI.1,D1BWI.1,[A0.2++]
+D GETD T0,[A0.2++]
+D GETD T1,[A0.2++]
+D GETD T2,[A0.2++]
+D GETD T3,[A0.2++]
+D GETD T4,[A0.2++]
+D GETD T5,[A0.2++]
+D GETD T6,[A0.2++]
+D GETD T7,[A0.2++]
+D GETD T8,[A0.2++]
+D GETD T9,[A0.2++]
+D GETD TA,[A0.2++]
+D GETD TB,[A0.2++]
+D GETD TC,[A0.2++]
+D GETD TD,[A0.2++]
+D GETD TE,[A0.2++]
+D GETD TF,[A0.2++]
+#else
+ ADD A0.2,A0.2,#(8*8+4*16)
+#endif
+ MOV PC,A1.2 /* Return */
+ .size ___TBICtxRestore,.-___TBICtxRestore
+
+/*
+ * End of tbictx.S
+ */
diff --git a/arch/metag/tbx/tbictxfpu.S b/arch/metag/tbx/tbictxfpu.S
new file mode 100644
index 00000000000..e773bea3e7b
--- /dev/null
+++ b/arch/metag/tbx/tbictxfpu.S
@@ -0,0 +1,190 @@
+/*
+ * tbictxfpu.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+ .file "tbifpuctx.S"
+
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+#ifdef TBI_1_4
+/*
+ * void *__TBICtxFPUSave( TBIRES State, void *pExt )
+ *
+ * D0Ar2 contains TBICTX_*_BIT values that control what
+ * extended data is to be saved.
+ * These bits must be ored into the SaveMask of this structure.
+ *
+ * Virtually all possible scratch registers are used.
+ */
+ .text
+ .balign 4
+ .global ___TBICtxFPUSave
+ .type ___TBICtxFPUSave,function
+___TBICtxFPUSave:
+
+ /* D1Ar1:D0Ar2 - State
+ * D1Ar3 - pExt
+ * D0Ar4 - Value of METAC_CORE_ID
+ * D1Ar5 - Scratch
+ * D0Ar6 - Scratch
+ */
+
+ /* If the FPAC bit isnt set then there is nothing to do */
+ TSTT D0Ar2,#TBICTX_FPAC_BIT
+ MOVZ PC, D1RtP
+
+ /* Obtain the Core config */
+ MOVT D0Ar4, #HI(METAC_CORE_ID)
+ ADD D0Ar4, D0Ar4, #LO(METAC_CORE_ID)
+ GETD D0Ar4, [D0Ar4]
+
+ /* Detect FX.8 - FX.15 and add to core config */
+ MOV D0Ar6, TXENABLE
+ AND D0Ar6, D0Ar6, #(TXENABLE_CLASSALT_FPUR8 << TXENABLE_CLASS_S)
+ AND D0Ar4, D0Ar4, #LO(0x0000FFFF)
+ ORT D0Ar4, D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT)
+ XOR D0Ar4, D0Ar4, D0Ar6
+
+ /* Save the relevant bits to the buffer */
+ SETD [D1Ar3++], D0Ar4
+
+ /* Save the relevant bits of TXDEFR (Assumes TXDEFR is coherent) ... */
+ MOV D0Ar6, TXDEFR
+ LSR D0Re0, D0Ar6, #8
+ AND D0Re0, D0Re0, #LO(TXDEFR_FPE_FE_BITS>>8)
+ AND D0Ar6, D0Ar6, #LO(TXDEFR_FPE_ICTRL_BITS)
+ OR D0Re0, D0Re0, D0Ar6
+
+ /* ... along with relevant bits of TXMODE to buffer */
+ MOV D0Ar6, TXMODE
+ ANDT D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS)
+ ORT D0Ar6, D0Ar6, #HI(TXMODE_FPURMODEWRITE_BIT)
+ OR D0Ar6, D0Ar6, D0Re0
+ SETD [D1Ar3++], D0Ar6
+
+ GETD D0Ar6,[D1Ar1+#TBICTX_SaveMask-2] /* Get the current SaveMask */
+ /* D0Ar6 - pCtx->SaveMask */
+
+ TSTT D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers
+ * to avoid stalls
+ */
+ /* Save the standard FPU registers */
+F MSETL [D1Ar3++], FX.0, FX.2, FX.4, FX.6
+
+ /* Save the extended FPU registers if they are present */
+ BZ $Lskip_save_fx8_fx16
+F MSETL [D1Ar3++], FX.8, FX.10, FX.12, FX.14
+$Lskip_save_fx8_fx16:
+
+ /* Save the FPU Accumulator if it is present */
+ TST D0Ar4, #METAC_COREID_NOFPACC_BIT
+ BNZ $Lskip_save_fpacc
+F SETL [D1Ar3++], ACF.0
+F SETL [D1Ar3++], ACF.1
+F SETL [D1Ar3++], ACF.2
+$Lskip_save_fpacc:
+
+ /* Update pCtx->SaveMask */
+ ANDT D0Ar2, D0Ar2, #TBICTX_FPAC_BIT
+ OR D0Ar6, D0Ar6, D0Ar2
+ SETD [D1Ar1+#TBICTX_SaveMask-2],D0Ar6/* Add in XCBF bit to TBICTX */
+
+ MOV D0Re0, D1Ar3 /* Return end of save area */
+ MOV PC, D1RtP
+
+ .size ___TBICtxFPUSave,.-___TBICtxFPUSave
+
+/*
+ * void *__TBICtxFPURestore( TBIRES State, void *pExt )
+ *
+ * D0Ar2 contains TBICTX_*_BIT values that control what
+ * extended data is to be recovered from D1Ar3 (pExt).
+ *
+ * Virtually all possible scratch registers are used.
+ */
+/*
+ * If TBICTX_XEXT_BIT is specified in State. Then the saved state of
+ * the orginal A0.2 and A1.2 is restored from pExt and the XEXT
+ * related flags are removed from State.pCtx->SaveMask.
+ *
+ */
+ .balign 4
+ .global ___TBICtxFPURestore
+ .type ___TBICtxFPURestore,function
+___TBICtxFPURestore:
+
+ /* D1Ar1:D0Ar2 - State
+ * D1Ar3 - pExt
+ * D0Ar4 - Value of METAC_CORE_ID
+ * D1Ar5 - Scratch
+ * D0Ar6 - Scratch
+ * D1Re0 - Scratch
+ */
+
+ /* If the FPAC bit isnt set then there is nothing to do */
+ TSTT D0Ar2,#TBICTX_FPAC_BIT
+ MOVZ PC, D1RtP
+
+ /* Obtain the relevant bits of the Core config */
+ GETD D0Ar4, [D1Ar3++]
+
+ /* Restore FPU related parts of TXDEFR. Assumes TXDEFR is coherent */
+ GETD D1Ar5, [D1Ar3++]
+ MOV D0Ar6, D1Ar5
+ LSL D1Re0, D1Ar5, #8
+ ANDT D1Re0, D1Re0, #HI(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)
+ AND D1Ar5, D1Ar5, #LO(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)
+ OR D1Re0, D1Re0, D1Ar5
+
+ MOV D1Ar5, TXDEFR
+ ANDMT D1Ar5, D1Ar5, #HI(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS))
+ ANDMB D1Ar5, D1Ar5, #LO(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS))
+ OR D1Re0, D1Re0, D1Ar5
+ MOV TXDEFR, D1Re0
+
+ /* Restore relevant bits of TXMODE */
+ MOV D1Ar5, TXMODE
+ ANDMT D1Ar5, D1Ar5, #HI(~TXMODE_FPURMODE_BITS)
+ ANDT D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS|TXMODE_FPURMODEWRITE_BIT)
+ OR D0Ar6, D0Ar6, D1Ar5
+ MOV TXMODE, D0Ar6
+
+ TSTT D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers
+ * to avoid stalls
+ */
+ /* Save the standard FPU registers */
+F MGETL FX.0, FX.2, FX.4, FX.6, [D1Ar3++]
+
+ /* Save the extended FPU registers if they are present */
+ BZ $Lskip_restore_fx8_fx16
+F MGETL FX.8, FX.10, FX.12, FX.14, [D1Ar3++]
+$Lskip_restore_fx8_fx16:
+
+ /* Save the FPU Accumulator if it is present */
+ TST D0Ar4, #METAC_COREID_NOFPACC_BIT
+ BNZ $Lskip_restore_fpacc
+F GETL ACF.0, [D1Ar3++]
+F GETL ACF.1, [D1Ar3++]
+F GETL ACF.2, [D1Ar3++]
+$Lskip_restore_fpacc:
+
+ MOV D0Re0, D1Ar3 /* Return end of save area */
+ MOV PC, D1RtP
+
+ .size ___TBICtxFPURestore,.-___TBICtxFPURestore
+
+#endif /* TBI_1_4 */
+
+/*
+ * End of tbictx.S
+ */
diff --git a/arch/metag/tbx/tbidefr.S b/arch/metag/tbx/tbidefr.S
new file mode 100644
index 00000000000..3eb165ebf54
--- /dev/null
+++ b/arch/metag/tbx/tbidefr.S
@@ -0,0 +1,175 @@
+/*
+ * tbidefr.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Routing deferred exceptions
+ */
+
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+ .text
+ .balign 4
+ .global ___TBIHandleDFR
+ .type ___TBIHandleDFR,function
+/* D1Ar1:D0Ar2 -- State
+ * D0Ar3 -- SigNum
+ * D0Ar4 -- Triggers
+ * D1Ar5 -- InstOrSWSId
+ * D0Ar6 -- pTBI (volatile)
+ */
+___TBIHandleDFR:
+#ifdef META_BUG_MBN100212
+ MSETL [A0StP++], D0FrT, D0.5
+
+ /* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved
+ * D0Ar4 -- The deferred exceptions
+ * D1Ar3 -- As per D0Ar4 but just the trigger bits
+ * D0.5 -- The bgnd deferred exceptions
+ * D1.5 -- TXDEFR with bgnd re-added
+ */
+
+ /* - Collect the pending deferred exceptions using TXSTAT,
+ * (ack's the bgnd exceptions as a side-effect)
+ * - Manually collect remaining (interrupt) deferred exceptions
+ * using TXDEFR
+ * - Replace the triggers (from TXSTATI) with the int deferred
+ * exceptions DEFR ..., TXSTATI would have returned if it was valid
+ * from bgnd code
+ * - Reconstruct TXDEFR by or'ing bgnd deferred exceptions (except
+ * the DEFER bit) and the int deferred exceptions. This will be
+ * restored later
+ */
+ DEFR D0.5, TXSTAT
+ MOV D1.5, TXDEFR
+ ANDT D0.5, D0.5, #HI(0xFFFF0000)
+ MOV D1Ar3, D1.5
+ ANDT D1Ar3, D1Ar3, #HI(0xFFFF0000)
+ OR D0Ar4, D1Ar3, #TXSTAT_DEFER_BIT
+ OR D1.5, D1.5, D0.5
+
+ /* Mask off anything unrelated to the deferred exception triggers */
+ ANDT D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS)
+
+ /* Can assume that at least one exception happened since this
+ * handler wouldnt have been called otherwise.
+ *
+ * Replace the signal number and at the same time, prepare
+ * the mask to acknowledge the exception
+ *
+ * D1Re0 -- The bits to acknowledge
+ * D1Ar3 -- The signal number
+ * D1RtP -- Scratch to deal with non-conditional insns
+ */
+ MOVT D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT)
+ MOV D1RtP, #TXSTAT_FPE_INVALID_S
+ FFB D1Ar3, D1Ar3
+ CMP D1Ar3, #TXSTAT_FPE_INVALID_S
+ MOVLE D1Ar3, D1RtP /* Collapse FPE triggers to a single signal */
+ MOV D1RtP, #1
+ LSLGT D1Re0, D1RtP, D1Ar3
+
+ /* Get the handler using the signal number
+ *
+ * D1Ar3 -- The signal number
+ * D0Re0 -- Offset into TBI struct containing handler address
+ * D1Re0 -- Mask of triggers to keep
+ * D1RtP -- Address of handler
+ */
+ SUB D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE)
+ LSL D0Re0, D1Ar3, #2
+ XOR D1Re0, D1Re0, #-1 /* Prepare mask for acknowledge (avoids stall) */
+ ADD D0Re0,D0Re0,#TBI_fnSigs
+ GETD D1RtP, [D0Ar6+D0Re0]
+
+ /* Acknowledge triggers */
+ AND D1.5, D1.5, D1Re0
+
+ /* Restore remaining exceptions
+ * Do this here in case the handler enables nested interrupts
+ *
+ * D1.5 -- TXDEFR with this exception ack'd
+ */
+ MOV TXDEFR, D1.5
+
+ /* Call the handler */
+ SWAP D1RtP, PC
+
+ GETL D0.5, D1.5, [--A0StP]
+ GETL D0FrT, D1RtP, [--A0StP]
+ MOV PC,D1RtP
+#else /* META_BUG_MBN100212 */
+
+ /* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved
+ * D0Ar4 -- The deferred exceptions
+ * D1Ar3 -- As per D0Ar4 but just the trigger bits
+ */
+
+ /* - Collect the pending deferred exceptions using TXSTAT,
+ * (ack's the interrupt exceptions as a side-effect)
+ */
+ DEFR D0Ar4, TXSTATI
+
+ /* Mask off anything unrelated to the deferred exception triggers */
+ MOV D1Ar3, D0Ar4
+ ANDT D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS)
+
+ /* Can assume that at least one exception happened since this
+ * handler wouldnt have been called otherwise.
+ *
+ * Replace the signal number and at the same time, prepare
+ * the mask to acknowledge the exception
+ *
+ * The unusual code for 1<<D1Ar3 may need explanation.
+ * Normally this would be done using 'MOV rs,#1' and 'LSL rd,rs,D1Ar3'
+ * but only D1Re0 is available in D1 and no crossunit insns are available
+ * Even worse, there is no conditional 'MOV r,#uimm8'.
+ * Since the CMP proves that D1Ar3 >= 20, we can reuse the bottom 12-bits
+ * of D1Re0 (using 'ORGT r,#1') in the knowledge that the top 20-bits will
+ * be discarded without affecting the result.
+ *
+ * D1Re0 -- The bits to acknowledge
+ * D1Ar3 -- The signal number
+ */
+ MOVT D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT)
+ MOV D0Re0, #TXSTAT_FPE_INVALID_S
+ FFB D1Ar3, D1Ar3
+ CMP D1Ar3, #TXSTAT_FPE_INVALID_S
+ MOVLE D1Ar3, D0Re0 /* Collapse FPE triggers to a single signal */
+ ORGT D1Re0, D1Re0, #1
+ LSLGT D1Re0, D1Re0, D1Ar3
+
+ SUB D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE)
+
+ /* Acknowledge triggers and restore remaining exceptions
+ * Do this here in case the handler enables nested interrupts
+ *
+ * (x | y) ^ y == x & ~y. It avoids the restrictive XOR ...,#-1 insn
+ * and is the same length
+ */
+ MOV D0Re0, TXDEFR
+ OR D0Re0, D0Re0, D1Re0
+ XOR TXDEFR, D0Re0, D1Re0
+
+ /* Get the handler using the signal number
+ *
+ * D1Ar3 -- The signal number
+ * D0Re0 -- Address of handler
+ */
+ LSL D0Re0, D1Ar3, #2
+ ADD D0Re0,D0Re0,#TBI_fnSigs
+ GETD D0Re0, [D0Ar6+D0Re0]
+
+ /* Tailcall the handler */
+ MOV PC,D0Re0
+
+#endif /* META_BUG_MBN100212 */
+ .size ___TBIHandleDFR,.-___TBIHandleDFR
+/*
+ * End of tbidefr.S
+ */
diff --git a/arch/metag/tbx/tbidspram.S b/arch/metag/tbx/tbidspram.S
new file mode 100644
index 00000000000..2f27c037221
--- /dev/null
+++ b/arch/metag/tbx/tbidspram.S
@@ -0,0 +1,161 @@
+/*
+ * tbidspram.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+ .file "tbidspram.S"
+
+/* These aren't generally useful to a user so for now, they arent publically available */
+#define _TBIECH_DSPRAM_DUA_S 8
+#define _TBIECH_DSPRAM_DUA_BITS 0x7f00
+#define _TBIECH_DSPRAM_DUB_S 0
+#define _TBIECH_DSPRAM_DUB_BITS 0x007f
+
+/*
+ * void *__TBIDspramSaveA( short DspramSizes, void *pExt )
+ */
+ .text
+ .balign 4
+ .global ___TBIDspramSaveA
+ .type ___TBIDspramSaveA,function
+___TBIDspramSaveA:
+
+ SETL [A0StP++], D0.5, D1.5
+ MOV A0.3, D0Ar2
+
+ /* D1Ar1 - Dspram Sizes
+ * A0.4 - Pointer to buffer
+ */
+
+ /* Save the specified amount of dspram DUA */
+DL MOV D0AR.0, #0
+ LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S
+ AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S)
+ SUB TXRPT, D1Ar1, #1
+$L1:
+DL MOV D0Re0, [D0AR.0++]
+DL MOV D0Ar6, [D0AR.0++]
+DL MOV D0Ar4, [D0AR.0++]
+DL MOV D0.5, [D0AR.0++]
+ MSETL [A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5
+
+ BR $L1
+
+ GETL D0.5, D1.5, [--A0StP]
+ MOV PC, D1RtP
+
+ .size ___TBIDspramSaveA,.-___TBIDspramSaveA
+
+/*
+ * void *__TBIDspramSaveB( short DspramSizes, void *pExt )
+ */
+ .balign 4
+ .global ___TBIDspramSaveB
+ .type ___TBIDspramSaveB,function
+___TBIDspramSaveB:
+
+ SETL [A0StP++], D0.5, D1.5
+ MOV A0.3, D0Ar2
+
+ /* D1Ar1 - Dspram Sizes
+ * A0.3 - Pointer to buffer
+ */
+
+ /* Save the specified amount of dspram DUA */
+DL MOV D0BR.0, #0
+ LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S
+ AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S)
+ SUB TXRPT, D1Ar1, #1
+$L2:
+DL MOV D0Re0, [D0BR.0++]
+DL MOV D0Ar6, [D0BR.0++]
+DL MOV D0Ar4, [D0BR.0++]
+DL MOV D0.5, [D0BR.0++]
+ MSETL [A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5
+
+ BR $L2
+
+ GETL D0.5, D1.5, [--A0StP]
+ MOV PC, D1RtP
+
+ .size ___TBIDspramSaveB,.-___TBIDspramSaveB
+
+/*
+ * void *__TBIDspramRestoreA( short DspramSizes, void *pExt )
+ */
+ .balign 4
+ .global ___TBIDspramRestoreA
+ .type ___TBIDspramRestoreA,function
+___TBIDspramRestoreA:
+
+ SETL [A0StP++], D0.5, D1.5
+ MOV A0.3, D0Ar2
+
+ /* D1Ar1 - Dspram Sizes
+ * A0.3 - Pointer to buffer
+ */
+
+ /* Restore the specified amount of dspram DUA */
+DL MOV D0AW.0, #0
+ LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S
+ AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S)
+ SUB TXRPT, D1Ar1, #1
+$L3:
+ MGETL D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++]
+DL MOV [D0AW.0++], D0Re0
+DL MOV [D0AW.0++], D0Ar6
+DL MOV [D0AW.0++], D0Ar4
+DL MOV [D0AW.0++], D0.5
+
+ BR $L3
+
+ GETL D0.5, D1.5, [--A0StP]
+ MOV PC, D1RtP
+
+ .size ___TBIDspramRestoreA,.-___TBIDspramRestoreA
+
+/*
+ * void *__TBIDspramRestoreB( short DspramSizes, void *pExt )
+ */
+ .balign 4
+ .global ___TBIDspramRestoreB
+ .type ___TBIDspramRestoreB,function
+___TBIDspramRestoreB:
+
+ SETL [A0StP++], D0.5, D1.5
+ MOV A0.3, D0Ar2
+
+ /* D1Ar1 - Dspram Sizes
+ * A0.3 - Pointer to buffer
+ */
+
+ /* Restore the specified amount of dspram DUA */
+DL MOV D0BW.0, #0
+ LSR D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S
+ AND D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S)
+ SUB TXRPT, D1Ar1, #1
+$L4:
+ MGETL D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++]
+DL MOV [D0BW.0++], D0Re0
+DL MOV [D0BW.0++], D0Ar6
+DL MOV [D0BW.0++], D0Ar4
+DL MOV [D0BW.0++], D0.5
+
+ BR $L4
+
+ GETL D0.5, D1.5, [--A0StP]
+ MOV PC, D1RtP
+
+ .size ___TBIDspramRestoreB,.-___TBIDspramRestoreB
+
+/*
+ * End of tbidspram.S
+ */
diff --git a/arch/metag/tbx/tbilogf.S b/arch/metag/tbx/tbilogf.S
new file mode 100644
index 00000000000..4a34d80657d
--- /dev/null
+++ b/arch/metag/tbx/tbilogf.S
@@ -0,0 +1,48 @@
+/*
+ * tbilogf.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Defines __TBILogF trap code for debugging messages and __TBICont for debug
+ * assert to be implemented on.
+ */
+
+ .file "tbilogf.S"
+
+/*
+ * Perform console printf using external debugger or host support
+ */
+ .text
+ .balign 4
+ .global ___TBILogF
+ .type ___TBILogF,function
+___TBILogF:
+ MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2
+ SWITCH #0xC10020
+ MOV D0Re0,#0
+ SUB A0StP,A0StP,#24
+ MOV PC,D1RtP
+ .size ___TBILogF,.-___TBILogF
+
+/*
+ * Perform wait for continue under control of the debugger
+ */
+ .text
+ .balign 4
+ .global ___TBICont
+ .type ___TBICont,function
+___TBICont:
+ MOV D0Ar6,#1
+ MSETL [A0StP],D0Ar6,D0Ar4,D0Ar2
+ SWITCH #0xC30006 /* Returns if we are to continue */
+ SUB A0StP,A0StP,#(8*3)
+ MOV PC,D1RtP /* Return */
+ .size ___TBICont,.-___TBICont
+
+/*
+ * End of tbilogf.S
+ */
diff --git a/arch/metag/tbx/tbipcx.S b/arch/metag/tbx/tbipcx.S
new file mode 100644
index 00000000000..de0626fdad2
--- /dev/null
+++ b/arch/metag/tbx/tbipcx.S
@@ -0,0 +1,451 @@
+/*
+ * tbipcx.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Asyncronous trigger handling including exceptions
+ */
+
+ .file "tbipcx.S"
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+/* BEGIN HACK */
+/* define these for now while doing inital conversion to GAS
+ will fix properly later */
+
+/* Signal identifiers always have the TBID_SIGNAL_BIT set and contain the
+ following related bit-fields */
+#define TBID_SIGNUM_S 2
+
+/* END HACK */
+
+#ifdef METAC_1_0
+/* Ax.4 is saved in TBICTX */
+#define A0_4 ,A0.4
+#else
+/* Ax.4 is NOT saved in TBICTX */
+#define A0_4
+#endif
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+#ifdef METAC_1_1
+#ifndef BOOTROM
+#ifndef SPECIAL_BUILD
+/* Jump straight into the boot ROM version of this code */
+#define CODE_USES_BOOTROM
+#endif
+#endif
+#endif
+
+/* Define space needed for CATCH buffer state in traditional units */
+#define CATCH_ENTRIES 5
+#define CATCH_ENTRY_BYTES 16
+
+#ifndef CODE_USES_BOOTROM
+#define A0GblIStP A0.15 /* PTBICTX for current thread in PRIV system */
+#define A1GblIGbP A1.15 /* Interrupt A1GbP value in PRIV system */
+#endif
+
+/*
+ * TBIRES __TBIASyncTrigger( TBIRES State )
+ */
+ .text
+ .balign 4
+ .global ___TBIASyncTrigger
+ .type ___TBIASyncTrigger,function
+___TBIASyncTrigger:
+#ifdef CODE_USES_BOOTROM
+ MOVT D0Re0,#HI(LINCORE_BASE)
+ JUMP D0Re0,#0xA0
+#else
+ MOV D0FrT,A0FrP /* Boing entry sequence */
+ ADD A0FrP,A0StP,#0
+ SETL [A0StP++],D0FrT,D1RtP
+ MOV D0Re0,PCX /* Check for repeat call */
+ MOVT D0FrT,#HI(___TBIBoingRTI+4)
+ ADD D0FrT,D0FrT,#LO(___TBIBoingRTI+4)
+ CMP D0Re0,D0FrT
+ BEQ ___TBIBoingExit /* Already set up - come out */
+ ADD D1Ar1,D1Ar1,#7 /* PRIV system stack here */
+ MOV A0.2,A0StP /* else push context here */
+ MOVS D0Re0,D0Ar2 /* Return in user mode? */
+ ANDMB D1Ar1,D1Ar1,#0xfff8 /* align priv stack to 64-bit */
+ MOV D1Re0,D1Ar1 /* and set result to arg */
+ MOVMI A0.2,D1Ar1 /* use priv stack if PRIV set */
+/*
+ * Generate an initial TBICTX to return to our own current call context
+ */
+ MOVT D1Ar5,#HI(___TBIBoingExit) /* Go here to return */
+ ADD D1Ar5,D1Ar5,#LO(___TBIBoingExit)
+ ADD A0.3,A0.2,#TBICTX_DX /* DX Save area */
+ ANDT D0Ar2,D0Ar2,#TBICTX_PRIV_BIT /* Extract PRIV bit */
+ MOVT D0Ar6,#TBICTX_SOFT_BIT /* Only soft thread state */
+ ADD D0Ar6,D0Ar6,D0Ar2 /* Add in PRIV bit if requested */
+ SETL [A0.2],D0Ar6,D1Ar5 /* Push header fields */
+ ADD D0FrT,A0.2,#TBICTX_AX /* Address AX save area */
+ MSETL [A0.3],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+ MOV D0Ar6,#0
+ MOV D1Ar5,#0
+ SETL [A0.3++],D0Ar6,D1Ar5 /* Zero CT register states */
+ SETL [A0.3++],D0Ar6,D1Ar5
+ MSETL [D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */
+ MOV A0FrP,A0.2 /* Restore me! */
+ B ___TBIResume
+ .size ___TBIASyncTrigger,.-___TBIASyncTrigger
+
+/*
+ * Optimised return to handler for META Core
+ */
+___TBIBoingRTH:
+ RTH /* Go to background level */
+ MOVT A0.2, #HI($Lpcx_target)
+ ADD A0.2,A0.2,#LO($Lpcx_target)
+ MOV PCX,A0.2 /* Setup PCX for interrupts */
+ MOV PC,D1Re0 /* Jump to handler */
+/*
+ * This is where the code below needs to jump to wait for outermost interrupt
+ * event in a non-privilege mode system (single shared interrupt stack).
+ */
+___TBIBoingPCX:
+ MGETL A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */
+ MOV TXSTATUS,D0Re0 /* Restore flags */
+ GETL D0Re0,D1Re0,[D1Re0+#TBICTX_DX-TBICTX_BYTES]
+___TBIBoingRTI:
+ RTI /* Wait for interrupt */
+$Lpcx_target:
+/*
+ * Save initial interrupt state on current stack
+ */
+ SETL [A0StP+#TBICTX_DX],D0Re0,D1Re0 /* Save key registers */
+ ADD D1Re0,A0StP,#TBICTX_AX /* Address AX save area */
+ MOV D0Re0,TXSTATUS /* Read TXSTATUS into D0Re0 */
+ MOV TXSTATUS,#0 /* Clear TXSTATUS */
+ MSETL [D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */
+/*
+ * Register state at this point is-
+ *
+ * D0Re0 - Old TXSTATUS with PRIV and CBUF bits set if appropriate
+ * A0StP - Is call stack frame and base of TBICTX being generated
+ * A1GbP - Is valid static access link
+ */
+___TBIBoing:
+ LOCK0 /* Make sure we have no locks! */
+ ADD A1.2,A0StP,#TBICTX_DX+(8*1) /* Address DX.1 save area */
+ MOV A0FrP,A0StP /* Setup frame pointer */
+ MSETL [A1.2],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+ MOV D0Ar4,TXRPT /* Save critical CT regs */
+ MOV D1Ar3,TXBPOBITS
+ MOV D1Ar1,TXDIVTIME /* Calc catch buffer pSrc */
+ MOV D0Ar2,TXMODE
+ MOV TXMODE,#0 /* Clear TXMODE */
+#ifdef TXDIVTIME_RPDIRTY_BIT
+ TSTT D1Ar1,#HI(TXDIVTIME_RPDIRTY_BIT)/* NZ = RPDIRTY */
+ MOVT D0Ar6,#TBICTX_CBRP_BIT
+ ORNZ D0Re0,D0Re0,D0Ar6 /* Set CBRP if RPDIRTY set */
+#endif
+ MSETL [A1.2],D0Ar4,D0Ar2 /* Save CT regs state */
+ MOV D0Ar2,D0Re0 /* Copy TXSTATUS */
+ ANDMT D0Ar2,D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT
+#ifdef TBI_1_4
+ MOVT D1Ar1,#TBICTX_FPAC_BIT /* Copy FPActive into FPAC */
+ TSTT D0Re0,#HI(TXSTATUS_FPACTIVE_BIT)
+ ORNZ D0Ar2,D0Ar2,D1Ar1
+#endif
+ MOV D1Ar1,PCX /* Read CurrPC */
+ ORT D0Ar2,D0Ar2,#TBICTX_CRIT_BIT /* SaveMask + CRIT bit */
+ SETL [A0FrP+#TBICTX_Flags],D0Ar2,D1Ar1 /* Set pCtx header fields */
+/*
+ * Completed context save, now we need to make a call to an interrupt handler
+ *
+ * D0Re0 - holds PRIV, WAIT, CBUF flags, HALT reason if appropriate
+ * A0FrP - interrupt stack frame and base of TBICTX being generated
+ * A0StP - same as A0FrP
+ */
+___TBIBoingWait:
+ /* Reserve space for TBICTX and CBUF */
+ ADD A0StP,A0StP,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES)
+ MOV D0Ar4,TXSTATI /* Read the Triggers data */
+ MOV D1Ar3,TXDIVTIME /* Read IRQEnc bits */
+ MOV D0Ar2,D0Re0 /* Copy PRIV and WAIT flags */
+ ANDT D0Ar2,D0Ar2,#TBICTX_PRIV_BIT+TBICTX_WAIT_BIT+TBICTX_CBUF_BIT
+#ifdef TBI_1_4
+ MOVT D1Ar5,#TBICTX_FPAC_BIT /* Copy FPActive into FPAC */
+ TSTT D0Re0,#HI(TXSTATUS_FPACTIVE_BIT)
+ ORNZ D0Ar2,D0Ar2,D1Ar5
+#endif
+ ANDT D1Ar3,D1Ar3,#HI(TXDIVTIME_IRQENC_BITS)
+ LSR D1Ar3,D1Ar3,#TXDIVTIME_IRQENC_S
+ AND TXSTATI,D0Ar4,#TXSTATI_BGNDHALT_BIT/* Ack any HALT seen */
+ ANDS D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* Only seen HALT? */
+ ORT D0Ar2,D0Ar2,#TBICTX_CRIT_BIT /* Set CRIT */
+#ifndef BOOTROM
+ MOVT A1LbP,#HI(___pTBIs)
+ ADD A1LbP,A1LbP,#LO(___pTBIs)
+ GETL D1Ar5,D0Ar6,[A1LbP] /* D0Ar6 = ___pTBIs[1] */
+#else
+/*
+ * For BOOTROM support ___pTBIs must be allocated at offset 0 vs A1GbP
+ */
+ GETL D1Ar5,D0Ar6,[A1GbP] /* D0Ar6 = ___pTBIs[1] */
+#endif
+ BZ ___TBIBoingHalt /* Yes: Service HALT */
+/*
+ * Encode interrupt as signal vector, strip away same/lower TXMASKI bits
+ */
+ MOV D1Ar1,#1 /* Generate mask for this bit */
+ MOV D0Re0,TXMASKI /* Get interrupt mask */
+ LSL TXSTATI,D1Ar1,D1Ar3 /* Acknowledge trigger */
+ AND TXMASKI,D0Re0,#TXSTATI_BGNDHALT_BIT /* Only allow HALTs */
+ OR D0Ar2,D0Ar2,D0Re0 /* Set TBIRES.Sig.TrigMask */
+ ADD D1Ar3,D1Ar3,#TBID_SIGNUM_TRT /* Offset into interrupt sigs */
+ LSL D0Re0,D1Ar3,#TBID_SIGNUM_S /* Generate offset from SigNum */
+/*
+ * This is a key moment we are about to call the handler, register state is
+ * as follows-
+ *
+ * D0Re0 - Handler vector (SigNum<<TBID_SIGNUM_S)
+ * D0Ar2 - TXMASKI:TBICTX_CRIT_BIT with optional CBUF and PRIV bits
+ * D1Ar3 - SigNum
+ * D0Ar4 - State read from TXSTATI
+ * D1Ar5 - Inst for SWITCH trigger case only, otherwise undefined
+ * D0Ar6 - pTBI
+ */
+___TBIBoingVec:
+ ADD D0Re0,D0Re0,#TBI_fnSigs /* Offset into signal table */
+ GETD D1Re0,[D0Ar6+D0Re0] /* Get address for Handler */
+/*
+ * Call handler at interrupt level, when it returns simply resume execution
+ * of state indicated by D1Re0.
+ */
+ MOV D1Ar1,A0FrP /* Pass in pCtx */
+ CALLR D1RtP,___TBIBoingRTH /* Use RTH to invoke handler */
+
+/*
+ * Perform critical state restore and execute background thread.
+ *
+ * A0FrP - is pointer to TBICTX structure to resume
+ * D0Re0 - contains additional TXMASKI triggers
+ */
+ .text
+ .balign 4
+#ifdef BOOTROM
+ .global ___TBIResume
+#endif
+___TBIResume:
+/*
+ * New META IP method
+ */
+ RTH /* Go to interrupt level */
+ MOV D0Ar4,TXMASKI /* Read TXMASKI */
+ OR TXMASKI,D0Ar4,D0Re0 /* -Write-Modify TXMASKI */
+ GETL D0Re0,D1Re0,[A0FrP+#TBICTX_Flags]/* Get Flags:SaveMask, CurrPC */
+ MOV A0StP,A0FrP /* Position stack pointer */
+ MOV D0Ar2,TXPOLLI /* Read pending triggers */
+ MOV PCX,D1Re0 /* Set resumption PC */
+ TST D0Ar2,#0xFFFF /* Any pending triggers? */
+ BNZ ___TBIBoingWait /* Yes: Go for triggers */
+ TSTT D0Re0,#TBICTX_WAIT_BIT /* Do we WAIT anyway? */
+ BNZ ___TBIBoingWait /* Yes: Go for triggers */
+ LSLS D1Ar5,D0Re0,#1 /* Test XCBF (MI) & PRIV (CS)? */
+ ADD D1Re0,A0FrP,#TBICTX_CurrRPT /* Address CT save area */
+ ADD A0StP,A0FrP,#TBICTX_DX+(8*1) /* Address DX.1 save area */
+ MGETL A0.2,A0.3,[D1Re0] /* Get CT reg states */
+ MOV D1Ar3,A1.3 /* Copy old TXDIVTIME */
+ BPL ___TBIResCrit /* No: Skip logic */
+ ADD D0Ar4,A0FrP,#TBICTX_BYTES /* Source is after TBICTX */
+ ANDST D1Ar3,D1Ar3,#HI(TXDIVTIME_RPMASK_BITS)/* !Z if RPDIRTY */
+ MGETL D0.5,D0.6,[D0Ar4] /* Read Catch state */
+ MOV TXCATCH0,D0.5 /* Restore TXCATCHn */
+ MOV TXCATCH1,D1.5
+ MOV TXCATCH2,D0.6
+ MOV TXCATCH3,D1.6
+ BZ ___TBIResCrit
+ MOV D0Ar2,#(1*8)
+ LSRS D1Ar3,D1Ar3,#TXDIVTIME_RPMASK_S+1 /* 2nd RPMASK bit -> bit 0 */
+ ADD RA,D0Ar4,#(0*8) /* Re-read read pipeline */
+ ADDNZ RA,D0Ar4,D0Ar2 /* If Bit 0 set issue RA */
+ LSRS D1Ar3,D1Ar3,#2 /* Bit 1 -> C, Bit 2 -> Bit 0 */
+ ADD D0Ar2,D0Ar2,#8
+ ADDCS RA,D0Ar4,D0Ar2 /* If C issue RA */
+ ADD D0Ar2,D0Ar2,#8
+ ADDNZ RA,D0Ar4,D0Ar2 /* If Bit 0 set issue RA */
+ LSRS D1Ar3,D1Ar3,#2 /* Bit 1 -> C, Bit 2 -> Bit 0 */
+ ADD D0Ar2,D0Ar2,#8
+ ADDCS RA,D0Ar4,D0Ar2 /* If C issue RA */
+ ADD D0Ar2,D0Ar2,#8
+ ADDNZ RA,D0Ar4,D0Ar2 /* If Bit 0 set issue RA */
+ MOV TXDIVTIME,A1.3 /* Set RPDIRTY again */
+___TBIResCrit:
+ LSLS D1Ar5,D0Re0,#1 /* Test XCBF (MI) & PRIV (CS)? */
+#ifdef TBI_1_4
+ ANDT D1Ar5,D1Ar5,#(TBICTX_FPAC_BIT*2)
+ LSL D0Ar6,D1Ar5,#3 /* Convert FPAC into FPACTIVE */
+#endif
+ ANDMT D0Re0,D0Re0,#TBICTX_CBUF_BIT /* Keep CBUF bit from SaveMask */
+#ifdef TBI_1_4
+ OR D0Re0,D0Re0,D0Ar6 /* Combine FPACTIVE with others */
+#endif
+ MGETL D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7,[A0StP] /* Restore DX */
+ MOV TXRPT,A0.2 /* Restore CT regs */
+ MOV TXBPOBITS,A1.2
+ MOV TXMODE,A0.3
+ BCC ___TBIBoingPCX /* Do non-PRIV wait! */
+ MOV A1GblIGbP,A1GbP /* Save A1GbP too */
+ MGETL A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */
+/*
+ * Wait for the first interrupt/exception trigger in a privilege mode system
+ * (interrupt stack area for current TASK to be pointed to by A0GblIStP
+ * or per_cpu__stack_save[hwthread_id]).
+ */
+ MOV TXSTATUS,D0Re0 /* Restore flags */
+ MOV D0Re0,TXPRIVEXT /* Set TXPRIVEXT_TXTOGGLEI_BIT */
+ SUB D1Re0,D1Re0,#TBICTX_BYTES /* TBICTX is top of int stack */
+#ifdef TBX_PERCPU_SP_SAVE
+ SWAP D1Ar3,A1GbP
+ MOV D1Ar3,TXENABLE /* Which thread are we? */
+ AND D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS
+ LSR D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2
+ ADDT D1Ar3,D1Ar3,#HI(_per_cpu__stack_save)
+ ADD D1Ar3,D1Ar3,#LO(_per_cpu__stack_save)
+ SETD [D1Ar3],D1Re0
+ SWAP D1Ar3,A1GbP
+#else
+ MOV A0GblIStP, D1Re0
+#endif
+ OR D0Re0,D0Re0,#TXPRIVEXT_TXTOGGLEI_BIT
+ MOV TXPRIVEXT,D0Re0 /* Cannot set TXPRIVEXT if !priv */
+ GETL D0Re0,D1Re0,[D1Re0+#TBICTX_DX]
+ RTI /* Wait for interrupt */
+/*
+ * Save initial interrupt state on A0GblIStP, switch to A0GblIStP if
+ * BOOTROM code, save and switch to [A1GbP] otherwise.
+ */
+___TBIBoingPCXP:
+#ifdef TBX_PERCPU_SP_SAVE
+ SWAP D1Ar3,A1GbP /* Get PRIV stack base */
+ MOV D1Ar3,TXENABLE /* Which thread are we? */
+ AND D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS
+ LSR D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2
+ ADDT D1Ar3,D1Ar3,#HI(_per_cpu__stack_save)
+ ADD D1Ar3,D1Ar3,#LO(_per_cpu__stack_save)
+ GETD D1Ar3,[D1Ar3]
+#else
+ SWAP D1Ar3,A0GblIStP /* Get PRIV stack base */
+#endif
+ SETL [D1Ar3+#TBICTX_DX],D0Re0,D1Re0 /* Save key registers */
+ MOV D0Re0,TXPRIVEXT /* Clear TXPRIVEXT_TXTOGGLEI_BIT */
+ ADD D1Re0,D1Ar3,#TBICTX_AX /* Address AX save area */
+ ANDMB D0Re0,D0Re0,#0xFFFF-TXPRIVEXT_TXTOGGLEI_BIT
+ MOV TXPRIVEXT,D0Re0 /* Cannot set TXPRIVEXT if !priv */
+ MOV D0Re0,TXSTATUS /* Read TXSTATUS into D0Re0 */
+ MOV TXSTATUS,#0 /* Clear TXSTATUS */
+ MSETL [D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */
+ MOV A0StP,D1Ar3 /* Switch stacks */
+#ifdef TBX_PERCPU_SP_SAVE
+ MOV D1Ar3,A1GbP /* Get D1Ar2 back */
+#else
+ MOV D1Ar3,A0GblIStP /* Get D1Ar2 back */
+#endif
+ ORT D0Re0,D0Re0,#TBICTX_PRIV_BIT /* Add PRIV to TXSTATUS */
+ MOV A1GbP,A1GblIGbP /* Restore A1GbP */
+ B ___TBIBoing /* Enter common handler code */
+/*
+ * At this point we know it's a background HALT case we are handling.
+ * The restored TXSTATUS always needs to have zero in the reason bits.
+ */
+___TBIBoingHalt:
+ MOV D0Ar4,TXMASKI /* Get interrupt mask */
+ ANDST D0Re0,D0Re0,#HI(TXSTATUS_MAJOR_HALT_BITS+TXSTATUS_MEM_FAULT_BITS)
+ AND TXMASKI,D0Ar4,#TXSTATI_BGNDHALT_BIT /* Only allow HALTs */
+ AND D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* What ints are off? */
+ OR D0Ar2,D0Ar2,D0Ar4 /* Set TBIRES.Sig.TrigMask */
+ MOV D0Ar4,#TXSTATI_BGNDHALT_BIT /* This was the trigger state */
+ LSR D1Ar3,D0Re0,#TXSTATUS_MAJOR_HALT_S
+ MOV D0Re0,#TBID_SIGNUM_XXF<<TBID_SIGNUM_S
+ BNZ ___TBIBoingVec /* Jump to XXF exception handler */
+/*
+ * Only the SWITCH cases are left, PCX must be valid
+ */
+#ifdef TBI_1_4
+ MOV D1Ar5,TXPRIVEXT
+ TST D1Ar5,#TXPRIVEXT_MINIMON_BIT
+ LSR D1Ar3,D1Ar1,#1 /* Shift needed for MINIM paths (fill stall) */
+ BZ $Lmeta /* If META only, skip */
+ TSTT D1Ar1,#HI(0x00800000)
+ ANDMT D1Ar3,D1Ar3,#HI(0x007FFFFF >> 1)/* Shifted mask for large MINIM */
+ ANDT D1Ar1,D1Ar1,#HI(0xFFE00000) /* Static mask for small MINIM */
+ BZ $Llarge_minim /* If large MINIM */
+$Lsmall_minim:
+ TSTT D1Ar3,#HI(0x00100000 >> 1)
+ ANDMT D1Ar3,D1Ar3,#HI(0x001FFFFF >> 1)/* Correct shifted mask for large MINIM */
+ ADDZ D1Ar1,D1Ar1,D1Ar3 /* If META rgn, add twice to undo LSR #1 */
+ B $Lrecombine
+$Llarge_minim:
+ ANDST D1Ar1,D1Ar1,#HI(0xFF800000) /* Correct static mask for small MINIM */
+ /* Z=0 (Cannot place code at NULL) */
+$Lrecombine:
+ ADD D1Ar1,D1Ar1,D1Ar3 /* Combine static and shifted parts */
+$Lmeta:
+ GETW D1Ar5,[D1Ar1++] /* META: lo-16, MINIM: lo-16 (all-16 if short) */
+ GETW D1Ar3,[D1Ar1] /* META: hi-16, MINIM: hi-16 (only if long) */
+ MOV D1Re0,D1Ar5
+ XOR D1Re0,D1Re0,#0x4000
+ LSLSNZ D1Re0,D1Re0,#(32-14) /* MINIM: If long C=0, if short C=1 */
+ LSLCC D1Ar3,D1Ar3,#16 /* META/MINIM long: Move hi-16 up */
+ LSLCS D1Ar3,D1Ar5,#16 /* MINIM short: Dup all-16 */
+ ADD D1Ar5,D1Ar5,D1Ar3 /* ALL: Combine both 16-bit parts */
+#else
+ GETD D1Ar5,[D1Ar1] /* Read instruction for switch */
+#endif
+ LSR D1Ar3,D1Ar5,#22 /* Convert into signal number */
+ AND D1Ar3,D1Ar3,#TBID_SIGNUM_SW3-TBID_SIGNUM_SW0
+ LSL D0Re0,D1Ar3,#TBID_SIGNUM_S /* Generate offset from SigNum */
+ B ___TBIBoingVec /* Jump to switch handler */
+/*
+ * Exit from TBIASyncTrigger call
+ */
+___TBIBoingExit:
+ GETL D0FrT,D1RtP,[A0FrP++] /* Restore state from frame */
+ SUB A0StP,A0FrP,#8 /* Unwind stack */
+ MOV A0FrP,D0FrT /* Last memory read completes */
+ MOV PC,D1RtP /* Return to caller */
+#endif /* ifdef CODE_USES_BOOTROM */
+ .size ___TBIResume,.-___TBIResume
+
+#ifndef BOOTROM
+/*
+ * void __TBIASyncResume( TBIRES State )
+ */
+ .text
+ .balign 4
+ .global ___TBIASyncResume
+ .type ___TBIASyncResume,function
+___TBIASyncResume:
+/*
+ * Perform CRIT|SOFT state restore and execute background thread.
+ */
+ MOV D1Ar3,D1Ar1 /* Restore this context */
+ MOV D0Re0,D0Ar2 /* Carry in additional triggers */
+ /* Reserve space for TBICTX */
+ ADD D1Ar3,D1Ar3,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES)
+ MOV A0StP,D1Ar3 /* Enter with protection of */
+ MOV A0FrP,D1Ar1 /* TBICTX on our stack */
+#ifdef CODE_USES_BOOTROM
+ MOVT D1Ar1,#HI(LINCORE_BASE)
+ JUMP D1Ar1,#0xA4
+#else
+ B ___TBIResume
+#endif
+ .size ___TBIASyncResume,.-___TBIASyncResume
+#endif /* ifndef BOOTROM */
+
+/*
+ * End of tbipcx.S
+ */
diff --git a/arch/metag/tbx/tbiroot.S b/arch/metag/tbx/tbiroot.S
new file mode 100644
index 00000000000..7d84daf1340
--- /dev/null
+++ b/arch/metag/tbx/tbiroot.S
@@ -0,0 +1,87 @@
+/*
+ * tbiroot.S
+ *
+ * Copyright (C) 2001, 2002, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Module that creates and via ___TBI function returns a TBI Root Block for
+ * interrupt and background processing on the current thread.
+ */
+
+ .file "tbiroot.S"
+#include <asm/metag_regs.h>
+
+/*
+ * Get data structures and defines from the TBI C header
+ */
+#include <asm/tbx.h>
+
+
+/* If signals need to be exchanged we must create a TBI Root Block */
+
+ .data
+ .balign 8
+ .global ___pTBIs
+ .type ___pTBIs,object
+___pTBIs:
+ .long 0 /* Bgnd+Int root block ptrs */
+ .long 0
+ .size ___pTBIs,.-___pTBIs
+
+
+/*
+ * Return ___pTBIs value specific to execution level with promotion/demotion
+ *
+ * Register Usage: D1Ar1 is Id, D0Re0 is the primary result
+ * D1Re0 is secondary result (___pTBIs for other exec level)
+ */
+ .text
+ .balign 4
+ .global ___TBI
+ .type ___TBI,function
+___TBI:
+ TSTT D1Ar1,#HI(TBID_ISTAT_BIT) /* Bgnd or Int level? */
+ MOVT A1LbP,#HI(___pTBIs)
+ ADD A1LbP,A1LbP,#LO(___pTBIs)
+ GETL D0Re0,D1Re0,[A1LbP] /* Base of root block table */
+ SWAPNZ D0Re0,D1Re0 /* Swap if asked */
+ MOV PC,D1RtP
+ .size ___TBI,.-___TBI
+
+
+/*
+ * Return identifier of the current thread in TBI segment or signal format with
+ * secondary mask to indicate privilege and interrupt level of thread
+ */
+ .text
+ .balign 4
+ .global ___TBIThrdPrivId
+ .type ___TBIThrdPrivId,function
+___TBIThrdPrivId:
+ .global ___TBIThreadId
+ .type ___TBIThreadId,function
+___TBIThreadId:
+#ifndef METAC_0_1
+ MOV D1Re0,TXSTATUS /* Are we privileged or int? */
+ MOV D0Re0,TXENABLE /* Which thread are we? */
+/* Disable privilege adaption for now */
+ ANDT D1Re0,D1Re0,#HI(TXSTATUS_ISTAT_BIT) /* +TXSTATUS_PSTAT_BIT) */
+ LSL D1Re0,D1Re0,#TBID_ISTAT_S-TXSTATUS_ISTAT_S
+ AND D0Re0,D0Re0,#TXENABLE_THREAD_BITS
+ LSL D0Re0,D0Re0,#TBID_THREAD_S-TXENABLE_THREAD_S
+#else
+/* Thread 0 only */
+ XOR D0Re0,D0Re0,D0Re0
+ XOR D1Re0,D1Re0,D1Re0
+#endif
+ MOV PC,D1RtP /* Return */
+ .size ___TBIThrdPrivId,.-___TBIThrdPrivId
+ .size ___TBIThreadId,.-___TBIThreadId
+
+
+/*
+ * End of tbiroot.S
+ */
diff --git a/arch/metag/tbx/tbisoft.S b/arch/metag/tbx/tbisoft.S
new file mode 100644
index 00000000000..0346fe8a53b
--- /dev/null
+++ b/arch/metag/tbx/tbisoft.S
@@ -0,0 +1,237 @@
+/*
+ * tbisoft.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Support for soft threads and soft context switches
+ */
+
+ .file "tbisoft.S"
+
+#include <asm/tbx.h>
+
+#ifdef METAC_1_0
+/* Ax.4 is saved in TBICTX */
+#define A0_4 ,A0.4
+#define D0_5 ,D0.5
+#else
+/* Ax.4 is NOT saved in TBICTX */
+#define A0_4
+#define D0_5
+#endif
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+ .text
+ .balign 4
+ .global ___TBISwitchTail
+ .type ___TBISwitchTail,function
+___TBISwitchTail:
+ B $LSwitchTail
+ .size ___TBISwitchTail,.-___TBISwitchTail
+
+/*
+ * TBIRES __TBIJumpX( TBIX64 ArgsA, PTBICTX *rpSaveCtx, int TrigsMask,
+ * void (*fnMain)(), void *pStack );
+ *
+ * This is a combination of __TBISwitch and __TBIJump with the context of
+ * the calling thread being saved in the rpSaveCtx location with a drop-thru
+ * effect into the __TBIJump logic. ArgsB passes via __TBIJump to the
+ * routine eventually invoked will reflect the rpSaveCtx value specified.
+ */
+ .text
+ .balign 4
+ .global ___TBIJumpX
+ .type ___TBIJumpX,function
+___TBIJumpX:
+ CMP D1RtP,#-1
+ B $LSwitchStart
+ .size ___TBIJumpX,.-___TBIJumpX
+
+/*
+ * TBIRES __TBISwitch( TBIRES Switch, PTBICTX *rpSaveCtx )
+ *
+ * Software syncronous context switch between soft threads, save only the
+ * registers which are actually valid on call entry.
+ *
+ * A0FrP, D0RtP, D0.5, D0.6, D0.7 - Saved on stack
+ * A1GbP is global to all soft threads so not virtualised
+ * A0StP is then saved as the base of the TBICTX of the thread
+ *
+ */
+ .text
+ .balign 4
+ .global ___TBISwitch
+ .type ___TBISwitch,function
+___TBISwitch:
+ XORS D0Re0,D0Re0,D0Re0 /* Set ZERO flag */
+$LSwitchStart:
+ MOV D0FrT,A0FrP /* Boing entry sequence */
+ ADD A0FrP,A0StP,#0
+ SETL [A0StP+#8++],D0FrT,D1RtP
+/*
+ * Save current frame state - we save all regs because we don't want
+ * uninitialised crap in the TBICTX structure that the asyncronous resumption
+ * of a thread will restore.
+ */
+ MOVT D1Re0,#HI($LSwitchExit) /* ASync resume point here */
+ ADD D1Re0,D1Re0,#LO($LSwitchExit)
+ SETD [D1Ar3],A0StP /* Record pCtx of this thread */
+ MOVT D0Re0,#TBICTX_SOFT_BIT /* Only soft thread state */
+ SETL [A0StP++],D0Re0,D1Re0 /* Push header fields */
+ ADD D0FrT,A0StP,#TBICTX_AX-TBICTX_DX /* Address AX save area */
+ MOV D0Re0,#0 /* Setup 0:0 result for ASync */
+ MOV D1Re0,#0 /* resume of the thread */
+ MSETL [A0StP],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+ SETL [A0StP++],D0Re0,D1Re0 /* Zero CurrRPT, CurrBPOBITS, */
+ SETL [A0StP++],D0Re0,D1Re0 /* Zero CurrMODE, CurrDIVTIME */
+ ADD A0StP,A0StP,#(TBICTX_AX_REGS*8) /* Reserve AX save space */
+ MSETL [D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */
+ BNZ ___TBIJump
+/*
+ * NextThread MUST be in TBICTX_SOFT_BIT state!
+ */
+$LSwitchTail:
+ MOV D0Re0,D0Ar2 /* Result from args */
+ MOV D1Re0,D1Ar1
+ ADD D1RtP,D1Ar1,#TBICTX_AX
+ MGETL A0StP,A0FrP,[D1RtP] /* Get frame values */
+$LSwitchCmn:
+ ADD A0.2,D1Ar1,#TBICTX_DX+(8*5)
+ MGETL D0.5,D0.6,D0.7,[A0.2] /* Get caller-saved DX regs */
+$LSwitchExit:
+ GETL D0FrT,D1RtP,[A0FrP++] /* Restore state from frame */
+ SUB A0StP,A0FrP,#8 /* Unwind stack */
+ MOV A0FrP,D0FrT /* Last memory read completes */
+ MOV PC,D1RtP /* Return to caller */
+ .size ___TBISwitch,.-___TBISwitch
+
+/*
+ * void __TBISyncResume( TBIRES State, int TrigMask );
+ *
+ * This routine causes the TBICTX structure specified in State.Sig.pCtx to
+ * be restored. This implies that execution will not return to the caller.
+ * The State.Sig.TrigMask field will be ored into TXMASKI during the
+ * context switch such that any immediately occuring interrupts occur in
+ * the context of the newly specified task. The State.Sig.SaveMask parameter
+ * is ignored.
+ */
+ .text
+ .balign 4
+ .global ___TBISyncResume
+ .type ___TBISyncResume,function
+___TBISyncResume:
+ MOV D0Re0,D0Ar2 /* Result from args */
+ MOV D1Re0,D1Ar1
+ XOR D1Ar5,D1Ar5,D1Ar5 /* D1Ar5 = 0 */
+ ADD D1RtP,D1Ar1,#TBICTX_AX
+ SWAP D1Ar5,TXMASKI /* D1Ar5 <-> TXMASKI */
+ MGETL A0StP,A0FrP,[D1RtP] /* Get frame values */
+ OR TXMASKI,D1Ar5,D1Ar3 /* New TXMASKI */
+ B $LSwitchCmn
+ .size ___TBISyncResume,.-___TBISyncResume
+
+/*
+ * void __TBIJump( TBIX64 ArgsA, TBIX32 ArgsB, int TrigsMask,
+ * void (*fnMain)(), void *pStack );
+ *
+ * Jump directly to a new routine on an arbitrary stack with arbitrary args
+ * oring bits back into TXMASKI on route.
+ */
+ .text
+ .balign 4
+ .global ___TBIJump
+ .type ___TBIJump,function
+___TBIJump:
+ XOR D0Re0,D0Re0,D0Re0 /* D0Re0 = 0 */
+ MOV A0StP,D0Ar6 /* Stack = Frame */
+ SWAP D0Re0,TXMASKI /* D0Re0 <-> TXMASKI */
+ MOV A0FrP,D0Ar6
+ MOVT A1LbP,#HI(__exit)
+ ADD A1LbP,A1LbP,#LO(__exit)
+ MOV D1RtP,A1LbP /* D1RtP = __exit */
+ OR TXMASKI,D0Re0,D0Ar4 /* New TXMASKI */
+ MOV PC,D1Ar5 /* Jump to fnMain */
+ .size ___TBIJump,.-___TBIJump
+
+/*
+ * PTBICTX __TBISwitchInit( void *pStack, int (*fnMain)(),
+ * .... 4 extra 32-bit args .... );
+ *
+ * Generate a new soft thread context ready for it's first outing.
+ *
+ * D1Ar1 - Region of memory to be used as the new soft thread stack
+ * D0Ar2 - Main line routine for new soft thread
+ * D1Ar3, D0Ar4, D1Ar5, D0Ar6 - arguments to be passed on stack
+ * The routine returns the initial PTBICTX value for the new thread
+ */
+ .text
+ .balign 4
+ .global ___TBISwitchInit
+ .type ___TBISwitchInit,function
+___TBISwitchInit:
+ MOV D0FrT,A0FrP /* Need save return point */
+ ADD A0FrP,A0StP,#0
+ SETL [A0StP++],D0FrT,D1RtP /* Save return to caller */
+ MOVT A1LbP,#HI(__exit)
+ ADD A1LbP,A1LbP,#LO(__exit)
+ MOV D1RtP,A1LbP /* Get address of __exit */
+ ADD D1Ar1,D1Ar1,#7 /* Align stack to 64-bits */
+ ANDMB D1Ar1,D1Ar1,#0xfff8 /* by rounding base up */
+ MOV A0.2,D1Ar1 /* A0.2 is new stack */
+ MOV D0FrT,D1Ar1 /* Initial puesdo-frame pointer */
+ SETL [A0.2++],D0FrT,D1RtP /* Save return to __exit */
+ MOV D1RtP,D0Ar2
+ SETL [A0.2++],D0FrT,D1RtP /* Save return to fnMain */
+ ADD D0FrT,D0FrT,#8 /* Advance puesdo-frame pointer */
+ MSETL [A0.2],D0Ar6,D0Ar4 /* Save extra initial args */
+ MOVT D1RtP,#HI(___TBIStart) /* Start up code for new stack */
+ ADD D1RtP,D1RtP,#LO(___TBIStart)
+ SETL [A0.2++],D0FrT,D1RtP /* Save return to ___TBIStart */
+ ADD D0FrT,D0FrT,#(8*3) /* Advance puesdo-frame pointer */
+ MOV D0Re0,A0.2 /* Return pCtx for new thread */
+ MOV D1Re0,#0 /* pCtx:0 is default Arg1:Arg2 */
+/*
+ * Generate initial TBICTX state
+ */
+ MOVT D1Ar1,#HI($LSwitchExit) /* Async restore code */
+ ADD D1Ar1,D1Ar1,#LO($LSwitchExit)
+ MOVT D0Ar2,#TBICTX_SOFT_BIT /* Only soft thread state */
+ ADD D0Ar6,A0.2,#TBICTX_BYTES /* New A0StP */
+ MOV D1Ar5,A1GbP /* Same A1GbP */
+ MOV D0Ar4,D0FrT /* Initial A0FrP */
+ MOV D1Ar3,A1LbP /* Same A1LbP */
+ SETL [A0.2++],D0Ar2,D1Ar1 /* Set header fields */
+ MSETL [A0.2],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+ MOV D0Ar2,#0 /* Zero values */
+ MOV D1Ar1,#0
+ SETL [A0.2++],D0Ar2,D1Ar1 /* Zero CurrRPT, CurrBPOBITS, */
+ SETL [A0.2++],D0Ar2,D1Ar1 /* CurrMODE, and pCurrCBuf */
+ MSETL [A0.2],D0Ar6,D0Ar4,D0Ar2,D0FrT D0_5 /* Set DX and then AX regs */
+ B $LSwitchExit /* All done! */
+ .size ___TBISwitchInit,.-___TBISwitchInit
+
+ .text
+ .balign 4
+ .global ___TBIStart
+ .type ___TBIStart,function
+___TBIStart:
+ MOV D1Ar1,D1Re0 /* Pass TBIRES args to call */
+ MOV D0Ar2,D0Re0
+ MGETL D0Re0,D0Ar6,D0Ar4,[A0FrP] /* Get hidden args */
+ SUB A0StP,A0FrP,#(8*3) /* Entry stack pointer */
+ MOV A0FrP,D0Re0 /* Entry frame pointer */
+ MOVT A1LbP,#HI(__exit)
+ ADD A1LbP,A1LbP,#LO(__exit)
+ MOV D1RtP,A1LbP /* D1RtP = __exit */
+ MOV PC,D1Re0 /* Jump into fnMain */
+ .size ___TBIStart,.-___TBIStart
+
+/*
+ * End of tbisoft.S
+ */
diff --git a/arch/metag/tbx/tbistring.c b/arch/metag/tbx/tbistring.c
new file mode 100644
index 00000000000..f90cd082206
--- /dev/null
+++ b/arch/metag/tbx/tbistring.c
@@ -0,0 +1,114 @@
+/*
+ * tbistring.c
+ *
+ * Copyright (C) 2001, 2002, 2003, 2005, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * String table functions provided as part of the thread binary interface for
+ * Meta processors
+ */
+
+#include <linux/export.h>
+#include <linux/string.h>
+#include <asm/tbx.h>
+
+/*
+ * There are not any functions to modify the string table currently, if these
+ * are required at some later point I suggest having a seperate module and
+ * ensuring that creating new entries does not interfere with reading old
+ * entries in any way.
+ */
+
+const TBISTR *__TBIFindStr(const TBISTR *start,
+ const char *str, int match_len)
+{
+ const TBISTR *search = start;
+ bool exact = true;
+ const TBISEG *seg;
+
+ if (match_len < 0) {
+ /* Make match_len always positive for the inner loop */
+ match_len = -match_len;
+ exact = false;
+ } else {
+ /*
+ * Also support historic behaviour, which expected match_len to
+ * include null terminator
+ */
+ if (match_len && str[match_len-1] == '\0')
+ match_len--;
+ }
+
+ if (!search) {
+ /* Find global string table segment */
+ seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
+ TBID_SEGSCOPE_GLOBAL,
+ TBID_SEGTYPE_STRING));
+
+ if (!seg || seg->Bytes < sizeof(TBISTR))
+ /* No string table! */
+ return NULL;
+
+ /* Start of string table */
+ search = seg->pGAddr;
+ }
+
+ for (;;) {
+ while (!search->Tag)
+ /* Allow simple gaps which are just zero initialised */
+ search = (const TBISTR *)((const char *)search + 8);
+
+ if (search->Tag == METAG_TBI_STRE) {
+ /* Reached the end of the table */
+ search = NULL;
+ break;
+ }
+
+ if ((search->Len >= match_len) &&
+ (!exact || (search->Len == match_len + 1)) &&
+ (search->Tag != METAG_TBI_STRG)) {
+ /* Worth searching */
+ if (!strncmp(str, (const char *)search->String,
+ match_len))
+ break;
+ }
+
+ /* Next entry */
+ search = (const TBISTR *)((const char *)search + search->Bytes);
+ }
+
+ return search;
+}
+
+const void *__TBITransStr(const char *str, int len)
+{
+ const TBISTR *search = NULL;
+ const void *res = NULL;
+
+ for (;;) {
+ /* Search onwards */
+ search = __TBIFindStr(search, str, len);
+
+ /* No translation returns NULL */
+ if (!search)
+ break;
+
+ /* Skip matching entries with no translation data */
+ if (search->TransLen != METAG_TBI_STRX) {
+ /* Calculate base of translation string */
+ res = (const char *)search->String +
+ ((search->Len + 7) & ~7);
+ break;
+ }
+
+ /* Next entry */
+ search = (const TBISTR *)((const char *)search + search->Bytes);
+ }
+
+ /* Return base address of translation data or NULL */
+ return res;
+}
+EXPORT_SYMBOL(__TBITransStr);
diff --git a/arch/metag/tbx/tbitimer.S b/arch/metag/tbx/tbitimer.S
new file mode 100644
index 00000000000..5dbeddeee7b
--- /dev/null
+++ b/arch/metag/tbx/tbitimer.S
@@ -0,0 +1,207 @@
+/*
+ * tbitimer.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * TBI timer support routines and data values
+ */
+
+ .file "tbitimer.S"
+/*
+ * Get data structures and defines from the main C header
+ */
+#include <asm/tbx.h>
+
+ .data
+ .balign 8
+ .global ___TBITimeB
+ .type ___TBITimeB,object
+___TBITimeB:
+ .quad 0 /* Background 'lost' ticks */
+ .size ___TBITimeB,.-___TBITimeB
+
+ .data
+ .balign 8
+ .global ___TBITimeI
+ .type ___TBITimeI,object
+___TBITimeI:
+ .quad 0 /* Interrupt 'lost' ticks */
+ .size ___TBITimeI,.-___TBITimeI
+
+ .data
+ .balign 8
+ .global ___TBITimes
+ .type ___TBITimes,object
+___TBITimes:
+ .long ___TBITimeB /* Table of 'lost' tick values */
+ .long ___TBITimeI
+ .size ___TBITimes,.-___TBITimes
+
+/*
+ * Flag bits for control of ___TBITimeCore
+ */
+#define TIMER_SET_BIT 1
+#define TIMER_ADD_BIT 2
+
+/*
+ * Initialise or stop timer support
+ *
+ * Register Usage: D1Ar1 holds Id, D1Ar2 is initial delay or 0
+ * D0FrT is used to call ___TBITimeCore
+ * D0Re0 is used for the result which is TXSTAT_TIMER_BIT
+ * D0Ar4, D1Ar5, D0Ar6 are all used as scratch
+ * Other registers are those set by ___TBITimeCore
+ * A0.3 is assumed to point at ___TBITime(I/B)
+ */
+ .text
+ .balign 4
+ .global ___TBITimerCtrl
+ .type ___TBITimerCtrl,function
+___TBITimerCtrl:
+ MOV D1Ar5,#TIMER_SET_BIT /* Timer SET request */
+ MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */
+ CALL D0FrT,#LO(___TBITimeCore) /* and perform register update */
+ NEGS D0Ar6,D0Ar2 /* Set flags from time-stamp */
+ ASR D1Ar5,D0Ar6,#31 /* Sign extend D0Ar6 into D1Ar5 */
+ SETLNZ [A0.3],D0Ar6,D1Ar5 /* ___TBITime(B/I)=-Start if enable */
+ MOV PC,D1RtP /* Return */
+ .size ___TBITimerCtrl,.-___TBITimerCtrl
+
+/*
+ * Return ___TBITimeStamp value
+ *
+ * Register Usage: D1Ar1 holds Id
+ * D0FrT is used to call ___TBITimeCore
+ * D0Re0, D1Re0 is used for the result
+ * D1Ar3, D0Ar4, D1Ar5
+ * Other registers are those set by ___TBITimeCore
+ * D0Ar6 is assumed to be the timer value read
+ * A0.3 is assumed to point at ___TBITime(I/B)
+ */
+ .text
+ .balign 4
+ .global ___TBITimeStamp
+ .type ___TBITimeStamp,function
+___TBITimeStamp:
+ MOV D1Ar5,#0 /* Timer GET request */
+ MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */
+ CALL D0FrT,#LO(___TBITimeCore) /* with no register update */
+ ADDS D0Re0,D0Ar4,D0Ar6 /* Add current time value */
+ ADD D1Re0,D1Ar3,D1Ar5 /* to 64-bit signed extend time */
+ ADDCS D1Re0,D1Re0,#1 /* Support borrow too */
+ MOV PC,D1RtP /* Return */
+ .size ___TBITimeStamp,.-___TBITimeStamp
+
+/*
+ * Perform ___TBITimerAdd logic
+ *
+ * Register Usage: D1Ar1 holds Id, D0Ar2 holds value to be added to the timer
+ * D0Re0 is used for the result - new TIMER value
+ * D1Ar5, D0Ar6 are used as scratch
+ * Other registers are those set by ___TBITimeCore
+ * D0Ar6 is assumed to be the timer value read
+ * D0Ar4, D1Ar3 is the current value of ___TBITime(B/I)
+ */
+ .text
+ .balign 4
+ .global ___TBITimerAdd
+ .type ___TBITimerAdd,function
+___TBITimerAdd:
+ MOV D1Ar5,#TIMER_ADD_BIT /* Timer ADD request */
+ MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */
+ CALL D0FrT,#LO(___TBITimeCore) /* with no register update */
+ ADD D0Re0,D0Ar2,D0Ar6 /* Regenerate new value = result */
+ NEG D0Ar2,D0Ar2 /* Negate delta */
+ ASR D1Re0,D0Ar2,#31 /* Sign extend negated delta */
+ ADDS D0Ar4,D0Ar4,D0Ar2 /* Add time added to ... */
+ ADD D1Ar3,D1Ar3,D1Re0 /* ... real timer ... */
+ ADDCS D1Ar3,D1Ar3,#1 /* ... with carry */
+ SETL [A0.3],D0Ar4,D1Ar3 /* Update ___TBITime(B/I) */
+ MOV PC,D1RtP /* Return */
+ .size ___TBITimerAdd,.-___TBITimerAdd
+
+#ifdef TBI_1_4
+/*
+ * Perform ___TBITimerDeadline logic
+ * NB: Delays are positive compared to the Wait values which are -ive
+ *
+ * Register Usage: D1Ar1 holds Id
+ * D0Ar2 holds Delay requested
+ * D0Re0 is used for the result - old TIMER Delay value
+ * D1Ar5, D0Ar6 are used as scratch
+ * Other registers are those set by ___TBITimeCore
+ * D0Ar6 is assumed to be the timer value read
+ * D0Ar4, D1Ar3 is the current value of ___TBITime(B/I)
+ *
+ */
+ .text
+ .type ___TBITimerDeadline,function
+ .global ___TBITimerDeadline
+ .align 2
+___TBITimerDeadline:
+ MOV D1Ar5,#TIMER_SET_BIT /* Timer SET request */
+ MOVT D0FrT,#HI(___TBITimeCore) /* Get timer core reg values */
+ CALL D0FrT,#LO(___TBITimeCore) /* with no register update */
+ MOV D0Re0,D0Ar6 /* Old value read = result */
+ SUB D0Ar2,D0Ar6,D0Ar2 /* Delta from (old - new) */
+ ASR D1Re0,D0Ar2,#31 /* Sign extend delta */
+ ADDS D0Ar4,D0Ar4,D0Ar2 /* Add time added to ... */
+ ADD D1Ar3,D1Ar3,D1Re0 /* ... real timer ... */
+ ADDCS D1Ar3,D1Ar3,#1 /* ... with carry */
+ SETL [A0.3],D0Ar4,D1Ar3 /* Update ___TBITime(B/I) */
+ MOV PC,D1RtP /* Return */
+ .size ___TBITimerDeadline,.-___TBITimerDeadline
+#endif /* TBI_1_4 */
+
+/*
+ * Perform core timer access logic
+ *
+ * Register Usage: D1Ar1 holds Id, D0Ar2 holds input value for SET and
+ * input value for ADD
+ * D1Ar5 controls op as SET or ADD as bit values
+ * On return D0Ar6, D1Ar5 holds the old 64-bit timer value
+ * A0.3 is setup to point at ___TBITime(I/B)
+ * A1.3 is setup to point at ___TBITimes
+ * D0Ar4, D1Ar3 is setup to value of ___TBITime(I/B)
+ */
+ .text
+ .balign 4
+ .global ___TBITimeCore
+ .type ___TBITimeCore,function
+___TBITimeCore:
+#ifndef METAC_0_1
+ TSTT D1Ar1,#HI(TBID_ISTAT_BIT) /* Interrupt level timer? */
+#endif
+ MOVT A1LbP,#HI(___TBITimes)
+ ADD A1LbP,A1LbP,#LO(___TBITimes)
+ MOV A1.3,A1LbP /* Get ___TBITimes address */
+#ifndef METAC_0_1
+ BNZ $LTimeCoreI /* Yes: Service TXTIMERI! */
+#endif
+ LSRS D1Ar5,D1Ar5,#1 /* Carry = SET, Zero = !ADD */
+ GETD A0.3,[A1.3+#0] /* A0.3 == &___TBITimeB */
+ MOV D0Ar6,TXTIMER /* Always GET old value */
+ MOVCS TXTIMER,D0Ar2 /* Conditional SET operation */
+ ADDNZ TXTIMER,D0Ar2,D0Ar6 /* Conditional ADD operation */
+#ifndef METAC_0_1
+ B $LTimeCoreEnd
+$LTimeCoreI:
+ LSRS D1Ar5,D1Ar5,#1 /* Carry = SET, Zero = !ADD */
+ GETD A0.3,[A1.3+#4] /* A0.3 == &___TBITimeI */
+ MOV D0Ar6,TXTIMERI /* Always GET old value */
+ MOVCS TXTIMERI,D0Ar2 /* Conditional SET operation */
+ ADDNZ TXTIMERI,D0Ar2,D0Ar6 /* Conditional ADD operation */
+$LTimeCoreEnd:
+#endif
+ ASR D1Ar5,D0Ar6,#31 /* Sign extend D0Ar6 into D1Ar5 */
+ GETL D0Ar4,D1Ar3,[A0.3] /* Read ___TBITime(B/I) */
+ MOV PC,D0FrT /* Return quickly */
+ .size ___TBITimeCore,.-___TBITimeCore
+
+/*
+ * End of tbitimer.S
+ */
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index e920cbe519f..e507ab7df60 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -62,3 +62,8 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
config ARM_ARCH_TIMER
bool
+
+config CLKSRC_METAG_GENERIC
+ def_bool y if METAG
+ help
+ This option enables support for the Meta per-thread timers.
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 7d671b85a98..4d8283aec5b 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o
obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o
obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o
+obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o
diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c
new file mode 100644
index 00000000000..ade7513a11d
--- /dev/null
+++ b/drivers/clocksource/metag_generic.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2005-2013 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Support for Meta per-thread timers.
+ *
+ * Meta hardware threads have 2 timers. The background timer (TXTIMER) is used
+ * as a free-running time base (hz clocksource), and the interrupt timer
+ * (TXTIMERI) is used for the timer interrupt (clock event). Both counters
+ * traditionally count at approximately 1MHz.
+ */
+
+#include <clocksource/metag_generic.h>
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+
+#include <asm/clock.h>
+#include <asm/hwthread.h>
+#include <asm/core_reg.h>
+#include <asm/metag_mem.h>
+#include <asm/tbx.h>
+
+#define HARDWARE_FREQ 1000000 /* 1MHz */
+#define HARDWARE_DIV 1 /* divide by 1 = 1MHz clock */
+#define HARDWARE_TO_NS_SHIFT 10 /* convert ticks to ns */
+
+static unsigned int hwtimer_freq = HARDWARE_FREQ;
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+static DEFINE_PER_CPU(char [11], local_clockevent_name);
+
+static int metag_timer_set_next_event(unsigned long delta,
+ struct clock_event_device *dev)
+{
+ __core_reg_set(TXTIMERI, -delta);
+ return 0;
+}
+
+static void metag_timer_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ switch (mode) {
+ case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
+ break;
+
+ case CLOCK_EVT_MODE_SHUTDOWN:
+ /* We should disable the IRQ here */
+ break;
+
+ case CLOCK_EVT_MODE_PERIODIC:
+ case CLOCK_EVT_MODE_UNUSED:
+ WARN_ON(1);
+ break;
+ };
+}
+
+static cycle_t metag_clocksource_read(struct clocksource *cs)
+{
+ return __core_reg_get(TXTIMER);
+}
+
+static struct clocksource clocksource_metag = {
+ .name = "META",
+ .rating = 200,
+ .mask = CLOCKSOURCE_MASK(32),
+ .read = metag_clocksource_read,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static irqreturn_t metag_timer_interrupt(int irq, void *dummy)
+{
+ struct clock_event_device *evt = &__get_cpu_var(local_clockevent);
+
+ evt->event_handler(evt);
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction metag_timer_irq = {
+ .name = "META core timer",
+ .handler = metag_timer_interrupt,
+ .flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU,
+};
+
+unsigned long long sched_clock(void)
+{
+ unsigned long long ticks = __core_reg_get(TXTIMER);
+ return ticks << HARDWARE_TO_NS_SHIFT;
+}
+
+static void __cpuinit arch_timer_setup(unsigned int cpu)
+{
+ unsigned int txdivtime;
+ struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+ char *name = per_cpu(local_clockevent_name, cpu);
+
+ txdivtime = __core_reg_get(TXDIVTIME);
+
+ txdivtime &= ~TXDIVTIME_DIV_BITS;
+ txdivtime |= (HARDWARE_DIV & TXDIVTIME_DIV_BITS);
+
+ __core_reg_set(TXDIVTIME, txdivtime);
+
+ sprintf(name, "META %d", cpu);
+ clk->name = name;
+ clk->features = CLOCK_EVT_FEAT_ONESHOT,
+
+ clk->rating = 200,
+ clk->shift = 12,
+ clk->irq = tbisig_map(TBID_SIGNUM_TRT),
+ clk->set_mode = metag_timer_set_mode,
+ clk->set_next_event = metag_timer_set_next_event,
+
+ clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift);
+ clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk);
+ clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
+ clk->cpumask = cpumask_of(cpu);
+
+ clockevents_register_device(clk);
+
+ /*
+ * For all non-boot CPUs we need to synchronize our free
+ * running clock (TXTIMER) with the boot CPU's clock.
+ *
+ * While this won't be accurate, it should be close enough.
+ */
+ if (cpu) {
+ unsigned int thread0 = cpu_2_hwthread_id[0];
+ unsigned long val;
+
+ val = core_reg_read(TXUCT_ID, TXTIMER_REGNUM, thread0);
+ __core_reg_set(TXTIMER, val);
+ }
+}
+
+static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ arch_timer_setup(cpu);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata arch_timer_cpu_nb = {
+ .notifier_call = arch_timer_cpu_notify,
+};
+
+int __init metag_generic_timer_init(void)
+{
+ /*
+ * On Meta 2 SoCs, the actual frequency of the timer is based on the
+ * Meta core clock speed divided by an integer, so it is only
+ * approximately 1MHz. Calculating the real frequency here drastically
+ * reduces clock skew on these SoCs.
+ */
+#ifdef CONFIG_METAG_META21
+ hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1);
+#endif
+ clocksource_register_hz(&clocksource_metag, hwtimer_freq);
+
+ setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq);
+
+ /* Configure timer on boot CPU */
+ arch_timer_setup(smp_processor_id());
+
+ /* Hook cpu boot to configure other CPU's timers */
+ register_cpu_notifier(&arch_timer_cpu_nb);
+
+ return 0;
+}
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index e65fbf2cdf7..98e3b87bdf1 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -2,6 +2,8 @@ obj-$(CONFIG_IRQCHIP) += irqchip.o
obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o
obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o
+obj-$(CONFIG_METAG) += irq-metag-ext.o
+obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o
obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi.o
obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o
obj-$(CONFIG_ARM_GIC) += irq-gic.o
diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c
new file mode 100644
index 00000000000..92c41ab4dbf
--- /dev/null
+++ b/drivers/irqchip/irq-metag-ext.c
@@ -0,0 +1,868 @@
+/*
+ * Meta External interrupt code.
+ *
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * External interrupts on Meta are configured at two-levels, in the CPU core and
+ * in the external trigger block. Interrupts from SoC peripherals are
+ * multiplexed onto a single Meta CPU "trigger" - traditionally it has always
+ * been trigger 2 (TR2). For info on how de-multiplexing happens check out
+ * meta_intc_irq_demux().
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqdomain.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define HWSTAT_STRIDE 8
+#define HWVEC_BLK_STRIDE 0x1000
+
+/**
+ * struct meta_intc_priv - private meta external interrupt data
+ * @nr_banks: Number of interrupt banks
+ * @domain: IRQ domain for all banks of external IRQs
+ * @unmasked: Record of unmasked IRQs
+ * @levels_altered: Record of altered level bits
+ */
+struct meta_intc_priv {
+ unsigned int nr_banks;
+ struct irq_domain *domain;
+
+ unsigned long unmasked[4];
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+ unsigned long levels_altered[4];
+#endif
+};
+
+/* Private data for the one and only external interrupt controller */
+static struct meta_intc_priv meta_intc_priv;
+
+/**
+ * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number
+ * @hw: Hardware IRQ number (within external trigger block)
+ *
+ * Returns: Bit offset into the IRQ's bank registers
+ */
+static unsigned int meta_intc_offset(irq_hw_number_t hw)
+{
+ return hw & 0x1f;
+}
+
+/**
+ * meta_intc_bank() - Get the bank number of a hardware IRQ number
+ * @hw: Hardware IRQ number (within external trigger block)
+ *
+ * Returns: Bank number indicating which register the IRQ's bits are
+ */
+static unsigned int meta_intc_bank(irq_hw_number_t hw)
+{
+ return hw >> 5;
+}
+
+/**
+ * meta_intc_stat_addr() - Get the address of a HWSTATEXT register
+ * @hw: Hardware IRQ number (within external trigger block)
+ *
+ * Returns: Address of a HWSTATEXT register containing the status bit for
+ * the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw)
+{
+ return (void __iomem *)(HWSTATEXT +
+ HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_level_addr() - Get the address of a HWLEVELEXT register
+ * @hw: Hardware IRQ number (within external trigger block)
+ *
+ * Returns: Address of a HWLEVELEXT register containing the sense bit for
+ * the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_level_addr(irq_hw_number_t hw)
+{
+ return (void __iomem *)(HWLEVELEXT +
+ HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_mask_addr() - Get the address of a HWMASKEXT register
+ * @hw: Hardware IRQ number (within external trigger block)
+ *
+ * Returns: Address of a HWMASKEXT register containing the mask bit for the
+ * specified hardware IRQ number
+ */
+static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw)
+{
+ return (void __iomem *)(HWMASKEXT +
+ HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_vec_addr() - Get the vector address of a hardware interrupt
+ * @hw: Hardware IRQ number (within external trigger block)
+ *
+ * Returns: Address of a HWVECEXT register controlling the core trigger to
+ * vector the IRQ onto
+ */
+static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw)
+{
+ return (void __iomem *)(HWVEC0EXT +
+ HWVEC_BLK_STRIDE * meta_intc_bank(hw) +
+ HWVECnEXT_STRIDE * meta_intc_offset(hw));
+}
+
+/**
+ * meta_intc_startup_irq() - set up an external irq
+ * @data: data for the external irq to start up
+ *
+ * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and
+ * unmask irq, both using the appropriate callbacks.
+ */
+static unsigned int meta_intc_startup_irq(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ void __iomem *vec_addr = meta_intc_vec_addr(hw);
+ int thread = hard_processor_id();
+
+ /* Perform any necessary acking. */
+ if (data->chip->irq_ack)
+ data->chip->irq_ack(data);
+
+ /* Wire up this interrupt to the core with HWVECxEXT. */
+ metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+ /* Perform any necessary unmasking. */
+ data->chip->irq_unmask(data);
+
+ return 0;
+}
+
+/**
+ * meta_intc_shutdown_irq() - turn off an external irq
+ * @data: data for the external irq to turn off
+ *
+ * Mask irq using the appropriate callback and stop muxing it onto TR2.
+ */
+static void meta_intc_shutdown_irq(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+ /* Mask the IRQ */
+ data->chip->irq_mask(data);
+
+ /*
+ * Disable the IRQ at the core by removing the interrupt from
+ * the HW vector mapping.
+ */
+ metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_ack_irq() - acknowledge an external irq
+ * @data: data for the external irq to ack
+ *
+ * Clear down an edge interrupt in the status register.
+ */
+static void meta_intc_ack_irq(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << meta_intc_offset(hw);
+ void __iomem *stat_addr = meta_intc_stat_addr(hw);
+
+ /* Ack the int, if it is still 'on'.
+ * NOTE - this only works for edge triggered interrupts.
+ */
+ if (metag_in32(stat_addr) & bit)
+ metag_out32(bit, stat_addr);
+}
+
+/**
+ * record_irq_is_masked() - record the IRQ masked so it doesn't get handled
+ * @data: data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is masked (by whichever
+ * callback is used). It records the IRQ masked so that it doesn't get handled
+ * if it still shows up in the status register.
+ */
+static void record_irq_is_masked(struct irq_data *data)
+{
+ struct meta_intc_priv *priv = &meta_intc_priv;
+ irq_hw_number_t hw = data->hwirq;
+
+ clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/**
+ * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled
+ * @data: data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is unmasked (by whichever
+ * callback is used). It records the IRQ unmasked so that it gets handled if it
+ * shows up in the status register.
+ */
+static void record_irq_is_unmasked(struct irq_data *data)
+{
+ struct meta_intc_priv *priv = &meta_intc_priv;
+ irq_hw_number_t hw = data->hwirq;
+
+ set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/*
+ * For use by wrapper IRQ drivers
+ */
+
+/**
+ * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers
+ * @data: data for the external irq being masked
+ *
+ * This should be called by any wrapper IRQ driver mask functions. it doesn't do
+ * any masking but records the IRQ as masked so that the core code knows the
+ * mask has taken place. It is the callers responsibility to ensure that the IRQ
+ * won't trigger an interrupt to the core.
+ */
+void meta_intc_mask_irq_simple(struct irq_data *data)
+{
+ record_irq_is_masked(data);
+}
+
+/**
+ * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers
+ * @data: data for the external irq being unmasked
+ *
+ * This should be called by any wrapper IRQ driver unmask functions. it doesn't
+ * do any unmasking but records the IRQ as unmasked so that the core code knows
+ * the unmask has taken place. It is the callers responsibility to ensure that
+ * the IRQ can now trigger an interrupt to the core.
+ */
+void meta_intc_unmask_irq_simple(struct irq_data *data)
+{
+ record_irq_is_unmasked(data);
+}
+
+
+/**
+ * meta_intc_mask_irq() - mask an external irq using HWMASKEXT
+ * @data: data for the external irq to mask
+ *
+ * This is a default implementation of a mask function which makes use of the
+ * HWMASKEXT registers available in newer versions.
+ *
+ * Earlier versions without these registers should use SoC level IRQ masking
+ * which call the meta_intc_*_simple() functions above, or if that isn't
+ * available should use the fallback meta_intc_*_nomask() functions below.
+ */
+static void meta_intc_mask_irq(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << meta_intc_offset(hw);
+ void __iomem *mask_addr = meta_intc_mask_addr(hw);
+ unsigned long flags;
+
+ record_irq_is_masked(data);
+
+ /* update the interrupt mask */
+ __global_lock2(flags);
+ metag_out32(metag_in32(mask_addr) & ~bit, mask_addr);
+ __global_unlock2(flags);
+}
+
+/**
+ * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT
+ * @data: data for the external irq to unmask
+ *
+ * This is a default implementation of an unmask function which makes use of the
+ * HWMASKEXT registers available on new versions. It should be paired with
+ * meta_intc_mask_irq() above.
+ */
+static void meta_intc_unmask_irq(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << meta_intc_offset(hw);
+ void __iomem *mask_addr = meta_intc_mask_addr(hw);
+ unsigned long flags;
+
+ record_irq_is_unmasked(data);
+
+ /* update the interrupt mask */
+ __global_lock2(flags);
+ metag_out32(metag_in32(mask_addr) | bit, mask_addr);
+ __global_unlock2(flags);
+}
+
+/**
+ * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring
+ * @data: data for the external irq to mask
+ *
+ * This is the version of the mask function for older versions which don't have
+ * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is
+ * unvectored from the core and retriggered if necessary later.
+ */
+static void meta_intc_mask_irq_nomask(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+ record_irq_is_masked(data);
+
+ /* there is no interrupt mask, so unvector the interrupt */
+ metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data: data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to edge interrupts.
+ */
+static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << meta_intc_offset(hw);
+ void __iomem *stat_addr = meta_intc_stat_addr(hw);
+ void __iomem *vec_addr = meta_intc_vec_addr(hw);
+ unsigned int thread = hard_processor_id();
+
+ record_irq_is_unmasked(data);
+
+ /* there is no interrupt mask, so revector the interrupt */
+ metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+ /*
+ * Re-trigger interrupt
+ *
+ * Writing a 1 toggles, and a 0->1 transition triggers. We only
+ * retrigger if the status bit is already set, which means we
+ * need to clear it first. Retriggering is fundamentally racy
+ * because if the interrupt fires again after we clear it we
+ * could end up clearing it again and the interrupt handler
+ * thinking it hasn't fired. Therefore we need to keep trying to
+ * retrigger until the bit is set.
+ */
+ if (metag_in32(stat_addr) & bit) {
+ metag_out32(bit, stat_addr);
+ while (!(metag_in32(stat_addr) & bit))
+ metag_out32(bit, stat_addr);
+ }
+}
+
+/**
+ * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring
+ * @data: data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to level interrupts.
+ */
+static void meta_intc_unmask_level_irq_nomask(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << meta_intc_offset(hw);
+ void __iomem *stat_addr = meta_intc_stat_addr(hw);
+ void __iomem *vec_addr = meta_intc_vec_addr(hw);
+ unsigned int thread = hard_processor_id();
+
+ record_irq_is_unmasked(data);
+
+ /* there is no interrupt mask, so revector the interrupt */
+ metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+ /* Re-trigger interrupt */
+ /* Writing a 1 triggers interrupt */
+ if (metag_in32(stat_addr) & bit)
+ metag_out32(bit, stat_addr);
+}
+
+/**
+ * meta_intc_irq_set_type() - set the type of an external irq
+ * @data: data for the external irq to set the type of
+ * @flow_type: new irq flow type
+ *
+ * Set the flow type of an external interrupt. This updates the irq chip and irq
+ * handler depending on whether the irq is edge or level sensitive (the polarity
+ * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows
+ * when to trigger.
+ */
+static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+#ifdef CONFIG_METAG_SUSPEND_MEM
+ struct meta_intc_priv *priv = &meta_intc_priv;
+#endif
+ unsigned int irq = data->irq;
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << meta_intc_offset(hw);
+ void __iomem *level_addr = meta_intc_level_addr(hw);
+ unsigned long flags;
+ unsigned int level;
+
+ /* update the chip/handler */
+ if (flow_type & IRQ_TYPE_LEVEL_MASK)
+ __irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip,
+ handle_level_irq, NULL);
+ else
+ __irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip,
+ handle_edge_irq, NULL);
+
+ /* and clear/set the bit in HWLEVELEXT */
+ __global_lock2(flags);
+ level = metag_in32(level_addr);
+ if (flow_type & IRQ_TYPE_LEVEL_MASK)
+ level |= bit;
+ else
+ level &= ~bit;
+ metag_out32(level, level_addr);
+#ifdef CONFIG_METAG_SUSPEND_MEM
+ priv->levels_altered[meta_intc_bank(hw)] |= bit;
+#endif
+ __global_unlock2(flags);
+
+ return 0;
+}
+
+/**
+ * meta_intc_irq_demux() - external irq de-multiplexer
+ * @irq: the virtual interrupt number
+ * @desc: the interrupt description structure for this irq
+ *
+ * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is
+ * this function's job to demux this irq and figure out exactly which external
+ * irq needs servicing.
+ *
+ * Whilst using TR2 to detect external interrupts is a software convention it is
+ * (hopefully) unlikely to change.
+ */
+static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+ struct meta_intc_priv *priv = &meta_intc_priv;
+ irq_hw_number_t hw;
+ unsigned int bank, irq_no, status;
+ void __iomem *stat_addr = meta_intc_stat_addr(0);
+
+ /*
+ * Locate which interrupt has caused our handler to run.
+ */
+ for (bank = 0; bank < priv->nr_banks; ++bank) {
+ /* Which interrupts are currently pending in this bank? */
+recalculate:
+ status = metag_in32(stat_addr) & priv->unmasked[bank];
+
+ for (hw = bank*32; status; status >>= 1, ++hw) {
+ if (status & 0x1) {
+ /*
+ * Map the hardware IRQ number to a virtual
+ * Linux IRQ number.
+ */
+ irq_no = irq_linear_revmap(priv->domain, hw);
+
+ /*
+ * Only fire off external interrupts that are
+ * registered to be handled by the kernel.
+ * Other external interrupts are probably being
+ * handled by other Meta hardware threads.
+ */
+ generic_handle_irq(irq_no);
+
+ /*
+ * The handler may have re-enabled interrupts
+ * which could have caused a nested invocation
+ * of this code and make the copy of the
+ * status register we are using invalid.
+ */
+ goto recalculate;
+ }
+ }
+ stat_addr += HWSTAT_STRIDE;
+ }
+}
+
+#ifdef CONFIG_SMP
+/**
+ * meta_intc_set_affinity() - set the affinity for an interrupt
+ * @data: data for the external irq to set the affinity of
+ * @cpumask: cpu mask representing cpus which can handle the interrupt
+ * @force: whether to force (ignored)
+ *
+ * Revector the specified external irq onto a specific cpu's TR2 trigger, so
+ * that that cpu tends to be the one who handles it.
+ */
+static int meta_intc_set_affinity(struct irq_data *data,
+ const struct cpumask *cpumask, bool force)
+{
+ irq_hw_number_t hw = data->hwirq;
+ void __iomem *vec_addr = meta_intc_vec_addr(hw);
+ unsigned int cpu, thread;
+
+ /*
+ * Wire up this interrupt from HWVECxEXT to the Meta core.
+ *
+ * Note that we can't wire up HWVECxEXT to interrupt more than
+ * one cpu (the interrupt code doesn't support it), so we just
+ * pick the first cpu we find in 'cpumask'.
+ */
+ cpu = cpumask_any(cpumask);
+ thread = cpu_2_hwthread_id[cpu];
+
+ metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+ return 0;
+}
+#else
+#define meta_intc_set_affinity NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define META_INTC_CHIP_FLAGS (IRQCHIP_MASK_ON_SUSPEND \
+ | IRQCHIP_SKIP_SET_WAKE)
+#else
+#define META_INTC_CHIP_FLAGS 0
+#endif
+
+/* public edge/level irq chips which SoCs can override */
+
+struct irq_chip meta_intc_edge_chip = {
+ .irq_startup = meta_intc_startup_irq,
+ .irq_shutdown = meta_intc_shutdown_irq,
+ .irq_ack = meta_intc_ack_irq,
+ .irq_mask = meta_intc_mask_irq,
+ .irq_unmask = meta_intc_unmask_irq,
+ .irq_set_type = meta_intc_irq_set_type,
+ .irq_set_affinity = meta_intc_set_affinity,
+ .flags = META_INTC_CHIP_FLAGS,
+};
+
+struct irq_chip meta_intc_level_chip = {
+ .irq_startup = meta_intc_startup_irq,
+ .irq_shutdown = meta_intc_shutdown_irq,
+ .irq_set_type = meta_intc_irq_set_type,
+ .irq_mask = meta_intc_mask_irq,
+ .irq_unmask = meta_intc_unmask_irq,
+ .irq_set_affinity = meta_intc_set_affinity,
+ .flags = META_INTC_CHIP_FLAGS,
+};
+
+/**
+ * meta_intc_map() - map an external irq
+ * @d: irq domain of external trigger block
+ * @irq: virtual irq number
+ * @hw: hardware irq number within external trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured, using the HWLEVELEXT registers to determine
+ * edge/level flow type. These registers will have been set when the irq type is
+ * set (or set to a default at init time).
+ */
+static int meta_intc_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+ unsigned int bit = 1 << meta_intc_offset(hw);
+ void __iomem *level_addr = meta_intc_level_addr(hw);
+
+ /* Go by the current sense in the HWLEVELEXT register */
+ if (metag_in32(level_addr) & bit)
+ irq_set_chip_and_handler(irq, &meta_intc_level_chip,
+ handle_level_irq);
+ else
+ irq_set_chip_and_handler(irq, &meta_intc_edge_chip,
+ handle_edge_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops meta_intc_domain_ops = {
+ .map = meta_intc_map,
+ .xlate = irq_domain_xlate_twocell,
+};
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+
+/**
+ * struct meta_intc_context - suspend context
+ * @levels: State of HWLEVELEXT registers
+ * @masks: State of HWMASKEXT registers
+ * @vectors: State of HWVECEXT registers
+ * @txvecint: State of TxVECINT registers
+ *
+ * This structure stores the IRQ state across suspend.
+ */
+struct meta_intc_context {
+ u32 levels[4];
+ u32 masks[4];
+ u8 vectors[4*32];
+
+ u8 txvecint[4][4];
+};
+
+/* suspend context */
+static struct meta_intc_context *meta_intc_context;
+
+/**
+ * meta_intc_suspend() - store irq state
+ *
+ * To avoid interfering with other threads we only save the IRQ state of IRQs in
+ * use by Linux.
+ */
+static int meta_intc_suspend(void)
+{
+ struct meta_intc_priv *priv = &meta_intc_priv;
+ int i, j;
+ irq_hw_number_t hw;
+ unsigned int bank;
+ unsigned long flags;
+ struct meta_intc_context *context;
+ void __iomem *level_addr, *mask_addr, *vec_addr;
+ u32 mask, bit;
+
+ context = kzalloc(sizeof(*context), GFP_ATOMIC);
+ if (!context)
+ return -ENOMEM;
+
+ hw = 0;
+ level_addr = meta_intc_level_addr(0);
+ mask_addr = meta_intc_mask_addr(0);
+ for (bank = 0; bank < priv->nr_banks; ++bank) {
+ vec_addr = meta_intc_vec_addr(hw);
+
+ /* create mask of interrupts in use */
+ mask = 0;
+ for (bit = 1; bit; bit <<= 1) {
+ i = irq_linear_revmap(priv->domain, hw);
+ /* save mapped irqs which are enabled or have actions */
+ if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+ irq_has_action(i))) {
+ mask |= bit;
+
+ /* save trigger vector */
+ context->vectors[hw] = metag_in32(vec_addr);
+ }
+
+ ++hw;
+ vec_addr += HWVECnEXT_STRIDE;
+ }
+
+ /* save level state if any IRQ levels altered */
+ if (priv->levels_altered[bank])
+ context->levels[bank] = metag_in32(level_addr);
+ /* save mask state if any IRQs in use */
+ if (mask)
+ context->masks[bank] = metag_in32(mask_addr);
+
+ level_addr += HWSTAT_STRIDE;
+ mask_addr += HWSTAT_STRIDE;
+ }
+
+ /* save trigger matrixing */
+ __global_lock2(flags);
+ for (i = 0; i < 4; ++i)
+ for (j = 0; j < 4; ++j)
+ context->txvecint[i][j] = metag_in32(T0VECINT_BHALT +
+ TnVECINT_STRIDE*i +
+ 8*j);
+ __global_unlock2(flags);
+
+ meta_intc_context = context;
+ return 0;
+}
+
+/**
+ * meta_intc_resume() - restore saved irq state
+ *
+ * Restore the saved IRQ state and drop it.
+ */
+static void meta_intc_resume(void)
+{
+ struct meta_intc_priv *priv = &meta_intc_priv;
+ int i, j;
+ irq_hw_number_t hw;
+ unsigned int bank;
+ unsigned long flags;
+ struct meta_intc_context *context = meta_intc_context;
+ void __iomem *level_addr, *mask_addr, *vec_addr;
+ u32 mask, bit, tmp;
+
+ meta_intc_context = NULL;
+
+ hw = 0;
+ level_addr = meta_intc_level_addr(0);
+ mask_addr = meta_intc_mask_addr(0);
+ for (bank = 0; bank < priv->nr_banks; ++bank) {
+ vec_addr = meta_intc_vec_addr(hw);
+
+ /* create mask of interrupts in use */
+ mask = 0;
+ for (bit = 1; bit; bit <<= 1) {
+ i = irq_linear_revmap(priv->domain, hw);
+ /* restore mapped irqs, enabled or with actions */
+ if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+ irq_has_action(i))) {
+ mask |= bit;
+
+ /* restore trigger vector */
+ metag_out32(context->vectors[hw], vec_addr);
+ }
+
+ ++hw;
+ vec_addr += HWVECnEXT_STRIDE;
+ }
+
+ if (mask) {
+ /* restore mask state */
+ __global_lock2(flags);
+ tmp = metag_in32(mask_addr);
+ tmp = (tmp & ~mask) | (context->masks[bank] & mask);
+ metag_out32(tmp, mask_addr);
+ __global_unlock2(flags);
+ }
+
+ mask = priv->levels_altered[bank];
+ if (mask) {
+ /* restore level state */
+ __global_lock2(flags);
+ tmp = metag_in32(level_addr);
+ tmp = (tmp & ~mask) | (context->levels[bank] & mask);
+ metag_out32(tmp, level_addr);
+ __global_unlock2(flags);
+ }
+
+ level_addr += HWSTAT_STRIDE;
+ mask_addr += HWSTAT_STRIDE;
+ }
+
+ /* restore trigger matrixing */
+ __global_lock2(flags);
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j) {
+ metag_out32(context->txvecint[i][j],
+ T0VECINT_BHALT +
+ TnVECINT_STRIDE*i +
+ 8*j);
+ }
+ }
+ __global_unlock2(flags);
+
+ kfree(context);
+}
+
+static struct syscore_ops meta_intc_syscore_ops = {
+ .suspend = meta_intc_suspend,
+ .resume = meta_intc_resume,
+};
+
+static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv)
+{
+ register_syscore_ops(&meta_intc_syscore_ops);
+}
+#else
+#define meta_intc_init_syscore_ops(priv) do {} while (0)
+#endif
+
+/**
+ * meta_intc_init_cpu() - register with a Meta cpu
+ * @priv: private interrupt controller data
+ * @cpu: the CPU to register on
+ *
+ * Configure @cpu's TR2 irq so that we can demux external irqs.
+ */
+static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu)
+{
+ unsigned int thread = cpu_2_hwthread_id[cpu];
+ unsigned int signum = TBID_SIGNUM_TR2(thread);
+ int irq = tbisig_map(signum);
+
+ /* Register the multiplexed IRQ handler */
+ irq_set_chained_handler(irq, meta_intc_irq_demux);
+ irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * meta_intc_no_mask() - indicate lack of HWMASKEXT registers
+ *
+ * Called from SoC code (or init code below) to dynamically indicate the lack of
+ * HWMASKEXT registers (for example depending on some SoC revision register).
+ * This alters the irq mask and unmask callbacks to use the fallback
+ * unvectoring/retriggering technique instead of using HWMASKEXT registers.
+ */
+void __init meta_intc_no_mask(void)
+{
+ meta_intc_edge_chip.irq_mask = meta_intc_mask_irq_nomask;
+ meta_intc_edge_chip.irq_unmask = meta_intc_unmask_edge_irq_nomask;
+ meta_intc_level_chip.irq_mask = meta_intc_mask_irq_nomask;
+ meta_intc_level_chip.irq_unmask = meta_intc_unmask_level_irq_nomask;
+}
+
+/**
+ * init_external_IRQ() - initialise the external irq controller
+ *
+ * Set up the external irq controller using device tree properties. This is
+ * called from init_IRQ().
+ */
+int __init init_external_IRQ(void)
+{
+ struct meta_intc_priv *priv = &meta_intc_priv;
+ struct device_node *node;
+ int ret, cpu;
+ u32 val;
+ bool no_masks = false;
+
+ node = of_find_compatible_node(NULL, NULL, "img,meta-intc");
+ if (!node)
+ return -ENOENT;
+
+ /* Get number of banks */
+ ret = of_property_read_u32(node, "num-banks", &val);
+ if (ret) {
+ pr_err("meta-intc: No num-banks property found\n");
+ return ret;
+ }
+ if (val < 1 || val > 4) {
+ pr_err("meta-intc: num-banks (%u) out of range\n", val);
+ return -EINVAL;
+ }
+ priv->nr_banks = val;
+
+ /* Are any mask registers present? */
+ if (of_get_property(node, "no-mask", NULL))
+ no_masks = true;
+
+ /* No HWMASKEXT registers present? */
+ if (no_masks)
+ meta_intc_no_mask();
+
+ /* Set up an IRQ domain */
+ /*
+ * This is a legacy IRQ domain for now until all the platform setup code
+ * has been converted to devicetree.
+ */
+ priv->domain = irq_domain_add_linear(node, priv->nr_banks*32,
+ &meta_intc_domain_ops, priv);
+ if (unlikely(!priv->domain)) {
+ pr_err("meta-intc: cannot add IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ /* Setup TR2 for all cpus. */
+ for_each_possible_cpu(cpu)
+ meta_intc_init_cpu(priv, cpu);
+
+ /* Set up system suspend/resume callbacks */
+ meta_intc_init_syscore_ops(priv);
+
+ pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n",
+ priv->nr_banks*32);
+
+ return 0;
+}
diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c
new file mode 100644
index 00000000000..8e94d7a3b20
--- /dev/null
+++ b/drivers/irqchip/irq-metag.c
@@ -0,0 +1,343 @@
+/*
+ * Meta internal (HWSTATMETA) interrupt code.
+ *
+ * Copyright (C) 2011-2012 Imagination Technologies Ltd.
+ *
+ * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c
+ * The code base could be generalised/merged as a lot of the functionality is
+ * similar. Until this is done, we try to keep the code simple here.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define PERF0VECINT 0x04820580
+#define PERF1VECINT 0x04820588
+#define PERF0TRIG_OFFSET 16
+#define PERF1TRIG_OFFSET 17
+
+/**
+ * struct metag_internal_irq_priv - private meta internal interrupt data
+ * @domain: IRQ domain for all internal Meta IRQs (HWSTATMETA)
+ * @unmasked: Record of unmasked IRQs
+ */
+struct metag_internal_irq_priv {
+ struct irq_domain *domain;
+
+ unsigned long unmasked;
+};
+
+/* Private data for the one and only internal interrupt controller */
+static struct metag_internal_irq_priv metag_internal_irq_priv;
+
+static unsigned int metag_internal_irq_startup(struct irq_data *data);
+static void metag_internal_irq_shutdown(struct irq_data *data);
+static void metag_internal_irq_ack(struct irq_data *data);
+static void metag_internal_irq_mask(struct irq_data *data);
+static void metag_internal_irq_unmask(struct irq_data *data);
+#ifdef CONFIG_SMP
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+ const struct cpumask *cpumask, bool force);
+#endif
+
+static struct irq_chip internal_irq_edge_chip = {
+ .name = "HWSTATMETA-IRQ",
+ .irq_startup = metag_internal_irq_startup,
+ .irq_shutdown = metag_internal_irq_shutdown,
+ .irq_ack = metag_internal_irq_ack,
+ .irq_mask = metag_internal_irq_mask,
+ .irq_unmask = metag_internal_irq_unmask,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = metag_internal_irq_set_affinity,
+#endif
+};
+
+/*
+ * metag_hwvec_addr - get the address of *VECINT regs of irq
+ *
+ * This function is a table of supported triggers on HWSTATMETA
+ * Could do with a structure, but better keep it simple. Changes
+ * in this code should be rare.
+ */
+static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw)
+{
+ void __iomem *addr;
+
+ switch (hw) {
+ case PERF0TRIG_OFFSET:
+ addr = (void __iomem *)PERF0VECINT;
+ break;
+ case PERF1TRIG_OFFSET:
+ addr = (void __iomem *)PERF1VECINT;
+ break;
+ default:
+ addr = NULL;
+ break;
+ }
+ return addr;
+}
+
+/*
+ * metag_internal_startup - setup an internal irq
+ * @irq: the irq to startup
+ *
+ * Multiplex interrupts for @irq onto TR1. Clear any pending
+ * interrupts.
+ */
+static unsigned int metag_internal_irq_startup(struct irq_data *data)
+{
+ /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+ metag_internal_irq_ack(data);
+
+ /* Enable the interrupt by unmasking it */
+ metag_internal_irq_unmask(data);
+
+ return 0;
+}
+
+/*
+ * metag_internal_irq_shutdown - turn off the irq
+ * @irq: the irq number to turn off
+ *
+ * Mask @irq and clear any pending interrupts.
+ * Stop muxing @irq onto TR1.
+ */
+static void metag_internal_irq_shutdown(struct irq_data *data)
+{
+ /* Disable the IRQ at the core by masking it. */
+ metag_internal_irq_mask(data);
+
+ /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+ metag_internal_irq_ack(data);
+}
+
+/*
+ * metag_internal_irq_ack - acknowledge irq
+ * @irq: the irq to ack
+ */
+static void metag_internal_irq_ack(struct irq_data *data)
+{
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << hw;
+
+ if (metag_in32(HWSTATMETA) & bit)
+ metag_out32(bit, HWSTATMETA);
+}
+
+/**
+ * metag_internal_irq_mask() - mask an internal irq by unvectoring
+ * @data: data for the internal irq to mask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core
+ * and retriggered if necessary later.
+ */
+static void metag_internal_irq_mask(struct irq_data *data)
+{
+ struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+ irq_hw_number_t hw = data->hwirq;
+ void __iomem *vec_addr = metag_hwvec_addr(hw);
+
+ clear_bit(hw, &priv->unmasked);
+
+ /* there is no interrupt mask, so unvector the interrupt */
+ metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data: data for the internal irq to unmask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the
+ * core and retriggered if necessary.
+ */
+static void metag_internal_irq_unmask(struct irq_data *data)
+{
+ struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+ irq_hw_number_t hw = data->hwirq;
+ unsigned int bit = 1 << hw;
+ void __iomem *vec_addr = metag_hwvec_addr(hw);
+ unsigned int thread = hard_processor_id();
+
+ set_bit(hw, &priv->unmasked);
+
+ /* there is no interrupt mask, so revector the interrupt */
+ metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr);
+
+ /*
+ * Re-trigger interrupt
+ *
+ * Writing a 1 toggles, and a 0->1 transition triggers. We only
+ * retrigger if the status bit is already set, which means we
+ * need to clear it first. Retriggering is fundamentally racy
+ * because if the interrupt fires again after we clear it we
+ * could end up clearing it again and the interrupt handler
+ * thinking it hasn't fired. Therefore we need to keep trying to
+ * retrigger until the bit is set.
+ */
+ if (metag_in32(HWSTATMETA) & bit) {
+ metag_out32(bit, HWSTATMETA);
+ while (!(metag_in32(HWSTATMETA) & bit))
+ metag_out32(bit, HWSTATMETA);
+ }
+}
+
+#ifdef CONFIG_SMP
+/*
+ * metag_internal_irq_set_affinity - set the affinity for an interrupt
+ */
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+ const struct cpumask *cpumask, bool force)
+{
+ unsigned int cpu, thread;
+ irq_hw_number_t hw = data->hwirq;
+ /*
+ * Wire up this interrupt from *VECINT to the Meta core.
+ *
+ * Note that we can't wire up *VECINT to interrupt more than
+ * one cpu (the interrupt code doesn't support it), so we just
+ * pick the first cpu we find in 'cpumask'.
+ */
+ cpu = cpumask_any(cpumask);
+ thread = cpu_2_hwthread_id[cpu];
+
+ metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)),
+ metag_hwvec_addr(hw));
+
+ return 0;
+}
+#endif
+
+/*
+ * metag_internal_irq_demux - irq de-multiplexer
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ *
+ * The cpu receives an interrupt on TR1 when an interrupt has
+ * occurred. It is this function's job to demux this irq and
+ * figure out exactly which trigger needs servicing.
+ */
+static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+ struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc);
+ irq_hw_number_t hw;
+ unsigned int irq_no;
+ u32 status;
+
+recalculate:
+ status = metag_in32(HWSTATMETA) & priv->unmasked;
+
+ for (hw = 0; status != 0; status >>= 1, ++hw) {
+ if (status & 0x1) {
+ /*
+ * Map the hardware IRQ number to a virtual Linux IRQ
+ * number.
+ */
+ irq_no = irq_linear_revmap(priv->domain, hw);
+
+ /*
+ * Only fire off interrupts that are
+ * registered to be handled by the kernel.
+ * Other interrupts are probably being
+ * handled by other Meta hardware threads.
+ */
+ generic_handle_irq(irq_no);
+
+ /*
+ * The handler may have re-enabled interrupts
+ * which could have caused a nested invocation
+ * of this code and make the copy of the
+ * status register we are using invalid.
+ */
+ goto recalculate;
+ }
+ }
+}
+
+/**
+ * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number.
+ * @hw: Number of the internal IRQ. Must be in range.
+ *
+ * Returns: The virtual IRQ number of the Meta internal IRQ specified by
+ * @hw.
+ */
+int internal_irq_map(unsigned int hw)
+{
+ struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+ if (!priv->domain)
+ return -ENODEV;
+ return irq_create_mapping(priv->domain, hw);
+}
+
+/**
+ * metag_internal_irq_init_cpu - regsister with the Meta cpu
+ * @cpu: the CPU to register on
+ *
+ * Configure @cpu's TR1 irq so that we can demux irqs.
+ */
+static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv,
+ int cpu)
+{
+ unsigned int thread = cpu_2_hwthread_id[cpu];
+ unsigned int signum = TBID_SIGNUM_TR1(thread);
+ int irq = tbisig_map(signum);
+
+ /* Register the multiplexed IRQ handler */
+ irq_set_handler_data(irq, priv);
+ irq_set_chained_handler(irq, metag_internal_irq_demux);
+ irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * metag_internal_intc_map() - map an internal irq
+ * @d: irq domain of internal trigger block
+ * @irq: virtual irq number
+ * @hw: hardware irq number within internal trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured.
+ */
+static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hw)
+{
+ /* only register interrupt if it is mapped */
+ if (!metag_hwvec_addr(hw))
+ return -EINVAL;
+
+ irq_set_chip_and_handler(irq, &internal_irq_edge_chip,
+ handle_edge_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops metag_internal_intc_domain_ops = {
+ .map = metag_internal_intc_map,
+};
+
+/**
+ * metag_internal_irq_register - register internal IRQs
+ *
+ * Register the irq chip and handler function for all internal IRQs
+ */
+int __init init_internal_IRQ(void)
+{
+ struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+ unsigned int cpu;
+
+ /* Set up an IRQ domain */
+ priv->domain = irq_domain_add_linear(NULL, 32,
+ &metag_internal_intc_domain_ops,
+ priv);
+ if (unlikely(!priv->domain)) {
+ pr_err("meta-internal-intc: cannot add IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ /* Setup TR1 for all cpus. */
+ for_each_possible_cpu(cpu)
+ metag_internal_irq_init_cpu(priv, cpu);
+
+ return 0;
+};
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a5702d74d2b..3939829f6c5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -322,6 +322,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
return 0;
}
+#ifndef elf_map
+
static unsigned long elf_map(struct file *filep, unsigned long addr,
struct elf_phdr *eppnt, int prot, int type,
unsigned long total_size)
@@ -356,6 +358,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
return(map_addr);
}
+#endif /* !elf_map */
+
static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
{
int i, first_idx = -1, last_idx = -1;
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index aba53083297..ac9da00e9f2 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -346,6 +346,7 @@ extern void ioport_unmap(void __iomem *p);
#define xlate_dev_kmem_ptr(p) p
#define xlate_dev_mem_ptr(p) __va(p)
+#ifdef CONFIG_VIRT_TO_BUS
#ifndef virt_to_bus
static inline unsigned long virt_to_bus(volatile void *address)
{
@@ -357,6 +358,7 @@ static inline void *bus_to_virt(unsigned long address)
return (void *) address;
}
#endif
+#endif
#ifndef memset_io
#define memset_io(a, b, c) memset(__io_virt(a), (b), (c))
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 257c55ec4f7..4077b5d9ff8 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -17,5 +17,12 @@
* but it doesn't work on all toolchains, so we just do it by hand
*/
#ifndef cond_syscall
-#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+#ifdef CONFIG_SYMBOL_PREFIX
+#define __SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define __SYMBOL_PREFIX
+#endif
+#define cond_syscall(x) asm(".weak\t" __SYMBOL_PREFIX #x "\n\t" \
+ ".set\t" __SYMBOL_PREFIX #x "," \
+ __SYMBOL_PREFIX "sys_ni_syscall")
#endif
diff --git a/include/clocksource/metag_generic.h b/include/clocksource/metag_generic.h
new file mode 100644
index 00000000000..ac17e7d06cf
--- /dev/null
+++ b/include/clocksource/metag_generic.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 Imaginaton Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __CLKSOURCE_METAG_GENERIC_H
+#define __CLKSOURCE_METAG_GENERIC_H
+
+extern int metag_generic_timer_init(void);
+
+#endif /* __CLKSOURCE_METAG_GENERIC_H */
diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h
new file mode 100644
index 00000000000..697af0fe7c5
--- /dev/null
+++ b/include/linux/irqchip/metag-ext.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_EXT_H_
+#define _LINUX_IRQCHIP_METAG_EXT_H_
+
+struct irq_data;
+struct platform_device;
+
+/* called from core irq code at init */
+int init_external_IRQ(void);
+
+/*
+ * called from SoC init_irq() callback to dynamically indicate the lack of
+ * HWMASKEXT registers.
+ */
+void meta_intc_no_mask(void);
+
+/*
+ * These allow SoCs to specialise the interrupt controller from their init_irq
+ * callbacks.
+ */
+
+extern struct irq_chip meta_intc_edge_chip;
+extern struct irq_chip meta_intc_level_chip;
+
+/* this should be called in the mask callback */
+void meta_intc_mask_irq_simple(struct irq_data *data);
+/* this should be called in the unmask callback */
+void meta_intc_unmask_irq_simple(struct irq_data *data);
+
+#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */
diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h
new file mode 100644
index 00000000000..4ebdfb3101a
--- /dev/null
+++ b/include/linux/irqchip/metag.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2011 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_H_
+#define _LINUX_IRQCHIP_METAG_H_
+
+#include <linux/errno.h>
+
+#ifdef CONFIG_METAG_PERFCOUNTER_IRQS
+extern int init_internal_IRQ(void);
+extern int internal_irq_map(unsigned int hw);
+#else
+static inline int init_internal_IRQ(void)
+{
+ return 0;
+}
+static inline int internal_irq_map(unsigned int hw)
+{
+ return -EINVAL;
+}
+#endif
+
+#endif /* _LINUX_IRQCHIP_METAG_H_ */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ede55f292c..7acc9dc73c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -115,6 +115,8 @@ extern unsigned int kobjsize(const void *objp);
# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
#elif defined(CONFIG_PARISC)
# define VM_GROWSUP VM_ARCH_1
+#elif defined(CONFIG_METAG)
+# define VM_GROWSUP VM_ARCH_1
#elif defined(CONFIG_IA64)
# define VM_GROWSUP VM_ARCH_1
#elif !defined(CONFIG_MMU)
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 900b9484445..8072d352b98 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -395,6 +395,8 @@ typedef struct elf64_shdr {
#define NT_ARM_TLS 0x401 /* ARM TLS register */
#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
+#define NT_METAG_CBUF 0x500 /* Metag catch buffer registers */
+#define NT_METAG_RPIPE 0x501 /* Metag read pipeline state */
/* Note header in a PT_NOTE section */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7244acde77b..6989df2ba19 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -178,7 +178,7 @@ void tracing_off_permanent(void)
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
-#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
# define RB_FORCE_8BYTE_ALIGNMENT 0
# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
#else
@@ -186,6 +186,8 @@ void tracing_off_permanent(void)
# define RB_ARCH_ALIGNMENT 8U
#endif
+#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
+
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -334,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
struct buffer_data_page {
u64 time_stamp; /* page time stamp */
local_t commit; /* write committed index */
- unsigned char data[]; /* data of buffer page */
+ unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
};
/*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4a7f808fa0..28be08c09ba 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -674,7 +674,7 @@ config STACKTRACE
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
- depends on DEBUG_KERNEL && !IA64 && !PARISC
+ depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG
help
Enables the display of the minimum amount of free stack which each
task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -855,7 +855,7 @@ config FRAME_POINTER
bool "Compile the kernel with frame pointers"
depends on DEBUG_KERNEL && \
(CRIS || M68K || FRV || UML || \
- AVR32 || SUPERH || BLACKFIN || MN10300) || \
+ AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || \
ARCH_WANT_FRAME_POINTERS
default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
help
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index 17e38439670..544aa56b620 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -34,7 +34,7 @@ use strict;
# $1 (first bracket) matches the dynamic amount of the stack growth
#
# use anything else and feel the pain ;)
-my (@stack, $re, $dre, $x, $xs);
+my (@stack, $re, $dre, $x, $xs, $funcre);
{
my $arch = shift;
if ($arch eq "") {
@@ -44,6 +44,7 @@ my (@stack, $re, $dre, $x, $xs);
$x = "[0-9a-f]"; # hex character
$xs = "[0-9a-f ]"; # hex character or space
+ $funcre = qr/^$x* <(.*)>:$/;
if ($arch eq 'arm') {
#c0008ffc: e24dd064 sub sp, sp, #100 ; 0x64
$re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o;
@@ -66,6 +67,10 @@ my (@stack, $re, $dre, $x, $xs);
# 2b6c: 4e56 fb70 linkw %fp,#-1168
# 1df770: defc ffe4 addaw #-28,%sp
$re = qr/.*(?:linkw %fp,|addaw )#-([0-9]{1,4})(?:,%sp)?$/o;
+ } elsif ($arch eq 'metag') {
+ #400026fc: 40 00 00 82 ADD A0StP,A0StP,#0x8
+ $re = qr/.*ADD.*A0StP,A0StP,\#(0x$x{1,8})/o;
+ $funcre = qr/^$x* <[^\$](.*)>:$/;
} elsif ($arch eq 'mips64') {
#8800402c: 67bdfff0 daddiu sp,sp,-16
$re = qr/.*daddiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
@@ -109,7 +114,6 @@ my (@stack, $re, $dre, $x, $xs);
#
# main()
#
-my $funcre = qr/^$x* <(.*)>:$/;
my ($func, $file, $lastslash);
while (my $line = <STDIN>) {
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index 8a106499ec4..d25e4a118d3 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -826,7 +826,8 @@ int main(int argc, char **argv)
genksyms_usage();
return 1;
}
- if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0))
+ if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0) ||
+ (strcmp(arch, "metag") == 0))
mod_prefix = "_";
{
extern int yydebug;
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index ee52cb8e17a..9c22317778e 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -33,6 +33,13 @@
#include <string.h>
#include <unistd.h>
+#ifndef EM_METAG
+/* Remove this when these make it to the standard system elf.h. */
+#define EM_METAG 174
+#define R_METAG_ADDR32 2
+#define R_METAG_NONE 3
+#endif
+
static int fd_map; /* File descriptor for file being modified. */
static int mmap_failed; /* Boolean flag. */
static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */
@@ -341,6 +348,12 @@ do_file(char const *const fname)
altmcount = "__gnu_mcount_nc";
break;
case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break;
+ case EM_METAG: reltype = R_METAG_ADDR32;
+ altmcount = "_mcount_wrapper";
+ rel_type_nop = R_METAG_NONE;
+ /* We happen to have the same requirement as MIPS */
+ is_fake_mcount32 = MIPS32_is_fake_mcount;
+ break;
case EM_MIPS: /* reltype: e_class */ gpfx = '_'; break;
case EM_PPC: reltype = R_PPC_ADDR32; gpfx = '_'; break;
case EM_PPC64: reltype = R_PPC64_ADDR64; gpfx = '_'; break;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index d5818c98d05..74659ecf93e 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -100,6 +100,12 @@
#define CPUINFO_PROC "Processor"
#endif
+#ifdef __metag__
+#define rmb() asm volatile("" ::: "memory")
+#define cpu_relax() asm volatile("" ::: "memory")
+#define CPUINFO_PROC "CPU"
+#endif
+
#include <time.h>
#include <unistd.h>
#include <sys/types.h>