aboutsummaryrefslogtreecommitdiff
path: root/arch/arc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arc/mm')
-rw-r--r--arch/arc/mm/Makefile10
-rw-r--r--arch/arc/mm/cache_arc700.c768
-rw-r--r--arch/arc/mm/dma.c94
-rw-r--r--arch/arc/mm/extable.c63
-rw-r--r--arch/arc/mm/fault.c228
-rw-r--r--arch/arc/mm/init.c187
-rw-r--r--arch/arc/mm/ioremap.c91
-rw-r--r--arch/arc/mm/tlb.c645
-rw-r--r--arch/arc/mm/tlbex.S408
9 files changed, 2494 insertions, 0 deletions
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
new file mode 100644
index 00000000000..168dc146a8f
--- /dev/null
+++ b/arch/arc/mm/Makefile
@@ -0,0 +1,10 @@
+#
+# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+obj-y := extable.o ioremap.o dma.o fault.o init.o
+obj-y += tlb.o tlbex.o cache_arc700.o
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
new file mode 100644
index 00000000000..88d617d8423
--- /dev/null
+++ b/arch/arc/mm/cache_arc700.c
@@ -0,0 +1,768 @@
+/*
+ * ARC700 VIPT Cache Management
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
+ * -flush_cache_dup_mm (fork)
+ * -likewise for flush_cache_mm (exit/execve)
+ * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
+ *
+ * vineetg: Apr 2011
+ * -Now that MMU can support larger pg sz (16K), the determiniation of
+ * aliasing shd not be based on assumption of 8k pg
+ *
+ * vineetg: Mar 2011
+ * -optimised version of flush_icache_range( ) for making I/D coherent
+ * when vaddr is available (agnostic of num of aliases)
+ *
+ * vineetg: Mar 2011
+ * -Added documentation about I-cache aliasing on ARC700 and the way it
+ * was handled up until MMU V2.
+ * -Spotted a three year old bug when killing the 4 aliases, which needs
+ * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
+ * instead of paddr | {0x00, 0x01, 0x10, 0x11}
+ * (Rajesh you owe me one now)
+ *
+ * vineetg: Dec 2010
+ * -Off-by-one error when computing num_of_lines to flush
+ * This broke signal handling with bionic which uses synthetic sigret stub
+ *
+ * vineetg: Mar 2010
+ * -GCC can't generate ZOL for core cache flush loops.
+ * Conv them into iterations based as opposed to while (start < end) types
+ *
+ * Vineetg: July 2009
+ * -In I-cache flush routine we used to chk for aliasing for every line INV.
+ * Instead now we setup routines per cache geometry and invoke them
+ * via function pointers.
+ *
+ * Vineetg: Jan 2009
+ * -Cache Line flush routines used to flush an extra line beyond end addr
+ * because check was while (end >= start) instead of (end > start)
+ * =Some call sites had to work around by doing -1, -4 etc to end param
+ * =Some callers didnt care. This was spec bad in case of INV routines
+ * which would discard valid data (cause of the horrible ext2 bug
+ * in ARC IDE driver)
+ *
+ * vineetg: June 11th 2008: Fixed flush_icache_range( )
+ * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
+ * to be flushed, which it was not doing.
+ * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
+ * however ARC cache maintenance OPs require PHY addr. Thus need to do
+ * vmalloc_to_phy.
+ * -Also added optimisation there, that for range > PAGE SIZE we flush the
+ * entire cache in one shot rather than line by line. For e.g. a module
+ * with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
+ * while cache is only 16 or 32k.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/cache.h>
+#include <linux/mmu_context.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/cachectl.h>
+#include <asm/setup.h>
+
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+static void __ic_line_inv_no_alias(unsigned long, int);
+static void __ic_line_inv_2_alias(unsigned long, int);
+static void __ic_line_inv_4_alias(unsigned long, int);
+
+/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
+static void (*___flush_icache_rtn) (unsigned long, int);
+#endif
+
+char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
+{
+ int n = 0;
+ unsigned int c = smp_processor_id();
+
+#define PR_CACHE(p, enb, str) \
+{ \
+ if (!(p)->ver) \
+ n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \
+ else \
+ n += scnprintf(buf + n, len - n, \
+ str"\t\t: (%uK) VIPT, %dway set-asc, %ub Line %s\n", \
+ TO_KB((p)->sz), (p)->assoc, (p)->line_len, \
+ enb ? "" : "DISABLED (kernel-build)"); \
+}
+
+ PR_CACHE(&cpuinfo_arc700[c].icache, __CONFIG_ARC_HAS_ICACHE, "I-Cache");
+ PR_CACHE(&cpuinfo_arc700[c].dcache, __CONFIG_ARC_HAS_DCACHE, "D-Cache");
+
+ return buf;
+}
+
+/*
+ * Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation done here, simply read/convert the BCRs
+ */
+void __init read_decode_cache_bcr(void)
+{
+ struct bcr_cache ibcr, dbcr;
+ struct cpuinfo_arc_cache *p_ic, *p_dc;
+ unsigned int cpu = smp_processor_id();
+
+ p_ic = &cpuinfo_arc700[cpu].icache;
+ READ_BCR(ARC_REG_IC_BCR, ibcr);
+
+ if (ibcr.config == 0x3)
+ p_ic->assoc = 2;
+ p_ic->line_len = 8 << ibcr.line_len;
+ p_ic->sz = 0x200 << ibcr.sz;
+ p_ic->ver = ibcr.ver;
+
+ p_dc = &cpuinfo_arc700[cpu].dcache;
+ READ_BCR(ARC_REG_DC_BCR, dbcr);
+
+ if (dbcr.config == 0x2)
+ p_dc->assoc = 4;
+ p_dc->line_len = 16 << dbcr.line_len;
+ p_dc->sz = 0x200 << dbcr.sz;
+ p_dc->ver = dbcr.ver;
+}
+
+/*
+ * 1. Validate the Cache Geomtery (compile time config matches hardware)
+ * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
+ * (aliasing D-cache configurations are not supported YET)
+ * 3. Enable the Caches, setup default flush mode for D-Cache
+ * 3. Calculate the SHMLBA used by user space
+ */
+void __init arc_cache_init(void)
+{
+ unsigned int temp;
+ unsigned int cpu = smp_processor_id();
+ struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+ struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+ int way_pg_ratio = way_pg_ratio;
+ char str[256];
+
+ printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+ if (!ic->ver)
+ goto chk_dc;
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+ /* 1. Confirm some of I-cache params which Linux assumes */
+ if ((ic->assoc != ARC_ICACHE_WAYS) ||
+ (ic->line_len != ARC_ICACHE_LINE_LEN)) {
+ panic("Cache H/W doesn't match kernel Config");
+ }
+#if (CONFIG_ARC_MMU_VER > 2)
+ if (ic->ver != 3) {
+ if (running_on_hw)
+ panic("Cache ver doesn't match MMU ver\n");
+
+ /* For ISS - suggest the toggles to use */
+ pr_err("Use -prop=icache_version=3,-prop=dcache_version=3\n");
+
+ }
+#endif
+
+ /*
+ * if Cache way size is <= page size then no aliasing exhibited
+ * otherwise ratio determines num of aliases.
+ * e.g. 32K I$, 2 way set assoc, 8k pg size
+ * way-sz = 32k/2 = 16k
+ * way-pg-ratio = 16k/8k = 2, so 2 aliases possible
+ * (meaning 1 line could be in 2 possible locations).
+ */
+ way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
+ switch (way_pg_ratio) {
+ case 0:
+ case 1:
+ ___flush_icache_rtn = __ic_line_inv_no_alias;
+ break;
+ case 2:
+ ___flush_icache_rtn = __ic_line_inv_2_alias;
+ break;
+ case 4:
+ ___flush_icache_rtn = __ic_line_inv_4_alias;
+ break;
+ default:
+ panic("Unsupported I-Cache Sz\n");
+ }
+#endif
+
+ /* Enable/disable I-Cache */
+ temp = read_aux_reg(ARC_REG_IC_CTRL);
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+ temp &= ~IC_CTRL_CACHE_DISABLE;
+#else
+ temp |= IC_CTRL_CACHE_DISABLE;
+#endif
+
+ write_aux_reg(ARC_REG_IC_CTRL, temp);
+
+chk_dc:
+ if (!dc->ver)
+ return;
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+ if ((dc->assoc != ARC_DCACHE_WAYS) ||
+ (dc->line_len != ARC_DCACHE_LINE_LEN)) {
+ panic("Cache H/W doesn't match kernel Config");
+ }
+
+ /* check for D-Cache aliasing */
+ if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE)
+ panic("D$ aliasing not handled right now\n");
+#endif
+
+ /* Set the default Invalidate Mode to "simpy discard dirty lines"
+ * as this is more frequent then flush before invalidate
+ * Ofcourse we toggle this default behviour when desired
+ */
+ temp = read_aux_reg(ARC_REG_DC_CTRL);
+ temp &= ~DC_CTRL_INV_MODE_FLUSH;
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+ /* Enable D-Cache: Clear Bit 0 */
+ write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE);
+#else
+ /* Flush D cache */
+ write_aux_reg(ARC_REG_DC_FLSH, 0x1);
+ /* Disable D cache */
+ write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE);
+#endif
+
+ return;
+}
+
+#define OP_INV 0x1
+#define OP_FLUSH 0x2
+#define OP_FLUSH_N_INV 0x3
+
+#ifdef CONFIG_ARC_HAS_DCACHE
+
+/***************************************************************
+ * Machine specific helpers for Entire D-Cache or Per Line ops
+ */
+
+static inline void wait_for_flush(void)
+{
+ while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
+ ;
+}
+
+/*
+ * Operation on Entire D-Cache
+ * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * Note that constant propagation ensures all the checks are gone
+ * in generated code
+ */
+static inline void __dc_entire_op(const int cacheop)
+{
+ unsigned long flags, tmp = tmp;
+ int aux;
+
+ local_irq_save(flags);
+
+ if (cacheop == OP_FLUSH_N_INV) {
+ /* Dcache provides 2 cmd: FLUSH or INV
+ * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+ * flush-n-inv is achieved by INV cmd but with IM=1
+ * Default INV sub-mode is DISCARD, which needs to be toggled
+ */
+ tmp = read_aux_reg(ARC_REG_DC_CTRL);
+ write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
+ }
+
+ if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */
+ aux = ARC_REG_DC_IVDC;
+ else
+ aux = ARC_REG_DC_FLSH;
+
+ write_aux_reg(aux, 0x1);
+
+ if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
+ wait_for_flush();
+
+ /* Switch back the DISCARD ONLY Invalidate mode */
+ if (cacheop == OP_FLUSH_N_INV)
+ write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Per Line Operation on D-Cache
+ * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
+ * It's sole purpose is to help gcc generate ZOL
+ */
+static inline void __dc_line_loop(unsigned long start, unsigned long sz,
+ int aux_reg)
+{
+ int num_lines, slack;
+
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @start - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@start will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+ slack = start & ~DCACHE_LINE_MASK;
+ sz += slack;
+ start -= slack;
+ }
+
+ num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
+
+ while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+ /*
+ * Just as for I$, in MMU v3, D$ ops also require
+ * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
+ * But we pass phy addr for both. This works since Linux
+ * doesn't support aliasing configs for D$, yet.
+ * Thus paddr is enough to provide both tag and index.
+ */
+ write_aux_reg(ARC_REG_DC_PTAG, start);
+#endif
+ write_aux_reg(aux_reg, start);
+ start += ARC_DCACHE_LINE_LEN;
+ }
+}
+
+/*
+ * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
+ */
+static inline void __dc_line_op(unsigned long start, unsigned long sz,
+ const int cacheop)
+{
+ unsigned long flags, tmp = tmp;
+ int aux;
+
+ local_irq_save(flags);
+
+ if (cacheop == OP_FLUSH_N_INV) {
+ /*
+ * Dcache provides 2 cmd: FLUSH or INV
+ * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
+ * flush-n-inv is achieved by INV cmd but with IM=1
+ * Default INV sub-mode is DISCARD, which needs to be toggled
+ */
+ tmp = read_aux_reg(ARC_REG_DC_CTRL);
+ write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
+ }
+
+ if (cacheop & OP_INV) /* Inv / flush-n-inv use same cmd reg */
+ aux = ARC_REG_DC_IVDL;
+ else
+ aux = ARC_REG_DC_FLDL;
+
+ __dc_line_loop(start, sz, aux);
+
+ if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
+ wait_for_flush();
+
+ /* Switch back the DISCARD ONLY Invalidate mode */
+ if (cacheop == OP_FLUSH_N_INV)
+ write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
+
+ local_irq_restore(flags);
+}
+
+#else
+
+#define __dc_entire_op(cacheop)
+#define __dc_line_op(start, sz, cacheop)
+
+#endif /* CONFIG_ARC_HAS_DCACHE */
+
+
+#ifdef CONFIG_ARC_HAS_ICACHE
+
+/*
+ * I-Cache Aliasing in ARC700 VIPT caches
+ *
+ * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
+ * to "index" into SET of cache-line and paddr from MMU to match the TAG
+ * in the WAYS of SET.
+ *
+ * However the CDU iterface (to flush/inv) lines from software, only takes
+ * paddr (to have simpler hardware interface). For simpler cases, using paddr
+ * alone suffices.
+ * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
+ * way_sz = cache_sz / num_ways = 16k/2 = 8k
+ * num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
+ * Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
+ * bits req for calc set-index = bits 12:5 (0 based). Since this range fits
+ * inside the bottom 13 bits of paddr, which are same for vaddr and paddr
+ * (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
+ * locate a cache-line.
+ *
+ * However for a difft sized cache, say 32k I$, above math yields need
+ * for 14 bits of vaddr to locate a cache line, which can't be provided by
+ * paddr, since the bit 13 (0 based) might differ between the two.
+ *
+ * This lack of extra bits needed for correct line addressing, defines the
+ * classical problem of Cache aliasing with VIPT architectures
+ * num_aliases = 1 << extra_bits
+ * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
+ * 2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
+ * 2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geom.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * Since vaddr was not available for all instances of I$ flush req by core
+ * kernel, the only safe way (non-optimal though) was to kill all possible
+ * lines which could represent an alias (even if they didnt represent one
+ * in execution).
+ * e.g. for 64K I$, 4 aliases possible, so we did
+ * flush start
+ * flush start | 0x01
+ * flush start | 0x2
+ * flush start | 0x3
+ *
+ * The penalty was invoking the operation itself, since tag match is anyways
+ * paddr based, a line which didn't represent an alias would not match the
+ * paddr, hence wont be killed
+ *
+ * Note that aliasing concerns are independent of line-sz for a given cache
+ * geometry (size + set_assoc) because the extra bits required by line-sz are
+ * reduced from the set calc.
+ * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
+ * 32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
+ * 64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports var page sizes (1k-16k) - Linux will support
+ * 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ */
+
+/***********************************************************
+ * Machine specific helpers for per line I-Cache invalidate.
+ * 3 routines to accpunt for 1, 2, 4 aliases possible
+ */
+
+static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
+{
+ while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+ write_aux_reg(ARC_REG_IC_PTAG, start);
+#endif
+ write_aux_reg(ARC_REG_IC_IVIL, start);
+ start += ARC_ICACHE_LINE_LEN;
+ }
+}
+
+static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
+{
+ while (num_lines-- > 0) {
+
+#if (CONFIG_ARC_MMU_VER > 2)
+ /*
+ * MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
+ * reg to pass the "tag" bits and existing IVIL reg only looks
+ * at bits relevant for "index" (details above)
+ * Programming Notes:
+ * -when writing tag to PTAG reg, bit chopping can be avoided,
+ * CDU ignores non-tag bits.
+ * -Ideally "index" must be computed from vaddr, but it is not
+ * avail in these rtns. So to be safe, we kill the lines in all
+ * possible indexes corresp to num of aliases possible for
+ * given cache config.
+ */
+ write_aux_reg(ARC_REG_IC_PTAG, start);
+ write_aux_reg(ARC_REG_IC_IVIL,
+ start & ~(0x1 << PAGE_SHIFT));
+ write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
+#else
+ write_aux_reg(ARC_REG_IC_IVIL, start);
+ write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
+#endif
+ start += ARC_ICACHE_LINE_LEN;
+ }
+}
+
+static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
+{
+ while (num_lines-- > 0) {
+
+#if (CONFIG_ARC_MMU_VER > 2)
+ write_aux_reg(ARC_REG_IC_PTAG, start);
+
+ write_aux_reg(ARC_REG_IC_IVIL,
+ start & ~(0x3 << PAGE_SHIFT));
+ write_aux_reg(ARC_REG_IC_IVIL,
+ start & ~(0x2 << PAGE_SHIFT));
+ write_aux_reg(ARC_REG_IC_IVIL,
+ start & ~(0x1 << PAGE_SHIFT));
+ write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
+#else
+ write_aux_reg(ARC_REG_IC_IVIL, start);
+ write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
+ write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
+ write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
+#endif
+ start += ARC_ICACHE_LINE_LEN;
+ }
+}
+
+static void __ic_line_inv(unsigned long start, unsigned long sz)
+{
+ unsigned long flags;
+ int num_lines, slack;
+
+ /*
+ * Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @start - aligned to cache line, and integral @num_lines
+ * However page sized flushes can be compile time optimised.
+ * -@start will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
+ slack = start & ~ICACHE_LINE_MASK;
+ sz += slack;
+ start -= slack;
+ }
+
+ num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
+
+ local_irq_save(flags);
+ (*___flush_icache_rtn) (start, num_lines);
+ local_irq_restore(flags);
+}
+
+/* Unlike routines above, having vaddr for flush op (along with paddr),
+ * prevents the need to speculatively kill the lines in multiple sets
+ * based on ratio of way_sz : pg_sz
+ */
+static void __ic_line_inv_vaddr(unsigned long phy_start,
+ unsigned long vaddr, unsigned long sz)
+{
+ unsigned long flags;
+ int num_lines, slack;
+ unsigned int addr;
+
+ slack = phy_start & ~ICACHE_LINE_MASK;
+ sz += slack;
+ phy_start -= slack;
+ num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
+
+#if (CONFIG_ARC_MMU_VER > 2)
+ vaddr &= ~ICACHE_LINE_MASK;
+ addr = phy_start;
+#else
+ /* bits 17:13 of vaddr go as bits 4:0 of paddr */
+ addr = phy_start | ((vaddr >> 13) & 0x1F);
+#endif
+
+ local_irq_save(flags);
+ while (num_lines-- > 0) {
+#if (CONFIG_ARC_MMU_VER > 2)
+ /* tag comes from phy addr */
+ write_aux_reg(ARC_REG_IC_PTAG, addr);
+
+ /* index bits come from vaddr */
+ write_aux_reg(ARC_REG_IC_IVIL, vaddr);
+ vaddr += ARC_ICACHE_LINE_LEN;
+#else
+ /* this paddr contains vaddrs bits as needed */
+ write_aux_reg(ARC_REG_IC_IVIL, addr);
+#endif
+ addr += ARC_ICACHE_LINE_LEN;
+ }
+ local_irq_restore(flags);
+}
+
+#else
+
+#define __ic_line_inv(start, sz)
+#define __ic_line_inv_vaddr(pstart, vstart, sz)
+
+#endif /* CONFIG_ARC_HAS_ICACHE */
+
+
+/***********************************************************
+ * Exported APIs
+ */
+
+/* TBD: use pg_arch_1 to optimize this */
+void flush_dcache_page(struct page *page)
+{
+ __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+ __dc_line_op(start, sz, OP_FLUSH_N_INV);
+}
+EXPORT_SYMBOL(dma_cache_wback_inv);
+
+void dma_cache_inv(unsigned long start, unsigned long sz)
+{
+ __dc_line_op(start, sz, OP_INV);
+}
+EXPORT_SYMBOL(dma_cache_inv);
+
+void dma_cache_wback(unsigned long start, unsigned long sz)
+{
+ __dc_line_op(start, sz, OP_FLUSH);
+}
+EXPORT_SYMBOL(dma_cache_wback);
+
+/*
+ * This is API for making I/D Caches consistent when modifying code
+ * (loadable modules, kprobes, etc)
+ * This is called on insmod, with kernel virtual address for CODE of
+ * the module. ARC cache maintenance ops require PHY address thus we
+ * need to convert vmalloc addr to PHY addr
+ */
+void flush_icache_range(unsigned long kstart, unsigned long kend)
+{
+ unsigned int tot_sz, off, sz;
+ unsigned long phy, pfn;
+ unsigned long flags;
+
+ /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
+
+ /* This is not the right API for user virtual address */
+ if (kstart < TASK_SIZE) {
+ BUG_ON("Flush icache range for user virtual addr space");
+ return;
+ }
+
+ /* Shortcut for bigger flush ranges.
+ * Here we don't care if this was kernel virtual or phy addr
+ */
+ tot_sz = kend - kstart;
+ if (tot_sz > PAGE_SIZE) {
+ flush_cache_all();
+ return;
+ }
+
+ /* Case: Kernel Phy addr (0x8000_0000 onwards) */
+ if (likely(kstart > PAGE_OFFSET)) {
+ __ic_line_inv(kstart, kend - kstart);
+ __dc_line_op(kstart, kend - kstart, OP_FLUSH);
+ return;
+ }
+
+ /*
+ * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
+ * (1) ARC Cache Maintenance ops only take Phy addr, hence special
+ * handling of kernel vaddr.
+ *
+ * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
+ * it still needs to handle a 2 page scenario, where the range
+ * straddles across 2 virtual pages and hence need for loop
+ */
+ while (tot_sz > 0) {
+ off = kstart % PAGE_SIZE;
+ pfn = vmalloc_to_pfn((void *)kstart);
+ phy = (pfn << PAGE_SHIFT) + off;
+ sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
+ local_irq_save(flags);
+ __dc_line_op(phy, sz, OP_FLUSH);
+ __ic_line_inv(phy, sz);
+ local_irq_restore(flags);
+ kstart += sz;
+ tot_sz -= sz;
+ }
+}
+
+/*
+ * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
+ * where vaddr is also available. This allows passing both vaddr and paddr
+ * bits to CDU for cache flush, short-circuting the current pessimistic algo
+ * which kills all possible aliases.
+ * An added adv of knowing that vaddr is user-vaddr avoids various checks
+ * and handling for k-vaddr, k-paddr as done in orig ver above
+ */
+void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
+ int len)
+{
+ __ic_line_inv_vaddr(paddr, u_vaddr, len);
+ __dc_line_op(paddr, len, OP_FLUSH);
+}
+
+/*
+ * XXX: This also needs to be optim using pg_arch_1
+ * This is called when a page-cache page is about to be mapped into a
+ * user process' address space. It offers an opportunity for a
+ * port to ensure d-cache/i-cache coherency if necessary.
+ */
+void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+{
+ if (!(vma->vm_flags & VM_EXEC))
+ return;
+
+ __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
+}
+
+void flush_icache_all(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ write_aux_reg(ARC_REG_IC_IVIC, 1);
+
+ /* lr will not complete till the icache inv operation is not over */
+ read_aux_reg(ARC_REG_IC_CTRL);
+ local_irq_restore(flags);
+}
+
+noinline void flush_cache_all(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ flush_icache_all();
+ __dc_entire_op(OP_FLUSH_N_INV);
+
+ local_irq_restore(flags);
+
+}
+
+/**********************************************************************
+ * Explicit Cache flush request from user space via syscall
+ * Needed for JITs which generate code on the fly
+ */
+SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
+{
+ /* TBD: optimize this */
+ flush_cache_all();
+ return 0;
+}
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
new file mode 100644
index 00000000000..12cc6485b21
--- /dev/null
+++ b/arch/arc/mm/dma.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * DMA Coherent API Notes
+ *
+ * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
+ * implemented by accessintg it using a kernel virtual address, with
+ * Cache bit off in the TLB entry.
+ *
+ * The default DMA address == Phy address which is 0x8000_0000 based.
+ * A platform/device can make it zero based, by over-riding
+ * plat_{dma,kernel}_addr_to_{kernel,dma}
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/export.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Helpers for Coherent DMA API.
+ */
+void *dma_alloc_noncoherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ void *paddr;
+
+ /* This is linear addr (0x8000_0000 based) */
+ paddr = alloc_pages_exact(size, gfp);
+ if (!paddr)
+ return NULL;
+
+ /* This is bus address, platform dependent */
+ *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+
+ return paddr;
+}
+EXPORT_SYMBOL(dma_alloc_noncoherent);
+
+void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle)
+{
+ free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
+ size);
+}
+EXPORT_SYMBOL(dma_free_noncoherent);
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ void *paddr, *kvaddr;
+
+ /* This is linear addr (0x8000_0000 based) */
+ paddr = alloc_pages_exact(size, gfp);
+ if (!paddr)
+ return NULL;
+
+ /* This is kernel Virtual address (0x7000_0000 based) */
+ kvaddr = ioremap_nocache((unsigned long)paddr, size);
+ if (kvaddr != NULL)
+ memset(kvaddr, 0, size);
+
+ /* This is bus address, platform dependent */
+ *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+
+ return kvaddr;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
+ dma_addr_t dma_handle)
+{
+ iounmap((void __force __iomem *)kvaddr);
+
+ free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
+ size);
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+/*
+ * Helper for streaming DMA...
+ */
+void __arc_dma_cache_sync(unsigned long paddr, size_t size,
+ enum dma_data_direction dir)
+{
+ __inline_dma_cache_sync(paddr, size, dir);
+}
+EXPORT_SYMBOL(__arc_dma_cache_sync);
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
new file mode 100644
index 00000000000..014172ba843
--- /dev/null
+++ b/arch/arc/mm/extable.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Borrowed heavily from MIPS
+ */
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+ fixup = search_exception_tables(instruction_pointer(regs));
+ if (fixup) {
+ regs->ret = fixup->fixup;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+
+long arc_copy_from_user_noinline(void *to, const void __user * from,
+ unsigned long n)
+{
+ return __arc_copy_from_user(to, from, n);
+}
+EXPORT_SYMBOL(arc_copy_from_user_noinline);
+
+long arc_copy_to_user_noinline(void __user *to, const void *from,
+ unsigned long n)
+{
+ return __arc_copy_to_user(to, from, n);
+}
+EXPORT_SYMBOL(arc_copy_to_user_noinline);
+
+unsigned long arc_clear_user_noinline(void __user *to,
+ unsigned long n)
+{
+ return __arc_clear_user(to, n);
+}
+EXPORT_SYMBOL(arc_clear_user_noinline);
+
+long arc_strncpy_from_user_noinline (char *dst, const char __user *src,
+ long count)
+{
+ return __arc_strncpy_from_user(dst, src, count);
+}
+EXPORT_SYMBOL(arc_strncpy_from_user_noinline);
+
+long arc_strnlen_user_noinline(const char __user *src, long n)
+{
+ return __arc_strnlen_user(src, n);
+}
+EXPORT_SYMBOL(arc_strnlen_user_noinline);
+#endif
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
new file mode 100644
index 00000000000..af55aab803d
--- /dev/null
+++ b/arch/arc/mm/fault.c
@@ -0,0 +1,228 @@
+/* Page Fault Handling for ARC (TLB Miss / ProtV)
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/signal.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/kdebug.h>
+#include <asm/pgalloc.h>
+
+static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address)
+{
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ */
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+
+ pgd = pgd_offset_fast(mm, address);
+ pgd_k = pgd_offset_k(address);
+
+ if (!pgd_present(*pgd_k))
+ goto bad_area;
+
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ goto bad_area;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ goto bad_area;
+
+ set_pmd(pmd, *pmd_k);
+
+ /* XXX: create the TLB entry here */
+ return 0;
+
+bad_area:
+ return 1;
+}
+
+void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
+ unsigned long cause_code)
+{
+ struct vm_area_struct *vma = NULL;
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+ siginfo_t info;
+ int fault, ret;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+ (write ? FAULT_FLAG_WRITE : 0);
+
+ /*
+ * We fault-in kernel-space virtual memory on-demand. The
+ * 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ */
+ if (address >= VMALLOC_START && address <= VMALLOC_END) {
+ ret = handle_vmalloc_fault(mm, address);
+ if (unlikely(ret))
+ goto bad_area_nosemaphore;
+ else
+ return;
+ }
+
+ info.si_code = SEGV_MAPERR;
+
+ /*
+ * If we're in an interrupt or have no user
+ * context, we must not take the fault..
+ */
+ if (in_atomic() || !mm)
+ goto no_context;
+
+retry:
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (vma->vm_start <= address)
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ if (expand_stack(vma, address))
+ goto bad_area;
+
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+ info.si_code = SEGV_ACCERR;
+
+ /* Handle protection violation, execute on heap or stack */
+
+ if (cause_code == ((ECR_V_PROTV << 16) | ECR_C_PROTV_INST_FETCH))
+ goto bad_area;
+
+ if (write) {
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+ } else {
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto bad_area;
+ }
+
+survive:
+ /*
+ * If for any reason at all we couldn't handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(mm, vma, address, flags);
+
+ /* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
+ if (unlikely(fatal_signal_pending(current))) {
+ if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
+ up_read(&mm->mmap_sem);
+ if (user_mode(regs))
+ return;
+ }
+
+ if (likely(!(fault & VM_FAULT_ERROR))) {
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ /* To avoid updating stats twice for retry case */
+ if (fault & VM_FAULT_MAJOR)
+ tsk->maj_flt++;
+ else
+ tsk->min_flt++;
+
+ if (fault & VM_FAULT_RETRY) {
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ }
+ }
+
+ /* Fault Handled Gracefully */
+ up_read(&mm->mmap_sem);
+ return;
+ }
+
+ /* TBD: switch to pagefault_out_of_memory() */
+ if (fault & VM_FAULT_OOM)
+ goto out_of_memory;
+ else if (fault & VM_FAULT_SIGBUS)
+ goto do_sigbus;
+
+ /* no man's land */
+ BUG();
+
+ /*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+ up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
+ tsk->thread.fault_address = address;
+ tsk->thread.cause_code = cause_code;
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ /* info.si_code has been set above */
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGSEGV, &info, tsk);
+ return;
+ }
+
+no_context:
+ /* Are we prepared to handle this kernel fault?
+ *
+ * (The kernel has valid exception-points in the source
+ * when it acesses user-memory. When it fails in one
+ * of those points, we find it in a table and do a jump
+ * to some fixup code that loads an appropriate error
+ * code)
+ */
+ if (fixup_exception(regs))
+ return;
+
+ die("Oops", regs, address, cause_code);
+
+out_of_memory:
+ if (is_global_init(tsk)) {
+ yield();
+ goto survive;
+ }
+ up_read(&mm->mmap_sem);
+
+ if (user_mode(regs))
+ do_group_exit(SIGKILL); /* This will never return */
+
+ goto no_context;
+
+do_sigbus:
+ up_read(&mm->mmap_sem);
+
+ if (!user_mode(regs))
+ goto no_context;
+
+ tsk->thread.fault_address = address;
+ tsk->thread.cause_code = cause_code;
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGBUS, &info, tsk);
+}
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
new file mode 100644
index 00000000000..caf797de23f
--- /dev/null
+++ b/arch/arc/mm/init.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#ifdef CONFIG_BLOCK_DEV_RAM
+#include <linux/blk.h>
+#endif
+#include <linux/swap.h>
+#include <linux/module.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <asm/arcregs.h>
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
+char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+EXPORT_SYMBOL(empty_zero_page);
+
+/* Default tot mem from .config */
+static unsigned long arc_mem_sz = 0x20000000; /* some default */
+
+/* User can over-ride above with "mem=nnn[KkMm]" in cmdline */
+static int __init setup_mem_sz(char *str)
+{
+ arc_mem_sz = memparse(str, NULL) & PAGE_MASK;
+
+ /* early console might not be setup yet - it will show up later */
+ pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz));
+
+ return 0;
+}
+early_param("mem", setup_mem_sz);
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+ arc_mem_sz = size & PAGE_MASK;
+ pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz));
+}
+
+/*
+ * First memory setup routine called from setup_arch()
+ * 1. setup swapper's mm @init_mm
+ * 2. Count the pages we have and setup bootmem allocator
+ * 3. zone setup
+ */
+void __init setup_arch_memory(void)
+{
+ unsigned long zones_size[MAX_NR_ZONES] = { 0, 0 };
+ unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz;
+
+ init_mm.start_code = (unsigned long)_text;
+ init_mm.end_code = (unsigned long)_etext;
+ init_mm.end_data = (unsigned long)_edata;
+ init_mm.brk = (unsigned long)_end;
+
+ /*
+ * We do it here, so that memory is correctly instantiated
+ * even if "mem=xxx" cmline over-ride is given and/or
+ * DT has memory node. Each causes an update to @arc_mem_sz
+ * and we finally add memory one here
+ */
+ memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz);
+
+ /*------------- externs in mm need setting up ---------------*/
+
+ /* first page of system - kernel .vector starts here */
+ min_low_pfn = PFN_DOWN(CONFIG_LINUX_LINK_BASE);
+
+ /* Last usable page of low mem (no HIGHMEM yet for ARC port) */
+ max_low_pfn = max_pfn = PFN_DOWN(end_mem);
+
+ max_mapnr = num_physpages = max_low_pfn - min_low_pfn;
+
+ /*------------- reserve kernel image -----------------------*/
+ memblock_reserve(CONFIG_LINUX_LINK_BASE,
+ __pa(_end) - CONFIG_LINUX_LINK_BASE);
+
+ memblock_dump_all();
+
+ /*-------------- node setup --------------------------------*/
+ memset(zones_size, 0, sizeof(zones_size));
+ zones_size[ZONE_NORMAL] = num_physpages;
+
+ /*
+ * We can't use the helper free_area_init(zones[]) because it uses
+ * PAGE_OFFSET to compute the @min_low_pfn which would be wrong
+ * when our kernel doesn't start at PAGE_OFFSET, i.e.
+ * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE
+ */
+ free_area_init_node(0, /* node-id */
+ zones_size, /* num pages per zone */
+ min_low_pfn, /* first pfn of node */
+ NULL); /* NO holes */
+}
+
+/*
+ * mem_init - initializes memory
+ *
+ * Frees up bootmem
+ * Calculates and displays memory available/used
+ */
+void __init mem_init(void)
+{
+ int codesize, datasize, initsize, reserved_pages, free_pages;
+ int tmp;
+
+ high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz);
+
+ totalram_pages = free_all_bootmem();
+
+ /* count all reserved pages [kernel code/data/mem_map..] */
+ reserved_pages = 0;
+ for (tmp = 0; tmp < max_mapnr; tmp++)
+ if (PageReserved(mem_map + tmp))
+ reserved_pages++;
+
+ /* XXX: nr_free_pages() is equivalent */
+ free_pages = max_mapnr - reserved_pages;
+
+ /*
+ * For the purpose of display below, split the "reserve mem"
+ * kernel code/data is already shown explicitly,
+ * Show any other reservations (mem_map[ ] et al)
+ */
+ reserved_pages -= (((unsigned int)_end - CONFIG_LINUX_LINK_BASE) >>
+ PAGE_SHIFT);
+
+ codesize = _etext - _text;
+ datasize = _end - _etext;
+ initsize = __init_end - __init_begin;
+
+ pr_info("Memory Available: %dM / %ldM (%dK code, %dK data, %dK init, %dK reserv)\n",
+ PAGES_TO_MB(free_pages),
+ TO_MB(arc_mem_sz),
+ TO_KB(codesize), TO_KB(datasize), TO_KB(initsize),
+ PAGES_TO_KB(reserved_pages));
+}
+
+static void __init free_init_pages(const char *what, unsigned long begin,
+ unsigned long end)
+{
+ unsigned long addr;
+
+ pr_info("Freeing %s: %ldk [%lx] to [%lx]\n",
+ what, TO_KB(end - begin), begin, end);
+
+ /* need to check that the page we free is not a partial page */
+ for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) {
+ ClearPageReserved(virt_to_page(addr));
+ init_page_count(virt_to_page(addr));
+ free_page(addr);
+ totalram_pages++;
+ }
+}
+
+/*
+ * free_initmem: Free all the __init memory.
+ */
+void __init_refok free_initmem(void)
+{
+ free_init_pages("unused kernel memory",
+ (unsigned long)__init_begin,
+ (unsigned long)__init_end);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init free_initrd_mem(unsigned long start, unsigned long end)
+{
+ free_init_pages("initrd memory", start, end);
+}
+#endif
+
+#ifdef CONFIG_OF_FLATTREE
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+ unsigned long end)
+{
+ pr_err("%s(%lx, %lx)\n", __func__, start, end);
+}
+#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
new file mode 100644
index 00000000000..3e5c92c7993
--- /dev/null
+++ b/arch/arc/mm/ioremap.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/cache.h>
+
+void __iomem *ioremap(unsigned long paddr, unsigned long size)
+{
+ unsigned long end;
+
+ /* Don't allow wraparound or zero size */
+ end = paddr + size - 1;
+ if (!size || (end < paddr))
+ return NULL;
+
+ /* If the region is h/w uncached, avoid MMU mappings */
+ if (paddr >= ARC_UNCACHED_ADDR_SPACE)
+ return (void __iomem *)paddr;
+
+ return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
+}
+EXPORT_SYMBOL(ioremap);
+
+/*
+ * ioremap with access flags
+ * Cache semantics wise it is same as ioremap - "forced" uncached.
+ * However unline vanilla ioremap which bypasses ARC MMU for addresses in
+ * ARC hardware uncached region, this one still goes thru the MMU as caller
+ * might need finer access control (R/W/X)
+ */
+void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+ unsigned long flags)
+{
+ void __iomem *vaddr;
+ struct vm_struct *area;
+ unsigned long off, end;
+ pgprot_t prot = __pgprot(flags);
+
+ /* Don't allow wraparound, zero size */
+ end = paddr + size - 1;
+ if ((!size) || (end < paddr))
+ return NULL;
+
+ /* An early platform driver might end up here */
+ if (!slab_is_available())
+ return NULL;
+
+ /* force uncached */
+ prot = pgprot_noncached(prot);
+
+ /* Mappings have to be page-aligned */
+ off = paddr & ~PAGE_MASK;
+ paddr &= PAGE_MASK;
+ size = PAGE_ALIGN(end + 1) - paddr;
+
+ /*
+ * Ok, go for it..
+ */
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+ area->phys_addr = paddr;
+ vaddr = (void __iomem *)area->addr;
+ if (ioremap_page_range((unsigned long)vaddr,
+ (unsigned long)vaddr + size, paddr, prot)) {
+ vunmap((void __force *)vaddr);
+ return NULL;
+ }
+ return (void __iomem *)(off + (char __iomem *)vaddr);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+
+void iounmap(const void __iomem *addr)
+{
+ if (addr >= (void __force __iomem *)ARC_UNCACHED_ADDR_SPACE)
+ return;
+
+ vfree((void *)(PAGE_MASK & (unsigned long __force)addr));
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
new file mode 100644
index 00000000000..9b9ce23f4ec
--- /dev/null
+++ b/arch/arc/mm/tlb.c
@@ -0,0 +1,645 @@
+/*
+ * TLB Management (flush/create/diagnostics) for ARC700
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: Aug 2011
+ * -Reintroduce duplicate PD fixup - some customer chips still have the issue
+ *
+ * vineetg: May 2011
+ * -No need to flush_cache_page( ) for each call to update_mmu_cache()
+ * some of the LMBench tests improved amazingly
+ * = page-fault thrice as fast (75 usec to 28 usec)
+ * = mmap twice as fast (9.6 msec to 4.6 msec),
+ * = fork (5.3 msec to 3.7 msec)
+ *
+ * vineetg: April 2011 :
+ * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
+ * helps avoid a shift when preparing PD0 from PTE
+ *
+ * vineetg: April 2011 : Preparing for MMU V3
+ * -MMU v2/v3 BCRs decoded differently
+ * -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
+ * -tlb_entry_erase( ) can be void
+ * -local_flush_tlb_range( ):
+ * = need not "ceil" @end
+ * = walks MMU only if range spans < 32 entries, as opposed to 256
+ *
+ * Vineetg: Sept 10th 2008
+ * -Changes related to MMU v2 (Rel 4.8)
+ *
+ * Vineetg: Aug 29th 2008
+ * -In TLB Flush operations (Metal Fix MMU) there is a explict command to
+ * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
+ * it fails. Thus need to load it with ANY valid value before invoking
+ * TLBIVUTLB cmd
+ *
+ * Vineetg: Aug 21th 2008:
+ * -Reduced the duration of IRQ lockouts in TLB Flush routines
+ * -Multiple copies of TLB erase code seperated into a "single" function
+ * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
+ * in interrupt-safe region.
+ *
+ * Vineetg: April 23rd Bug #93131
+ * Problem: tlb_flush_kernel_range() doesnt do anything if the range to
+ * flush is more than the size of TLB itself.
+ *
+ * Rahul Trivedi : Codito Technologies 2004
+ */
+
+#include <linux/module.h>
+#include <asm/arcregs.h>
+#include <asm/setup.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+
+/* Need for ARC MMU v2
+ *
+ * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
+ * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
+ * map into same set, there would be contention for the 2 ways causing severe
+ * Thrashing.
+ *
+ * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
+ * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
+ * Given this, the thrasing problem should never happen because once the 3
+ * J-TLB entries are created (even though 3rd will knock out one of the prev
+ * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
+ *
+ * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
+ * This is a simple design for keeping them in sync. So what do we do?
+ * The solution which James came up was pretty neat. It utilised the assoc
+ * of uTLBs by not invalidating always but only when absolutely necessary.
+ *
+ * - Existing TLB commands work as before
+ * - New command (TLBWriteNI) for TLB write without clearing uTLBs
+ * - New command (TLBIVUTLB) to invalidate uTLBs.
+ *
+ * The uTLBs need only be invalidated when pages are being removed from the
+ * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
+ * as a result of a miss, the removed entry is still allowed to exist in the
+ * uTLBs as it is still valid and present in the OS page table. This allows the
+ * full associativity of the uTLBs to hide the limited associativity of the main
+ * TLB.
+ *
+ * During a miss handler, the new "TLBWriteNI" command is used to load
+ * entries without clearing the uTLBs.
+ *
+ * When the OS page table is updated, TLB entries that may be associated with a
+ * removed page are removed (flushed) from the TLB using TLBWrite. In this
+ * circumstance, the uTLBs must also be cleared. This is done by using the
+ * existing TLBWrite command. An explicit IVUTLB is also required for those
+ * corner cases when TLBWrite was not executed at all because the corresp
+ * J-TLB entry got evicted/replaced.
+ */
+
+/* A copy of the ASID from the PID reg is kept in asid_cache */
+int asid_cache = FIRST_ASID;
+
+/* ASID to mm struct mapping. We have one extra entry corresponding to
+ * NO_ASID to save us a compare when clearing the mm entry for old asid
+ * see get_new_mmu_context (asm-arc/mmu_context.h)
+ */
+struct mm_struct *asid_mm_map[NUM_ASID + 1];
+
+/*
+ * Utility Routine to erase a J-TLB entry
+ * The procedure is to look it up in the MMU. If found, ERASE it by
+ * issuing a TlbWrite CMD with PD0 = PD1 = 0
+ */
+
+static void __tlb_entry_erase(void)
+{
+ write_aux_reg(ARC_REG_TLBPD1, 0);
+ write_aux_reg(ARC_REG_TLBPD0, 0);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+ unsigned int idx;
+
+ /* Locate the TLB entry for this vaddr + ASID */
+ write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
+ idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+ /* No error means entry found, zero it out */
+ if (likely(!(idx & TLB_LKUP_ERR))) {
+ __tlb_entry_erase();
+ } else { /* Some sort of Error */
+
+ /* Duplicate entry error */
+ if (idx & 0x1) {
+ /* TODO we need to handle this case too */
+ pr_emerg("unhandled Duplicate flush for %x\n",
+ vaddr_n_asid);
+ }
+ /* else entry not found so nothing to do */
+ }
+}
+
+/****************************************************************************
+ * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs)
+ *
+ * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB
+ *
+ * utlb_invalidate ( )
+ * -For v2 MMU calls Flush uTLB Cmd
+ * -For v1 MMU does nothing (except for Metal Fix v1 MMU)
+ * This is because in v1 TLBWrite itself invalidate uTLBs
+ ***************************************************************************/
+
+static void utlb_invalidate(void)
+{
+#if (CONFIG_ARC_MMU_VER >= 2)
+
+#if (CONFIG_ARC_MMU_VER < 3)
+ /* MMU v2 introduced the uTLB Flush command.
+ * There was however an obscure hardware bug, where uTLB flush would
+ * fail when a prior probe for J-TLB (both totally unrelated) would
+ * return lkup err - because the entry didnt exist in MMU.
+ * The Workround was to set Index reg with some valid value, prior to
+ * flush. This was fixed in MMU v3 hence not needed any more
+ */
+ unsigned int idx;
+
+ /* make sure INDEX Reg is valid */
+ idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+ /* If not write some dummy val */
+ if (unlikely(idx & TLB_LKUP_ERR))
+ write_aux_reg(ARC_REG_TLBINDEX, 0xa);
+#endif
+
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
+#endif
+
+}
+
+/*
+ * Un-conditionally (without lookup) erase the entire MMU contents
+ */
+
+noinline void local_flush_tlb_all(void)
+{
+ unsigned long flags;
+ unsigned int entry;
+ struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+ local_irq_save(flags);
+
+ /* Load PD0 and PD1 with template for a Blank Entry */
+ write_aux_reg(ARC_REG_TLBPD1, 0);
+ write_aux_reg(ARC_REG_TLBPD0, 0);
+
+ for (entry = 0; entry < mmu->num_tlb; entry++) {
+ /* write this entry to the TLB */
+ write_aux_reg(ARC_REG_TLBINDEX, entry);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+ }
+
+ utlb_invalidate();
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Flush the entrie MM for userland. The fastest way is to move to Next ASID
+ */
+noinline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ /*
+ * Small optimisation courtesy IA64
+ * flush_mm called during fork,exit,munmap etc, multiple times as well.
+ * Only for fork( ) do we need to move parent to a new MMU ctxt,
+ * all other cases are NOPs, hence this check.
+ */
+ if (atomic_read(&mm->mm_users) == 0)
+ return;
+
+ /*
+ * Workaround for Android weirdism:
+ * A binder VMA could end up in a task such that vma->mm != tsk->mm
+ * old code would cause h/w - s/w ASID to get out of sync
+ */
+ if (current->mm != mm)
+ destroy_context(mm);
+ else
+ get_new_mmu_context(mm);
+}
+
+/*
+ * Flush a Range of TLB entries for userland.
+ * @start is inclusive, while @end is exclusive
+ * Difference between this and Kernel Range Flush is
+ * -Here the fastest way (if range is too large) is to move to next ASID
+ * without doing any explicit Shootdown
+ * -In case of kernel Flush, entry has to be shot down explictly
+ */
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ unsigned long flags;
+ unsigned int asid;
+
+ /* If range @start to @end is more than 32 TLB entries deep,
+ * its better to move to a new ASID rather than searching for
+ * individual entries and then shooting them down
+ *
+ * The calc above is rough, doesn't account for unaligned parts,
+ * since this is heuristics based anyways
+ */
+ if (unlikely((end - start) >= PAGE_SIZE * 32)) {
+ local_flush_tlb_mm(vma->vm_mm);
+ return;
+ }
+
+ /*
+ * @start moved to page start: this alone suffices for checking
+ * loop end condition below, w/o need for aligning @end to end
+ * e.g. 2000 to 4001 will anyhow loop twice
+ */
+ start &= PAGE_MASK;
+
+ local_irq_save(flags);
+ asid = vma->vm_mm->context.asid;
+
+ if (asid != NO_ASID) {
+ while (start < end) {
+ tlb_entry_erase(start | (asid & 0xff));
+ start += PAGE_SIZE;
+ }
+ }
+
+ utlb_invalidate();
+
+ local_irq_restore(flags);
+}
+
+/* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective)
+ * @start, @end interpreted as kvaddr
+ * Interestingly, shared TLB entries can also be flushed using just
+ * @start,@end alone (interpreted as user vaddr), although technically SASID
+ * is also needed. However our smart TLbProbe lookup takes care of that.
+ */
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ unsigned long flags;
+
+ /* exactly same as above, except for TLB entry not taking ASID */
+
+ if (unlikely((end - start) >= PAGE_SIZE * 32)) {
+ local_flush_tlb_all();
+ return;
+ }
+
+ start &= PAGE_MASK;
+
+ local_irq_save(flags);
+ while (start < end) {
+ tlb_entry_erase(start);
+ start += PAGE_SIZE;
+ }
+
+ utlb_invalidate();
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Delete TLB entry in MMU for a given page (??? address)
+ * NOTE One TLB entry contains translation for single PAGE
+ */
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+ unsigned long flags;
+
+ /* Note that it is critical that interrupts are DISABLED between
+ * checking the ASID and using it flush the TLB entry
+ */
+ local_irq_save(flags);
+
+ if (vma->vm_mm->context.asid != NO_ASID) {
+ tlb_entry_erase((page & PAGE_MASK) |
+ (vma->vm_mm->context.asid & 0xff));
+ utlb_invalidate();
+ }
+
+ local_irq_restore(flags);
+}
+
+/*
+ * Routine to create a TLB entry
+ */
+void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+ unsigned long flags;
+ unsigned int idx, asid_or_sasid;
+ unsigned long pd0_flags;
+
+ /*
+ * create_tlb() assumes that current->mm == vma->mm, since
+ * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr)
+ * -completes the lazy write to SASID reg (again valid for curr tsk)
+ *
+ * Removing the assumption involves
+ * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
+ * -Fix the TLB paranoid debug code to not trigger false negatives.
+ * -More importantly it makes this handler inconsistent with fast-path
+ * TLB Refill handler which always deals with "current"
+ *
+ * Lets see the use cases when current->mm != vma->mm and we land here
+ * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault
+ * Here VM wants to pre-install a TLB entry for user stack while
+ * current->mm still points to pre-execve mm (hence the condition).
+ * However the stack vaddr is soon relocated (randomization) and
+ * move_page_tables() tries to undo that TLB entry.
+ * Thus not creating TLB entry is not any worse.
+ *
+ * 2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a
+ * breakpoint in debugged task. Not creating a TLB now is not
+ * performance critical.
+ *
+ * Both the cases above are not good enough for code churn.
+ */
+ if (current->active_mm != vma->vm_mm)
+ return;
+
+ local_irq_save(flags);
+
+ tlb_paranoid_check(vma->vm_mm->context.asid, address);
+
+ address &= PAGE_MASK;
+
+ /* update this PTE credentials */
+ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
+
+ /* Create HW TLB entry Flags (in PD0) from PTE Flags */
+#if (CONFIG_ARC_MMU_VER <= 2)
+ pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1);
+#else
+ pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0));
+#endif
+
+ /* ASID for this task */
+ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
+
+ write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid);
+
+ /* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */
+ write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1));
+
+ /* First verify if entry for this vaddr+ASID already exists */
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
+ idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+ /*
+ * If Not already present get a free slot from MMU.
+ * Otherwise, Probe would have located the entry and set INDEX Reg
+ * with existing location. This will cause Write CMD to over-write
+ * existing entry with new PD0 and PD1
+ */
+ if (likely(idx & TLB_LKUP_ERR))
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+
+ /*
+ * Commit the Entry to MMU
+ * It doesnt sound safe to use the TLBWriteNI cmd here
+ * which doesn't flush uTLBs. I'd rather be safe than sorry.
+ */
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+
+ local_irq_restore(flags);
+}
+
+/* arch hook called by core VM at the end of handle_mm_fault( ),
+ * when a new PTE is entered in Page Tables or an existing one
+ * is modified. We aggresively pre-install a TLB entry
+ */
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress,
+ pte_t *ptep)
+{
+
+ create_tlb(vma, vaddress, ptep);
+}
+
+/* Read the Cache Build Confuration Registers, Decode them and save into
+ * the cpuinfo structure for later use.
+ * No Validation is done here, simply read/convert the BCRs
+ */
+void __init read_decode_mmu_bcr(void)
+{
+ unsigned int tmp;
+ struct bcr_mmu_1_2 *mmu2; /* encoded MMU2 attr */
+ struct bcr_mmu_3 *mmu3; /* encoded MMU3 attr */
+ struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+ tmp = read_aux_reg(ARC_REG_MMU_BCR);
+ mmu->ver = (tmp >> 24);
+
+ if (mmu->ver <= 2) {
+ mmu2 = (struct bcr_mmu_1_2 *)&tmp;
+ mmu->pg_sz = PAGE_SIZE;
+ mmu->sets = 1 << mmu2->sets;
+ mmu->ways = 1 << mmu2->ways;
+ mmu->u_dtlb = mmu2->u_dtlb;
+ mmu->u_itlb = mmu2->u_itlb;
+ } else {
+ mmu3 = (struct bcr_mmu_3 *)&tmp;
+ mmu->pg_sz = 512 << mmu3->pg_sz;
+ mmu->sets = 1 << mmu3->sets;
+ mmu->ways = 1 << mmu3->ways;
+ mmu->u_dtlb = mmu3->u_dtlb;
+ mmu->u_itlb = mmu3->u_itlb;
+ }
+
+ mmu->num_tlb = mmu->sets * mmu->ways;
+}
+
+char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
+{
+ int n = 0;
+ struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+ n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ",
+ p_mmu->ver, TO_KB(p_mmu->pg_sz));
+
+ n += scnprintf(buf + n, len - n,
+ "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n",
+ p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
+ p_mmu->u_dtlb, p_mmu->u_itlb,
+ __CONFIG_ARC_MMU_SASID_VAL ? "SASID" : "");
+
+ return buf;
+}
+
+void __init arc_mmu_init(void)
+{
+ char str[256];
+ struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+ printk(arc_mmu_mumbojumbo(0, str, sizeof(str)));
+
+ /* For efficiency sake, kernel is compile time built for a MMU ver
+ * This must match the hardware it is running on.
+ * Linux built for MMU V2, if run on MMU V1 will break down because V1
+ * hardware doesn't understand cmds such as WriteNI, or IVUTLB
+ * On the other hand, Linux built for V1 if run on MMU V2 will do
+ * un-needed workarounds to prevent memcpy thrashing.
+ * Similarly MMU V3 has new features which won't work on older MMU
+ */
+ if (mmu->ver != CONFIG_ARC_MMU_VER) {
+ panic("MMU ver %d doesn't match kernel built for %d...\n",
+ mmu->ver, CONFIG_ARC_MMU_VER);
+ }
+
+ if (mmu->pg_sz != PAGE_SIZE)
+ panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
+
+ /*
+ * ASID mgmt data structures are compile time init
+ * asid_cache = FIRST_ASID and asid_mm_map[] all zeroes
+ */
+
+ local_flush_tlb_all();
+
+ /* Enable the MMU */
+ write_aux_reg(ARC_REG_PID, MMU_ENABLE);
+
+ /* In smp we use this reg for interrupt 1 scratch */
+#ifndef CONFIG_SMP
+ /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
+ write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
+#endif
+}
+
+/*
+ * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4}
+ * The mapping is Column-first.
+ * --------------------- -----------
+ * |way0|way1|way2|way3| |way0|way1|
+ * --------------------- -----------
+ * [set0] | 0 | 1 | 2 | 3 | | 0 | 1 |
+ * [set1] | 4 | 5 | 6 | 7 | | 2 | 3 |
+ * ~ ~ ~ ~
+ * [set127] | 508| 509| 510| 511| | 254| 255|
+ * --------------------- -----------
+ * For normal operations we don't(must not) care how above works since
+ * MMU cmd getIndex(vaddr) abstracts that out.
+ * However for walking WAYS of a SET, we need to know this
+ */
+#define SET_WAY_TO_IDX(mmu, set, way) ((set) * mmu->ways + (way))
+
+/* Handling of Duplicate PD (TLB entry) in MMU.
+ * -Could be due to buggy customer tapeouts or obscure kernel bugs
+ * -MMU complaints not at the time of duplicate PD installation, but at the
+ * time of lookup matching multiple ways.
+ * -Ideally these should never happen - but if they do - workaround by deleting
+ * the duplicate one.
+ * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
+ */
+volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */
+
+void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
+ struct pt_regs *regs)
+{
+ int set, way, n;
+ unsigned int pd0[4], pd1[4]; /* assume max 4 ways */
+ unsigned long flags, is_valid;
+ struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+
+ local_irq_save(flags);
+
+ /* re-enable the MMU */
+ write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID));
+
+ /* loop thru all sets of TLB */
+ for (set = 0; set < mmu->sets; set++) {
+
+ /* read out all the ways of current set */
+ for (way = 0, is_valid = 0; way < mmu->ways; way++) {
+ write_aux_reg(ARC_REG_TLBINDEX,
+ SET_WAY_TO_IDX(mmu, set, way));
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
+ pd0[way] = read_aux_reg(ARC_REG_TLBPD0);
+ pd1[way] = read_aux_reg(ARC_REG_TLBPD1);
+ is_valid |= pd0[way] & _PAGE_PRESENT;
+ }
+
+ /* If all the WAYS in SET are empty, skip to next SET */
+ if (!is_valid)
+ continue;
+
+ /* Scan the set for duplicate ways: needs a nested loop */
+ for (way = 0; way < mmu->ways; way++) {
+ if (!pd0[way])
+ continue;
+
+ for (n = way + 1; n < mmu->ways; n++) {
+ if ((pd0[way] & PAGE_MASK) ==
+ (pd0[n] & PAGE_MASK)) {
+
+ if (dup_pd_verbose) {
+ pr_info("Duplicate PD's @"
+ "[%d:%d]/[%d:%d]\n",
+ set, way, set, n);
+ pr_info("TLBPD0[%u]: %08x\n",
+ way, pd0[way]);
+ }
+
+ /*
+ * clear entry @way and not @n. This is
+ * critical to our optimised loop
+ */
+ pd0[way] = pd1[way] = 0;
+ write_aux_reg(ARC_REG_TLBINDEX,
+ SET_WAY_TO_IDX(mmu, set, way));
+ __tlb_entry_erase();
+ }
+ }
+ }
+ }
+
+ local_irq_restore(flags);
+}
+
+/***********************************************************************
+ * Diagnostic Routines
+ * -Called from Low Level TLB Hanlders if things don;t look good
+ **********************************************************************/
+
+#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
+
+/*
+ * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
+ * don't match
+ */
+void print_asid_mismatch(int is_fast_path)
+{
+ int pid_sw, pid_hw;
+ pid_sw = current->active_mm->context.asid;
+ pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
+
+ pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
+ is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw);
+
+ __asm__ __volatile__("flag 1");
+}
+
+void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr)
+{
+ unsigned int pid_hw;
+
+ pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
+
+ if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID)))
+ print_asid_mismatch(0);
+}
+#endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
new file mode 100644
index 00000000000..9df765dc7c3
--- /dev/null
+++ b/arch/arc/mm/tlbex.S
@@ -0,0 +1,408 @@
+/*
+ * TLB Exception Handling for ARC
+ *
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Vineetg: April 2011 :
+ * -MMU v1: moved out legacy code into a seperate file
+ * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
+ * helps avoid a shift when preparing PD0 from PTE
+ *
+ * Vineetg: July 2009
+ * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB
+ * entry, so that it doesn't knock out it's I-TLB entry
+ * -Some more fine tuning:
+ * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc
+ *
+ * Vineetg: July 2009
+ * -Practically rewrote the I/D TLB Miss handlers
+ * Now 40 and 135 instructions a peice as compared to 131 and 449 resp.
+ * Hence Leaner by 1.5 K
+ * Used Conditional arithmetic to replace excessive branching
+ * Also used short instructions wherever possible
+ *
+ * Vineetg: Aug 13th 2008
+ * -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing
+ * more information in case of a Fatality
+ *
+ * Vineetg: March 25th Bug #92690
+ * -Added Debug Code to check if sw-ASID == hw-ASID
+
+ * Rahul Trivedi, Amit Bhor: Codito Technologies 2004
+ */
+
+ .cpu A7
+
+#include <linux/linkage.h>
+#include <asm/entry.h>
+#include <asm/tlb.h>
+#include <asm/pgtable.h>
+#include <asm/arcregs.h>
+#include <asm/cache.h>
+#include <asm/processor.h>
+#if (CONFIG_ARC_MMU_VER == 1)
+#include <asm/tlb-mmu1.h>
+#endif
+
+;--------------------------------------------------------------------------
+; scratch memory to save the registers (r0-r3) used to code TLB refill Handler
+; For details refer to comments before TLBMISS_FREEUP_REGS below
+;--------------------------------------------------------------------------
+
+ARCFP_DATA ex_saved_reg1
+ .align 1 << L1_CACHE_SHIFT ; IMP: Must be Cache Line aligned
+ .type ex_saved_reg1, @object
+#ifdef CONFIG_SMP
+ .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
+ex_saved_reg1:
+ .zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
+#else
+ .size ex_saved_reg1, 16
+ex_saved_reg1:
+ .zero 16
+#endif
+
+;============================================================================
+; Troubleshooting Stuff
+;============================================================================
+
+; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
+; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
+; we use the MMU PID Reg to get current ASID.
+; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
+; So we try to detect this in TLB Mis shandler
+
+
+.macro DBG_ASID_MISMATCH
+
+#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
+
+ ; make sure h/w ASID is same as s/w ASID
+
+ GET_CURR_TASK_ON_CPU r3
+ ld r0, [r3, TASK_ACT_MM]
+ ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
+
+ lr r1, [ARC_REG_PID]
+ and r1, r1, 0xFF
+ breq r1, r0, 5f
+
+ ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
+ lr r0, [erstatus]
+ bbit0 r0, STATUS_U_BIT, 5f
+
+ ; We sure are in troubled waters, Flag the error, but to do so
+ ; need to switch to kernel mode stack to call error routine
+ GET_TSK_STACK_BASE r3, sp
+
+ ; Call printk to shoutout aloud
+ mov r0, 1
+ j print_asid_mismatch
+
+5: ; ASIDs match so proceed normally
+ nop
+
+#endif
+
+.endm
+
+;============================================================================
+;TLB Miss handling Code
+;============================================================================
+
+;-----------------------------------------------------------------------------
+; This macro does the page-table lookup for the faulting address.
+; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
+.macro LOAD_FAULT_PTE
+
+ lr r2, [efa]
+
+#ifndef CONFIG_SMP
+ lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
+#else
+ GET_CURR_TASK_ON_CPU r1
+ ld r1, [r1, TASK_ACT_MM]
+ ld r1, [r1, MM_PGD]
+#endif
+
+ lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD
+ ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr
+ and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags
+ ; contains Ptr to Page Table
+ bz.d do_slow_path_pf ; if no Page Table, do page fault
+
+ ; Get the PTE entry: The idea is
+ ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr
+ ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
+ ; (3) z = pgtbl[y]
+ ; To avoid the multiply by in end, we do the -2, <<2 below
+
+ lsr r0, r2, (PAGE_SHIFT - 2)
+ and r0, r0, ( (PTRS_PER_PTE - 1) << 2)
+ ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr
+#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
+ and.f 0, r0, _PAGE_PRESENT
+ bz 1f
+ ld r2, [num_pte_not_present]
+ add r2, r2, 1
+ st r2, [num_pte_not_present]
+1:
+#endif
+
+.endm
+
+;-----------------------------------------------------------------
+; Convert Linux PTE entry into TLB entry
+; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
+; IN: r0 = PTE, r1 = ptr to PTE
+
+.macro CONV_PTE_TO_TLB
+ and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE
+ sr r3, [ARC_REG_TLBPD1] ; these go in PD1
+
+ and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
+#if (CONFIG_ARC_MMU_VER <= 2) /* Neednot be done with v3 onwards */
+ lsr r2, r2 ; shift PTE flags to match layout in PD0
+#endif
+
+ lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid
+
+ or r3, r3, r2 ; S | vaddr | {sasid|asid}
+ sr r3,[ARC_REG_TLBPD0] ; rewrite PD0
+.endm
+
+;-----------------------------------------------------------------
+; Commit the TLB entry into MMU
+
+.macro COMMIT_ENTRY_TO_MMU
+
+ /* Get free TLB slot: Set = computed from vaddr, way = random */
+ sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
+
+ /* Commit the Write */
+#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */
+ sr TLBWriteNI, [ARC_REG_TLBCOMMAND]
+#else
+ sr TLBWrite, [ARC_REG_TLBCOMMAND]
+#endif
+.endm
+
+;-----------------------------------------------------------------
+; ARC700 Exception Handling doesn't auto-switch stack and it only provides
+; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
+;
+; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
+; "global" is used to free-up FIRST core reg to be able to code the rest of
+; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
+; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
+; need to be saved as well by extending the "global" to be 4 words. Hence
+; ".size ex_saved_reg1, 16"
+; [All of this dance is to avoid stack switching for each TLB Miss, since we
+; only need to save only a handful of regs, as opposed to complete reg file]
+;
+; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
+; core reg as it will not be SMP safe.
+; Thus scratch AUX reg is used (and no longer used to cache task PGD).
+; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
+; Epilogue thus has to locate the "per-cpu" storage for regs.
+; To avoid cache line bouncing the per-cpu global is aligned/sized per
+; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
+; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
+
+; As simple as that....
+
+.macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_SMP
+ sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with
+ GET_CPU_ID r0 ; get to per cpu scratch mem,
+ lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu
+ add r0, @ex_saved_reg1, r0
+#else
+ st r0, [@ex_saved_reg1]
+ mov_s r0, @ex_saved_reg1
+#endif
+ st_s r1, [r0, 4]
+ st_s r2, [r0, 8]
+ st_s r3, [r0, 12]
+
+ ; VERIFY if the ASID in MMU-PID Reg is same as
+ ; one in Linux data structures
+
+ DBG_ASID_MISMATCH
+.endm
+
+;-----------------------------------------------------------------
+.macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_SMP
+ GET_CPU_ID r0 ; get to per cpu scratch mem
+ lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide
+ add r0, @ex_saved_reg1, r0
+ ld_s r3, [r0,12]
+ ld_s r2, [r0, 8]
+ ld_s r1, [r0, 4]
+ lr r0, [ARC_REG_SCRATCH_DATA0]
+#else
+ mov_s r0, @ex_saved_reg1
+ ld_s r3, [r0,12]
+ ld_s r2, [r0, 8]
+ ld_s r1, [r0, 4]
+ ld_s r0, [r0]
+#endif
+.endm
+
+ARCFP_CODE ;Fast Path Code, candidate for ICCM
+
+;-----------------------------------------------------------------------------
+; I-TLB Miss Exception Handler
+;-----------------------------------------------------------------------------
+
+ARC_ENTRY EV_TLBMissI
+
+ TLBMISS_FREEUP_REGS
+
+#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
+ ld r0, [@numitlb]
+ add r0, r0, 1
+ st r0, [@numitlb]
+#endif
+
+ ;----------------------------------------------------------------
+ ; Get the PTE corresponding to V-addr accessed
+ LOAD_FAULT_PTE
+
+ ;----------------------------------------------------------------
+ ; VERIFY_PTE: Check if PTE permissions approp for executing code
+ cmp_s r2, VMALLOC_START
+ mov.lo r2, (_PAGE_PRESENT | _PAGE_READ | _PAGE_EXECUTE)
+ mov.hs r2, (_PAGE_PRESENT | _PAGE_K_READ | _PAGE_K_EXECUTE)
+
+ and r3, r0, r2 ; Mask out NON Flag bits from PTE
+ xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test )
+ bnz do_slow_path_pf
+
+ ; Let Linux VM know that the page was accessed
+ or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; set Accessed Bit
+ st_s r0, [r1] ; Write back PTE
+
+ CONV_PTE_TO_TLB
+ COMMIT_ENTRY_TO_MMU
+ TLBMISS_RESTORE_REGS
+ rtie
+
+ARC_EXIT EV_TLBMissI
+
+;-----------------------------------------------------------------------------
+; D-TLB Miss Exception Handler
+;-----------------------------------------------------------------------------
+
+ARC_ENTRY EV_TLBMissD
+
+ TLBMISS_FREEUP_REGS
+
+#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
+ ld r0, [@numdtlb]
+ add r0, r0, 1
+ st r0, [@numdtlb]
+#endif
+
+ ;----------------------------------------------------------------
+ ; Get the PTE corresponding to V-addr accessed
+ ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE
+ LOAD_FAULT_PTE
+
+ ;----------------------------------------------------------------
+ ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
+
+ mov_s r2, 0
+ lr r3, [ecr]
+ btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access
+ or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE
+ btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access
+ or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE
+ ; Above laddering takes care of XCHG access
+ ; which is both Read and Write
+
+ ; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx
+ ; For copy_(to|from)_user, despite exception taken in kernel mode,
+ ; this code is not hit, because EFA would still be the user mode
+ ; address (EFA < 0x6000_0000).
+ ; This code is for legit kernel mode faults, vmalloc specifically
+ ; (EFA: 0x7000_0000 to 0x7FFF_FFFF)
+
+ lr r3, [efa]
+ cmp r3, VMALLOC_START - 1 ; If kernel mode access
+ asl.hi r2, r2, 3 ; make _PAGE_xx flags as _PAGE_K_xx
+ or r2, r2, _PAGE_PRESENT ; Common flag for K/U mode
+
+ ; By now, r2 setup with all the Flags we need to check in PTE
+ and r3, r0, r2 ; Mask out NON Flag bits from PTE
+ brne.d r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test)
+
+ ;----------------------------------------------------------------
+ ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
+ lr r3, [ecr]
+ or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; Accessed bit always
+ btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ?
+ or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well
+ st_s r0, [r1] ; Write back PTE
+
+ CONV_PTE_TO_TLB
+
+#if (CONFIG_ARC_MMU_VER == 1)
+ ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
+ ; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
+ ; But only for old MMU or one with Metal Fix
+ TLB_WRITE_HEURISTICS
+#endif
+
+ COMMIT_ENTRY_TO_MMU
+ TLBMISS_RESTORE_REGS
+ rtie
+
+;-------- Common routine to call Linux Page Fault Handler -----------
+do_slow_path_pf:
+
+ ; Restore the 4-scratch regs saved by fast path miss handler
+ TLBMISS_RESTORE_REGS
+
+ ; Slow path TLB Miss handled as a regular ARC Exception
+ ; (stack switching / save the complete reg-file).
+ ; That requires freeing up r9
+ EXCPN_PROLOG_FREEUP_REG r9
+
+ lr r9, [erstatus]
+
+ SWITCH_TO_KERNEL_STK
+ SAVE_ALL_SYS
+
+ ; ------- setup args for Linux Page fault Hanlder ---------
+ mov_s r0, sp
+ lr r2, [efa]
+ lr r3, [ecr]
+
+ ; Both st and ex imply WRITE access of some sort, hence do_page_fault( )
+ ; invoked with write=1 for DTLB-st/ex Miss and write=0 for ITLB miss or
+ ; DTLB-ld Miss
+ ; DTLB Miss Cause code is ld = 0x01 , st = 0x02, ex = 0x03
+ ; Following code uses that fact that st/ex have one bit in common
+
+ btst_s r3, ECR_C_BIT_DTLB_ST_MISS
+ mov.z r1, 0
+ mov.nz r1, 1
+
+ ; We don't want exceptions to be disabled while the fault is handled.
+ ; Now that we have saved the context we return from exception hence
+ ; exceptions get re-enable
+
+ FAKE_RET_FROM_EXCPN r9
+
+ bl do_page_fault
+ b ret_from_exception
+
+ARC_EXIT EV_TLBMissD
+
+ARC_ENTRY EV_TLBMissB ; Bogus entry to measure sz of DTLBMiss hdlr