aboutsummaryrefslogtreecommitdiff
path: root/arch/arc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arc/mm')
-rw-r--r--arch/arc/mm/Makefile2
-rw-r--r--arch/arc/mm/cache_arc700.c471
-rw-r--r--arch/arc/mm/extable.c4
-rw-r--r--arch/arc/mm/fault.c1
-rw-r--r--arch/arc/mm/init.c26
-rw-r--r--arch/arc/mm/ioremap.c2
-rw-r--r--arch/arc/mm/mmap.c78
-rw-r--r--arch/arc/mm/tlb.c48
-rw-r--r--arch/arc/mm/tlbex.S6
9 files changed, 352 insertions, 286 deletions
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile
index 168dc146a8f..ac95cc239c1 100644
--- a/arch/arc/mm/Makefile
+++ b/arch/arc/mm/Makefile
@@ -7,4 +7,4 @@
#
obj-y := extable.o ioremap.o dma.o fault.o init.o
-obj-y += tlb.o tlbex.o cache_arc700.o
+obj-y += tlb.o tlbex.o cache_arc700.o mmap.o
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 88d617d8423..aedce190544 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -68,20 +68,11 @@
#include <linux/mmu_context.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <linux/pagemap.h>
#include <asm/cacheflush.h>
#include <asm/cachectl.h>
#include <asm/setup.h>
-
-#ifdef CONFIG_ARC_HAS_ICACHE
-static void __ic_line_inv_no_alias(unsigned long, int);
-static void __ic_line_inv_2_alias(unsigned long, int);
-static void __ic_line_inv_4_alias(unsigned long, int);
-
-/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
-static void (*___flush_icache_rtn) (unsigned long, int);
-#endif
-
char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
{
int n = 0;
@@ -109,7 +100,7 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
* the cpuinfo structure for later use.
* No Validation done here, simply read/convert the BCRs
*/
-void __init read_decode_cache_bcr(void)
+void __cpuinit read_decode_cache_bcr(void)
{
struct bcr_cache ibcr, dbcr;
struct cpuinfo_arc_cache *p_ic, *p_dc;
@@ -141,13 +132,14 @@ void __init read_decode_cache_bcr(void)
* 3. Enable the Caches, setup default flush mode for D-Cache
* 3. Calculate the SHMLBA used by user space
*/
-void __init arc_cache_init(void)
+void __cpuinit arc_cache_init(void)
{
unsigned int temp;
unsigned int cpu = smp_processor_id();
struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
int way_pg_ratio = way_pg_ratio;
+ int dcache_does_alias;
char str[256];
printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
@@ -171,30 +163,6 @@ void __init arc_cache_init(void)
}
#endif
-
- /*
- * if Cache way size is <= page size then no aliasing exhibited
- * otherwise ratio determines num of aliases.
- * e.g. 32K I$, 2 way set assoc, 8k pg size
- * way-sz = 32k/2 = 16k
- * way-pg-ratio = 16k/8k = 2, so 2 aliases possible
- * (meaning 1 line could be in 2 possible locations).
- */
- way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
- switch (way_pg_ratio) {
- case 0:
- case 1:
- ___flush_icache_rtn = __ic_line_inv_no_alias;
- break;
- case 2:
- ___flush_icache_rtn = __ic_line_inv_2_alias;
- break;
- case 4:
- ___flush_icache_rtn = __ic_line_inv_4_alias;
- break;
- default:
- panic("Unsupported I-Cache Sz\n");
- }
#endif
/* Enable/disable I-Cache */
@@ -218,9 +186,13 @@ chk_dc:
panic("Cache H/W doesn't match kernel Config");
}
+ dcache_does_alias = (dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE;
+
/* check for D-Cache aliasing */
- if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE)
- panic("D$ aliasing not handled right now\n");
+ if (dcache_does_alias && !cache_is_vipt_aliasing())
+ panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ else if (!dcache_does_alias && cache_is_vipt_aliasing())
+ panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
#endif
/* Set the default Invalidate Mode to "simpy discard dirty lines"
@@ -303,47 +275,57 @@ static inline void __dc_entire_op(const int cacheop)
* Per Line Operation on D-Cache
* Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
* It's sole purpose is to help gcc generate ZOL
+ * (aliasing VIPT dcache flushing needs both vaddr and paddr)
*/
-static inline void __dc_line_loop(unsigned long start, unsigned long sz,
- int aux_reg)
+static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int aux_reg)
{
- int num_lines, slack;
+ int num_lines;
/* Ensure we properly floor/ceil the non-line aligned/sized requests
- * and have @start - aligned to cache line and integral @num_lines.
+ * and have @paddr - aligned to cache line and integral @num_lines.
* This however can be avoided for page sized since:
- * -@start will be cache-line aligned already (being page aligned)
+ * -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
*/
if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
- slack = start & ~DCACHE_LINE_MASK;
- sz += slack;
- start -= slack;
+ sz += paddr & ~DCACHE_LINE_MASK;
+ paddr &= DCACHE_LINE_MASK;
+ vaddr &= DCACHE_LINE_MASK;
}
num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
+#if (CONFIG_ARC_MMU_VER <= 2)
+ paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
+#endif
+
while (num_lines-- > 0) {
#if (CONFIG_ARC_MMU_VER > 2)
/*
* Just as for I$, in MMU v3, D$ ops also require
* "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
- * But we pass phy addr for both. This works since Linux
- * doesn't support aliasing configs for D$, yet.
- * Thus paddr is enough to provide both tag and index.
*/
- write_aux_reg(ARC_REG_DC_PTAG, start);
+ write_aux_reg(ARC_REG_DC_PTAG, paddr);
+
+ write_aux_reg(aux_reg, vaddr);
+ vaddr += ARC_DCACHE_LINE_LEN;
+#else
+ /* paddr contains stuffed vaddrs bits */
+ write_aux_reg(aux_reg, paddr);
#endif
- write_aux_reg(aux_reg, start);
- start += ARC_DCACHE_LINE_LEN;
+ paddr += ARC_DCACHE_LINE_LEN;
}
}
+/* For kernel mappings cache operation: index is same as paddr */
+#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op)
+
/*
* D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
*/
-static inline void __dc_line_op(unsigned long start, unsigned long sz,
- const int cacheop)
+static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz, const int cacheop)
{
unsigned long flags, tmp = tmp;
int aux;
@@ -366,7 +348,7 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
else
aux = ARC_REG_DC_FLDL;
- __dc_line_loop(start, sz, aux);
+ __dc_line_loop(paddr, vaddr, sz, aux);
if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
wait_for_flush();
@@ -381,7 +363,8 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
#else
#define __dc_entire_op(cacheop)
-#define __dc_line_op(start, sz, cacheop)
+#define __dc_line_op(paddr, vaddr, sz, cacheop)
+#define __dc_line_op_k(paddr, sz, cacheop)
#endif /* CONFIG_ARC_HAS_DCACHE */
@@ -391,75 +374,38 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
/*
* I-Cache Aliasing in ARC700 VIPT caches
*
- * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
- * to "index" into SET of cache-line and paddr from MMU to match the TAG
- * in the WAYS of SET.
- *
- * However the CDU iterface (to flush/inv) lines from software, only takes
- * paddr (to have simpler hardware interface). For simpler cases, using paddr
- * alone suffices.
- * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
- * way_sz = cache_sz / num_ways = 16k/2 = 8k
- * num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
- * Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
- * bits req for calc set-index = bits 12:5 (0 based). Since this range fits
- * inside the bottom 13 bits of paddr, which are same for vaddr and paddr
- * (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
- * locate a cache-line.
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
*
- * However for a difft sized cache, say 32k I$, above math yields need
- * for 14 bits of vaddr to locate a cache line, which can't be provided by
- * paddr, since the bit 13 (0 based) might differ between the two.
- *
- * This lack of extra bits needed for correct line addressing, defines the
- * classical problem of Cache aliasing with VIPT architectures
- * num_aliases = 1 << extra_bits
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
- * 2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
- * 2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
*
* ------------------
* MMU v1/v2 (Fixed Page Size 8k)
* ------------------
* The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geom.
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
* H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
* of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
* orig 5 bits of paddr were anyways ignored by CDU line ops, as they
* represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU.
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
*
* 17:13 represented the max num of bits passable, actual bits needed were
* fewer, based on the num-of-aliases possible.
* -for 2 alias possibility, only bit 13 needed (32K cache)
* -for 4 alias possibility, bits 14:13 needed (64K cache)
*
- * Since vaddr was not available for all instances of I$ flush req by core
- * kernel, the only safe way (non-optimal though) was to kill all possible
- * lines which could represent an alias (even if they didnt represent one
- * in execution).
- * e.g. for 64K I$, 4 aliases possible, so we did
- * flush start
- * flush start | 0x01
- * flush start | 0x2
- * flush start | 0x3
- *
- * The penalty was invoking the operation itself, since tag match is anyways
- * paddr based, a line which didn't represent an alias would not match the
- * paddr, hence wont be killed
- *
- * Note that aliasing concerns are independent of line-sz for a given cache
- * geometry (size + set_assoc) because the extra bits required by line-sz are
- * reduced from the set calc.
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
- * 32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
- * 64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
- *
* ------------------
* MMU v3
* ------------------
- * This ver of MMU supports var page sizes (1k-16k) - Linux will support
- * 8k (default), 16k and 4k.
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
* However from hardware perspective, smaller page sizes aggrevate aliasing
* meaning more vaddr bits needed to disambiguate the cache-line-op ;
* the existing scheme of piggybacking won't work for certain configurations.
@@ -468,144 +414,53 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
*/
/***********************************************************
- * Machine specific helpers for per line I-Cache invalidate.
- * 3 routines to accpunt for 1, 2, 4 aliases possible
+ * Machine specific helper for per line I-Cache invalidate.
*/
-
-static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
-{
- while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
- write_aux_reg(ARC_REG_IC_PTAG, start);
-#endif
- write_aux_reg(ARC_REG_IC_IVIL, start);
- start += ARC_ICACHE_LINE_LEN;
- }
-}
-
-static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
-{
- while (num_lines-- > 0) {
-
-#if (CONFIG_ARC_MMU_VER > 2)
- /*
- * MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
- * reg to pass the "tag" bits and existing IVIL reg only looks
- * at bits relevant for "index" (details above)
- * Programming Notes:
- * -when writing tag to PTAG reg, bit chopping can be avoided,
- * CDU ignores non-tag bits.
- * -Ideally "index" must be computed from vaddr, but it is not
- * avail in these rtns. So to be safe, we kill the lines in all
- * possible indexes corresp to num of aliases possible for
- * given cache config.
- */
- write_aux_reg(ARC_REG_IC_PTAG, start);
- write_aux_reg(ARC_REG_IC_IVIL,
- start & ~(0x1 << PAGE_SHIFT));
- write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
-#else
- write_aux_reg(ARC_REG_IC_IVIL, start);
- write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
-#endif
- start += ARC_ICACHE_LINE_LEN;
- }
-}
-
-static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
-{
- while (num_lines-- > 0) {
-
-#if (CONFIG_ARC_MMU_VER > 2)
- write_aux_reg(ARC_REG_IC_PTAG, start);
-
- write_aux_reg(ARC_REG_IC_IVIL,
- start & ~(0x3 << PAGE_SHIFT));
- write_aux_reg(ARC_REG_IC_IVIL,
- start & ~(0x2 << PAGE_SHIFT));
- write_aux_reg(ARC_REG_IC_IVIL,
- start & ~(0x1 << PAGE_SHIFT));
- write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
-#else
- write_aux_reg(ARC_REG_IC_IVIL, start);
- write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
- write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
- write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
-#endif
- start += ARC_ICACHE_LINE_LEN;
- }
-}
-
-static void __ic_line_inv(unsigned long start, unsigned long sz)
+static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+ unsigned long sz)
{
unsigned long flags;
- int num_lines, slack;
+ int num_lines;
/*
- * Ensure we properly floor/ceil the non-line aligned/sized requests
- * and have @start - aligned to cache line, and integral @num_lines
+ * Ensure we properly floor/ceil the non-line aligned/sized requests:
* However page sized flushes can be compile time optimised.
- * -@start will be cache-line aligned already (being page aligned)
+ * -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
*/
if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
- slack = start & ~ICACHE_LINE_MASK;
- sz += slack;
- start -= slack;
+ sz += paddr & ~ICACHE_LINE_MASK;
+ paddr &= ICACHE_LINE_MASK;
+ vaddr &= ICACHE_LINE_MASK;
}
num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
- local_irq_save(flags);
- (*___flush_icache_rtn) (start, num_lines);
- local_irq_restore(flags);
-}
-
-/* Unlike routines above, having vaddr for flush op (along with paddr),
- * prevents the need to speculatively kill the lines in multiple sets
- * based on ratio of way_sz : pg_sz
- */
-static void __ic_line_inv_vaddr(unsigned long phy_start,
- unsigned long vaddr, unsigned long sz)
-{
- unsigned long flags;
- int num_lines, slack;
- unsigned int addr;
-
- slack = phy_start & ~ICACHE_LINE_MASK;
- sz += slack;
- phy_start -= slack;
- num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
-
-#if (CONFIG_ARC_MMU_VER > 2)
- vaddr &= ~ICACHE_LINE_MASK;
- addr = phy_start;
-#else
+#if (CONFIG_ARC_MMU_VER <= 2)
/* bits 17:13 of vaddr go as bits 4:0 of paddr */
- addr = phy_start | ((vaddr >> 13) & 0x1F);
+ paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
#endif
local_irq_save(flags);
while (num_lines-- > 0) {
#if (CONFIG_ARC_MMU_VER > 2)
/* tag comes from phy addr */
- write_aux_reg(ARC_REG_IC_PTAG, addr);
+ write_aux_reg(ARC_REG_IC_PTAG, paddr);
/* index bits come from vaddr */
write_aux_reg(ARC_REG_IC_IVIL, vaddr);
vaddr += ARC_ICACHE_LINE_LEN;
#else
- /* this paddr contains vaddrs bits as needed */
- write_aux_reg(ARC_REG_IC_IVIL, addr);
+ /* paddr contains stuffed vaddrs bits */
+ write_aux_reg(ARC_REG_IC_IVIL, paddr);
#endif
- addr += ARC_ICACHE_LINE_LEN;
+ paddr += ARC_ICACHE_LINE_LEN;
}
local_irq_restore(flags);
}
#else
-#define __ic_line_inv(start, sz)
#define __ic_line_inv_vaddr(pstart, vstart, sz)
#endif /* CONFIG_ARC_HAS_ICACHE */
@@ -615,35 +470,72 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
* Exported APIs
*/
-/* TBD: use pg_arch_1 to optimize this */
+/*
+ * Handle cache congruency of kernel and userspace mappings of page when kernel
+ * writes-to/reads-from
+ *
+ * The idea is to defer flushing of kernel mapping after a WRITE, possible if:
+ * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent
+ * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache)
+ * -In SMP, if hardware caches are coherent
+ *
+ * There's a corollary case, where kernel READs from a userspace mapped page.
+ * If the U-mapping is not congruent to to K-mapping, former needs flushing.
+ */
void flush_dcache_page(struct page *page)
{
- __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
+ struct address_space *mapping;
+
+ if (!cache_is_vipt_aliasing()) {
+ set_bit(PG_arch_1, &page->flags);
+ return;
+ }
+
+ /* don't handle anon pages here */
+ mapping = page_mapping(page);
+ if (!mapping)
+ return;
+
+ /*
+ * pagecache page, file not yet mapped to userspace
+ * Make a note that K-mapping is dirty
+ */
+ if (!mapping_mapped(mapping)) {
+ set_bit(PG_arch_1, &page->flags);
+ } else if (page_mapped(page)) {
+
+ /* kernel reading from page with U-mapping */
+ void *paddr = page_address(page);
+ unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
+
+ if (addr_not_cache_congruent(paddr, vaddr))
+ __flush_dcache_page(paddr, vaddr);
+ }
}
EXPORT_SYMBOL(flush_dcache_page);
void dma_cache_wback_inv(unsigned long start, unsigned long sz)
{
- __dc_line_op(start, sz, OP_FLUSH_N_INV);
+ __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
}
EXPORT_SYMBOL(dma_cache_wback_inv);
void dma_cache_inv(unsigned long start, unsigned long sz)
{
- __dc_line_op(start, sz, OP_INV);
+ __dc_line_op_k(start, sz, OP_INV);
}
EXPORT_SYMBOL(dma_cache_inv);
void dma_cache_wback(unsigned long start, unsigned long sz)
{
- __dc_line_op(start, sz, OP_FLUSH);
+ __dc_line_op_k(start, sz, OP_FLUSH);
}
EXPORT_SYMBOL(dma_cache_wback);
/*
- * This is API for making I/D Caches consistent when modifying code
- * (loadable modules, kprobes, etc)
+ * This is API for making I/D Caches consistent when modifying
+ * kernel code (loadable modules, kprobes, kgdb...)
* This is called on insmod, with kernel virtual address for CODE of
* the module. ARC cache maintenance ops require PHY address thus we
* need to convert vmalloc addr to PHY addr
@@ -652,7 +544,6 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
{
unsigned int tot_sz, off, sz;
unsigned long phy, pfn;
- unsigned long flags;
/* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
@@ -673,8 +564,13 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
/* Case: Kernel Phy addr (0x8000_0000 onwards) */
if (likely(kstart > PAGE_OFFSET)) {
- __ic_line_inv(kstart, kend - kstart);
- __dc_line_op(kstart, kend - kstart, OP_FLUSH);
+ /*
+ * The 2nd arg despite being paddr will be used to index icache
+ * This is OK since no alternate virtual mappings will exist
+ * given the callers for this case: kprobe/kgdb in built-in
+ * kernel code only.
+ */
+ __sync_icache_dcache(kstart, kstart, kend - kstart);
return;
}
@@ -692,42 +588,45 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
pfn = vmalloc_to_pfn((void *)kstart);
phy = (pfn << PAGE_SHIFT) + off;
sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
- local_irq_save(flags);
- __dc_line_op(phy, sz, OP_FLUSH);
- __ic_line_inv(phy, sz);
- local_irq_restore(flags);
+ __sync_icache_dcache(phy, kstart, sz);
kstart += sz;
tot_sz -= sz;
}
}
/*
- * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
- * where vaddr is also available. This allows passing both vaddr and paddr
- * bits to CDU for cache flush, short-circuting the current pessimistic algo
- * which kills all possible aliases.
- * An added adv of knowing that vaddr is user-vaddr avoids various checks
- * and handling for k-vaddr, k-paddr as done in orig ver above
+ * General purpose helper to make I and D cache lines consistent.
+ * @paddr is phy addr of region
+ * @vaddr is typically user or kernel vaddr (vmalloc)
+ * Howver in one instance, flush_icache_range() by kprobe (for a breakpt in
+ * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
+ * use a paddr to index the cache (despite VIPT). This is fine since since a
+ * built-in kernel page will not have any virtual mappings (not even kernel)
+ * kprobe on loadable module is different as it will have kvaddr.
*/
-void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
- int len)
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __ic_line_inv_vaddr(paddr, vaddr, len);
+ __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
+ local_irq_restore(flags);
+}
+
+/* wrapper to compile time eliminate alignment checks in flush loop */
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
{
- __ic_line_inv_vaddr(paddr, u_vaddr, len);
- __dc_line_op(paddr, len, OP_FLUSH);
+ __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
}
/*
- * XXX: This also needs to be optim using pg_arch_1
- * This is called when a page-cache page is about to be mapped into a
- * user process' address space. It offers an opportunity for a
- * port to ensure d-cache/i-cache coherency if necessary.
+ * wrapper to clearout kernel or userspace mappings of a page
+ * For kernel mappings @vaddr == @paddr
*/
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr)
{
- if (!(vma->vm_flags & VM_EXEC))
- return;
-
- __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
+ __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
}
void flush_icache_all(void)
@@ -756,6 +655,88 @@ noinline void flush_cache_all(void)
}
+#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+ flush_cache_all();
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
+ unsigned long pfn)
+{
+ unsigned int paddr = pfn << PAGE_SHIFT;
+
+ __sync_icache_dcache(paddr, u_vaddr, PAGE_SIZE);
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ flush_cache_all();
+}
+
+void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long u_vaddr)
+{
+ /* TBD: do we really need to clear the kernel mapping */
+ __flush_dcache_page(page_address(page), u_vaddr);
+ __flush_dcache_page(page_address(page), page_address(page));
+
+}
+
+#endif
+
+void copy_user_highpage(struct page *to, struct page *from,
+ unsigned long u_vaddr, struct vm_area_struct *vma)
+{
+ void *kfrom = page_address(from);
+ void *kto = page_address(to);
+ int clean_src_k_mappings = 0;
+
+ /*
+ * If SRC page was already mapped in userspace AND it's U-mapping is
+ * not congruent with K-mapping, sync former to physical page so that
+ * K-mapping in memcpy below, sees the right data
+ *
+ * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
+ * equally valid for SRC page as well
+ */
+ if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
+ __flush_dcache_page(kfrom, u_vaddr);
+ clean_src_k_mappings = 1;
+ }
+
+ copy_page(kto, kfrom);
+
+ /*
+ * Mark DST page K-mapping as dirty for a later finalization by
+ * update_mmu_cache(). Although the finalization could have been done
+ * here as well (given that both vaddr/paddr are available).
+ * But update_mmu_cache() already has code to do that for other
+ * non copied user pages (e.g. read faults which wire in pagecache page
+ * directly).
+ */
+ set_bit(PG_arch_1, &to->flags);
+
+ /*
+ * if SRC was already usermapped and non-congruent to kernel mapping
+ * sync the kernel mapping back to physical page
+ */
+ if (clean_src_k_mappings) {
+ __flush_dcache_page(kfrom, kfrom);
+ } else {
+ set_bit(PG_arch_1, &from->flags);
+ }
+}
+
+void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
+{
+ clear_page(to);
+ set_bit(PG_arch_1, &page->flags);
+}
+
+
/**********************************************************************
* Explicit Cache flush request from user space via syscall
* Needed for JITs which generate code on the fly
diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c
index 014172ba843..aa652e28132 100644
--- a/arch/arc/mm/extable.c
+++ b/arch/arc/mm/extable.c
@@ -27,7 +27,7 @@ int fixup_exception(struct pt_regs *regs)
#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-long arc_copy_from_user_noinline(void *to, const void __user * from,
+long arc_copy_from_user_noinline(void *to, const void __user *from,
unsigned long n)
{
return __arc_copy_from_user(to, from, n);
@@ -48,7 +48,7 @@ unsigned long arc_clear_user_noinline(void __user *to,
}
EXPORT_SYMBOL(arc_clear_user_noinline);
-long arc_strncpy_from_user_noinline (char *dst, const char __user *src,
+long arc_strncpy_from_user_noinline(char *dst, const char __user *src,
long count)
{
return __arc_strncpy_from_user(dst, src, count);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index af55aab803d..689ffd86d5e 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -12,7 +12,6 @@
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
-#include <linux/version.h>
#include <linux/uaccess.h>
#include <linux/kdebug.h>
#include <asm/pgalloc.h>
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index caf797de23f..4a177365b2c 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -10,9 +10,6 @@
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
-#ifdef CONFIG_BLOCK_DEV_RAM
-#include <linux/blk.h>
-#endif
#include <linux/swap.h>
#include <linux/module.h>
#include <asm/page.h>
@@ -144,37 +141,18 @@ void __init mem_init(void)
PAGES_TO_KB(reserved_pages));
}
-static void __init free_init_pages(const char *what, unsigned long begin,
- unsigned long end)
-{
- unsigned long addr;
-
- pr_info("Freeing %s: %ldk [%lx] to [%lx]\n",
- what, TO_KB(end - begin), begin, end);
-
- /* need to check that the page we free is not a partial page */
- for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
-}
-
/*
* free_initmem: Free all the __init memory.
*/
void __init_refok free_initmem(void)
{
- free_init_pages("unused kernel memory",
- (unsigned long)__init_begin,
- (unsigned long)__init_end);
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index 3e5c92c7993..739e65f355d 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -12,7 +12,7 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <asm/cache.h>
+#include <linux/cache.h>
void __iomem *ioremap(unsigned long paddr, unsigned long size)
{
diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c
new file mode 100644
index 00000000000..2e06d56e987
--- /dev/null
+++ b/arch/arc/mm/mmap.c
@@ -0,0 +1,78 @@
+/*
+ * ARC700 mmap
+ *
+ * (started from arm version - for VIPT alias handling)
+ *
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <asm/cacheflush.h>
+
+#define COLOUR_ALIGN(addr, pgoff) \
+ ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \
+ (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1)))
+
+/*
+ * Ensure that shared mappings are correctly aligned to
+ * avoid aliasing issues with VIPT caches.
+ * We need to ensure that
+ * a specific page of an object is always mapped at a multiple of
+ * SHMLBA bytes.
+ */
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int do_align = 0;
+ int aliasing = cache_is_vipt_aliasing();
+ struct vm_unmapped_area_info info;
+
+ /*
+ * We only need to do colour alignment if D cache aliases.
+ */
+ if (aliasing)
+ do_align = filp || (flags & MAP_SHARED);
+
+ /*
+ * We enforce the MAP_FIXED case.
+ */
+ if (flags & MAP_FIXED) {
+ if (aliasing && flags & MAP_SHARED &&
+ (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
+ return -EINVAL;
+ return addr;
+ }
+
+ if (len > TASK_SIZE)
+ return -ENOMEM;
+
+ if (addr) {
+ if (do_align)
+ addr = COLOUR_ALIGN(addr, pgoff);
+ else
+ addr = PAGE_ALIGN(addr);
+
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+ (!vma || addr + len <= vma->vm_start))
+ return addr;
+ }
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+ return vm_unmapped_area(&info);
+}
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9b9ce23f4ec..fe1c5a073af 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -418,23 +418,53 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
local_irq_restore(flags);
}
-/* arch hook called by core VM at the end of handle_mm_fault( ),
- * when a new PTE is entered in Page Tables or an existing one
- * is modified. We aggresively pre-install a TLB entry
+/*
+ * Called at the end of pagefault, for a userspace mapped page
+ * -pre-install the corresponding TLB entry into MMU
+ * -Finalize the delayed D-cache flush of kernel mapping of page due to
+ * flush_dcache_page(), copy_user_page()
+ *
+ * Note that flush (when done) involves both WBACK - so physical page is
+ * in sync as well as INV - so any non-congruent aliases don't remain
*/
-
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress,
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
pte_t *ptep)
{
+ unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
+ unsigned long paddr = pte_val(*ptep) & PAGE_MASK;
+
+ create_tlb(vma, vaddr, ptep);
- create_tlb(vma, vaddress, ptep);
+ /*
+ * Exec page : Independent of aliasing/page-color considerations,
+ * since icache doesn't snoop dcache on ARC, any dirty
+ * K-mapping of a code page needs to be wback+inv so that
+ * icache fetch by userspace sees code correctly.
+ * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it
+ * so userspace sees the right data.
+ * (Avoids the flush for Non-exec + congruent mapping case)
+ */
+ if ((vma->vm_flags & VM_EXEC) ||
+ addr_not_cache_congruent(paddr, vaddr)) {
+ struct page *page = pfn_to_page(pte_pfn(*ptep));
+
+ int dirty = test_and_clear_bit(PG_arch_1, &page->flags);
+ if (dirty) {
+ /* wback + inv dcache lines */
+ __flush_dcache_page(paddr, paddr);
+
+ /* invalidate any existing icache lines */
+ if (vma->vm_flags & VM_EXEC)
+ __inv_icache_page(paddr, vaddr);
+ }
+ }
}
/* Read the Cache Build Confuration Registers, Decode them and save into
* the cpuinfo structure for later use.
* No Validation is done here, simply read/convert the BCRs
*/
-void __init read_decode_mmu_bcr(void)
+void __cpuinit read_decode_mmu_bcr(void)
{
unsigned int tmp;
struct bcr_mmu_1_2 *mmu2; /* encoded MMU2 attr */
@@ -466,7 +496,7 @@ void __init read_decode_mmu_bcr(void)
char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
{
int n = 0;
- struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+ struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ",
p_mmu->ver, TO_KB(p_mmu->pg_sz));
@@ -480,7 +510,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
return buf;
}
-void __init arc_mmu_init(void)
+void __cpuinit arc_mmu_init(void)
{
char str[256];
struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 9df765dc7c3..3357d26ffe5 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -277,7 +277,7 @@ ARC_ENTRY EV_TLBMissI
;----------------------------------------------------------------
; VERIFY_PTE: Check if PTE permissions approp for executing code
cmp_s r2, VMALLOC_START
- mov.lo r2, (_PAGE_PRESENT | _PAGE_READ | _PAGE_EXECUTE)
+ mov.lo r2, (_PAGE_PRESENT | _PAGE_U_READ | _PAGE_U_EXECUTE)
mov.hs r2, (_PAGE_PRESENT | _PAGE_K_READ | _PAGE_K_EXECUTE)
and r3, r0, r2 ; Mask out NON Flag bits from PTE
@@ -320,9 +320,9 @@ ARC_ENTRY EV_TLBMissD
mov_s r2, 0
lr r3, [ecr]
btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access
- or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE
+ or.nz r2, r2, _PAGE_U_READ ; chk for Read flag in PTE
btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access
- or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE
+ or.nz r2, r2, _PAGE_U_WRITE ; chk for Write flag in PTE
; Above laddering takes care of XCHG access
; which is both Read and Write