aboutsummaryrefslogtreecommitdiff
path: root/arch/metag/mm/cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/metag/mm/cache.c')
-rw-r--r--arch/metag/mm/cache.c521
1 files changed, 521 insertions, 0 deletions
diff --git a/arch/metag/mm/cache.c b/arch/metag/mm/cache.c
new file mode 100644
index 00000000000..b5d3b2e7c16
--- /dev/null
+++ b/arch/metag/mm/cache.c
@@ -0,0 +1,521 @@
+/*
+ * arch/metag/mm/cache.c
+ *
+ * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Cache control code
+ */
+
+#include <linux/export.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define DEFAULT_CACHE_WAYS_LOG2 2
+
+/*
+ * Size of a set in the caches. Initialised for default 16K stride, adjusted
+ * according to values passed through TBI global heap segment via LDLK (on ATP)
+ * or config registers (on HTP/MTP)
+ */
+static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2;
+static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2;
+/*
+ * The number of sets in the caches. Initialised for HTP/ATP, adjusted
+ * according to NOMMU setting in config registers
+ */
+static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+
+#ifndef CONFIG_METAG_META12
+/**
+ * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache
+ */
+static volatile u32 lnkget_testdata[16] __initdata __aligned(64);
+
+#define LNKGET_CONSTANT 0xdeadbeef
+
+void __init metag_lnkget_probe(void)
+{
+ int temp;
+ long flags;
+
+ /*
+ * It's conceivable the user has configured a globally coherent cache
+ * shared with non-Linux hardware threads, so use LOCK2 to prevent them
+ * from executing and causing cache eviction during the test.
+ */
+ __global_lock2(flags);
+
+ /* read a value to bring it into the cache */
+ (void)lnkget_testdata[0];
+ lnkget_testdata[0] = 0;
+
+ /* lnkget/lnkset it to modify it */
+ asm volatile(
+ "1: LNKGETD %0, [%1]\n"
+ " LNKSETD [%1], %2\n"
+ " DEFR %0, TXSTAT\n"
+ " ANDT %0, %0, #HI(0x3f000000)\n"
+ " CMPT %0, #HI(0x02000000)\n"
+ " BNZ 1b\n"
+ : "=&d" (temp)
+ : "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT)
+ : "cc");
+
+ /* re-read it to see if the cached value changed */
+ temp = lnkget_testdata[0];
+
+ __global_unlock2(flags);
+
+ /* flush the cache line to fix any incoherency */
+ __builtin_dcache_flush((void *)&lnkget_testdata[0]);
+
+#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE)
+ /* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */
+ if (temp == LNKGET_CONSTANT)
+ pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n");
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+ /*
+ * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary
+ * because the kernel is configured to use LNKGET/SET for atomicity
+ */
+ WARN(temp != LNKGET_CONSTANT,
+ "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+ "Expect kernel failure as it's used for atomicity primitives\n");
+#elif defined(CONFIG_SMP)
+ /*
+ * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the
+ * gateway page won't flush and userland could break.
+ */
+ WARN(temp != LNKGET_CONSTANT,
+ "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+ "Expect userland failure as it's used for user gateway page\n");
+#else
+ /*
+ * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it
+ * doesn't actually matter as it doesn't have any effect on !SMP &&
+ * !ATOMICITY_LNKGET.
+ */
+ if (temp != LNKGET_CONSTANT)
+ pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n");
+#endif
+}
+#endif /* !CONFIG_METAG_META12 */
+
+/**
+ * metag_cache_probe() - Probe L1 cache configuration.
+ *
+ * Probe the L1 cache configuration to aid the L1 physical cache flushing
+ * functions.
+ */
+void __init metag_cache_probe(void)
+{
+#ifndef CONFIG_METAG_META12
+ int coreid = metag_in32(METAC_CORE_ID);
+ int config = metag_in32(METAC_CORE_CONFIG2);
+ int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS;
+
+ if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 ||
+ cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) {
+ icache_sets_log2 = 1;
+ dcache_sets_log2 = 1;
+ }
+
+ /* For normal size caches, the smallest size is 4Kb.
+ For small caches, the smallest size is 64b */
+ icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT)
+ ? 6 : 12;
+ icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS)
+ >> METAC_CORE_C2ICSZ_S;
+ icache_set_shift -= icache_sets_log2;
+
+ dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT)
+ ? 6 : 12;
+ dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS)
+ >> METAC_CORECFG2_DCSZ_S;
+ dcache_set_shift -= dcache_sets_log2;
+
+ metag_lnkget_probe();
+#else
+ /* Extract cache sizes from global heap segment */
+ unsigned long val, u;
+ int width, shift, addend;
+ PTBISEG seg;
+
+ seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
+ TBID_SEGSCOPE_GLOBAL,
+ TBID_SEGTYPE_HEAP));
+ if (seg != NULL) {
+ val = seg->Data[1];
+
+ /* Work out width of I-cache size bit-field */
+ u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS)
+ >> METAG_TBI_ICACHE_SIZE_S;
+ width = 0;
+ while (u & 1) {
+ width++;
+ u >>= 1;
+ }
+ /* Extract sign-extended size addend value */
+ shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width);
+ addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS)
+ << shift)
+ >> (shift + METAG_TBI_ICACHE_SIZE_S);
+ /* Now calculate I-cache set size */
+ icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2)
+ + addend;
+
+ /* Similarly for D-cache */
+ u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS)
+ >> METAG_TBI_DCACHE_SIZE_S;
+ width = 0;
+ while (u & 1) {
+ width++;
+ u >>= 1;
+ }
+ shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width);
+ addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS)
+ << shift)
+ >> (shift + METAG_TBI_DCACHE_SIZE_S);
+ dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+ - DEFAULT_CACHE_WAYS_LOG2)
+ + addend;
+ }
+#endif
+}
+
+static void metag_phys_data_cache_flush(const void *start)
+{
+ unsigned long flush0, flush1, flush2, flush3;
+ int loops, step;
+ int thread;
+ int part, offset;
+ int set_shift;
+
+ /* Use a sequence of writes to flush the cache region requested */
+ thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+ >> TXENABLE_THREAD_S;
+
+ /* Cache is broken into sets which lie in contiguous RAMs */
+ set_shift = dcache_set_shift;
+
+ /* Move to the base of the physical cache flush region */
+ flush0 = LINSYSCFLUSH_DCACHE_LINE;
+ step = 64;
+
+ /* Get partition data for this thread */
+ part = metag_in32(SYSC_DCPART0 +
+ (SYSC_xCPARTn_STRIDE * thread));
+
+ if ((int)start < 0)
+ /* Access Global vs Local partition */
+ part >>= SYSC_xCPARTG_AND_S
+ - SYSC_xCPARTL_AND_S;
+
+ /* Extract offset and move SetOff */
+ offset = (part & SYSC_xCPARTL_OR_BITS)
+ >> SYSC_xCPARTL_OR_S;
+ flush0 += (offset << (set_shift - 4));
+
+ /* Shrink size */
+ part = (part & SYSC_xCPARTL_AND_BITS)
+ >> SYSC_xCPARTL_AND_S;
+ loops = ((part + 1) << (set_shift - 4));
+
+ /* Reduce loops by step of cache line size */
+ loops /= step;
+
+ flush1 = flush0 + (1 << set_shift);
+ flush2 = flush0 + (2 << set_shift);
+ flush3 = flush0 + (3 << set_shift);
+
+ if (dcache_sets_log2 == 1) {
+ flush2 = flush1;
+ flush3 = flush1 + step;
+ flush1 = flush0 + step;
+ step <<= 1;
+ loops >>= 1;
+ }
+
+ /* Clear loops ways in cache */
+ while (loops-- != 0) {
+ /* Clear the ways. */
+#if 0
+ /*
+ * GCC doesn't generate very good code for this so we
+ * provide inline assembly instead.
+ */
+ metag_out8(0, flush0);
+ metag_out8(0, flush1);
+ metag_out8(0, flush2);
+ metag_out8(0, flush3);
+
+ flush0 += step;
+ flush1 += step;
+ flush2 += step;
+ flush3 += step;
+#else
+ asm volatile (
+ "SETB\t[%0+%4++],%5\n"
+ "SETB\t[%1+%4++],%5\n"
+ "SETB\t[%2+%4++],%5\n"
+ "SETB\t[%3+%4++],%5\n"
+ : "+e" (flush0),
+ "+e" (flush1),
+ "+e" (flush2),
+ "+e" (flush3)
+ : "e" (step), "a" (0));
+#endif
+ }
+}
+
+void metag_data_cache_flush_all(const void *start)
+{
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+ /* No need to flush the data cache it's not actually enabled */
+ return;
+
+ metag_phys_data_cache_flush(start);
+}
+
+void metag_data_cache_flush(const void *start, int bytes)
+{
+ unsigned long flush0;
+ int loops, step;
+
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+ /* No need to flush the data cache it's not actually enabled */
+ return;
+
+ if (bytes >= 4096) {
+ metag_phys_data_cache_flush(start);
+ return;
+ }
+
+ /* Use linear cache flush mechanism on META IP */
+ flush0 = (int)start;
+ loops = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes +
+ (DCACHE_LINE_BYTES - 1);
+ loops >>= DCACHE_LINE_S;
+
+#define PRIM_FLUSH(addr, offset) do { \
+ int __addr = ((int) (addr)) + ((offset) * 64); \
+ __builtin_dcache_flush((void *)(__addr)); \
+ } while (0)
+
+#define LOOP_INC (4*64)
+
+ do {
+ /* By default stop */
+ step = 0;
+
+ switch (loops) {
+ /* Drop Thru Cases! */
+ default:
+ PRIM_FLUSH(flush0, 3);
+ loops -= 4;
+ step = 1;
+ case 3:
+ PRIM_FLUSH(flush0, 2);
+ case 2:
+ PRIM_FLUSH(flush0, 1);
+ case 1:
+ PRIM_FLUSH(flush0, 0);
+ flush0 += LOOP_INC;
+ case 0:
+ break;
+ }
+ } while (step);
+}
+EXPORT_SYMBOL(metag_data_cache_flush);
+
+static void metag_phys_code_cache_flush(const void *start, int bytes)
+{
+ unsigned long flush0, flush1, flush2, flush3, end_set;
+ int loops, step;
+ int thread;
+ int set_shift, set_size;
+ int part, offset;
+
+ /* Use a sequence of writes to flush the cache region requested */
+ thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+ >> TXENABLE_THREAD_S;
+ set_shift = icache_set_shift;
+
+ /* Move to the base of the physical cache flush region */
+ flush0 = LINSYSCFLUSH_ICACHE_LINE;
+ step = 64;
+
+ /* Get partition code for this thread */
+ part = metag_in32(SYSC_ICPART0 +
+ (SYSC_xCPARTn_STRIDE * thread));
+
+ if ((int)start < 0)
+ /* Access Global vs Local partition */
+ part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S;
+
+ /* Extract offset and move SetOff */
+ offset = (part & SYSC_xCPARTL_OR_BITS)
+ >> SYSC_xCPARTL_OR_S;
+ flush0 += (offset << (set_shift - 4));
+
+ /* Shrink size */
+ part = (part & SYSC_xCPARTL_AND_BITS)
+ >> SYSC_xCPARTL_AND_S;
+ loops = ((part + 1) << (set_shift - 4));
+
+ /* Where does the Set end? */
+ end_set = flush0 + loops;
+ set_size = loops;
+
+#ifdef CONFIG_METAG_META12
+ if ((bytes < 4096) && (bytes < loops)) {
+ /* Unreachable on HTP/MTP */
+ /* Only target the sets that could be relavent */
+ flush0 += (loops - step) & ((int) start);
+ loops = (((int) start) & (step-1)) + bytes + step - 1;
+ }
+#endif
+
+ /* Reduce loops by step of cache line size */
+ loops /= step;
+
+ flush1 = flush0 + (1<<set_shift);
+ flush2 = flush0 + (2<<set_shift);
+ flush3 = flush0 + (3<<set_shift);
+
+ if (icache_sets_log2 == 1) {
+ flush2 = flush1;
+ flush3 = flush1 + step;
+ flush1 = flush0 + step;
+#if 0
+ /* flush0 will stop one line early in this case
+ * (flush1 will do the final line).
+ * However we don't correct end_set here at the moment
+ * because it will never wrap on HTP/MTP
+ */
+ end_set -= step;
+#endif
+ step <<= 1;
+ loops >>= 1;
+ }
+
+ /* Clear loops ways in cache */
+ while (loops-- != 0) {
+#if 0
+ /*
+ * GCC doesn't generate very good code for this so we
+ * provide inline assembly instead.
+ */
+ /* Clear the ways */
+ metag_out8(0, flush0);
+ metag_out8(0, flush1);
+ metag_out8(0, flush2);
+ metag_out8(0, flush3);
+
+ flush0 += step;
+ flush1 += step;
+ flush2 += step;
+ flush3 += step;
+#else
+ asm volatile (
+ "SETB\t[%0+%4++],%5\n"
+ "SETB\t[%1+%4++],%5\n"
+ "SETB\t[%2+%4++],%5\n"
+ "SETB\t[%3+%4++],%5\n"
+ : "+e" (flush0),
+ "+e" (flush1),
+ "+e" (flush2),
+ "+e" (flush3)
+ : "e" (step), "a" (0));
+#endif
+
+ if (flush0 == end_set) {
+ /* Wrap within Set 0 */
+ flush0 -= set_size;
+ flush1 -= set_size;
+ flush2 -= set_size;
+ flush3 -= set_size;
+ }
+ }
+}
+
+void metag_code_cache_flush_all(const void *start)
+{
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+ /* No need to flush the code cache it's not actually enabled */
+ return;
+
+ metag_phys_code_cache_flush(start, 4096);
+}
+EXPORT_SYMBOL(metag_code_cache_flush_all);
+
+void metag_code_cache_flush(const void *start, int bytes)
+{
+#ifndef CONFIG_METAG_META12
+ void *flush;
+ int loops, step;
+#endif /* !CONFIG_METAG_META12 */
+
+ if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+ /* No need to flush the code cache it's not actually enabled */
+ return;
+
+#ifdef CONFIG_METAG_META12
+ /* CACHEWD isn't available on Meta1, so always do full cache flush */
+ metag_phys_code_cache_flush(start, bytes);
+
+#else /* CONFIG_METAG_META12 */
+ /* If large size do full physical cache flush */
+ if (bytes >= 4096) {
+ metag_phys_code_cache_flush(start, bytes);
+ return;
+ }
+
+ /* Use linear cache flush mechanism on META IP */
+ flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1));
+ loops = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes +
+ (ICACHE_LINE_BYTES-1);
+ loops >>= ICACHE_LINE_S;
+
+#define PRIM_IFLUSH(addr, offset) \
+ __builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT)
+
+#define LOOP_INC (4*64)
+
+ do {
+ /* By default stop */
+ step = 0;
+
+ switch (loops) {
+ /* Drop Thru Cases! */
+ default:
+ PRIM_IFLUSH(flush, 3);
+ loops -= 4;
+ step = 1;
+ case 3:
+ PRIM_IFLUSH(flush, 2);
+ case 2:
+ PRIM_IFLUSH(flush, 1);
+ case 1:
+ PRIM_IFLUSH(flush, 0);
+ flush += LOOP_INC;
+ case 0:
+ break;
+ }
+ } while (step);
+#endif /* !CONFIG_METAG_META12 */
+}
+EXPORT_SYMBOL(metag_code_cache_flush);