+/*
+ * Break the 1, 2 and 4 way variants of this out into separate functions to
+ * avoid nearly all the overhead of having the conditional stuff in the function
+ * bodies (+ the 1 and 2 way cases avoid saving any registers too).
+ */
+static void __flush_dcache_segment_1way(unsigned long start,
+ unsigned long extent_per_way)
+{
+ unsigned long orig_sr, sr_with_bl;
+ unsigned long base_addr;
+ unsigned long way_incr, linesz, way_size;
+ struct cache_info *dcache;
+ register unsigned long a0, a0e;
+
+ asm volatile("stc sr, %0" : "=r" (orig_sr));
+ sr_with_bl = orig_sr | (1<<28);
+ base_addr = ((unsigned long)&empty_zero_page[0]);
+
+ /*
+ * The previous code aligned base_addr to 16k, i.e. the way_size of all
+ * existing SH-4 D-caches. Whilst I don't see a need to have this
+ * aligned to any better than the cache line size (which it will be
+ * anyway by construction), let's align it to at least the way_size of
+ * any existing or conceivable SH-4 D-cache. -- RPC
+ */
+ base_addr = ((base_addr >> 16) << 16);
+ base_addr |= start;
+
+ dcache = &cpu_data->dcache;
+ linesz = dcache->linesz;
+ way_incr = dcache->way_incr;
+ way_size = dcache->way_size;
+
+ a0 = base_addr;
+ a0e = base_addr + extent_per_way;
+ do {
+ asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
+ asm volatile("movca.l r0, @%0\n\t"
+ "ocbi @%0" : : "r" (a0));
+ a0 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "ocbi @%0" : : "r" (a0));
+ a0 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "ocbi @%0" : : "r" (a0));
+ a0 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "ocbi @%0" : : "r" (a0));
+ asm volatile("ldc %0, sr" : : "r" (orig_sr));
+ a0 += linesz;
+ } while (a0 < a0e);
+}
+
+static void __flush_dcache_segment_2way(unsigned long start,
+ unsigned long extent_per_way)
+{
+ unsigned long orig_sr, sr_with_bl;
+ unsigned long base_addr;
+ unsigned long way_incr, linesz, way_size;
+ struct cache_info *dcache;
+ register unsigned long a0, a1, a0e;
+
+ asm volatile("stc sr, %0" : "=r" (orig_sr));
+ sr_with_bl = orig_sr | (1<<28);
+ base_addr = ((unsigned long)&empty_zero_page[0]);
+
+ /* See comment under 1-way above */
+ base_addr = ((base_addr >> 16) << 16);
+ base_addr |= start;
+
+ dcache = &cpu_data->dcache;
+ linesz = dcache->linesz;
+ way_incr = dcache->way_incr;
+ way_size = dcache->way_size;
+
+ a0 = base_addr;
+ a1 = a0 + way_incr;
+ a0e = base_addr + extent_per_way;
+ do {
+ asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1" : :
+ "r" (a0), "r" (a1));
+ a0 += linesz;
+ a1 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1" : :
+ "r" (a0), "r" (a1));
+ a0 += linesz;
+ a1 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1" : :
+ "r" (a0), "r" (a1));
+ a0 += linesz;
+ a1 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1" : :
+ "r" (a0), "r" (a1));
+ asm volatile("ldc %0, sr" : : "r" (orig_sr));
+ a0 += linesz;
+ a1 += linesz;
+ } while (a0 < a0e);
+}
+
+static void __flush_dcache_segment_4way(unsigned long start,
+ unsigned long extent_per_way)
+{
+ unsigned long orig_sr, sr_with_bl;
+ unsigned long base_addr;
+ unsigned long way_incr, linesz, way_size;
+ struct cache_info *dcache;
+ register unsigned long a0, a1, a2, a3, a0e;
+
+ asm volatile("stc sr, %0" : "=r" (orig_sr));
+ sr_with_bl = orig_sr | (1<<28);
+ base_addr = ((unsigned long)&empty_zero_page[0]);
+
+ /* See comment under 1-way above */
+ base_addr = ((base_addr >> 16) << 16);
+ base_addr |= start;
+
+ dcache = &cpu_data->dcache;
+ linesz = dcache->linesz;
+ way_incr = dcache->way_incr;
+ way_size = dcache->way_size;
+
+ a0 = base_addr;
+ a1 = a0 + way_incr;
+ a2 = a1 + way_incr;
+ a3 = a2 + way_incr;
+ a0e = base_addr + extent_per_way;
+ do {
+ asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "movca.l r0, @%2\n\t"
+ "movca.l r0, @%3\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1\n\t"
+ "ocbi @%2\n\t"
+ "ocbi @%3\n\t" : :
+ "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+ a0 += linesz;
+ a1 += linesz;
+ a2 += linesz;
+ a3 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "movca.l r0, @%2\n\t"
+ "movca.l r0, @%3\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1\n\t"
+ "ocbi @%2\n\t"
+ "ocbi @%3\n\t" : :
+ "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+ a0 += linesz;
+ a1 += linesz;
+ a2 += linesz;
+ a3 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "movca.l r0, @%2\n\t"
+ "movca.l r0, @%3\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1\n\t"
+ "ocbi @%2\n\t"
+ "ocbi @%3\n\t" : :
+ "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+ a0 += linesz;
+ a1 += linesz;
+ a2 += linesz;
+ a3 += linesz;
+ asm volatile("movca.l r0, @%0\n\t"
+ "movca.l r0, @%1\n\t"
+ "movca.l r0, @%2\n\t"
+ "movca.l r0, @%3\n\t"
+ "ocbi @%0\n\t"
+ "ocbi @%1\n\t"
+ "ocbi @%2\n\t"
+ "ocbi @%3\n\t" : :
+ "r" (a0), "r" (a1), "r" (a2), "r" (a3));
+ asm volatile("ldc %0, sr" : : "r" (orig_sr));
+ a0 += linesz;
+ a1 += linesz;
+ a2 += linesz;
+ a3 += linesz;
+ } while (a0 < a0e);
+}