#ifndef _X8664_TLBFLUSH_H
#define _X8664_TLBFLUSH_H
-#include <linux/config.h>
#include <linux/mm.h>
#include <asm/processor.h>
-#define __flush_tlb() \
- do { \
- unsigned long tmpreg; \
- \
- __asm__ __volatile__( \
- "movq %%cr3, %0; # flush TLB \n" \
- "movq %0, %%cr3; \n" \
- : "=r" (tmpreg) \
- :: "memory"); \
- } while (0)
+static inline unsigned long get_cr3(void)
+{
+ unsigned long cr3;
+ asm volatile("mov %%cr3,%0" : "=r" (cr3));
+ return cr3;
+}
-/*
- * Global pages have to be flushed a bit differently. Not a real
- * performance problem because this does not happen often.
- */
-#define __flush_tlb_global() \
- do { \
- unsigned long tmpreg; \
- \
- __asm__ __volatile__( \
- "movq %1, %%cr4; # turn off PGE \n" \
- "movq %%cr3, %0; # flush TLB \n" \
- "movq %0, %%cr3; \n" \
- "movq %2, %%cr4; # turn PGE back on \n" \
- : "=&r" (tmpreg) \
- : "r" (mmu_cr4_features & ~X86_CR4_PGE), \
- "r" (mmu_cr4_features) \
- : "memory"); \
- } while (0)
-
-extern unsigned long pgkern_mask;
-
-#define __flush_tlb_all() __flush_tlb_global()
+static inline void set_cr3(unsigned long cr3)
+{
+ asm volatile("mov %0,%%cr3" :: "r" (cr3) : "memory");
+}
+
+static inline void __flush_tlb(void)
+{
+ set_cr3(get_cr3());
+}
+
+static inline unsigned long get_cr4(void)
+{
+ unsigned long cr4;
+ asm volatile("mov %%cr4,%0" : "=r" (cr4));
+ return cr4;
+}
+
+static inline void set_cr4(unsigned long cr4)
+{
+ asm volatile("mov %0,%%cr4" :: "r" (cr4) : "memory");
+}
+
+static inline void __flush_tlb_all(void)
+{
+ unsigned long cr4 = get_cr4();
+ set_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */
+ set_cr4(cr4); /* write old PGE again and flush TLBs */
+}
#define __flush_tlb_one(addr) \
- __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
+ __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory")
/*
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
* - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
*
- * ..but the x86_64 has somewhat limited tlb flushing capabilities,
- * and page-granular flushes are available only on i486 and up.
+ * x86-64 can only flush individual pages or full VMs. For a range flush
+ * we always do the full VM. Might be worth trying if for a small
+ * range a few INVLPGs in a row are a win.
*/
#ifndef CONFIG_SMP
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
+/* Roughly an IPI every 20MB with 4k pages for freeing page table
+ ranges. Cost is about 42k of memory for each CPU. */
+#define ARCH_FREE_PTE_NR 5350
+
#endif
#define flush_tlb_kernel_range(start, end) flush_tlb_all()
static inline void flush_tlb_pgtables(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- /* x86_64 does not keep any page table caches in TLB */
+ /* x86_64 does not keep any page table caches in a software TLB.
+ The CPUs do in their hardware TLBs, but they are handled
+ by the normal TLB flushing algorithms. */
}
#endif /* _X8664_TLBFLUSH_H */