arch/x86_64/mm/fault-xen.c

   1 /*
   2  *  linux/arch/x86-64/mm/fault.c
   3  *
   4  *  Copyright (C) 1995  Linus Torvalds
   5  *  Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
   6  */
   7
   8 #include <linux/signal.h>
   9 #include <linux/sched.h>
  10 #include <linux/kernel.h>
  11 #include <linux/errno.h>
  12 #include <linux/string.h>
  13 #include <linux/types.h>
  14 #include <linux/tracehook.h>
  15 #include <linux/mman.h>
  16 #include <linux/mm.h>
  17 #include <linux/smp.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/interrupt.h>
  20 #include <linux/init.h>
  21 #include <linux/tty.h>
  22 #include <linux/vt_kern.h>              /* For unblank_screen() */
  23 #include <linux/compiler.h>
  24 #include <linux/module.h>
  25 #include <linux/kprobes.h>
  26
  27 #include <asm/system.h>
  28 #include <asm/uaccess.h>
  29 #include <asm/pgalloc.h>
  30 #include <asm/smp.h>
  31 #include <asm/tlbflush.h>
  32 #include <asm/proto.h>
  33 #include <asm/kdebug.h>
  34 #include <asm-generic/sections.h>
  35
  36 /* Page fault error code bits */
  37 #define PF_PROT (1<<0)          /* or no page found */
  38 #define PF_WRITE        (1<<1)
  39 #define PF_USER (1<<2)
  40 #define PF_RSVD (1<<3)
  41 #define PF_INSTR        (1<<4)
  42
  43 #ifdef CONFIG_KPROBES
  44 ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
  45
  46 /* Hook to register for page fault notifications */
  47 int register_page_fault_notifier(struct notifier_block *nb)
  48 {
  49         vmalloc_sync_all();
  50         return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
  51 }
  52
  53 int unregister_page_fault_notifier(struct notifier_block *nb)
  54 {
  55         return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
  56 }
  57
  58 static inline int notify_page_fault(enum die_val val, const char *str,
  59                         struct pt_regs *regs, long err, int trap, int sig)
  60 {
  61         struct die_args args = {
  62                 .regs = regs,
  63                 .str = str,
  64                 .err = err,
  65                 .trapnr = trap,
  66                 .signr = sig
  67         };
  68         return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
  69 }
  70 #else
  71 static inline int notify_page_fault(enum die_val val, const char *str,
  72                         struct pt_regs *regs, long err, int trap, int sig)
  73 {
  74         return NOTIFY_DONE;
  75 }
  76 #endif
  77
  78 void bust_spinlocks(int yes)
  79 {
  80         int loglevel_save = console_loglevel;
  81         if (yes) {
  82                 oops_in_progress = 1;
  83         } else {
  84 #ifdef CONFIG_VT
  85                 unblank_screen();
  86 #endif
  87                 oops_in_progress = 0;
  88                 /*
  89                  * OK, the message is on the console.  Now we call printk()
  90                  * without oops_in_progress set so that printk will give klogd
  91                  * a poke.  Hold onto your hats...
  92                  */
  93                 console_loglevel = 15;          /* NMI oopser may have shut the console up */
  94                 printk(" ");
  95                 console_loglevel = loglevel_save;
  96         }
  97 }
  98
  99 /* Sometimes the CPU reports invalid exceptions on prefetch.
 100    Check that here and ignore.
 101    Opcode checker based on code by Richard Brunner */
 102 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 103                                 unsigned long error_code)
 104 {
 105         unsigned char *instr;
 106         int scan_more = 1;
 107         int prefetch = 0;
 108         unsigned char *max_instr;
 109
 110         /* If it was a exec fault ignore */
 111         if (error_code & PF_INSTR)
 112                 return 0;
 113
 114         instr = (unsigned char *)convert_rip_to_linear(current, regs);
 115         max_instr = instr + 15;
 116
 117         if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE64)
 118                 return 0;
 119
 120         while (scan_more && instr < max_instr) {
 121                 unsigned char opcode;
 122                 unsigned char instr_hi;
 123                 unsigned char instr_lo;
 124
 125                 if (__get_user(opcode, instr))
 126                         break;
 127
 128                 instr_hi = opcode & 0xf0;
 129                 instr_lo = opcode & 0x0f;
 130                 instr++;
 131
 132                 switch (instr_hi) {
 133                 case 0x20:
 134                 case 0x30:
 135                         /* Values 0x26,0x2E,0x36,0x3E are valid x86
 136                            prefixes.  In long mode, the CPU will signal
 137                            invalid opcode if some of these prefixes are
 138                            present so we will never get here anyway */
 139                         scan_more = ((instr_lo & 7) == 0x6);
 140                         break;
 141
 142                 case 0x40:
 143                         /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
 144                            Need to figure out under what instruction mode the
 145                            instruction was issued ... */
 146                         /* Could check the LDT for lm, but for now it's good
 147                            enough to assume that long mode only uses well known
 148                            segments or kernel. */
 149                         scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
 150                         break;
 151
 152                 case 0x60:
 153                         /* 0x64 thru 0x67 are valid prefixes in all modes. */
 154                         scan_more = (instr_lo & 0xC) == 0x4;
 155                         break;
 156                 case 0xF0:
 157                         /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
 158                         scan_more = !instr_lo || (instr_lo>>1) == 1;
 159                         break;
 160                 case 0x00:
 161                         /* Prefetch instruction is 0x0F0D or 0x0F18 */
 162                         scan_more = 0;
 163                         if (__get_user(opcode, instr))
 164                                 break;
 165                         prefetch = (instr_lo == 0xF) &&
 166                                 (opcode == 0x0D || opcode == 0x18);
 167                         break;
 168                 default:
 169                         scan_more = 0;
 170                         break;
 171                 }
 172         }
 173         return prefetch;
 174 }
 175
 176 static int bad_address(void *p)
 177 {
 178         unsigned long dummy;
 179         return __get_user(dummy, (unsigned long *)p);
 180 }
 181
 182 void dump_pagetable(unsigned long address)
 183 {
 184         pgd_t *pgd;
 185         pud_t *pud;
 186         pmd_t *pmd;
 187         pte_t *pte;
 188
 189         pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
 190         pgd += pgd_index(address);
 191         if (bad_address(pgd)) goto bad;
 192         printk("PGD %lx ", pgd_val(*pgd));
 193         if (!pgd_present(*pgd)) goto ret;
 194
 195         pud = pud_offset(pgd, address);
 196         if (bad_address(pud)) goto bad;
 197         printk("PUD %lx ", pud_val(*pud));
 198         if (!pud_present(*pud)) goto ret;
 199
 200         pmd = pmd_offset(pud, address);
 201         if (bad_address(pmd)) goto bad;
 202         printk("PMD %lx ", pmd_val(*pmd));
 203         if (!pmd_present(*pmd)) goto ret;
 204
 205         pte = pte_offset_kernel(pmd, address);
 206         if (bad_address(pte)) goto bad;
 207         printk("PTE %lx", pte_val(*pte));
 208 ret:
 209         printk("\n");
 210         return;
 211 bad:
 212         printk("BAD\n");
 213 }
 214
 215 static const char errata93_warning[] =
 216 KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 217 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
 218 KERN_ERR "******* Please consider a BIOS update.\n"
 219 KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
 220
 221 /* Workaround for K8 erratum #93 & buggy BIOS.
 222    BIOS SMM functions are required to use a specific workaround
 223    to avoid corruption of the 64bit RIP register on C stepping K8.
 224    A lot of BIOS that didn't get tested properly miss this.
 225    The OS sees this as a page fault with the upper 32bits of RIP cleared.
 226    Try to work around it here.
 227    Note we only handle faults in kernel here. */
 228
 229 static int is_errata93(struct pt_regs *regs, unsigned long address)
 230 {
 231         static int warned;
 232         if (address != regs->rip)
 233                 return 0;
 234         if ((address >> 32) != 0)
 235                 return 0;
 236         address |= 0xffffffffUL << 32;
 237         if ((address >= (u64)_stext && address <= (u64)_etext) ||
 238             (address >= MODULES_VADDR && address <= MODULES_END)) {
 239                 if (!warned) {
 240                         printk(errata93_warning);
 241                         warned = 1;
 242                 }
 243                 regs->rip = address;
 244                 return 1;
 245         }
 246         return 0;
 247 }
 248
 249 int unhandled_signal(struct task_struct *tsk, int sig)
 250 {
 251         if (tsk->pid == 1)
 252                 return 1;
 253         if (tracehook_consider_fatal_signal(tsk, sig))
 254                 return 0;
 255         return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
 256                 (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
 257 }
 258
 259 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 260                                  unsigned long error_code)
 261 {
 262         unsigned long flags = oops_begin();
 263         struct task_struct *tsk;
 264
 265         printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
 266                current->comm, address);
 267         dump_pagetable(address);
 268         tsk = current;
 269         tsk->thread.cr2 = address;
 270         tsk->thread.trap_no = 14;
 271         tsk->thread.error_code = error_code;
 272         __die("Bad pagetable", regs, error_code);
 273         oops_end(flags);
 274         do_exit(SIGKILL);
 275 }
 276
 277 /*
 278  * Handle a fault on the vmalloc area
 279  *
 280  * This assumes no large pages in there.
 281  */
 282 static int vmalloc_fault(unsigned long address)
 283 {
 284         pgd_t *pgd, *pgd_ref;
 285         pud_t *pud, *pud_ref;
 286         pmd_t *pmd, *pmd_ref;
 287         pte_t *pte, *pte_ref;
 288
 289         /* Copy kernel mappings over when needed. This can also
 290            happen within a race in page table update. In the later
 291            case just flush. */
 292
 293         /* On Xen the line below does not always work. Needs investigating! */
 294         /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
 295         pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
 296         pgd += pgd_index(address);
 297         pgd_ref = pgd_offset_k(address);
 298         if (pgd_none(*pgd_ref))
 299                 return -1;
 300         if (pgd_none(*pgd))
 301                 set_pgd(pgd, *pgd_ref);
 302         else
 303                 BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
 304
 305         /* Below here mismatches are bugs because these lower tables
 306            are shared */
 307
 308         pud = pud_offset(pgd, address);
 309         pud_ref = pud_offset(pgd_ref, address);
 310         if (pud_none(*pud_ref))
 311                 return -1;
 312         if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
 313                 BUG();
 314         pmd = pmd_offset(pud, address);
 315         pmd_ref = pmd_offset(pud_ref, address);
 316         if (pmd_none(*pmd_ref))
 317                 return -1;
 318         if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
 319                 BUG();
 320         pte_ref = pte_offset_kernel(pmd_ref, address);
 321         if (!pte_present(*pte_ref))
 322                 return -1;
 323         pte = pte_offset_kernel(pmd, address);
 324         /* Don't use pte_page here, because the mappings can point
 325            outside mem_map, and the NUMA hash lookup cannot handle
 326            that. */
 327         if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
 328                 BUG();
 329         return 0;
 330 }
 331
 332 int page_fault_trace = 0;
 333 int exception_trace = 1;
 334
 335
 336 #define MEM_VERBOSE 1
 337
 338 #ifdef MEM_VERBOSE
 339 #define MEM_LOG(_f, _a...)                      \
 340         printk("fault.c:[%d]-> " _f "\n",       \
 341         __LINE__ , ## _a )
 342 #else
 343 #define MEM_LOG(_f, _a...) ((void)0)
 344 #endif
 345
 346 static int spurious_fault(struct pt_regs *regs,
 347                           unsigned long address,
 348                           unsigned long error_code)
 349 {
 350         pgd_t *pgd;
 351         pud_t *pud;
 352         pmd_t *pmd;
 353         pte_t *pte;
 354
 355 #ifdef CONFIG_XEN
 356         /* Faults in hypervisor area are never spurious. */
 357         if ((address >= HYPERVISOR_VIRT_START) &&
 358             (address < HYPERVISOR_VIRT_END))
 359                 return 0;
 360 #endif
 361
 362         /* Reserved-bit violation or user access to kernel space? */
 363         if (error_code & (PF_RSVD|PF_USER))
 364                 return 0;
 365
 366         pgd = init_mm.pgd + pgd_index(address);
 367         if (!pgd_present(*pgd))
 368                 return 0;
 369
 370         pud = pud_offset(pgd, address);
 371         if (!pud_present(*pud))
 372                 return 0;
 373
 374         pmd = pmd_offset(pud, address);
 375         if (!pmd_present(*pmd))
 376                 return 0;
 377
 378         pte = pte_offset_kernel(pmd, address);
 379         if (!pte_present(*pte))
 380                 return 0;
 381         if ((error_code & PF_WRITE) && !pte_write(*pte))
 382                 return 0;
 383         if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX))
 384                 return 0;
 385
 386         return 1;
 387 }
 388
 389 /*
 390  * This routine handles page faults.  It determines the address,
 391  * and the problem, and then passes it off to one of the appropriate
 392  * routines.
 393  */
 394 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 395                                         unsigned long error_code)
 396 {
 397         struct task_struct *tsk;
 398         struct mm_struct *mm;
 399         struct vm_area_struct * vma;
 400         unsigned long address;
 401         const struct exception_table_entry *fixup;
 402         int write;
 403         unsigned long flags;
 404         siginfo_t info;
 405
 406         if (!user_mode(regs))
 407                 error_code &= ~PF_USER; /* means kernel */
 408
 409         tsk = current;
 410         mm = tsk->mm;
 411         prefetchw(&mm->mmap_sem);
 412
 413         /* get the address */
 414         address = HYPERVISOR_shared_info->vcpu_info[
 415                 smp_processor_id()].arch.cr2;
 416
 417         info.si_code = SEGV_MAPERR;
 418
 419
 420         /*
 421          * We fault-in kernel-space virtual memory on-demand. The
 422          * 'reference' page table is init_mm.pgd.
 423          *
 424          * NOTE! We MUST NOT take any locks for this case. We may
 425          * be in an interrupt or a critical region, and should
 426          * only copy the information from the master page table,
 427          * nothing more.
 428          *
 429          * This verifies that the fault happens in kernel space
 430          * (error_code & 4) == 0, and that the fault was not a
 431          * protection error (error_code & 9) == 0.
 432          */
 433         if (unlikely(address >= TASK_SIZE64)) {
 434                 /*
 435                  * Don't check for the module range here: its PML4
 436                  * is always initialized because it's shared with the main
 437                  * kernel text. Only vmalloc may need PML4 syncups.
 438                  */
 439                 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 440                       ((address >= VMALLOC_START && address < VMALLOC_END))) {
 441                         if (vmalloc_fault(address) >= 0)
 442                                 return;
 443                 }
 444                 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 445                                                 SIGSEGV) == NOTIFY_STOP)
 446                         return;
 447                 /* Can take a spurious fault if mapping changes R/O -> R/W. */
 448                 if (spurious_fault(regs, address, error_code))
 449                         return;
 450                 /*
 451                  * Don't take the mm semaphore here. If we fixup a prefetch
 452                  * fault we could otherwise deadlock.
 453                  */
 454                 goto bad_area_nosemaphore;
 455         }
 456
 457         if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 458                                         SIGSEGV) == NOTIFY_STOP)
 459                 return;
 460
 461         if (likely(regs->eflags & X86_EFLAGS_IF))
 462                 local_irq_enable();
 463
 464         if (unlikely(page_fault_trace))
 465                 printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 466                        regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
 467
 468         if (unlikely(error_code & PF_RSVD))
 469                 pgtable_bad(address, regs, error_code);
 470
 471         /*
 472          * If we're in an interrupt or have no user
 473          * context, we must not take the fault..
 474          */
 475         if (unlikely(in_atomic() || !mm))
 476                 goto bad_area_nosemaphore;
 477
 478  again:
 479         /* When running in the kernel we expect faults to occur only to
 480          * addresses in user space.  All other faults represent errors in the
 481          * kernel and should generate an OOPS.  Unfortunatly, in the case of an
 482          * erroneous fault occurring in a code path which already holds mmap_sem
 483          * we will deadlock attempting to validate the fault against the
 484          * address space.  Luckily the kernel only validly references user
 485          * space from well defined areas of code, which are listed in the
 486          * exceptions table.
 487          *
 488          * As the vast majority of faults will be valid we will only perform
 489          * the source reference check when there is a possibilty of a deadlock.
 490          * Attempt to lock the address space, if we cannot we then validate the
 491          * source.  If this is invalid we can skip the address space check,
 492          * thus avoiding the deadlock.
 493          */
 494         if (!down_read_trylock(&mm->mmap_sem)) {
 495                 if ((error_code & PF_USER) == 0 &&
 496                     !search_exception_tables(regs->rip))
 497                         goto bad_area_nosemaphore;
 498                 down_read(&mm->mmap_sem);
 499         }
 500
 501         vma = find_vma(mm, address);
 502         if (!vma)
 503                 goto bad_area;
 504         if (likely(vma->vm_start <= address))
 505                 goto good_area;
 506         if (!(vma->vm_flags & VM_GROWSDOWN))
 507                 goto bad_area;
 508         if (error_code & 4) {
 509                 // XXX: align red zone size with ABI
 510                 if (address + 128 < regs->rsp)
 511                         goto bad_area;
 512         }
 513         if (expand_stack(vma, address))
 514                 goto bad_area;
 515 /*
 516  * Ok, we have a good vm_area for this memory access, so
 517  * we can handle it..
 518  */
 519 good_area:
 520         info.si_code = SEGV_ACCERR;
 521         write = 0;
 522         switch (error_code & (PF_PROT|PF_WRITE)) {
 523                 default:        /* 3: write, present */
 524                         /* fall through */
 525                 case PF_WRITE:          /* write, not present */
 526                         if (!(vma->vm_flags & VM_WRITE))
 527                                 goto bad_area;
 528                         write++;
 529                         break;
 530                 case PF_PROT:           /* read, present */
 531                         goto bad_area;
 532                 case 0:                 /* read, not present */
 533                         if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 534                                 goto bad_area;
 535         }
 536
 537         /*
 538          * If for any reason at all we couldn't handle the fault,
 539          * make sure we exit gracefully rather than endlessly redo
 540          * the fault.
 541          */
 542         switch (handle_mm_fault(mm, vma, address, write)) {
 543         case VM_FAULT_MINOR:
 544                 tsk->min_flt++;
 545                 break;
 546         case VM_FAULT_MAJOR:
 547                 tsk->maj_flt++;
 548                 break;
 549         case VM_FAULT_SIGBUS:
 550                 goto do_sigbus;
 551         default:
 552                 goto out_of_memory;
 553         }
 554
 555         up_read(&mm->mmap_sem);
 556         return;
 557
 558 /*
 559  * Something tried to access memory that isn't in our memory map..
 560  * Fix it, but check if it's kernel or user first..
 561  */
 562 bad_area:
 563         up_read(&mm->mmap_sem);
 564
 565 bad_area_nosemaphore:
 566         /* User mode accesses just cause a SIGSEGV */
 567         if (error_code & PF_USER) {
 568                 if (is_prefetch(regs, address, error_code))
 569                         return;
 570
 571                 /* Work around K8 erratum #100 K8 in compat mode
 572                    occasionally jumps to illegal addresses >4GB.  We
 573                    catch this here in the page fault handler because
 574                    these addresses are not reachable. Just detect this
 575                    case and return.  Any code segment in LDT is
 576                    compatibility mode. */
 577                 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
 578                     (address >> 32))
 579                         return;
 580
 581                 if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
 582                         printk(
 583                        "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 584                                         tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 585                                         tsk->comm, tsk->pid, address, regs->rip,
 586                                         regs->rsp, error_code);
 587                 }
 588
 589                 tsk->thread.cr2 = address;
 590                 /* Kernel addresses are always protection faults */
 591                 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
 592                 tsk->thread.trap_no = 14;
 593                 info.si_signo = SIGSEGV;
 594                 info.si_errno = 0;
 595                 /* info.si_code has been set above */
 596                 info.si_addr = (void __user *)address;
 597                 force_sig_info(SIGSEGV, &info, tsk);
 598                 return;
 599         }
 600
 601 no_context:
 602
 603         /* Are we prepared to handle this kernel fault?  */
 604         fixup = search_exception_tables(regs->rip);
 605         if (fixup) {
 606                 regs->rip = fixup->fixup;
 607                 return;
 608         }
 609
 610         /*
 611          * Hall of shame of CPU/BIOS bugs.
 612          */
 613
 614         if (is_prefetch(regs, address, error_code))
 615                 return;
 616
 617         if (is_errata93(regs, address))
 618                 return;
 619
 620 /*
 621  * Oops. The kernel tried to access some bad page. We'll have to
 622  * terminate things with extreme prejudice.
 623  */
 624
 625         flags = oops_begin();
 626
 627         if (address < PAGE_SIZE)
 628                 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
 629         else
 630                 printk(KERN_ALERT "Unable to handle kernel paging request");
 631         printk(" at %016lx RIP: \n" KERN_ALERT,address);
 632         printk_address(regs->rip);
 633         dump_pagetable(address);
 634         tsk->thread.cr2 = address;
 635         tsk->thread.trap_no = 14;
 636         tsk->thread.error_code = error_code;
 637         __die("Oops", regs, error_code);
 638         /* Executive summary in case the body of the oops scrolled away */
 639         printk(KERN_EMERG "CR2: %016lx\n", address);
 640         oops_end(flags);
 641         do_exit(SIGKILL);
 642
 643 /*
 644  * We ran out of memory, or some other thing happened to us that made
 645  * us unable to handle the page fault gracefully.
 646  */
 647 out_of_memory:
 648         up_read(&mm->mmap_sem);
 649         if (current->pid == 1) {
 650                 yield();
 651                 goto again;
 652         }
 653         printk("VM: killing process %s\n", tsk->comm);
 654         if (error_code & 4)
 655                 do_exit(SIGKILL);
 656         goto no_context;
 657
 658 do_sigbus:
 659         up_read(&mm->mmap_sem);
 660
 661         /* Kernel mode? Handle exceptions or die */
 662         if (!(error_code & PF_USER))
 663                 goto no_context;
 664
 665         tsk->thread.cr2 = address;
 666         tsk->thread.error_code = error_code;
 667         tsk->thread.trap_no = 14;
 668         info.si_signo = SIGBUS;
 669         info.si_errno = 0;
 670         info.si_code = BUS_ADRERR;
 671         info.si_addr = (void __user *)address;
 672         force_sig_info(SIGBUS, &info, tsk);
 673         return;
 674 }
 675
 676 DEFINE_SPINLOCK(pgd_lock);
 677 struct page *pgd_list;
 678
 679 void vmalloc_sync_all(void)
 680 {
 681         /* Note that races in the updates of insync and start aren't
 682            problematic:
 683            insync can only get set bits added, and updates to start are only
 684            improving performance (without affecting correctness if undone). */
 685         static DECLARE_BITMAP(insync, PTRS_PER_PGD);
 686         static unsigned long start = VMALLOC_START & PGDIR_MASK;
 687         unsigned long address;
 688
 689         for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
 690                 if (!test_bit(pgd_index(address), insync)) {
 691                         const pgd_t *pgd_ref = pgd_offset_k(address);
 692                         struct page *page;
 693
 694                         if (pgd_none(*pgd_ref))
 695                                 continue;
 696                         spin_lock(&pgd_lock);
 697                         for (page = pgd_list; page;
 698                              page = (struct page *)page->index) {
 699                                 pgd_t *pgd;
 700                                 pgd = (pgd_t *)page_address(page) + pgd_index(address);
 701                                 if (pgd_none(*pgd))
 702                                         set_pgd(pgd, *pgd_ref);
 703                                 else
 704                                         BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
 705                         }
 706                         spin_unlock(&pgd_lock);
 707                         set_bit(pgd_index(address), insync);
 708                 }
 709                 if (address == start)
 710                         start = address + PGDIR_SIZE;
 711         }
 712         /* Check that there is no need to do the same for the modules area. */
 713         BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
 714         BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 715                                 (__START_KERNEL & PGDIR_MASK)));
 716 }
 717
 718 static int __init enable_pagefaulttrace(char *str)
 719 {
 720         page_fault_trace = 1;
 721         return 1;
 722 }
 723 __setup("pagefaulttrace", enable_pagefaulttrace);