arch/x86_64/mm/fault-xen.c

   1 /*
   2  *  linux/arch/x86-64/mm/fault.c
   3  *
   4  *  Copyright (C) 1995  Linus Torvalds
   5  *  Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
   6  */
   7
   8 #include <linux/signal.h>
   9 #include <linux/sched.h>
  10 #include <linux/kernel.h>
  11 #include <linux/errno.h>
  12 #include <linux/string.h>
  13 #include <linux/types.h>
  14 #include <linux/tracehook.h>
  15 #include <linux/mman.h>
  16 #include <linux/mm.h>
  17 #include <linux/smp.h>
  18 #include <linux/smp_lock.h>
  19 #include <linux/interrupt.h>
  20 #include <linux/init.h>
  21 #include <linux/tty.h>
  22 #include <linux/vt_kern.h>              /* For unblank_screen() */
  23 #include <linux/compiler.h>
  24 #include <linux/module.h>
  25 #include <linux/kprobes.h>
  26 #include <linux/uaccess.h>
  27
  28 #include <asm/system.h>
  29 #include <asm/pgalloc.h>
  30 #include <asm/smp.h>
  31 #include <asm/tlbflush.h>
  32 #include <asm/proto.h>
  33 #include <asm/kdebug.h>
  34 #include <asm-generic/sections.h>
  35
  36 /* Page fault error code bits */
  37 #define PF_PROT (1<<0)          /* or no page found */
  38 #define PF_WRITE        (1<<1)
  39 #define PF_USER (1<<2)
  40 #define PF_RSVD (1<<3)
  41 #define PF_INSTR        (1<<4)
  42
  43 static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
  44
  45 /* Hook to register for page fault notifications */
  46 int register_page_fault_notifier(struct notifier_block *nb)
  47 {
  48         vmalloc_sync_all();
  49         return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
  50 }
  51 EXPORT_SYMBOL_GPL(register_page_fault_notifier);
  52
  53 int unregister_page_fault_notifier(struct notifier_block *nb)
  54 {
  55         return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
  56 }
  57 EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
  58
  59 static inline int notify_page_fault(enum die_val val, const char *str,
  60                         struct pt_regs *regs, long err, int trap, int sig)
  61 {
  62         struct die_args args = {
  63                 .regs = regs,
  64                 .str = str,
  65                 .err = err,
  66                 .trapnr = trap,
  67                 .signr = sig
  68         };
  69         return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
  70 }
  71
  72 void bust_spinlocks(int yes)
  73 {
  74         int loglevel_save = console_loglevel;
  75         if (yes) {
  76                 oops_in_progress = 1;
  77         } else {
  78 #ifdef CONFIG_VT
  79                 unblank_screen();
  80 #endif
  81                 oops_in_progress = 0;
  82                 /*
  83                  * OK, the message is on the console.  Now we call printk()
  84                  * without oops_in_progress set so that printk will give klogd
  85                  * a poke.  Hold onto your hats...
  86                  */
  87                 console_loglevel = 15;          /* NMI oopser may have shut the console up */
  88                 printk(" ");
  89                 console_loglevel = loglevel_save;
  90         }
  91 }
  92
  93 /* Sometimes the CPU reports invalid exceptions on prefetch.
  94    Check that here and ignore.
  95    Opcode checker based on code by Richard Brunner */
  96 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
  97                                 unsigned long error_code)
  98 {
  99         unsigned char *instr;
 100         int scan_more = 1;
 101         int prefetch = 0;
 102         unsigned char *max_instr;
 103
 104         /* If it was a exec fault ignore */
 105         if (error_code & PF_INSTR)
 106                 return 0;
 107
 108         instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
 109         max_instr = instr + 15;
 110
 111         if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE64)
 112                 return 0;
 113
 114         while (scan_more && instr < max_instr) {
 115                 unsigned char opcode;
 116                 unsigned char instr_hi;
 117                 unsigned char instr_lo;
 118
 119                 if (probe_kernel_address(instr, opcode))
 120                         break;
 121
 122                 instr_hi = opcode & 0xf0;
 123                 instr_lo = opcode & 0x0f;
 124                 instr++;
 125
 126                 switch (instr_hi) {
 127                 case 0x20:
 128                 case 0x30:
 129                         /* Values 0x26,0x2E,0x36,0x3E are valid x86
 130                            prefixes.  In long mode, the CPU will signal
 131                            invalid opcode if some of these prefixes are
 132                            present so we will never get here anyway */
 133                         scan_more = ((instr_lo & 7) == 0x6);
 134                         break;
 135
 136                 case 0x40:
 137                         /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
 138                            Need to figure out under what instruction mode the
 139                            instruction was issued ... */
 140                         /* Could check the LDT for lm, but for now it's good
 141                            enough to assume that long mode only uses well known
 142                            segments or kernel. */
 143                         scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
 144                         break;
 145
 146                 case 0x60:
 147                         /* 0x64 thru 0x67 are valid prefixes in all modes. */
 148                         scan_more = (instr_lo & 0xC) == 0x4;
 149                         break;
 150                 case 0xF0:
 151                         /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
 152                         scan_more = !instr_lo || (instr_lo>>1) == 1;
 153                         break;
 154                 case 0x00:
 155                         /* Prefetch instruction is 0x0F0D or 0x0F18 */
 156                         scan_more = 0;
 157                         if (probe_kernel_address(instr, opcode))
 158                                 break;
 159                         prefetch = (instr_lo == 0xF) &&
 160                                 (opcode == 0x0D || opcode == 0x18);
 161                         break;
 162                 default:
 163                         scan_more = 0;
 164                         break;
 165                 }
 166         }
 167         return prefetch;
 168 }
 169
 170 static int bad_address(void *p)
 171 {
 172         unsigned long dummy;
 173         return probe_kernel_address((unsigned long *)p, dummy);
 174 }
 175
 176 void dump_pagetable(unsigned long address)
 177 {
 178         pgd_t *pgd;
 179         pud_t *pud;
 180         pmd_t *pmd;
 181         pte_t *pte;
 182
 183         pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
 184         pgd += pgd_index(address);
 185         if (bad_address(pgd)) goto bad;
 186         printk("PGD %lx ", pgd_val(*pgd));
 187         if (!pgd_present(*pgd)) goto ret;
 188
 189         pud = pud_offset(pgd, address);
 190         if (bad_address(pud)) goto bad;
 191         printk("PUD %lx ", pud_val(*pud));
 192         if (!pud_present(*pud)) goto ret;
 193
 194         pmd = pmd_offset(pud, address);
 195         if (bad_address(pmd)) goto bad;
 196         printk("PMD %lx ", pmd_val(*pmd));
 197         if (!pmd_present(*pmd)) goto ret;
 198
 199         pte = pte_offset_kernel(pmd, address);
 200         if (bad_address(pte)) goto bad;
 201         printk("PTE %lx", pte_val(*pte));
 202 ret:
 203         printk("\n");
 204         return;
 205 bad:
 206         printk("BAD\n");
 207 }
 208
 209 static const char errata93_warning[] =
 210 KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 211 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
 212 KERN_ERR "******* Please consider a BIOS update.\n"
 213 KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
 214
 215 /* Workaround for K8 erratum #93 & buggy BIOS.
 216    BIOS SMM functions are required to use a specific workaround
 217    to avoid corruption of the 64bit RIP register on C stepping K8.
 218    A lot of BIOS that didn't get tested properly miss this.
 219    The OS sees this as a page fault with the upper 32bits of RIP cleared.
 220    Try to work around it here.
 221    Note we only handle faults in kernel here. */
 222
 223 static int is_errata93(struct pt_regs *regs, unsigned long address)
 224 {
 225         static int warned;
 226         if (address != regs->rip)
 227                 return 0;
 228         if ((address >> 32) != 0)
 229                 return 0;
 230         address |= 0xffffffffUL << 32;
 231         if ((address >= (u64)_stext && address <= (u64)_etext) ||
 232             (address >= MODULES_VADDR && address <= MODULES_END)) {
 233                 if (!warned) {
 234                         printk(errata93_warning);
 235                         warned = 1;
 236                 }
 237                 regs->rip = address;
 238                 return 1;
 239         }
 240         return 0;
 241 }
 242
 243 int unhandled_signal(struct task_struct *tsk, int sig)
 244 {
 245         if (is_init(tsk))
 246                 return 1;
 247         if (tracehook_consider_fatal_signal(tsk, sig))
 248                 return 0;
 249         return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
 250                 (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
 251 }
 252
 253 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 254                                  unsigned long error_code)
 255 {
 256         unsigned long flags = oops_begin();
 257         struct task_struct *tsk;
 258
 259         printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
 260                current->comm, address);
 261         dump_pagetable(address);
 262         tsk = current;
 263         tsk->thread.cr2 = address;
 264         tsk->thread.trap_no = 14;
 265         tsk->thread.error_code = error_code;
 266         __die("Bad pagetable", regs, error_code);
 267         oops_end(flags);
 268         do_exit(SIGKILL);
 269 }
 270
 271 /*
 272  * Handle a fault on the vmalloc area
 273  *
 274  * This assumes no large pages in there.
 275  */
 276 static int vmalloc_fault(unsigned long address)
 277 {
 278         pgd_t *pgd, *pgd_ref;
 279         pud_t *pud, *pud_ref;
 280         pmd_t *pmd, *pmd_ref;
 281         pte_t *pte, *pte_ref;
 282
 283         /* Copy kernel mappings over when needed. This can also
 284            happen within a race in page table update. In the later
 285            case just flush. */
 286
 287         /* On Xen the line below does not always work. Needs investigating! */
 288         /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
 289         pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
 290         pgd += pgd_index(address);
 291         pgd_ref = pgd_offset_k(address);
 292         if (pgd_none(*pgd_ref))
 293                 return -1;
 294         if (pgd_none(*pgd))
 295                 set_pgd(pgd, *pgd_ref);
 296         else
 297                 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
 298
 299         /* Below here mismatches are bugs because these lower tables
 300            are shared */
 301
 302         pud = pud_offset(pgd, address);
 303         pud_ref = pud_offset(pgd_ref, address);
 304         if (pud_none(*pud_ref))
 305                 return -1;
 306         if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
 307                 BUG();
 308         pmd = pmd_offset(pud, address);
 309         pmd_ref = pmd_offset(pud_ref, address);
 310         if (pmd_none(*pmd_ref))
 311                 return -1;
 312         if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
 313                 BUG();
 314         pte_ref = pte_offset_kernel(pmd_ref, address);
 315         if (!pte_present(*pte_ref))
 316                 return -1;
 317         pte = pte_offset_kernel(pmd, address);
 318         /* Don't use pte_page here, because the mappings can point
 319            outside mem_map, and the NUMA hash lookup cannot handle
 320            that. */
 321         if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
 322                 BUG();
 323         return 0;
 324 }
 325
 326 int page_fault_trace = 0;
 327 int exception_trace = 1;
 328
 329
 330 #define MEM_VERBOSE 1
 331
 332 #ifdef MEM_VERBOSE
 333 #define MEM_LOG(_f, _a...)                      \
 334         printk("fault.c:[%d]-> " _f "\n",       \
 335         __LINE__ , ## _a )
 336 #else
 337 #define MEM_LOG(_f, _a...) ((void)0)
 338 #endif
 339
 340 static int spurious_fault(struct pt_regs *regs,
 341                           unsigned long address,
 342                           unsigned long error_code)
 343 {
 344         pgd_t *pgd;
 345         pud_t *pud;
 346         pmd_t *pmd;
 347         pte_t *pte;
 348
 349 #ifdef CONFIG_XEN
 350         /* Faults in hypervisor area are never spurious. */
 351         if ((address >= HYPERVISOR_VIRT_START) &&
 352             (address < HYPERVISOR_VIRT_END))
 353                 return 0;
 354 #endif
 355
 356         /* Reserved-bit violation or user access to kernel space? */
 357         if (error_code & (PF_RSVD|PF_USER))
 358                 return 0;
 359
 360         pgd = init_mm.pgd + pgd_index(address);
 361         if (!pgd_present(*pgd))
 362                 return 0;
 363
 364         pud = pud_offset(pgd, address);
 365         if (!pud_present(*pud))
 366                 return 0;
 367
 368         pmd = pmd_offset(pud, address);
 369         if (!pmd_present(*pmd))
 370                 return 0;
 371
 372         pte = pte_offset_kernel(pmd, address);
 373         if (!pte_present(*pte))
 374                 return 0;
 375         if ((error_code & PF_WRITE) && !pte_write(*pte))
 376                 return 0;
 377         if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX))
 378                 return 0;
 379
 380         return 1;
 381 }
 382
 383 /*
 384  * This routine handles page faults.  It determines the address,
 385  * and the problem, and then passes it off to one of the appropriate
 386  * routines.
 387  */
 388 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 389                                         unsigned long error_code)
 390 {
 391         struct task_struct *tsk;
 392         struct mm_struct *mm;
 393         struct vm_area_struct * vma;
 394         unsigned long address;
 395         const struct exception_table_entry *fixup;
 396         int write;
 397         unsigned long flags;
 398         siginfo_t info;
 399
 400         if (!user_mode(regs))
 401                 error_code &= ~PF_USER; /* means kernel */
 402
 403         tsk = current;
 404         mm = tsk->mm;
 405         prefetchw(&mm->mmap_sem);
 406
 407         /* get the address */
 408         address = HYPERVISOR_shared_info->vcpu_info[
 409                 smp_processor_id()].arch.cr2;
 410
 411         info.si_code = SEGV_MAPERR;
 412
 413
 414         /*
 415          * We fault-in kernel-space virtual memory on-demand. The
 416          * 'reference' page table is init_mm.pgd.
 417          *
 418          * NOTE! We MUST NOT take any locks for this case. We may
 419          * be in an interrupt or a critical region, and should
 420          * only copy the information from the master page table,
 421          * nothing more.
 422          *
 423          * This verifies that the fault happens in kernel space
 424          * (error_code & 4) == 0, and that the fault was not a
 425          * protection error (error_code & 9) == 0.
 426          */
 427         if (unlikely(address >= TASK_SIZE64)) {
 428                 /*
 429                  * Don't check for the module range here: its PML4
 430                  * is always initialized because it's shared with the main
 431                  * kernel text. Only vmalloc may need PML4 syncups.
 432                  */
 433                 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 434                       ((address >= VMALLOC_START && address < VMALLOC_END))) {
 435                         if (vmalloc_fault(address) >= 0)
 436                                 return;
 437                 }
 438                 if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 439                                                 SIGSEGV) == NOTIFY_STOP)
 440                         return;
 441                 /* Can take a spurious fault if mapping changes R/O -> R/W. */
 442                 if (spurious_fault(regs, address, error_code))
 443                         return;
 444                 /*
 445                  * Don't take the mm semaphore here. If we fixup a prefetch
 446                  * fault we could otherwise deadlock.
 447                  */
 448                 goto bad_area_nosemaphore;
 449         }
 450
 451         if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 452                                         SIGSEGV) == NOTIFY_STOP)
 453                 return;
 454
 455         if (likely(regs->eflags & X86_EFLAGS_IF))
 456                 local_irq_enable();
 457
 458         if (unlikely(page_fault_trace))
 459                 printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 460                        regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
 461
 462         if (unlikely(error_code & PF_RSVD))
 463                 pgtable_bad(address, regs, error_code);
 464
 465         /*
 466          * If we're in an interrupt or have no user
 467          * context, we must not take the fault..
 468          */
 469         if (unlikely(in_atomic() || !mm))
 470                 goto bad_area_nosemaphore;
 471
 472  again:
 473         /* When running in the kernel we expect faults to occur only to
 474          * addresses in user space.  All other faults represent errors in the
 475          * kernel and should generate an OOPS.  Unfortunatly, in the case of an
 476          * erroneous fault occurring in a code path which already holds mmap_sem
 477          * we will deadlock attempting to validate the fault against the
 478          * address space.  Luckily the kernel only validly references user
 479          * space from well defined areas of code, which are listed in the
 480          * exceptions table.
 481          *
 482          * As the vast majority of faults will be valid we will only perform
 483          * the source reference check when there is a possibilty of a deadlock.
 484          * Attempt to lock the address space, if we cannot we then validate the
 485          * source.  If this is invalid we can skip the address space check,
 486          * thus avoiding the deadlock.
 487          */
 488         if (!down_read_trylock(&mm->mmap_sem)) {
 489                 if ((error_code & PF_USER) == 0 &&
 490                     !search_exception_tables(regs->rip))
 491                         goto bad_area_nosemaphore;
 492                 down_read(&mm->mmap_sem);
 493         }
 494
 495         vma = find_vma(mm, address);
 496         if (!vma)
 497                 goto bad_area;
 498         if (likely(vma->vm_start <= address))
 499                 goto good_area;
 500         if (!(vma->vm_flags & VM_GROWSDOWN))
 501                 goto bad_area;
 502         if (error_code & 4) {
 503                 // XXX: align red zone size with ABI
 504                 if (address + 128 < regs->rsp)
 505                         goto bad_area;
 506         }
 507         if (expand_stack(vma, address))
 508                 goto bad_area;
 509 /*
 510  * Ok, we have a good vm_area for this memory access, so
 511  * we can handle it..
 512  */
 513 good_area:
 514         info.si_code = SEGV_ACCERR;
 515         write = 0;
 516         switch (error_code & (PF_PROT|PF_WRITE)) {
 517                 default:        /* 3: write, present */
 518                         /* fall through */
 519                 case PF_WRITE:          /* write, not present */
 520                         if (!(vma->vm_flags & VM_WRITE))
 521                                 goto bad_area;
 522                         write++;
 523                         break;
 524                 case PF_PROT:           /* read, present */
 525                         goto bad_area;
 526                 case 0:                 /* read, not present */
 527                         if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 528                                 goto bad_area;
 529         }
 530
 531         /*
 532          * If for any reason at all we couldn't handle the fault,
 533          * make sure we exit gracefully rather than endlessly redo
 534          * the fault.
 535          */
 536         switch (handle_mm_fault(mm, vma, address, write)) {
 537         case VM_FAULT_MINOR:
 538                 tsk->min_flt++;
 539                 break;
 540         case VM_FAULT_MAJOR:
 541                 tsk->maj_flt++;
 542                 break;
 543         case VM_FAULT_SIGBUS:
 544                 goto do_sigbus;
 545         default:
 546                 goto out_of_memory;
 547         }
 548
 549         up_read(&mm->mmap_sem);
 550         return;
 551
 552 /*
 553  * Something tried to access memory that isn't in our memory map..
 554  * Fix it, but check if it's kernel or user first..
 555  */
 556 bad_area:
 557         up_read(&mm->mmap_sem);
 558
 559 bad_area_nosemaphore:
 560         /* User mode accesses just cause a SIGSEGV */
 561         if (error_code & PF_USER) {
 562                 if (is_prefetch(regs, address, error_code))
 563                         return;
 564
 565                 /* Work around K8 erratum #100 K8 in compat mode
 566                    occasionally jumps to illegal addresses >4GB.  We
 567                    catch this here in the page fault handler because
 568                    these addresses are not reachable. Just detect this
 569                    case and return.  Any code segment in LDT is
 570                    compatibility mode. */
 571                 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
 572                     (address >> 32))
 573                         return;
 574
 575                 if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
 576                         printk(
 577                        "%s%s[%d:#%u]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 578                                         tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 579                                         tsk->comm, tsk->pid, tsk->xid, address,
 580                                         regs->rip, regs->rsp, error_code);
 581                 }
 582
 583                 tsk->thread.cr2 = address;
 584                 /* Kernel addresses are always protection faults */
 585                 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
 586                 tsk->thread.trap_no = 14;
 587                 info.si_signo = SIGSEGV;
 588                 info.si_errno = 0;
 589                 /* info.si_code has been set above */
 590                 info.si_addr = (void __user *)address;
 591                 force_sig_info(SIGSEGV, &info, tsk);
 592                 return;
 593         }
 594
 595 no_context:
 596
 597         /* Are we prepared to handle this kernel fault?  */
 598         fixup = search_exception_tables(regs->rip);
 599         if (fixup) {
 600                 regs->rip = fixup->fixup;
 601                 return;
 602         }
 603
 604         /*
 605          * Hall of shame of CPU/BIOS bugs.
 606          */
 607
 608         if (is_prefetch(regs, address, error_code))
 609                 return;
 610
 611         if (is_errata93(regs, address))
 612                 return;
 613
 614 /*
 615  * Oops. The kernel tried to access some bad page. We'll have to
 616  * terminate things with extreme prejudice.
 617  */
 618
 619         flags = oops_begin();
 620
 621         if (address < PAGE_SIZE)
 622                 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
 623         else
 624                 printk(KERN_ALERT "Unable to handle kernel paging request");
 625         printk(" at %016lx RIP: \n" KERN_ALERT,address);
 626         printk_address(regs->rip);
 627         dump_pagetable(address);
 628         tsk->thread.cr2 = address;
 629         tsk->thread.trap_no = 14;
 630         tsk->thread.error_code = error_code;
 631         __die("Oops", regs, error_code);
 632         /* Executive summary in case the body of the oops scrolled away */
 633         printk(KERN_EMERG "CR2: %016lx\n", address);
 634         oops_end(flags);
 635         do_exit(SIGKILL);
 636
 637 /*
 638  * We ran out of memory, or some other thing happened to us that made
 639  * us unable to handle the page fault gracefully.
 640  */
 641 out_of_memory:
 642         up_read(&mm->mmap_sem);
 643         if (is_init(current)) {
 644                 yield();
 645                 goto again;
 646         }
 647         printk("VM: killing process %s(%d:#%u)\n",
 648                 tsk->comm, tsk->pid, tsk->xid);
 649         if (error_code & 4)
 650                 do_exit(SIGKILL);
 651         goto no_context;
 652
 653 do_sigbus:
 654         up_read(&mm->mmap_sem);
 655
 656         /* Kernel mode? Handle exceptions or die */
 657         if (!(error_code & PF_USER))
 658                 goto no_context;
 659
 660         tsk->thread.cr2 = address;
 661         tsk->thread.error_code = error_code;
 662         tsk->thread.trap_no = 14;
 663         info.si_signo = SIGBUS;
 664         info.si_errno = 0;
 665         info.si_code = BUS_ADRERR;
 666         info.si_addr = (void __user *)address;
 667         force_sig_info(SIGBUS, &info, tsk);
 668         return;
 669 }
 670
 671 DEFINE_SPINLOCK(pgd_lock);
 672 struct page *pgd_list;
 673
 674 void vmalloc_sync_all(void)
 675 {
 676         /* Note that races in the updates of insync and start aren't
 677            problematic:
 678            insync can only get set bits added, and updates to start are only
 679            improving performance (without affecting correctness if undone). */
 680         static DECLARE_BITMAP(insync, PTRS_PER_PGD);
 681         static unsigned long start = VMALLOC_START & PGDIR_MASK;
 682         unsigned long address;
 683
 684         for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
 685                 if (!test_bit(pgd_index(address), insync)) {
 686                         const pgd_t *pgd_ref = pgd_offset_k(address);
 687                         struct page *page;
 688
 689                         if (pgd_none(*pgd_ref))
 690                                 continue;
 691                         spin_lock(&pgd_lock);
 692                         for (page = pgd_list; page;
 693                              page = (struct page *)page->index) {
 694                                 pgd_t *pgd;
 695                                 pgd = (pgd_t *)page_address(page) + pgd_index(address);
 696                                 if (pgd_none(*pgd))
 697                                         set_pgd(pgd, *pgd_ref);
 698                                 else
 699                                         BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
 700                         }
 701                         spin_unlock(&pgd_lock);
 702                         set_bit(pgd_index(address), insync);
 703                 }
 704                 if (address == start)
 705                         start = address + PGDIR_SIZE;
 706         }
 707         /* Check that there is no need to do the same for the modules area. */
 708         BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
 709         BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 710                                 (__START_KERNEL & PGDIR_MASK)));
 711 }
 712
 713 static int __init enable_pagefaulttrace(char *str)
 714 {
 715         page_fault_trace = 1;
 716         return 1;
 717 }
 718 __setup("pagefaulttrace", enable_pagefaulttrace);