arch/x86_64/mm/fault-xen.c

   1 /*
   2  *  linux/arch/x86-64/mm/fault.c
   3  *
   4  *  Copyright (C) 1995  Linus Torvalds
   5  *  Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
   6  */
   7
   8 #include <linux/config.h>
   9 #include <linux/signal.h>
  10 #include <linux/sched.h>
  11 #include <linux/kernel.h>
  12 #include <linux/errno.h>
  13 #include <linux/string.h>
  14 #include <linux/types.h>
  15 #include <linux/ptrace.h>
  16 #include <linux/mman.h>
  17 #include <linux/mm.h>
  18 #include <linux/smp.h>
  19 #include <linux/smp_lock.h>
  20 #include <linux/interrupt.h>
  21 #include <linux/init.h>
  22 #include <linux/tty.h>
  23 #include <linux/vt_kern.h>              /* For unblank_screen() */
  24 #include <linux/compiler.h>
  25 #include <linux/module.h>
  26 #include <linux/kprobes.h>
  27
  28 #include <asm/system.h>
  29 #include <asm/uaccess.h>
  30 #include <asm/pgalloc.h>
  31 #include <asm/smp.h>
  32 #include <asm/tlbflush.h>
  33 #include <asm/proto.h>
  34 #include <asm/kdebug.h>
  35 #include <asm-generic/sections.h>
  36
  37 /* Page fault error code bits */
  38 #define PF_PROT (1<<0)          /* or no page found */
  39 #define PF_WRITE        (1<<1)
  40 #define PF_USER (1<<2)
  41 #define PF_RSVD (1<<3)
  42 #define PF_INSTR        (1<<4)
  43
  44 void bust_spinlocks(int yes)
  45 {
  46         int loglevel_save = console_loglevel;
  47         if (yes) {
  48                 oops_in_progress = 1;
  49         } else {
  50 #ifdef CONFIG_VT
  51                 unblank_screen();
  52 #endif
  53                 oops_in_progress = 0;
  54                 /*
  55                  * OK, the message is on the console.  Now we call printk()
  56                  * without oops_in_progress set so that printk will give klogd
  57                  * a poke.  Hold onto your hats...
  58                  */
  59                 console_loglevel = 15;          /* NMI oopser may have shut the console up */
  60                 printk(" ");
  61                 console_loglevel = loglevel_save;
  62         }
  63 }
  64
  65 /* Sometimes the CPU reports invalid exceptions on prefetch.
  66    Check that here and ignore.
  67    Opcode checker based on code by Richard Brunner */
  68 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
  69                                 unsigned long error_code)
  70 {
  71         unsigned char *instr;
  72         int scan_more = 1;
  73         int prefetch = 0;
  74         unsigned char *max_instr;
  75
  76         /* If it was a exec fault ignore */
  77         if (error_code & PF_INSTR)
  78                 return 0;
  79
  80         instr = (unsigned char *)convert_rip_to_linear(current, regs);
  81         max_instr = instr + 15;
  82
  83         if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE64)
  84                 return 0;
  85
  86         while (scan_more && instr < max_instr) {
  87                 unsigned char opcode;
  88                 unsigned char instr_hi;
  89                 unsigned char instr_lo;
  90
  91                 if (__get_user(opcode, instr))
  92                         break;
  93
  94                 instr_hi = opcode & 0xf0;
  95                 instr_lo = opcode & 0x0f;
  96                 instr++;
  97
  98                 switch (instr_hi) {
  99                 case 0x20:
 100                 case 0x30:
 101                         /* Values 0x26,0x2E,0x36,0x3E are valid x86
 102                            prefixes.  In long mode, the CPU will signal
 103                            invalid opcode if some of these prefixes are
 104                            present so we will never get here anyway */
 105                         scan_more = ((instr_lo & 7) == 0x6);
 106                         break;
 107
 108                 case 0x40:
 109                         /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
 110                            Need to figure out under what instruction mode the
 111                            instruction was issued ... */
 112                         /* Could check the LDT for lm, but for now it's good
 113                            enough to assume that long mode only uses well known
 114                            segments or kernel. */
 115                         scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
 116                         break;
 117
 118                 case 0x60:
 119                         /* 0x64 thru 0x67 are valid prefixes in all modes. */
 120                         scan_more = (instr_lo & 0xC) == 0x4;
 121                         break;
 122                 case 0xF0:
 123                         /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
 124                         scan_more = !instr_lo || (instr_lo>>1) == 1;
 125                         break;
 126                 case 0x00:
 127                         /* Prefetch instruction is 0x0F0D or 0x0F18 */
 128                         scan_more = 0;
 129                         if (__get_user(opcode, instr))
 130                                 break;
 131                         prefetch = (instr_lo == 0xF) &&
 132                                 (opcode == 0x0D || opcode == 0x18);
 133                         break;
 134                 default:
 135                         scan_more = 0;
 136                         break;
 137                 }
 138         }
 139         return prefetch;
 140 }
 141
 142 static int bad_address(void *p)
 143 {
 144         unsigned long dummy;
 145         return __get_user(dummy, (unsigned long *)p);
 146 }
 147
 148 void dump_pagetable(unsigned long address)
 149 {
 150         pgd_t *pgd;
 151         pud_t *pud;
 152         pmd_t *pmd;
 153         pte_t *pte;
 154
 155         pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
 156         pgd += pgd_index(address);
 157         if (bad_address(pgd)) goto bad;
 158         printk("PGD %lx ", pgd_val(*pgd));
 159         if (!pgd_present(*pgd)) goto ret;
 160
 161         pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address);
 162         if (bad_address(pud)) goto bad;
 163         printk("PUD %lx ", pud_val(*pud));
 164         if (!pud_present(*pud)) goto ret;
 165
 166         pmd = pmd_offset(pud, address);
 167         if (bad_address(pmd)) goto bad;
 168         printk("PMD %lx ", pmd_val(*pmd));
 169         if (!pmd_present(*pmd)) goto ret;
 170
 171         pte = pte_offset_kernel(pmd, address);
 172         if (bad_address(pte)) goto bad;
 173         printk("PTE %lx", pte_val(*pte));
 174 ret:
 175         printk("\n");
 176         return;
 177 bad:
 178         printk("BAD\n");
 179 }
 180
 181 static const char errata93_warning[] =
 182 KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
 183 KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
 184 KERN_ERR "******* Please consider a BIOS update.\n"
 185 KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
 186
 187 /* Workaround for K8 erratum #93 & buggy BIOS.
 188    BIOS SMM functions are required to use a specific workaround
 189    to avoid corruption of the 64bit RIP register on C stepping K8.
 190    A lot of BIOS that didn't get tested properly miss this.
 191    The OS sees this as a page fault with the upper 32bits of RIP cleared.
 192    Try to work around it here.
 193    Note we only handle faults in kernel here. */
 194
 195 static int is_errata93(struct pt_regs *regs, unsigned long address)
 196 {
 197         static int warned;
 198         if (address != regs->rip)
 199                 return 0;
 200         if ((address >> 32) != 0)
 201                 return 0;
 202         address |= 0xffffffffUL << 32;
 203         if ((address >= (u64)_stext && address <= (u64)_etext) ||
 204             (address >= MODULES_VADDR && address <= MODULES_END)) {
 205                 if (!warned) {
 206                         printk(errata93_warning);
 207                         warned = 1;
 208                 }
 209                 regs->rip = address;
 210                 return 1;
 211         }
 212         return 0;
 213 }
 214
 215 int unhandled_signal(struct task_struct *tsk, int sig)
 216 {
 217         if (tsk->pid == 1)
 218                 return 1;
 219         if (tsk->ptrace & PT_PTRACED)
 220                 return 0;
 221         return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
 222                 (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
 223 }
 224
 225 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
 226                                  unsigned long error_code)
 227 {
 228         unsigned long flags = oops_begin();
 229         struct task_struct *tsk;
 230
 231         printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
 232                current->comm, address);
 233         dump_pagetable(address);
 234         tsk = current;
 235         tsk->thread.cr2 = address;
 236         tsk->thread.trap_no = 14;
 237         tsk->thread.error_code = error_code;
 238         __die("Bad pagetable", regs, error_code);
 239         oops_end(flags);
 240         do_exit(SIGKILL);
 241 }
 242
 243 /*
 244  * Handle a fault on the vmalloc area
 245  *
 246  * This assumes no large pages in there.
 247  */
 248 static int vmalloc_fault(unsigned long address)
 249 {
 250         pgd_t *pgd, *pgd_ref;
 251         pud_t *pud, *pud_ref;
 252         pmd_t *pmd, *pmd_ref;
 253         pte_t *pte, *pte_ref;
 254
 255         /* Copy kernel mappings over when needed. This can also
 256            happen within a race in page table update. In the later
 257            case just flush. */
 258
 259         /* On Xen the line below does not always work. Needs investigating! */
 260         /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
 261         pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
 262         pgd += pgd_index(address);
 263         pgd_ref = pgd_offset_k(address);
 264         if (pgd_none(*pgd_ref))
 265                 return -1;
 266         if (pgd_none(*pgd))
 267                 set_pgd(pgd, *pgd_ref);
 268         else
 269                 BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
 270
 271         /* Below here mismatches are bugs because these lower tables
 272            are shared */
 273
 274         pud = pud_offset(pgd, address);
 275         pud_ref = pud_offset(pgd_ref, address);
 276         if (pud_none(*pud_ref))
 277                 return -1;
 278         if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
 279                 BUG();
 280         pmd = pmd_offset(pud, address);
 281         pmd_ref = pmd_offset(pud_ref, address);
 282         if (pmd_none(*pmd_ref))
 283                 return -1;
 284         if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
 285                 BUG();
 286         pte_ref = pte_offset_kernel(pmd_ref, address);
 287         if (!pte_present(*pte_ref))
 288                 return -1;
 289         pte = pte_offset_kernel(pmd, address);
 290         /* Don't use pte_page here, because the mappings can point
 291            outside mem_map, and the NUMA hash lookup cannot handle
 292            that. */
 293         if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
 294                 BUG();
 295         return 0;
 296 }
 297
 298 int page_fault_trace = 0;
 299 int exception_trace = 1;
 300
 301
 302 #define MEM_VERBOSE 1
 303
 304 #ifdef MEM_VERBOSE
 305 #define MEM_LOG(_f, _a...)                      \
 306         printk("fault.c:[%d]-> " _f "\n",       \
 307         __LINE__ , ## _a )
 308 #else
 309 #define MEM_LOG(_f, _a...) ((void)0)
 310 #endif
 311
 312 static int spurious_fault(struct pt_regs *regs,
 313                           unsigned long address,
 314                           unsigned long error_code)
 315 {
 316         pgd_t *pgd;
 317         pud_t *pud;
 318         pmd_t *pmd;
 319         pte_t *pte;
 320
 321 #ifdef CONFIG_XEN
 322         /* Faults in hypervisor area are never spurious. */
 323         if ((address >= HYPERVISOR_VIRT_START) &&
 324             (address < HYPERVISOR_VIRT_END))
 325                 return 0;
 326 #endif
 327
 328         /* Reserved-bit violation or user access to kernel space? */
 329         if (error_code & (PF_RSVD|PF_USER))
 330                 return 0;
 331
 332         pgd = init_mm.pgd + pgd_index(address);
 333         if (!pgd_present(*pgd))
 334                 return 0;
 335
 336         pud = pud_offset(pgd, address);
 337         if (!pud_present(*pud))
 338                 return 0;
 339
 340         pmd = pmd_offset(pud, address);
 341         if (!pmd_present(*pmd))
 342                 return 0;
 343
 344         pte = pte_offset_kernel(pmd, address);
 345         if (!pte_present(*pte))
 346                 return 0;
 347         if ((error_code & PF_WRITE) && !pte_write(*pte))
 348                 return 0;
 349         if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX))
 350                 return 0;
 351
 352         return 1;
 353 }
 354
 355 /*
 356  * This routine handles page faults.  It determines the address,
 357  * and the problem, and then passes it off to one of the appropriate
 358  * routines.
 359  */
 360 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 361                                         unsigned long error_code)
 362 {
 363         struct task_struct *tsk;
 364         struct mm_struct *mm;
 365         struct vm_area_struct * vma;
 366         unsigned long address;
 367         const struct exception_table_entry *fixup;
 368         int write;
 369         unsigned long flags;
 370         siginfo_t info;
 371
 372         if (!user_mode(regs))
 373                 error_code &= ~PF_USER; /* means kernel */
 374
 375         tsk = current;
 376         mm = tsk->mm;
 377         prefetchw(&mm->mmap_sem);
 378
 379         /* get the address */
 380         address = HYPERVISOR_shared_info->vcpu_info[
 381                 smp_processor_id()].arch.cr2;
 382
 383         info.si_code = SEGV_MAPERR;
 384
 385
 386         /*
 387          * We fault-in kernel-space virtual memory on-demand. The
 388          * 'reference' page table is init_mm.pgd.
 389          *
 390          * NOTE! We MUST NOT take any locks for this case. We may
 391          * be in an interrupt or a critical region, and should
 392          * only copy the information from the master page table,
 393          * nothing more.
 394          *
 395          * This verifies that the fault happens in kernel space
 396          * (error_code & 4) == 0, and that the fault was not a
 397          * protection error (error_code & 9) == 0.
 398          */
 399         if (unlikely(address >= TASK_SIZE64)) {
 400                 /*
 401                  * Don't check for the module range here: its PML4
 402                  * is always initialized because it's shared with the main
 403                  * kernel text. Only vmalloc may need PML4 syncups.
 404                  */
 405                 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 406                     address >= PAGE_OFFSET) {
 407                         if (vmalloc_fault(address) >= 0)
 408                                 return;
 409                 }
 410                 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 411                                                 SIGSEGV) == NOTIFY_STOP)
 412                         return;
 413                 /* Can take a spurious fault if mapping changes R/O -> R/W. */
 414                 if (spurious_fault(regs, address, error_code))
 415                         return;
 416                 /*
 417                  * Don't take the mm semaphore here. If we fixup a prefetch
 418                  * fault we could otherwise deadlock.
 419                  */
 420                 goto bad_area_nosemaphore;
 421         }
 422
 423         if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 424                                         SIGSEGV) == NOTIFY_STOP)
 425                 return;
 426
 427         if (likely(regs->eflags & X86_EFLAGS_IF))
 428                 local_irq_enable();
 429
 430         if (unlikely(page_fault_trace))
 431                 printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
 432                        regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
 433
 434         if (unlikely(error_code & PF_RSVD))
 435                 pgtable_bad(address, regs, error_code);
 436
 437         /*
 438          * If we're in an interrupt or have no user
 439          * context, we must not take the fault..
 440          */
 441         if (unlikely(in_atomic() || !mm))
 442                 goto bad_area_nosemaphore;
 443
 444  again:
 445         /* When running in the kernel we expect faults to occur only to
 446          * addresses in user space.  All other faults represent errors in the
 447          * kernel and should generate an OOPS.  Unfortunatly, in the case of an
 448          * erroneous fault occuring in a code path which already holds mmap_sem
 449          * we will deadlock attempting to validate the fault against the
 450          * address space.  Luckily the kernel only validly references user
 451          * space from well defined areas of code, which are listed in the
 452          * exceptions table.
 453          *
 454          * As the vast majority of faults will be valid we will only perform
 455          * the source reference check when there is a possibilty of a deadlock.
 456          * Attempt to lock the address space, if we cannot we then validate the
 457          * source.  If this is invalid we can skip the address space check,
 458          * thus avoiding the deadlock.
 459          */
 460         if (!down_read_trylock(&mm->mmap_sem)) {
 461                 if ((error_code & PF_USER) == 0 &&
 462                     !search_exception_tables(regs->rip))
 463                         goto bad_area_nosemaphore;
 464                 down_read(&mm->mmap_sem);
 465         }
 466
 467         vma = find_vma(mm, address);
 468         if (!vma)
 469                 goto bad_area;
 470         if (likely(vma->vm_start <= address))
 471                 goto good_area;
 472         if (!(vma->vm_flags & VM_GROWSDOWN))
 473                 goto bad_area;
 474         if (error_code & 4) {
 475                 // XXX: align red zone size with ABI
 476                 if (address + 128 < regs->rsp)
 477                         goto bad_area;
 478         }
 479         if (expand_stack(vma, address))
 480                 goto bad_area;
 481 /*
 482  * Ok, we have a good vm_area for this memory access, so
 483  * we can handle it..
 484  */
 485 good_area:
 486         info.si_code = SEGV_ACCERR;
 487         write = 0;
 488         switch (error_code & (PF_PROT|PF_WRITE)) {
 489                 default:        /* 3: write, present */
 490                         /* fall through */
 491                 case PF_WRITE:          /* write, not present */
 492                         if (!(vma->vm_flags & VM_WRITE))
 493                                 goto bad_area;
 494                         write++;
 495                         break;
 496                 case PF_PROT:           /* read, present */
 497                         goto bad_area;
 498                 case 0:                 /* read, not present */
 499                         if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 500                                 goto bad_area;
 501         }
 502
 503         /*
 504          * If for any reason at all we couldn't handle the fault,
 505          * make sure we exit gracefully rather than endlessly redo
 506          * the fault.
 507          */
 508         switch (handle_mm_fault(mm, vma, address, write)) {
 509         case VM_FAULT_MINOR:
 510                 tsk->min_flt++;
 511                 break;
 512         case VM_FAULT_MAJOR:
 513                 tsk->maj_flt++;
 514                 break;
 515         case VM_FAULT_SIGBUS:
 516                 goto do_sigbus;
 517         default:
 518                 goto out_of_memory;
 519         }
 520
 521         up_read(&mm->mmap_sem);
 522         return;
 523
 524 /*
 525  * Something tried to access memory that isn't in our memory map..
 526  * Fix it, but check if it's kernel or user first..
 527  */
 528 bad_area:
 529         up_read(&mm->mmap_sem);
 530
 531 bad_area_nosemaphore:
 532         /* User mode accesses just cause a SIGSEGV */
 533         if (error_code & PF_USER) {
 534                 if (is_prefetch(regs, address, error_code))
 535                         return;
 536
 537                 /* Work around K8 erratum #100 K8 in compat mode
 538                    occasionally jumps to illegal addresses >4GB.  We
 539                    catch this here in the page fault handler because
 540                    these addresses are not reachable. Just detect this
 541                    case and return.  Any code segment in LDT is
 542                    compatibility mode. */
 543                 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
 544                     (address >> 32))
 545                         return;
 546
 547                 if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
 548                         printk(
 549                        "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
 550                                         tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 551                                         tsk->comm, tsk->pid, address, regs->rip,
 552                                         regs->rsp, error_code);
 553                 }
 554
 555                 tsk->thread.cr2 = address;
 556                 /* Kernel addresses are always protection faults */
 557                 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
 558                 tsk->thread.trap_no = 14;
 559                 info.si_signo = SIGSEGV;
 560                 info.si_errno = 0;
 561                 /* info.si_code has been set above */
 562                 info.si_addr = (void __user *)address;
 563                 force_sig_info(SIGSEGV, &info, tsk);
 564                 return;
 565         }
 566
 567 no_context:
 568
 569         /* Are we prepared to handle this kernel fault?  */
 570         fixup = search_exception_tables(regs->rip);
 571         if (fixup) {
 572                 regs->rip = fixup->fixup;
 573                 return;
 574         }
 575
 576         /*
 577          * Hall of shame of CPU/BIOS bugs.
 578          */
 579
 580         if (is_prefetch(regs, address, error_code))
 581                 return;
 582
 583         if (is_errata93(regs, address))
 584                 return;
 585
 586 /*
 587  * Oops. The kernel tried to access some bad page. We'll have to
 588  * terminate things with extreme prejudice.
 589  */
 590
 591         flags = oops_begin();
 592
 593         if (address < PAGE_SIZE)
 594                 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
 595         else
 596                 printk(KERN_ALERT "Unable to handle kernel paging request");
 597         printk(" at %016lx RIP: \n" KERN_ALERT,address);
 598         printk_address(regs->rip);
 599         printk("\n");
 600         dump_pagetable(address);
 601         tsk->thread.cr2 = address;
 602         tsk->thread.trap_no = 14;
 603         tsk->thread.error_code = error_code;
 604         __die("Oops", regs, error_code);
 605         /* Executive summary in case the body of the oops scrolled away */
 606         printk(KERN_EMERG "CR2: %016lx\n", address);
 607         oops_end(flags);
 608         do_exit(SIGKILL);
 609
 610 /*
 611  * We ran out of memory, or some other thing happened to us that made
 612  * us unable to handle the page fault gracefully.
 613  */
 614 out_of_memory:
 615         up_read(&mm->mmap_sem);
 616         if (current->pid == 1) {
 617                 yield();
 618                 goto again;
 619         }
 620         printk("VM: killing process %s\n", tsk->comm);
 621         if (error_code & 4)
 622                 do_exit(SIGKILL);
 623         goto no_context;
 624
 625 do_sigbus:
 626         up_read(&mm->mmap_sem);
 627
 628         /* Kernel mode? Handle exceptions or die */
 629         if (!(error_code & PF_USER))
 630                 goto no_context;
 631
 632         tsk->thread.cr2 = address;
 633         tsk->thread.error_code = error_code;
 634         tsk->thread.trap_no = 14;
 635         info.si_signo = SIGBUS;
 636         info.si_errno = 0;
 637         info.si_code = BUS_ADRERR;
 638         info.si_addr = (void __user *)address;
 639         force_sig_info(SIGBUS, &info, tsk);
 640         return;
 641 }
 642
 643 DEFINE_SPINLOCK(pgd_lock);
 644 struct page *pgd_list;
 645
 646 void vmalloc_sync_all(void)
 647 {
 648         /* Note that races in the updates of insync and start aren't
 649            problematic:
 650            insync can only get set bits added, and updates to start are only
 651            improving performance (without affecting correctness if undone). */
 652         static DECLARE_BITMAP(insync, PTRS_PER_PGD);
 653         static unsigned long start = VMALLOC_START & PGDIR_MASK;
 654         unsigned long address;
 655
 656         for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
 657                 if (!test_bit(pgd_index(address), insync)) {
 658                         const pgd_t *pgd_ref = pgd_offset_k(address);
 659                         struct page *page;
 660
 661                         if (pgd_none(*pgd_ref))
 662                                 continue;
 663                         spin_lock(&pgd_lock);
 664                         for (page = pgd_list; page;
 665                              page = (struct page *)page->index) {
 666                                 pgd_t *pgd;
 667                                 pgd = (pgd_t *)page_address(page) + pgd_index(address);
 668                                 if (pgd_none(*pgd))
 669                                         set_pgd(pgd, *pgd_ref);
 670                                 else
 671                                         BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
 672                         }
 673                         spin_unlock(&pgd_lock);
 674                         set_bit(pgd_index(address), insync);
 675                 }
 676                 if (address == start)
 677                         start = address + PGDIR_SIZE;
 678         }
 679         /* Check that there is no need to do the same for the modules area. */
 680         BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
 681         BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 682                                 (__START_KERNEL & PGDIR_MASK)));
 683 }
 684
 685 static int __init enable_pagefaulttrace(char *str)
 686 {
 687         page_fault_trace = 1;
 688         return 1;
 689 }
 690 __setup("pagefaulttrace", enable_pagefaulttrace);