fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / arch / i386 / kernel / vm86.c
index 2dd216a..d5b775c 100644 (file)
@@ -4,7 +4,7 @@
  *  Copyright (C) 1994  Linus Torvalds
  *
  *  29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
- *                stack - Manfred Spraul <manfreds@colorfullife.com>
+ *                stack - Manfred Spraul <manfred@colorfullife.com>
  *
  *  22 mar 2002 - Manfred detected the stackfaults, but didn't handle
  *                them correctly. Now the emulation will be in a
@@ -30,7 +30,7 @@
  *
  */
 
-#include <linux/config.h>
+#include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
@@ -42,6 +42,8 @@
 #include <linux/smp_lock.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
+#include <linux/audit.h>
+#include <linux/stddef.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
 /*
  * 8- and 16-bit register defines..
  */
-#define AL(regs)       (((unsigned char *)&((regs)->eax))[0])
-#define AH(regs)       (((unsigned char *)&((regs)->eax))[1])
-#define IP(regs)       (*(unsigned short *)&((regs)->eip))
-#define SP(regs)       (*(unsigned short *)&((regs)->esp))
+#define AL(regs)       (((unsigned char *)&((regs)->pt.eax))[0])
+#define AH(regs)       (((unsigned char *)&((regs)->pt.eax))[1])
+#define IP(regs)       (*(unsigned short *)&((regs)->pt.eip))
+#define SP(regs)       (*(unsigned short *)&((regs)->pt.esp))
 
 /*
  * virtual flags (16 and 32-bit versions)
 #define SAFE_MASK      (0xDD5)
 #define RETURN_MASK    (0xDFF)
 
-#define VM86_REGS_PART2 orig_eax
-#define VM86_REGS_SIZE1 \
-        ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
-#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
+/* convert kernel_vm86_regs to vm86_regs */
+static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
+                                 const struct kernel_vm86_regs *regs)
+{
+       int ret = 0;
+
+       /* kernel_vm86_regs is missing xfs, so copy everything up to
+          (but not including) xgs, and then rest after xgs. */
+       ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs));
+       ret += copy_to_user(&user->__null_gs, &regs->pt.xgs,
+                           sizeof(struct kernel_vm86_regs) -
+                           offsetof(struct kernel_vm86_regs, pt.xgs));
+
+       return ret;
+}
+
+/* convert vm86_regs to kernel_vm86_regs */
+static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
+                                   const struct vm86_regs __user *user,
+                                   unsigned extra)
+{
+       int ret = 0;
+
+       ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs));
+       ret += copy_from_user(&regs->pt.xgs, &user->__null_gs,
+                             sizeof(struct kernel_vm86_regs) -
+                             offsetof(struct kernel_vm86_regs, pt.xgs) +
+                             extra);
+
+       return ret;
+}
 
 struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
 struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 {
+#ifndef CONFIG_X86_NO_TSS
        struct tss_struct *tss;
+#endif
        struct pt_regs *ret;
        unsigned long tmp;
 
@@ -111,73 +142,59 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
                printk("no vm86_info: BAD\n");
                do_exit(SIGSEGV);
        }
-       set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
-       tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1);
-       tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2,
-               &regs->VM86_REGS_PART2, VM86_REGS_SIZE2);
+       set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+       tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs,regs);
        tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
        if (tmp) {
                printk("vm86: could not access userspace vm86_info\n");
                do_exit(SIGSEGV);
        }
 
+#ifndef CONFIG_X86_NO_TSS
        tss = &per_cpu(init_tss, get_cpu());
+#endif
        current->thread.esp0 = current->thread.saved_esp0;
        current->thread.sysenter_cs = __KERNEL_CS;
        load_esp0(tss, &current->thread);
        current->thread.saved_esp0 = 0;
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
-       loadsegment(fs, current->thread.saved_fs);
-       loadsegment(gs, current->thread.saved_gs);
        ret = KVM86->regs32;
+
+       loadsegment(fs, current->thread.saved_fs);
+       ret->xgs = current->thread.saved_gs;
+
        return ret;
 }
 
-static void mark_screen_rdonly(struct task_struct * tsk)
+static void mark_screen_rdonly(struct mm_struct *mm)
 {
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
-       pte_t *pte, *mapped;
+       pte_t *pte;
+       spinlock_t *ptl;
        int i;
 
-       preempt_disable();
-       spin_lock(&tsk->mm->page_table_lock);
-       pgd = pgd_offset(tsk->mm, 0xA0000);
-       if (pgd_none(*pgd))
-               goto out;
-       if (pgd_bad(*pgd)) {
-               pgd_ERROR(*pgd);
-               pgd_clear(pgd);
+       pgd = pgd_offset(mm, 0xA0000);
+       if (pgd_none_or_clear_bad(pgd))
                goto out;
-       }
        pud = pud_offset(pgd, 0xA0000);
-       if (pud_none(*pud))
-               goto out;
-       if (pud_bad(*pud)) {
-               pud_ERROR(*pud);
-               pud_clear(pud);
+       if (pud_none_or_clear_bad(pud))
                goto out;
-       }
        pmd = pmd_offset(pud, 0xA0000);
-       if (pmd_none(*pmd))
-               goto out;
-       if (pmd_bad(*pmd)) {
-               pmd_ERROR(*pmd);
-               pmd_clear(pmd);
+       if (pmd_none_or_clear_bad(pmd))
                goto out;
-       }
-       pte = mapped = pte_offset_map(pmd, 0xA0000);
+       pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
        for (i = 0; i < 32; i++) {
                if (pte_present(*pte))
                        set_pte(pte, pte_wrprotect(*pte));
                pte++;
        }
-       pte_unmap(mapped);
+       pte_unmap_unlock(pte, ptl);
 out:
-       spin_unlock(&tsk->mm->page_table_lock);
-       preempt_enable();
        flush_tlb();
 }
 
@@ -200,9 +217,9 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
        tsk = current;
        if (tsk->thread.saved_esp0)
                goto out;
-       tmp  = copy_from_user(&info, v86, VM86_REGS_SIZE1);
-       tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
-               (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2);
+       tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
+                                      offsetof(struct kernel_vm86_struct, vm86plus) -
+                                      sizeof(info.regs));
        ret = -EFAULT;
        if (tmp)
                goto out;
@@ -237,7 +254,7 @@ asmlinkage int sys_vm86(struct pt_regs regs)
                        goto out;
                case VM86_PLUS_INSTALL_CHECK:
                        /* NOTE: on old vm86 stuff this will return the error
-                          from verify_area(), because the subfunction is
+                          from access_ok(), because the subfunction is
                           interpreted as (invalid) address to vm86_struct.
                           So the installation check works.
                         */
@@ -250,9 +267,9 @@ asmlinkage int sys_vm86(struct pt_regs regs)
        if (tsk->thread.saved_esp0)
                goto out;
        v86 = (struct vm86plus_struct __user *)regs.ecx;
-       tmp  = copy_from_user(&info, v86, VM86_REGS_SIZE1);
-       tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
-               (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2);
+       tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
+                                      offsetof(struct kernel_vm86_struct, regs32) -
+                                      sizeof(info.regs));
        ret = -EFAULT;
        if (tmp)
                goto out;
@@ -268,15 +285,18 @@ out:
 
 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
 {
+#ifndef CONFIG_X86_NO_TSS
        struct tss_struct *tss;
+#endif
 /*
  * make sure the vm86() system call doesn't try to do anything silly
  */
-       info->regs.__null_ds = 0;
-       info->regs.__null_es = 0;
+       info->regs.pt.xds = 0;
+       info->regs.pt.xes = 0;
+       info->regs.pt.xgs = 0;
 
-/* we are clearing fs,gs later just before "jmp resume_userspace",
- * because starting with Linux 2.1.x they aren't no longer saved/restored
+/* we are clearing fs later just before "jmp resume_userspace",
+ * because it is not saved/restored.
  */
 
 /*
@@ -284,10 +304,10 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
  * has set it up safely, so this makes sure interrupt etc flags are
  * inherited from protected mode.
  */
-       VEFLAGS = info->regs.eflags;
-       info->regs.eflags &= SAFE_MASK;
-       info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK;
-       info->regs.eflags |= VM_MASK;
+       VEFLAGS = info->regs.pt.eflags;
+       info->regs.pt.eflags &= SAFE_MASK;
+       info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK;
+       info->regs.pt.eflags |= VM_MASK;
 
        switch (info->cpu_type) {
                case CPU_286:
@@ -309,26 +329,35 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
  */
        info->regs32->eax = 0;
        tsk->thread.saved_esp0 = tsk->thread.esp0;
-       asm volatile("movl %%fs,%0":"=m" (tsk->thread.saved_fs));
-       asm volatile("movl %%gs,%0":"=m" (tsk->thread.saved_gs));
+       savesegment(fs, tsk->thread.saved_fs);
+       tsk->thread.saved_gs = info->regs32->xgs;
 
+#ifndef CONFIG_X86_NO_TSS
        tss = &per_cpu(init_tss, get_cpu());
+#endif
        tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
        if (cpu_has_sep)
                tsk->thread.sysenter_cs = 0;
        load_esp0(tss, &tsk->thread);
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
        tsk->thread.screen_bitmap = info->screen_bitmap;
        if (info->flags & VM86_SCREEN_BITMAP)
-               mark_screen_rdonly(tsk);
+               mark_screen_rdonly(tsk->mm);
+
+       /*call audit_syscall_exit since we do not exit via the normal paths */
+       if (unlikely(current->audit_context))
+               audit_syscall_exit(AUDITSC_RESULT(0), 0);
+
        __asm__ __volatile__(
-               "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
                "movl %0,%%esp\n\t"
                "movl %1,%%ebp\n\t"
+               "mov  %2, %%fs\n\t"
                "jmp resume_userspace"
                : /* no outputs */
-               :"r" (&info->regs), "r" (tsk->thread_info) : "ax");
+               :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
        /* we never return here */
 }
 
@@ -358,12 +387,12 @@ static inline void clear_IF(struct kernel_vm86_regs * regs)
 
 static inline void clear_TF(struct kernel_vm86_regs * regs)
 {
-       regs->eflags &= ~TF_MASK;
+       regs->pt.eflags &= ~TF_MASK;
 }
 
 static inline void clear_AC(struct kernel_vm86_regs * regs)
 {
-       regs->eflags &= ~AC_MASK;
+       regs->pt.eflags &= ~AC_MASK;
 }
 
 /* It is correct to call set_IF(regs) from the set_vflags_*
@@ -380,7 +409,7 @@ static inline void clear_AC(struct kernel_vm86_regs * regs)
 static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
 {
        set_flags(VEFLAGS, eflags, current->thread.v86mask);
-       set_flags(regs->eflags, eflags, SAFE_MASK);
+       set_flags(regs->pt.eflags, eflags, SAFE_MASK);
        if (eflags & IF_MASK)
                set_IF(regs);
        else
@@ -390,7 +419,7 @@ static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs
 static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
 {
        set_flags(VFLAGS, flags, current->thread.v86mask);
-       set_flags(regs->eflags, flags, SAFE_MASK);
+       set_flags(regs->pt.eflags, flags, SAFE_MASK);
        if (flags & IF_MASK)
                set_IF(regs);
        else
@@ -399,7 +428,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg
 
 static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
 {
-       unsigned long flags = regs->eflags & RETURN_MASK;
+       unsigned long flags = regs->pt.eflags & RETURN_MASK;
 
        if (VEFLAGS & VIF_MASK)
                flags |= IF_MASK;
@@ -503,7 +532,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
        unsigned long __user *intr_ptr;
        unsigned long segoffs;
 
-       if (regs->cs == BIOSSEG)
+       if (regs->pt.xcs == BIOSSEG)
                goto cannot_handle;
        if (is_revectored(i, &KVM86->int_revectored))
                goto cannot_handle;
@@ -515,9 +544,9 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
        if ((segoffs >> 16) == BIOSSEG)
                goto cannot_handle;
        pushw(ssp, sp, get_vflags(regs), cannot_handle);
-       pushw(ssp, sp, regs->cs, cannot_handle);
+       pushw(ssp, sp, regs->pt.xcs, cannot_handle);
        pushw(ssp, sp, IP(regs), cannot_handle);
-       regs->cs = segoffs >> 16;
+       regs->pt.xcs = segoffs >> 16;
        SP(regs) -= 6;
        IP(regs) = segoffs & 0xffff;
        clear_TF(regs);
@@ -534,18 +563,11 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno
        if (VMPI.is_vm86pus) {
                if ( (trapno==3) || (trapno==1) )
                        return_to_32bit(regs, VM86_TRAP + (trapno << 8));
-               do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs));
+               do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs));
                return 0;
        }
        if (trapno !=1)
                return 1; /* we let this handle by the calling routine */
-       if (current->ptrace & PT_PTRACED) {
-               unsigned long flags;
-               spin_lock_irqsave(&current->sighand->siglock, flags);
-               sigdelset(&current->blocked, SIGTRAP);
-               recalc_sigpending();
-               spin_unlock_irqrestore(&current->sighand->siglock, flags);
-       }
        send_sig(SIGTRAP, current, 1);
        current->thread.trap_no = trapno;
        current->thread.error_code = error_code;
@@ -557,7 +579,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
        unsigned char opcode;
        unsigned char __user *csp;
        unsigned char __user *ssp;
-       unsigned short ip, sp;
+       unsigned short ip, sp, orig_flags;
        int data32, pref_done;
 
 #define CHECK_IF_IN_TRAP \
@@ -566,10 +588,14 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 #define VM86_FAULT_RETURN do { \
        if (VMPI.force_return_for_pic  && (VEFLAGS & (IF_MASK | VIF_MASK))) \
                return_to_32bit(regs, VM86_PICRETURN); \
+       if (orig_flags & TF_MASK) \
+               handle_vm86_trap(regs, 0, 1); \
        return; } while (0)
 
-       csp = (unsigned char __user *) (regs->cs << 4);
-       ssp = (unsigned char __user *) (regs->ss << 4);
+       orig_flags = *(unsigned short *)&regs->pt.eflags;
+
+       csp = (unsigned char __user *) (regs->pt.xcs << 4);
+       ssp = (unsigned char __user *) (regs->pt.xss << 4);
        sp = SP(regs);
        ip = IP(regs);
 
@@ -656,7 +682,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
                        SP(regs) += 6;
                }
                IP(regs) = newip;
-               regs->cs = newcs;
+               regs->pt.xcs = newcs;
                CHECK_IF_IN_TRAP;
                if (data32) {
                        set_vflags_long(newflags, regs);
@@ -720,7 +746,7 @@ static int irqbits;
        | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
        | (1 << SIGUNUSED) )
        
-static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs)
+static irqreturn_t irq_handler(int intno, void *dev_id)
 {
        int irq_bit;
        unsigned long flags;
@@ -732,12 +758,12 @@ static irqreturn_t irq_handler(int intno, void *dev_id, struct pt_regs * regs)
        irqbits |= irq_bit;
        if (vm86_irqs[intno].sig)
                send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
-       spin_unlock_irqrestore(&irqbits_lock, flags);
        /*
         * IRQ will be re-enabled when user asks for the irq (whether
         * polling or as a result of the signal)
         */
-       disable_irq(intno);
+       disable_irq_nosync(intno);
+       spin_unlock_irqrestore(&irqbits_lock, flags);
        return IRQ_HANDLED;
 
 out:
@@ -769,17 +795,20 @@ static inline int get_and_reset_irq(int irqnumber)
 {
        int bit;
        unsigned long flags;
+       int ret = 0;
        
        if (invalid_vm86_irq(irqnumber)) return 0;
        if (vm86_irqs[irqnumber].tsk != current) return 0;
        spin_lock_irqsave(&irqbits_lock, flags);        
        bit = irqbits & (1 << irqnumber);
        irqbits &= ~bit;
+       if (bit) {
+               enable_irq(irqnumber);
+               ret = 1;
+       }
+
        spin_unlock_irqrestore(&irqbits_lock, flags);   
-       if (!bit)
-               return 0;
-       enable_irq(irqnumber);
-       return 1;
+       return ret;
 }