This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / x86_64 / kernel / process-xen.c
1 /*
2  *  linux/arch/x86-64/kernel/process.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Pentium III FXSR, SSE support
7  *      Gareth Hughes <gareth@valinux.com>, May 2000
8  * 
9  *  X86-64 port
10  *      Andi Kleen.
11  *
12  *      CPU hotplug support - ashok.raj@intel.com
13  *  $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14  * 
15  *  Jun Nakajima <jun.nakajima@intel.com> 
16  *     Modified for Xen
17  */
18
19 /*
20  * This file handles the architecture-dependent parts of process handling..
21  */
22
23 #include <stdarg.h>
24
25 #include <linux/cpu.h>
26 #include <linux/errno.h>
27 #include <linux/sched.h>
28 #include <linux/kernel.h>
29 #include <linux/mm.h>
30 #include <linux/elfcore.h>
31 #include <linux/smp.h>
32 #include <linux/slab.h>
33 #include <linux/user.h>
34 #include <linux/module.h>
35 #include <linux/a.out.h>
36 #include <linux/interrupt.h>
37 #include <linux/delay.h>
38 #include <linux/ptrace.h>
39 #include <linux/utsname.h>
40 #include <linux/random.h>
41 #include <linux/notifier.h>
42 #include <linux/kprobes.h>
43
44 #include <asm/uaccess.h>
45 #include <asm/pgtable.h>
46 #include <asm/system.h>
47 #include <asm/io.h>
48 #include <asm/processor.h>
49 #include <asm/i387.h>
50 #include <asm/mmu_context.h>
51 #include <asm/pda.h>
52 #include <asm/prctl.h>
53 #include <asm/kdebug.h>
54 #include <xen/interface/dom0_ops.h>
55 #include <xen/interface/physdev.h>
56 #include <xen/interface/vcpu.h>
57 #include <asm/desc.h>
58 #include <asm/proto.h>
59 #include <asm/hardirq.h>
60 #include <asm/ia32.h>
61 #include <asm/idle.h>
62
63 #include <xen/cpu_hotplug.h>
64
65 asmlinkage extern void ret_from_fork(void);
66
67 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
68
69 unsigned long boot_option_idle_override = 0;
70 EXPORT_SYMBOL(boot_option_idle_override);
71
72 /*
73  * Powermanagement idle function, if any..
74  */
75 void (*pm_idle)(void);
76 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
77
78 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
79
80 void idle_notifier_register(struct notifier_block *n)
81 {
82         atomic_notifier_chain_register(&idle_notifier, n);
83 }
84 EXPORT_SYMBOL_GPL(idle_notifier_register);
85
86 void idle_notifier_unregister(struct notifier_block *n)
87 {
88         atomic_notifier_chain_unregister(&idle_notifier, n);
89 }
90 EXPORT_SYMBOL(idle_notifier_unregister);
91
92 enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
93 static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
94
95 void enter_idle(void)
96 {
97         __get_cpu_var(idle_state) = CPU_IDLE;
98         atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
99 }
100
101 static void __exit_idle(void)
102 {
103         __get_cpu_var(idle_state) = CPU_NOT_IDLE;
104         atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
105 }
106
107 /* Called from interrupts to signify idle end */
108 void exit_idle(void)
109 {
110         if (current->pid | read_pda(irqcount))
111                 return;
112         __exit_idle();
113 }
114
115 /* XXX XEN doesn't use default_idle(), poll_idle(). Use xen_idle() instead. */
116 void xen_idle(void)
117 {
118         local_irq_disable();
119
120         if (need_resched())
121                 local_irq_enable();
122         else {
123                 clear_thread_flag(TIF_POLLING_NRFLAG);
124                 smp_mb__after_clear_bit();
125                 safe_halt();
126                 set_thread_flag(TIF_POLLING_NRFLAG);
127         }
128 }
129
130 #ifdef CONFIG_HOTPLUG_CPU
131 static inline void play_dead(void)
132 {
133         idle_task_exit();
134         local_irq_disable();
135         cpu_clear(smp_processor_id(), cpu_initialized);
136         preempt_enable_no_resched();
137         HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
138         cpu_bringup();
139 }
140 #else
141 static inline void play_dead(void)
142 {
143         BUG();
144 }
145 #endif /* CONFIG_HOTPLUG_CPU */
146
147 /*
148  * The idle thread. There's no useful work to be
149  * done, so just try to conserve power and have a
150  * low exit latency (ie sit in a loop waiting for
151  * somebody to say that they'd like to reschedule)
152  */
153 void cpu_idle (void)
154 {
155         set_thread_flag(TIF_POLLING_NRFLAG);
156
157         /* endless idle loop with no priority at all */
158         while (1) {
159                 while (!need_resched()) {
160                         if (__get_cpu_var(cpu_idle_state))
161                                 __get_cpu_var(cpu_idle_state) = 0;
162                         rmb();
163                         
164                         if (cpu_is_offline(smp_processor_id()))
165                                 play_dead();
166                         enter_idle();
167                         xen_idle();
168                         __exit_idle();
169                 }
170
171                 preempt_enable_no_resched();
172                 schedule();
173                 preempt_disable();
174         }
175 }
176
177 void cpu_idle_wait(void)
178 {
179         unsigned int cpu, this_cpu = get_cpu();
180         cpumask_t map;
181
182         set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
183         put_cpu();
184
185         cpus_clear(map);
186         for_each_online_cpu(cpu) {
187                 per_cpu(cpu_idle_state, cpu) = 1;
188                 cpu_set(cpu, map);
189         }
190
191         __get_cpu_var(cpu_idle_state) = 0;
192
193         wmb();
194         do {
195                 ssleep(1);
196                 for_each_online_cpu(cpu) {
197                         if (cpu_isset(cpu, map) &&
198                                         !per_cpu(cpu_idle_state, cpu))
199                                 cpu_clear(cpu, map);
200                 }
201                 cpus_and(map, map, cpu_online_map);
202         } while (!cpus_empty(map));
203 }
204 EXPORT_SYMBOL_GPL(cpu_idle_wait);
205
206 /* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */
207 /* Always use xen_idle() instead. */
208 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) {}
209
210 /* Prints also some state that isn't saved in the pt_regs */ 
211 void __show_regs(struct pt_regs * regs)
212 {
213         unsigned long fs, gs, shadowgs;
214         unsigned int fsindex,gsindex;
215         unsigned int ds,cs,es; 
216
217         printk("\n");
218         print_modules();
219         printk("Pid: %d, comm: %.20s %s %s %.*s\n",
220                 current->pid, current->comm, print_tainted(),
221                 system_utsname.release,
222                 (int)strcspn(system_utsname.version, " "),
223                 system_utsname.version);
224         printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
225         printk_address(regs->rip); 
226         printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
227                 regs->eflags);
228         printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
229                regs->rax, regs->rbx, regs->rcx);
230         printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
231                regs->rdx, regs->rsi, regs->rdi); 
232         printk("RBP: %016lx R08: %016lx R09: %016lx\n",
233                regs->rbp, regs->r8, regs->r9); 
234         printk("R10: %016lx R11: %016lx R12: %016lx\n",
235                regs->r10, regs->r11, regs->r12); 
236         printk("R13: %016lx R14: %016lx R15: %016lx\n",
237                regs->r13, regs->r14, regs->r15); 
238
239         asm("mov %%ds,%0" : "=r" (ds)); 
240         asm("mov %%cs,%0" : "=r" (cs)); 
241         asm("mov %%es,%0" : "=r" (es)); 
242         asm("mov %%fs,%0" : "=r" (fsindex));
243         asm("mov %%gs,%0" : "=r" (gsindex));
244
245         rdmsrl(MSR_FS_BASE, fs);
246         rdmsrl(MSR_GS_BASE, gs); 
247         rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
248
249         printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
250                fs,fsindex,gs,gsindex,shadowgs); 
251         printk("CS:  %04x DS: %04x ES: %04x\n", cs, ds, es); 
252
253 }
254
255 void show_regs(struct pt_regs *regs)
256 {
257         printk("CPU %d:", smp_processor_id());
258         __show_regs(regs);
259         show_trace(&regs->rsp);
260 }
261
262 /*
263  * Free current thread data structures etc..
264  */
265 void exit_thread(void)
266 {
267         struct task_struct *me = current;
268         struct thread_struct *t = &me->thread;
269
270         if (me->thread.io_bitmap_ptr) { 
271 #ifndef CONFIG_X86_NO_TSS
272                 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
273 #endif
274 #ifdef CONFIG_XEN
275                 struct physdev_set_iobitmap iobmp_op = { 0 };
276 #endif
277
278                 kfree(t->io_bitmap_ptr);
279                 t->io_bitmap_ptr = NULL;
280                 /*
281                  * Careful, clear this in the TSS too:
282                  */
283 #ifndef CONFIG_X86_NO_TSS
284                 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
285                 put_cpu();
286 #endif
287 #ifdef CONFIG_XEN
288                 HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobmp_op);
289 #endif
290                 t->io_bitmap_max = 0;
291         }
292 }
293
294 void load_gs_index(unsigned gs)
295 {
296         HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs);
297 }
298
299 void flush_thread(void)
300 {
301         struct task_struct *tsk = current;
302         struct thread_info *t = current_thread_info();
303
304         if (t->flags & _TIF_ABI_PENDING)
305                 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
306
307         tsk->thread.debugreg0 = 0;
308         tsk->thread.debugreg1 = 0;
309         tsk->thread.debugreg2 = 0;
310         tsk->thread.debugreg3 = 0;
311         tsk->thread.debugreg6 = 0;
312         tsk->thread.debugreg7 = 0;
313         memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));        
314         /*
315          * Forget coprocessor state..
316          */
317         clear_fpu(tsk);
318         clear_used_math();
319 }
320
321 void release_thread(struct task_struct *dead_task)
322 {
323         if (dead_task->mm) {
324                 if (dead_task->mm->context.size) {
325                         printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
326                                         dead_task->comm,
327                                         dead_task->mm->context.ldt,
328                                         dead_task->mm->context.size);
329                         BUG();
330                 }
331         }
332 }
333
334 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
335 {
336         struct user_desc ud = { 
337                 .base_addr = addr,
338                 .limit = 0xfffff,
339                 .contents = (3 << 3), /* user */
340                 .seg_32bit = 1,
341                 .limit_in_pages = 1,
342                 .useable = 1,
343         };
344         struct n_desc_struct *desc = (void *)t->thread.tls_array;
345         desc += tls;
346         desc->a = LDT_entry_a(&ud); 
347         desc->b = LDT_entry_b(&ud); 
348 }
349
350 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
351 {
352         struct desc_struct *desc = (void *)t->thread.tls_array;
353         desc += tls;
354         return desc->base0 | 
355                 (((u32)desc->base1) << 16) | 
356                 (((u32)desc->base2) << 24);
357 }
358
359 /*
360  * This gets called before we allocate a new thread and copy
361  * the current task into it.
362  */
363 void prepare_to_copy(struct task_struct *tsk)
364 {
365         unlazy_fpu(tsk);
366 }
367
368 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
369                 unsigned long unused,
370         struct task_struct * p, struct pt_regs * regs)
371 {
372         int err;
373         struct pt_regs * childregs;
374         struct task_struct *me = current;
375
376         childregs = ((struct pt_regs *)
377                         (THREAD_SIZE + task_stack_page(p))) - 1;
378         *childregs = *regs;
379
380         childregs->rax = 0;
381         childregs->rsp = rsp;
382         if (rsp == ~0UL)
383                 childregs->rsp = (unsigned long)childregs;
384
385         p->thread.rsp = (unsigned long) childregs;
386         p->thread.rsp0 = (unsigned long) (childregs+1);
387         p->thread.userrsp = me->thread.userrsp; 
388
389         set_tsk_thread_flag(p, TIF_FORK);
390
391         p->thread.fs = me->thread.fs;
392         p->thread.gs = me->thread.gs;
393
394         asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
395         asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
396         asm("mov %%es,%0" : "=m" (p->thread.es));
397         asm("mov %%ds,%0" : "=m" (p->thread.ds));
398
399         if (unlikely(me->thread.io_bitmap_ptr != NULL)) { 
400                 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
401                 if (!p->thread.io_bitmap_ptr) {
402                         p->thread.io_bitmap_max = 0;
403                         return -ENOMEM;
404                 }
405                 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
406                                 IO_BITMAP_BYTES);
407         } 
408
409         /*
410          * Set a new TLS for the child thread?
411          */
412         if (clone_flags & CLONE_SETTLS) {
413 #ifdef CONFIG_IA32_EMULATION
414                 if (test_thread_flag(TIF_IA32))
415                         err = ia32_child_tls(p, childregs); 
416                 else                    
417 #endif   
418                         err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
419                 if (err) 
420                         goto out;
421         }
422         p->thread.iopl = current->thread.iopl;
423
424         err = 0;
425 out:
426         if (err && p->thread.io_bitmap_ptr) {
427                 kfree(p->thread.io_bitmap_ptr);
428                 p->thread.io_bitmap_max = 0;
429         }
430         return err;
431 }
432
433 static inline void __save_init_fpu( struct task_struct *tsk )
434 {
435         asm volatile( "rex64 ; fxsave %0 ; fnclex"
436                       : "=m" (tsk->thread.i387.fxsave));
437         tsk->thread_info->status &= ~TS_USEDFPU;
438 }
439
440 /*
441  *      switch_to(x,y) should switch tasks from x to y.
442  *
443  * This could still be optimized: 
444  * - fold all the options into a flag word and test it with a single test.
445  * - could test fs/gs bitsliced
446  *
447  * Kprobes not supported here. Set the probe on schedule instead.
448  */
449 __kprobes struct task_struct *
450 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
451 {
452         struct thread_struct *prev = &prev_p->thread,
453                                  *next = &next_p->thread;
454         int cpu = smp_processor_id();  
455 #ifndef CONFIG_X86_NO_TSS
456         struct tss_struct *tss = &per_cpu(init_tss, cpu);
457 #endif
458         struct physdev_set_iopl iopl_op;
459         struct physdev_set_iobitmap iobmp_op;
460         multicall_entry_t _mcl[8], *mcl = _mcl;
461
462         /*
463          * This is basically '__unlazy_fpu', except that we queue a
464          * multicall to indicate FPU task switch, rather than
465          * synchronously trapping to Xen.
466          */
467         if (prev_p->thread_info->status & TS_USEDFPU) {
468                 __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
469                 mcl->op      = __HYPERVISOR_fpu_taskswitch;
470                 mcl->args[0] = 1;
471                 mcl++;
472         }
473
474         /*
475          * Reload esp0, LDT and the page table pointer:
476          */
477         mcl->op      = __HYPERVISOR_stack_switch;
478         mcl->args[0] = __KERNEL_DS;
479         mcl->args[1] = next->rsp0;
480         mcl++;
481
482         /*
483          * Load the per-thread Thread-Local Storage descriptor.
484          * This is load_TLS(next, cpu) with multicalls.
485          */
486 #define C(i) do {                                                       \
487         if (unlikely(next->tls_array[i] != prev->tls_array[i])) {       \
488                 mcl->op      = __HYPERVISOR_update_descriptor;          \
489                 mcl->args[0] = virt_to_machine(                         \
490                         &cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]);          \
491                 mcl->args[1] = next->tls_array[i];                      \
492                 mcl++;                                                  \
493         }                                                               \
494 } while (0)
495         C(0); C(1); C(2);
496 #undef C
497
498         if (unlikely(prev->iopl != next->iopl)) {
499                 iopl_op.iopl = (next->iopl == 0) ? 1 : next->iopl;
500                 mcl->op      = __HYPERVISOR_physdev_op;
501                 mcl->args[0] = PHYSDEVOP_set_iopl;
502                 mcl->args[1] = (unsigned long)&iopl_op;
503                 mcl++;
504         }
505
506         if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
507                 iobmp_op.bitmap   = (char *)next->io_bitmap_ptr;
508                 iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
509                 mcl->op      = __HYPERVISOR_physdev_op;
510                 mcl->args[0] = PHYSDEVOP_set_iobitmap;
511                 mcl->args[1] = (unsigned long)&iobmp_op;
512                 mcl++;
513         }
514
515         (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
516         /* 
517          * Switch DS and ES.
518          * This won't pick up thread selector changes, but I guess that is ok.
519          */
520         if (unlikely(next->es))
521                 loadsegment(es, next->es); 
522         
523         if (unlikely(next->ds))
524                 loadsegment(ds, next->ds);
525
526         /* 
527          * Switch FS and GS.
528          */
529         if (unlikely(next->fsindex))
530                 loadsegment(fs, next->fsindex);
531
532         if (next->fs)
533                 HYPERVISOR_set_segment_base(SEGBASE_FS, next->fs); 
534         
535         if (unlikely(next->gsindex))
536                 load_gs_index(next->gsindex);
537
538         if (next->gs)
539                 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs); 
540
541         /* 
542          * Switch the PDA.
543          */
544         prev->userrsp = read_pda(oldrsp); 
545         write_pda(oldrsp, next->userrsp); 
546         write_pda(pcurrent, next_p); 
547         write_pda(kernelstack,
548                   task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
549
550         /*
551          * Now maybe reload the debug registers
552          */
553         if (unlikely(next->debugreg7)) {
554                 set_debugreg(next->debugreg0, 0);
555                 set_debugreg(next->debugreg1, 1);
556                 set_debugreg(next->debugreg2, 2);
557                 set_debugreg(next->debugreg3, 3);
558                 /* no 4 and 5 */
559                 set_debugreg(next->debugreg6, 6);
560                 set_debugreg(next->debugreg7, 7);
561         }
562
563         return prev_p;
564 }
565
566 /*
567  * sys_execve() executes a new program.
568  */
569 asmlinkage 
570 long sys_execve(char __user *name, char __user * __user *argv,
571                 char __user * __user *envp, struct pt_regs regs)
572 {
573         long error;
574         char * filename;
575
576         filename = getname(name);
577         error = PTR_ERR(filename);
578         if (IS_ERR(filename)) 
579                 return error;
580         error = do_execve(filename, argv, envp, &regs); 
581         if (error == 0) {
582                 task_lock(current);
583                 current->ptrace &= ~PT_DTRACE;
584                 task_unlock(current);
585         }
586         putname(filename);
587         return error;
588 }
589
590 void set_personality_64bit(void)
591 {
592         /* inherit personality from parent */
593
594         /* Make sure to be in 64bit mode */
595         clear_thread_flag(TIF_IA32); 
596 }
597
598 asmlinkage long sys_fork(struct pt_regs *regs)
599 {
600         return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
601 }
602
603 asmlinkage long
604 sys_clone(unsigned long clone_flags, unsigned long newsp,
605           void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
606 {
607         if (!newsp)
608                 newsp = regs->rsp;
609         return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
610 }
611
612 /*
613  * This is trivial, and on the face of it looks like it
614  * could equally well be done in user mode.
615  *
616  * Not so, for quite unobvious reasons - register pressure.
617  * In user mode vfork() cannot have a stack frame, and if
618  * done by calling the "clone()" system call directly, you
619  * do not have enough call-clobbered registers to hold all
620  * the information you need.
621  */
622 asmlinkage long sys_vfork(struct pt_regs *regs)
623 {
624         return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
625                     NULL, NULL);
626 }
627
628 unsigned long get_wchan(struct task_struct *p)
629 {
630         unsigned long stack;
631         u64 fp,rip;
632         int count = 0;
633
634         if (!p || p == current || p->state==TASK_RUNNING)
635                 return 0; 
636         stack = (unsigned long)task_stack_page(p);
637         if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
638                 return 0;
639         fp = *(u64 *)(p->thread.rsp);
640         do { 
641                 if (fp < (unsigned long)stack ||
642                     fp > (unsigned long)stack+THREAD_SIZE)
643                         return 0; 
644                 rip = *(u64 *)(fp+8); 
645                 if (!in_sched_functions(rip))
646                         return rip; 
647                 fp = *(u64 *)fp; 
648         } while (count++ < 16); 
649         return 0;
650 }
651
652 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
653
654         int ret = 0; 
655         int doit = task == current;
656         int cpu;
657
658         switch (code) { 
659         case ARCH_SET_GS:
660                 if (addr >= TASK_SIZE_OF(task))
661                         return -EPERM; 
662                 cpu = get_cpu();
663                 /* handle small bases via the GDT because that's faster to 
664                    switch. */
665                 if (addr <= 0xffffffff) {  
666                         set_32bit_tls(task, GS_TLS, addr); 
667                         if (doit) { 
668                                 load_TLS(&task->thread, cpu);
669                                 load_gs_index(GS_TLS_SEL); 
670                         }
671                         task->thread.gsindex = GS_TLS_SEL; 
672                         task->thread.gs = 0;
673                 } else { 
674                         task->thread.gsindex = 0;
675                         task->thread.gs = addr;
676                         if (doit) {
677                                 load_gs_index(0);
678                                 ret = HYPERVISOR_set_segment_base(
679                                         SEGBASE_GS_USER, addr);
680                         } 
681                 }
682                 put_cpu();
683                 break;
684         case ARCH_SET_FS:
685                 /* Not strictly needed for fs, but do it for symmetry
686                    with gs */
687                 if (addr >= TASK_SIZE_OF(task))
688                         return -EPERM; 
689                 cpu = get_cpu();
690                 /* handle small bases via the GDT because that's faster to 
691                    switch. */
692                 if (addr <= 0xffffffff) { 
693                         set_32bit_tls(task, FS_TLS, addr);
694                         if (doit) { 
695                                 load_TLS(&task->thread, cpu); 
696                                 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
697                         }
698                         task->thread.fsindex = FS_TLS_SEL;
699                         task->thread.fs = 0;
700                 } else { 
701                         task->thread.fsindex = 0;
702                         task->thread.fs = addr;
703                         if (doit) {
704                                 /* set the selector to 0 to not confuse
705                                    __switch_to */
706                                 asm volatile("movl %0,%%fs" :: "r" (0));
707                                 ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
708                                                                   addr);
709                         }
710                 }
711                 put_cpu();
712                 break;
713         case ARCH_GET_FS: { 
714                 unsigned long base; 
715                 if (task->thread.fsindex == FS_TLS_SEL)
716                         base = read_32bit_tls(task, FS_TLS);
717                 else if (doit)
718                         rdmsrl(MSR_FS_BASE, base);
719                 else
720                         base = task->thread.fs;
721                 ret = put_user(base, (unsigned long __user *)addr); 
722                 break; 
723         }
724         case ARCH_GET_GS: { 
725                 unsigned long base;
726                 unsigned gsindex;
727                 if (task->thread.gsindex == GS_TLS_SEL)
728                         base = read_32bit_tls(task, GS_TLS);
729                 else if (doit) {
730                         asm("movl %%gs,%0" : "=r" (gsindex));
731                         if (gsindex)
732                                 rdmsrl(MSR_KERNEL_GS_BASE, base);
733                         else
734                                 base = task->thread.gs;
735                 }
736                 else
737                         base = task->thread.gs;
738                 ret = put_user(base, (unsigned long __user *)addr); 
739                 break;
740         }
741
742         default:
743                 ret = -EINVAL;
744                 break;
745         } 
746
747         return ret;     
748
749
750 long sys_arch_prctl(int code, unsigned long addr)
751 {
752         return do_arch_prctl(current, code, addr);
753
754
755 /* 
756  * Capture the user space registers if the task is not running (in user space)
757  */
758 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
759 {
760         struct pt_regs *pp, ptregs;
761
762         pp = task_pt_regs(tsk);
763
764         ptregs = *pp; 
765         ptregs.cs &= 0xffff;
766         ptregs.ss &= 0xffff;
767
768         elf_core_copy_regs(regs, &ptregs);
769  
770         boot_option_idle_override = 1;
771         return 1;
772 }
773
774 unsigned long arch_align_stack(unsigned long sp)
775 {
776         if (randomize_va_space)
777                 sp -= get_random_int() % 8192;
778         return sp & ~0xf;
779 }
780
781 #ifndef CONFIG_SMP
782 void _restore_vcpu(void)
783 {
784 }
785 #endif