X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Facct.c;h=70d0d88e55541821ff0fd9561e30fcae12e49e17;hb=refs%2Fheads%2Fvserver;hp=555e1e3c349ffb1639dcc8d9139e2778c27a9425;hpb=5273a3df6485dc2ad6aa7ddd441b9a21970f003b;p=linux-2.6.git diff --git a/kernel/acct.c b/kernel/acct.c index 555e1e3c3..70d0d88e5 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -43,16 +43,18 @@ * a struct file opened for write. Fixed. 2/6/2000, AV. */ -#include #include #include #include +#include #include #include #include #include #include #include +#include +#include #include #include #include /* sector_div */ @@ -72,7 +74,7 @@ int acct_parm[3] = {4, 2, 30}; /* * External references and all of the globals. */ -static void do_acct_process(long, struct file *); +static void do_acct_process(struct file *); /* * This structure is used so that all the data protected by lock @@ -87,7 +89,8 @@ struct acct_glbs { struct timer_list timer; }; -static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED}; +static struct acct_glbs acct_globals __cacheline_aligned = + {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; /* * Called whenever the timer says to check the free space. @@ -115,7 +118,7 @@ static int check_free_space(struct file *file) spin_unlock(&acct_globals.lock); /* May block */ - if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) + if (vfs_statfs(file->f_path.dentry, &sbuf)) return res; suspend = sbuf.f_blocks * SUSPEND; resume = sbuf.f_blocks * RESUME; @@ -164,12 +167,12 @@ out: } /* - * Close the old accouting file (if currently open) and then replace + * Close the old accounting file (if currently open) and then replace * it with file (if non-NULL). * * NOTE: acct_globals.lock MUST be held on entry and exit. */ -void acct_file_reopen(struct file *file) +static void acct_file_reopen(struct file *file) { struct file *old_acct = NULL; @@ -191,74 +194,113 @@ void acct_file_reopen(struct file *file) add_timer(&acct_globals.timer); } if (old_acct) { + mnt_unpin(old_acct->f_path.mnt); spin_unlock(&acct_globals.lock); - do_acct_process(0, old_acct); + do_acct_process(old_acct); filp_close(old_acct, NULL); spin_lock(&acct_globals.lock); } } -/* - * sys_acct() is the only system call needed to implement process - * accounting. It takes the name of the file where accounting records - * should be written. If the filename is NULL, accounting will be - * shutdown. - */ -asmlinkage long sys_acct(const char *name) +static int acct_on(char *name) { - struct file *file = NULL; - char *tmp; + struct file *file; int error; - if (!capable(CAP_SYS_PACCT)) - return -EPERM; + /* Difference from BSD - they don't do O_APPEND */ + file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + if (IS_ERR(file)) + return PTR_ERR(file); - if (name) { - tmp = getname(name); - if (IS_ERR(tmp)) { - return (PTR_ERR(tmp)); - } - /* Difference from BSD - they don't do O_APPEND */ - file = filp_open(tmp, O_WRONLY|O_APPEND, 0); - putname(tmp); - if (IS_ERR(file)) { - return (PTR_ERR(file)); - } - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - filp_close(file, NULL); - return (-EACCES); - } + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) { + filp_close(file, NULL); + return -EACCES; + } - if (!file->f_op->write) { - filp_close(file, NULL); - return (-EIO); - } + if (!file->f_op->write) { + filp_close(file, NULL); + return -EIO; } error = security_acct(file); if (error) { - if (file) - filp_close(file, NULL); + filp_close(file, NULL); return error; } spin_lock(&acct_globals.lock); + mnt_pin(file->f_path.mnt); acct_file_reopen(file); spin_unlock(&acct_globals.lock); - return (0); + mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ + + return 0; } -/* - * If the accouting is turned on for a file in the filesystem pointed - * to by sb, turn accouting off. +/** + * sys_acct - enable/disable process accounting + * @name: file name for accounting records or NULL to shutdown accounting + * + * Returns 0 for success or negative errno values for failure. + * + * sys_acct() is the only system call needed to implement process + * accounting. It takes the name of the file where accounting records + * should be written. If the filename is NULL, accounting will be + * shutdown. + */ +asmlinkage long sys_acct(const char __user *name) +{ + int error; + + if (!capable(CAP_SYS_PACCT)) + return -EPERM; + + if (name) { + char *tmp = getname(name); + if (IS_ERR(tmp)) + return (PTR_ERR(tmp)); + error = acct_on(tmp); + putname(tmp); + } else { + error = security_acct(NULL); + if (!error) { + spin_lock(&acct_globals.lock); + acct_file_reopen(NULL); + spin_unlock(&acct_globals.lock); + } + } + return error; +} + +/** + * acct_auto_close - turn off a filesystem's accounting if it is on + * @m: vfsmount being shut down + * + * If the accounting is turned on for a file in the subtree pointed to + * to by m, turn accounting off. Done when m is about to die. + */ +void acct_auto_close_mnt(struct vfsmount *m) +{ + spin_lock(&acct_globals.lock); + if (acct_globals.file && acct_globals.file->f_path.mnt == m) + acct_file_reopen(NULL); + spin_unlock(&acct_globals.lock); +} + +/** + * acct_auto_close - turn off a filesystem's accounting if it is on + * @sb: super block for the filesystem + * + * If the accounting is turned on for a file in the filesystem pointed + * to by sb, turn accounting off. */ void acct_auto_close(struct super_block *sb) { spin_lock(&acct_globals.lock); if (acct_globals.file && - acct_globals.file->f_dentry->d_inode->i_sb == sb) { - acct_file_reopen((struct file *)NULL); + acct_globals.file->f_path.mnt->mnt_sb == sb) { + acct_file_reopen(NULL); } spin_unlock(&acct_globals.lock); } @@ -302,6 +344,69 @@ static comp_t encode_comp_t(unsigned long value) return exp; } +#if ACCT_VERSION==1 || ACCT_VERSION==2 +/* + * encode an u64 into a comp2_t (24 bits) + * + * Format: 5 bit base 2 exponent, 20 bits mantissa. + * The leading bit of the mantissa is not stored, but implied for + * non-zero exponents. + * Largest encodable value is 50 bits. + */ + +#define MANTSIZE2 20 /* 20 bit mantissa. */ +#define EXPSIZE2 5 /* 5 bit base 2 exponent. */ +#define MAXFRACT2 ((1ul << MANTSIZE2) - 1) /* Maximum fractional value. */ +#define MAXEXP2 ((1 < (MAXFRACT2>>1)); + rnd = 0; + while (value > MAXFRACT2) { + rnd = value & 1; + value >>= 1; + exp++; + } + + /* + * If we need to round up, do it (and handle overflow correctly). + */ + if (rnd && (++value > MAXFRACT2)) { + value >>= 1; + exp++; + } + + if (exp > MAXEXP2) { + /* Overflow. Return largest representable number instead. */ + return (1ul << (MANTSIZE2+EXPSIZE2-1)) - 1; + } else { + return (value & (MAXFRACT2>>1)) | (exp << (MANTSIZE2-1)); + } +} +#endif + +#if ACCT_VERSION==3 +/* + * encode an u64 into a 32 bit IEEE float + */ +static u32 encode_float(u64 value) +{ + unsigned exp = 190; + unsigned u; + + if (value==0) return 0; + while ((s64)value > 0){ + value <<= 1; + exp--; + } + u = (u32)(value >> 40) & 0x7fffffu; + return u | (exp << 23); +} +#endif + /* * Write an accounting entry for an exiting process * @@ -314,13 +419,16 @@ static comp_t encode_comp_t(unsigned long value) /* * do_acct_process does all actual work. Caller holds the reference to file. */ -static void do_acct_process(long exitcode, struct file *file) +static void do_acct_process(struct file *file) { - struct acct ac; + struct pacct_struct *pacct = ¤t->signal->pacct; + acct_t ac; mm_segment_t fs; - unsigned long vsize; unsigned long flim; u64 elapsed; + u64 run_time; + struct timespec uptime; + struct tty_struct *tty; /* * First check to see if there is enough free_space to continue @@ -333,55 +441,64 @@ static void do_acct_process(long exitcode, struct file *file) * Fill the accounting struct with the needed info as recorded * by the different kernel functions. */ - memset((caddr_t)&ac, 0, sizeof(struct acct)); + memset((caddr_t)&ac, 0, sizeof(acct_t)); + ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); - elapsed = jiffies_64_to_clock_t(get_jiffies_64() - current->start_time); + /* calculate run_time in nsec*/ + do_posix_clock_monotonic_gettime(&uptime); + run_time = (u64)uptime.tv_sec*NSEC_PER_SEC + uptime.tv_nsec; + run_time -= (u64)current->group_leader->start_time.tv_sec * NSEC_PER_SEC + + current->group_leader->start_time.tv_nsec; + /* convert nsec -> AHZ */ + elapsed = nsec_to_AHZ(run_time); +#if ACCT_VERSION==3 + ac.ac_etime = encode_float(elapsed); +#else ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ? (unsigned long) elapsed : (unsigned long) -1l); - do_div(elapsed, USER_HZ); +#endif +#if ACCT_VERSION==1 || ACCT_VERSION==2 + { + /* new enlarged etime field */ + comp2_t etime = encode_comp2_t(elapsed); + ac.ac_etime_hi = etime >> 16; + ac.ac_etime_lo = (u16) etime; + } +#endif + do_div(elapsed, AHZ); ac.ac_btime = xtime.tv_sec - elapsed; - ac.ac_utime = encode_comp_t(jiffies_to_clock_t(current->utime)); - ac.ac_stime = encode_comp_t(jiffies_to_clock_t(current->stime)); /* we really need to bite the bullet and change layout */ ac.ac_uid = current->uid; ac.ac_gid = current->gid; - - read_lock(&tasklist_lock); /* pin current->signal */ - ac.ac_tty = current->signal->tty ? - old_encode_dev(tty_devnum(current->signal->tty)) : 0; - read_unlock(&tasklist_lock); - - ac.ac_flag = 0; - if (current->flags & PF_FORKNOEXEC) - ac.ac_flag |= AFORK; - if (current->flags & PF_SUPERPRIV) - ac.ac_flag |= ASU; - if (current->flags & PF_DUMPCORE) - ac.ac_flag |= ACORE; - if (current->flags & PF_SIGNALED) - ac.ac_flag |= AXSIG; - - vsize = 0; - if (current->mm) { - struct vm_area_struct *vma; - down_read(¤t->mm->mmap_sem); - vma = current->mm->mmap; - while (vma) { - vsize += vma->vm_end - vma->vm_start; - vma = vma->vm_next; - } - up_read(¤t->mm->mmap_sem); - } - vsize = vsize / 1024; - ac.ac_mem = encode_comp_t(vsize); +#if ACCT_VERSION==2 + ac.ac_ahz = AHZ; +#endif +#if ACCT_VERSION==1 || ACCT_VERSION==2 + /* backward-compatible 16 bit fields */ + ac.ac_uid16 = current->uid; + ac.ac_gid16 = current->gid; +#endif +#if ACCT_VERSION==3 + ac.ac_pid = current->tgid; + ac.ac_ppid = current->parent->tgid; +#endif + + spin_lock_irq(¤t->sighand->siglock); + tty = current->signal->tty; + ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; + ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); + ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); + ac.ac_flag = pacct->ac_flag; + ac.ac_mem = encode_comp_t(pacct->ac_mem); + ac.ac_minflt = encode_comp_t(pacct->ac_minflt); + ac.ac_majflt = encode_comp_t(pacct->ac_majflt); + ac.ac_exitcode = pacct->ac_exitcode; + spin_unlock_irq(¤t->sighand->siglock); ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ ac.ac_rw = encode_comp_t(ac.ac_io / 1024); - ac.ac_minflt = encode_comp_t(current->min_flt); - ac.ac_majflt = encode_comp_t(current->maj_flt); ac.ac_swaps = encode_comp_t(0); - ac.ac_exitcode = exitcode; /* * Kernel segment override to datasegment and write it @@ -392,18 +509,73 @@ static void do_acct_process(long exitcode, struct file *file) /* * Accounting records are not subject to resource limits. */ - flim = current->rlim[RLIMIT_FSIZE].rlim_cur; - current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; file->f_op->write(file, (char *)&ac, - sizeof(struct acct), &file->f_pos); - current->rlim[RLIMIT_FSIZE].rlim_cur = flim; + sizeof(acct_t), &file->f_pos); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; set_fs(fs); } -/* +/** + * acct_init_pacct - initialize a new pacct_struct + * @pacct: per-process accounting info struct to initialize + */ +void acct_init_pacct(struct pacct_struct *pacct) +{ + memset(pacct, 0, sizeof(struct pacct_struct)); + pacct->ac_utime = pacct->ac_stime = cputime_zero; +} + +/** + * acct_collect - collect accounting information into pacct_struct + * @exitcode: task exit code + * @group_dead: not 0, if this thread is the last one in the process. + */ +void acct_collect(long exitcode, int group_dead) +{ + struct pacct_struct *pacct = ¤t->signal->pacct; + unsigned long vsize = 0; + + if (group_dead && current->mm) { + struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); + vma = current->mm->mmap; + while (vma) { + vsize += vma->vm_end - vma->vm_start; + vma = vma->vm_next; + } + up_read(¤t->mm->mmap_sem); + } + + spin_lock_irq(¤t->sighand->siglock); + if (group_dead) + pacct->ac_mem = vsize / 1024; + if (thread_group_leader(current)) { + pacct->ac_exitcode = exitcode; + if (current->flags & PF_FORKNOEXEC) + pacct->ac_flag |= AFORK; + } + if (current->flags & PF_SUPERPRIV) + pacct->ac_flag |= ASU; + if (current->flags & PF_DUMPCORE) + pacct->ac_flag |= ACORE; + if (current->flags & PF_SIGNALED) + pacct->ac_flag |= AXSIG; + pacct->ac_utime = cputime_add(pacct->ac_utime, current->utime); + pacct->ac_stime = cputime_add(pacct->ac_stime, current->stime); + pacct->ac_minflt += current->min_flt; + pacct->ac_majflt += current->maj_flt; + spin_unlock_irq(¤t->sighand->siglock); +} + +/** * acct_process - now just a wrapper around do_acct_process + * @exitcode: task exit code + * + * handles process accounting for an exiting task */ -void acct_process(long exitcode) +void acct_process(void) { struct file *file = NULL; @@ -422,6 +594,6 @@ void acct_process(long exitcode) get_file(file); spin_unlock(&acct_globals.lock); - do_acct_process(exitcode, file); + do_acct_process(file); fput(file); }