2 * Marc E. Fiuczynski <mef@cs.princeton.edu>
4 * Copyright (c) 2004 The Trustees of Princeton University (Trustees).
6 * vsh is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
11 * vsh is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 * License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Poptop; see the file COPYING. If not, write to the Free
18 * Software Foundation, 59 Temple Place - Suite 330, Boston, MA
35 #include <sys/syscall.h>
36 #include <asm/unistd.h>
37 #include <sys/mount.h>
38 #include <sys/types.h>
40 #include <sys/resource.h>
45 //--------------------------------------------------------------------
48 #undef CONFIG_VSERVER_LEGACY
50 /* Null byte made explicit */
51 #define NULLBYTE_SIZE 1
53 /* Base for all vserver roots for chroot */
54 #define VSERVER_ROOT_BASE "/vservers"
57 _PERROR(const char *format, char *file, int line, int _errno, ...)
62 fprintf(stderr, "%s:%d: ", file, line);
63 vfprintf(stderr, format, ap);
65 fprintf(stderr, ": %s (%d)", strerror(_errno), _errno);
72 #define PERROR(format, args...) _PERROR(format, __FILE__, __LINE__, errno, ## args)
74 /* Change to root:root (before entering new context) */
75 static int setuidgid_root()
88 static void compute_new_root(char *base, char **root, uid_t uid)
93 if ((pwd = getpwuid(uid)) == NULL) {
94 PERROR("getpwuid(%d)", uid);
99 strlen(base) + strlen("/") +
100 strlen(pwd->pw_name) + NULLBYTE_SIZE;
101 (*root) = (char *)malloc(root_len);
102 if ((*root) == NULL) {
103 PERROR("malloc(%d)", root_len);
107 sprintf((*root), "%s/%s", base, pwd->pw_name);
108 (*root)[root_len - 1] = '\0';
111 /* Example: sandbox_root = /vservers/bnc, relpath = /proc/1 */
112 static int sandbox_file_exists(char *sandbox_root, char *relpath)
114 struct stat stat_buf;
118 len = strlen(sandbox_root) + strlen(relpath) + NULLBYTE_SIZE;
119 if ((file = (char *)malloc(len)) == NULL) {
120 PERROR("malloc(%d)", len);
123 sprintf(file, "%s%s", sandbox_root, relpath);
124 file[len - 1] = '\0';
125 if (stat(file, &stat_buf) == 0) {
134 static int proc_mounted(char *sandbox_root)
136 return sandbox_file_exists(sandbox_root, "/proc/1");
139 static int devpts_mounted(char *sandbox_root)
141 return sandbox_file_exists(sandbox_root, "/dev/pts/0");
144 static void mount_proc(char *sandbox_root)
146 char *source = "/proc";
150 len = strlen(sandbox_root) + strlen("/") + strlen("proc") + NULLBYTE_SIZE;
151 if ((target = (char *)malloc(len)) == NULL) {
152 PERROR("malloc(%d)", len);
156 sprintf(target, "%s/proc", sandbox_root);
157 target[len - 1] = '\0';
158 if (!proc_mounted(sandbox_root))
159 mount(source, target, "proc", MS_BIND | MS_RDONLY, NULL);
164 static void mount_devpts(char *sandbox_root)
166 char *source = "/dev/pts";
170 len = strlen(sandbox_root) + strlen("/") + strlen("dev/pts") + NULLBYTE_SIZE;
171 if ((target = (char *)malloc(len)) == NULL) {
172 PERROR("malloc(%d)", len);
176 sprintf(target, "%s/dev/pts", sandbox_root);
177 target[len - 1] = '\0';
178 if (!devpts_mounted(sandbox_root))
179 mount(source, target, "devpts", 0, NULL);
184 static int sandbox_chroot(uid_t uid)
186 char *sandbox_root = NULL;
188 compute_new_root(VSERVER_ROOT_BASE,&sandbox_root, uid);
189 mount_proc(sandbox_root);
190 mount_devpts(sandbox_root);
191 if (chroot(sandbox_root) < 0) {
192 PERROR("chroot(%s)", sandbox_root);
195 if (chdir("/") < 0) {
202 #define WHITESPACE(buffer,index,len) \
203 while(isspace((int)buffer[index])) \
204 if (index < len) index++; else goto out;
208 unsigned long long *limit;
211 #define VSERVERCONF "/etc/vservers/"
212 static void get_limits(char *context, struct resources *list){
214 size_t len = strlen(VSERVERCONF) + strlen(context) + strlen(".conf") + NULLBYTE_SIZE;
215 char *conf = (char *)malloc(len);
218 sprintf(conf, "%s%s.conf", VSERVERCONF, context);
220 /* open the conf file for reading */
221 fb = fopen(conf,"r");
224 char *buffer = malloc(1000);
227 /* the conf file exist */
228 while((p=fgets(buffer,1000-1,fb))!=NULL) {
230 len = strnlen(buffer,1000);
231 WHITESPACE(buffer,index,len);
232 if (buffer[index] == '#')
235 for (r=list; r->name; r++)
236 if ((p=strstr(&buffer[index],r->name))!=NULL) {
237 /* adjust index into buffer */
238 index+= (p-&buffer[index])+strlen(r->name);
240 /* skip over whitespace */
241 WHITESPACE(buffer,index,len);
243 /* expecting to see = sign */
244 if (buffer[index++]!='=') goto out;
246 /* skip over whitespace */
247 WHITESPACE(buffer,index,len);
249 /* expecting to see a digit for number */
250 if (!isdigit((int)buffer[index])) goto out;
252 *r->limit = atoi(&buffer[index]);
259 fprintf(stderr,"cannot open %s\n",conf);
265 static int sandbox_processes(xid_t ctx, char *context)
267 #ifdef CONFIG_VSERVER_LEGACY
271 flags |= 1; /* VX_INFO_LOCK -- cannot request a new vx_id */
272 /* flags |= 4; VX_INFO_NPROC -- limit number of procs in a context */
274 (void) vc_new_s_context(ctx, 0, flags);
276 /* use legacy dirty hack for capremove */
277 if (vc_new_s_context(VC_SAMECTX, vc_get_insecurebcaps(), flags) == VC_NOCTX) {
278 PERROR("vc_new_s_context(%u, 0x%16ullx, 0x%08x)",
279 VC_SAMECTX, vc_get_insecurebcaps(), flags);
283 struct vc_rlimit limits;
284 struct vc_ctx_caps caps;
285 struct vc_ctx_flags flags;
286 struct vc_vx_info vc;
289 unsigned long long cpu = VC_LIM_KEEP;
290 unsigned long long mem = VC_LIM_KEEP;
291 unsigned long long task = VC_LIM_KEEP;
292 unsigned long long cpuguaranteed = 0;
293 struct resources list[] =
296 {"CPUGUARANTEED", &cpuguaranteed},
297 {"TASKLIMIT", &task},
300 get_limits(context,list);
301 (void) (sandbox_chroot(ctx));
303 caps.ccaps = ~vc_get_insecureccaps();
305 caps.bcaps = ~vc_get_insecurebcaps();
307 flags.mask = flags.flagword = VC_VXF_STATE_SETUP;
310 if (vc_get_vx_info(ctx,&vc) != 0) {
311 xid = vc_ctx_create(ctx);
312 if (xid == VC_NOCTX && errno != EEXIST){
313 PERROR("vc_ctx_create(%d)", xid);
319 if (cpu != VC_LIM_KEEP) {
320 struct vc_set_sched sched = {
324 /* Need to distinguish between guarantee (hard) and
325 * best effort (share) from the vserver
328 #define VC_VXF_SCHED_SHARE 0x00000800ull
329 flags.flagword |= VC_VXF_SCHED_HARD;
330 flags.mask |= VC_VXF_SCHED_HARD;
331 if (cpuguaranteed==0) {
332 flags.flagword |= VC_VXF_SCHED_SHARE;
333 flags.mask |= VC_VXF_SCHED_SHARE;
336 /* CPULIMIT value from /etc/vservers/xyz.conf */
337 sched.fill_rate = cpu;
338 sched.set_mask |= VC_VXSM_FILL_RATE;
340 sched.interval = 1000; /* Andy's default value */
341 sched.set_mask |= VC_VXSM_INTERVAL;
343 /* set base token value for new contexts */
344 if (xid != VC_NOCTX) {
345 sched.tokens = 100; /* Andy's default value */
346 sched.set_mask |= VC_VXSM_TOKENS;
349 sched.tokens_min = 50; /* Andy's default value */
350 sched.tokens_max = 100; /* Andy's default value */
351 sched.set_mask |= VC_VXSM_TOKENS_MIN;
352 sched.set_mask |= VC_VXSM_TOKENS_MAX;
354 if (vc_set_sched(ctx, &sched)==-1) {
355 PERROR("vc_set_sched()");
361 limits.min = VC_LIM_KEEP;
362 limits.soft = VC_LIM_KEEP;
364 if (vc_set_rlimit(ctx, RLIMIT_RSS, &limits)) {
365 PERROR("vc_set_rlimit(%d, %d, %d/%d/%d)",
366 ctx, RLIMIT_RSS, limits.min, limits.soft, limits.hard);
371 limits.min = VC_LIM_KEEP;
372 limits.soft = VC_LIM_KEEP;
374 if (vc_set_rlimit(ctx, RLIMIT_NPROC, &limits)) {
375 PERROR("vc_set_rlimit(%d, %d, %d/%d/%d)",
376 ctx, RLIMIT_NPROC, limits.min, limits.soft, limits.hard);
380 if (vc_set_ccaps(ctx, &caps) == -1) {
381 PERROR("vc_set_ccaps(%d, 0x%16ullx/0x%16ullx, 0x%16ullx/0x%16ullx)",
382 ctx, caps.ccaps, caps.cmask, caps.bcaps, caps.bmask);
386 if (vc_set_cflags(ctx, &flags) == -1) {
387 PERROR("vc_set_cflags(%d, 0x%16llx/0x%16llx)",
388 ctx, flags.flagword, flags.mask);
392 /* context already exists, migrate to it */
393 if (xid == VC_NOCTX && vc_ctx_migrate(ctx) == -1) {
394 PERROR("vc_ctx_migrate(%d)", xid);
402 void runas_slice_user(char *username)
404 struct passwd pwdd, *pwd = &pwdd, *result;
406 char *home_env, *logname_env, *mail_env, *shell_env, *user_env;
407 int home_len, logname_len, mail_len, shell_len, user_len;
409 static char *envp[10];
412 pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
413 if (pwdBuffer_len == -1) {
414 PERROR("sysconf(_SC_GETPW_R_SIZE_MAX)");
418 pwdBuffer = (char*)malloc(pwdBuffer_len);
419 if (pwdBuffer == NULL) {
420 PERROR("malloc(%d)", pwdBuffer_len);
425 if ((getpwnam_r(username,pwd,pwdBuffer,pwdBuffer_len, &result) != 0) || (errno != 0)) {
426 PERROR("getpwnam_r(%s)", username);
430 if (setgid(pwd->pw_gid) < 0) {
431 PERROR("setgid(%d)", pwd->pw_gid);
435 if (setuid(pwd->pw_uid) < 0) {
436 PERROR("setuid(%d)", pwd->pw_uid);
440 if (chdir(pwd->pw_dir) < 0) {
441 PERROR("chdir(%s)", pwd->pw_dir);
445 home_len = strlen("HOME=") + strlen(pwd->pw_dir) + NULLBYTE_SIZE;
446 logname_len = strlen("LOGNAME=") + strlen(username) + NULLBYTE_SIZE;
447 mail_len = strlen("MAIL=/var/spool/mail/") + strlen(username)
449 shell_len = strlen("SHELL=") + strlen(pwd->pw_shell) + NULLBYTE_SIZE;
450 user_len = strlen("USER=") + strlen(username) + NULLBYTE_SIZE;
452 home_env = (char *)malloc(home_len);
453 logname_env = (char *)malloc(logname_len);
454 mail_env = (char *)malloc(mail_len);
455 shell_env = (char *)malloc(shell_len);
456 user_env = (char *)malloc(user_len);
458 if ((home_env == NULL) ||
459 (logname_env == NULL) ||
460 (mail_env == NULL) ||
461 (shell_env == NULL) ||
462 (user_env == NULL)) {
467 sprintf(home_env, "HOME=%s", pwd->pw_dir);
468 sprintf(logname_env, "LOGNAME=%s", username);
469 sprintf(mail_env, "MAIL=/var/spool/mail/%s", username);
470 sprintf(shell_env, "SHELL=%s", pwd->pw_shell);
471 sprintf(user_env, "USER=%s", username);
473 home_env[home_len - 1] = '\0';
474 logname_env[logname_len - 1] = '\0';
475 mail_env[mail_len - 1] = '\0';
476 shell_env[shell_len - 1] = '\0';
477 user_env[user_len - 1] = '\0';
480 envp[1] = logname_env;
486 if ((putenv(home_env) < 0) ||
487 (putenv(logname_env) < 0) ||
488 (putenv(mail_env) < 0) ||
489 (putenv(shell_env) < 0) ||
490 (putenv(user_env) < 0)) {
491 PERROR("vserver: putenv error ");
496 void slice_enter(char *context)
498 struct passwd pwdd, *pwd = &pwdd, *result;
503 pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
504 if (pwdBuffer_len == -1) {
505 PERROR("sysconf(_SC_GETPW_R_SIZE_MAX)");
509 pwdBuffer = (char*)malloc(pwdBuffer_len);
510 if (pwdBuffer == NULL) {
511 PERROR("malloc(%d)", pwdBuffer_len);
516 if ((getpwnam_r(context,pwd,pwdBuffer,pwdBuffer_len, &result) != 0) || (errno != 0)) {
517 PERROR("getpwnam_r(%s)", context);
522 if (setuidgid_root() < 0) { /* For chroot, new_s_context */
523 fprintf(stderr, "vsh: Could not become root, check that SUID flag is set on binary\n");
527 #ifdef CONFIG_VSERVER_LEGACY
528 (void) (sandbox_chroot(uid));
531 if (sandbox_processes((xid_t) uid, context) < 0) {
532 fprintf(stderr, "vsh: Could not change context to %d\n", uid);
537 //--------------------------------------------------------------------
539 #define DEFAULT_SHELL "/bin/sh"
541 /* Exit statuses for programs like 'env' that exec other programs.
542 EXIT_FAILURE might not be 1, so use EXIT_FAIL in such programs. */
545 EXIT_CANNOT_INVOKE = 126,
549 int main(int argc, char **argv)
551 struct passwd pwdd, *pwd = &pwdd, *result;
552 char *context, *username, *shell, *pwdBuffer;
563 if ((pwd = getpwuid(uid)) == NULL) {
564 PERROR("getpwuid(%d)", uid);
568 context = (char*)strdup(pwd->pw_name);
574 /* enter vserver "context" */
575 slice_enter(context);
577 /* Now run as username in this context. Note that for PlanetLab's
578 vserver configuration the context name also happens to be the
579 "default" username within the vserver context.
582 runas_slice_user(username);
584 /* With the uid/gid appropriately set. Let's figure out what the
585 * shell in the vserver's /etc/passwd is for the given username.
588 pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
589 if (pwdBuffer_len == -1) {
590 PERROR("sysconf(_SC_GETPW_R_SIZE_MAX");
593 pwdBuffer = (char*)malloc(pwdBuffer_len);
594 if (pwdBuffer == NULL) {
595 PERROR("malloc(%d)", pwdBuffer_len);
600 if ((getpwnam_r(username,pwd,pwdBuffer,pwdBuffer_len, &result) != 0) || (errno != 0)) {
601 PERROR("getpwnam_r(%s)", username);
605 /* Make sure pw->pw_shell is non-NULL.*/
606 if (pwd->pw_shell == NULL || pwd->pw_shell[0] == '\0') {
607 pwd->pw_shell = (char *) DEFAULT_SHELL;
610 shell = (char *)strdup(pwd->pw_shell);
616 /* Check whether 'su' or 'sshd' invoked us as a login shell or
617 not; did this above when testing argv[0]=='-'.
622 args = (char**)malloc(sizeof(char*)*(argc+2));
624 PERROR("malloc(%d)", sizeof(char*)*(argc+2));
629 for(i=1;i<argc+1;i++) {
634 (void) execvp(shell,argv);
636 int exit_status = (errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE);
640 return 0; /* shutup compiler */