2 * Marc E. Fiuczynski <mef@cs.princeton.edu>
4 * Copyright (c) 2004 The Trustees of Princeton University (Trustees).
6 * vsh is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
11 * vsh is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 * License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Poptop; see the file COPYING. If not, write to the Free
18 * Software Foundation, 59 Temple Place - Suite 330, Boston, MA
35 #include <sys/syscall.h>
36 #include <asm/unistd.h>
37 #include <sys/mount.h>
38 #include <sys/types.h>
40 #include <sys/resource.h>
45 //--------------------------------------------------------------------
48 #undef CONFIG_VSERVER_LEGACY
50 /* Null byte made explicit */
51 #define NULLBYTE_SIZE 1
53 /* Base for all vserver roots for chroot */
54 #define VSERVER_ROOT_BASE "/vservers"
57 _PERROR(const char *format, char *file, int line, int _errno, ...)
62 fprintf(stderr, "%s:%d: ", file, line);
63 vfprintf(stderr, format, ap);
65 fprintf(stderr, ": %s (%d)", strerror(_errno), _errno);
72 #define PERROR(format, args...) _PERROR(format, __FILE__, __LINE__, errno, ## args)
74 /* Change to root:root (before entering new context) */
75 static int setuidgid_root()
88 static void compute_new_root(char *base, char **root, uid_t uid)
93 if ((pwd = getpwuid(uid)) == NULL) {
94 PERROR("getpwuid(%d)", uid);
99 strlen(base) + strlen("/") +
100 strlen(pwd->pw_name) + NULLBYTE_SIZE;
101 (*root) = (char *)malloc(root_len);
102 if ((*root) == NULL) {
103 PERROR("malloc(%d)", root_len);
107 sprintf((*root), "%s/%s", base, pwd->pw_name);
108 (*root)[root_len - 1] = '\0';
111 /* Example: sandbox_root = /vservers/bnc, relpath = /proc/1 */
112 static int sandbox_file_exists(char *sandbox_root, char *relpath)
114 struct stat stat_buf;
118 len = strlen(sandbox_root) + strlen(relpath) + NULLBYTE_SIZE;
119 if ((file = (char *)malloc(len)) == NULL) {
120 PERROR("malloc(%d)", len);
123 sprintf(file, "%s%s", sandbox_root, relpath);
124 file[len - 1] = '\0';
125 if (stat(file, &stat_buf) == 0) {
134 static int proc_mounted(char *sandbox_root)
136 return sandbox_file_exists(sandbox_root, "/proc/1");
139 static int devpts_mounted(char *sandbox_root)
141 return sandbox_file_exists(sandbox_root, "/dev/pts/0");
144 static void mount_proc(char *sandbox_root)
146 char *source = "/proc";
150 len = strlen(sandbox_root) + strlen("/") + strlen("proc") + NULLBYTE_SIZE;
151 if ((target = (char *)malloc(len)) == NULL) {
152 PERROR("malloc(%d)", len);
156 sprintf(target, "%s/proc", sandbox_root);
157 target[len - 1] = '\0';
158 if (!proc_mounted(sandbox_root))
159 mount(source, target, "proc", MS_BIND | MS_RDONLY, NULL);
164 static void mount_devpts(char *sandbox_root)
166 char *source = "/dev/pts";
170 len = strlen(sandbox_root) + strlen("/") + strlen("dev/pts") + NULLBYTE_SIZE;
171 if ((target = (char *)malloc(len)) == NULL) {
172 PERROR("malloc(%d)", len);
176 sprintf(target, "%s/dev/pts", sandbox_root);
177 target[len - 1] = '\0';
178 if (!devpts_mounted(sandbox_root))
179 mount(source, target, "devpts", 0, NULL);
184 static int sandbox_chroot(uid_t uid)
186 char *sandbox_root = NULL;
188 compute_new_root(VSERVER_ROOT_BASE,&sandbox_root, uid);
189 mount_proc(sandbox_root);
190 mount_devpts(sandbox_root);
191 if (chroot(sandbox_root) < 0) {
192 PERROR("chroot(%s)", sandbox_root);
195 if (chdir("/") < 0) {
202 #define WHITESPACE(buffer,index,len) \
203 while(isspace((int)buffer[index])) \
204 if (index < len) index++; else goto out;
208 unsigned long long *limit;
211 #define VSERVERCONF "/etc/vservers/"
212 static void get_limits(char *context, struct resources *list){
214 size_t len = strlen(VSERVERCONF) + strlen(context) + strlen(".conf") + NULLBYTE_SIZE;
215 char *conf = (char *)malloc(len);
218 sprintf(conf, "%s%s.conf", VSERVERCONF, context);
220 /* open the conf file for reading */
221 fb = fopen(conf,"r");
224 char *buffer = malloc(1000);
227 /* the conf file exist */
228 while((p=fgets(buffer,1000-1,fb))!=NULL) {
230 len = strnlen(buffer,1000);
231 WHITESPACE(buffer,index,len);
232 if (buffer[index] == '#')
235 for (r=list; r->name; r++)
236 if ((p=strstr(&buffer[index],r->name))!=NULL) {
237 /* adjust index into buffer */
238 index+= (p-&buffer[index])+strlen(r->name);
240 /* skip over whitespace */
241 WHITESPACE(buffer,index,len);
243 /* expecting to see = sign */
244 if (buffer[index++]!='=') goto out;
246 /* skip over whitespace */
247 WHITESPACE(buffer,index,len);
249 /* expecting to see a digit for number */
250 if (!isdigit((int)buffer[index])) goto out;
252 *r->limit = atoi(&buffer[index]);
259 fprintf(stderr,"cannot open %s\n",conf);
265 static int sandbox_processes(xid_t xid, char *context)
267 #ifdef CONFIG_VSERVER_LEGACY
271 flags |= 1; /* VX_INFO_LOCK -- cannot request a new vx_id */
272 /* flags |= 4; VX_INFO_NPROC -- limit number of procs in a context */
274 (void) vc_new_s_context(xid, 0, flags);
276 /* use legacy dirty hack for capremove */
277 if (vc_new_s_context(VC_SAMECTX, vc_get_insecurebcaps(), flags) == VC_NOCTX) {
278 PERROR("vc_new_s_context(%u, 0x%16ullx, 0x%08x)",
279 VC_SAMECTX, vc_get_insecurebcaps(), flags);
283 struct vc_rlimit limits;
284 struct vc_ctx_caps caps;
285 struct vc_ctx_flags flags;
288 unsigned long long cpu = VC_LIM_KEEP;
289 unsigned long long mem = VC_LIM_KEEP;
290 unsigned long long task = VC_LIM_KEEP;
291 unsigned long long cpuguaranteed = 0;
292 struct resources list[] =
295 {"CPUGUARANTEED", &cpuguaranteed},
296 {"TASKLIMIT", &task},
299 get_limits(context,list);
300 (void) (sandbox_chroot(xid));
302 caps.ccaps = ~vc_get_insecureccaps();
304 caps.bcaps = ~vc_get_insecurebcaps();
307 flags.flagword = VC_VXF_INFO_LOCK;
308 flags.mask = VC_VXF_STATE_SETUP | VC_VXF_INFO_LOCK;
310 ctx = vc_ctx_create(xid);
311 if (ctx == VC_NOCTX && errno != EEXIST) {
312 PERROR("vc_ctx_create(%d)", xid);
317 if (cpu != VC_LIM_KEEP) {
318 struct vc_set_sched sched = {
322 /* Need to distinguish between guarantee (hard) and
323 * best effort (share) from the vserver
326 #define VC_VXF_SCHED_SHARE 0x00000800ull
327 flags.flagword |= VC_VXF_SCHED_HARD;
328 flags.mask |= VC_VXF_SCHED_HARD;
329 if (cpuguaranteed==0) {
330 flags.flagword |= VC_VXF_SCHED_SHARE;
331 flags.mask |= VC_VXF_SCHED_SHARE;
334 /* CPULIMIT value from /etc/vservers/xyz.conf */
335 sched.fill_rate = cpu;
336 sched.set_mask |= VC_VXSM_FILL_RATE;
338 sched.interval = 1000; /* Andy's default value */
339 sched.set_mask |= VC_VXSM_INTERVAL;
341 /* set base token value for new contexts */
342 if (ctx != VC_NOCTX) {
343 sched.tokens = 100; /* Andy's default value */
344 sched.set_mask |= VC_VXSM_TOKENS;
347 sched.tokens_min = 50; /* Andy's default value */
348 sched.tokens_max = 100; /* Andy's default value */
349 sched.set_mask |= VC_VXSM_TOKENS_MIN;
350 sched.set_mask |= VC_VXSM_TOKENS_MAX;
352 if (vc_set_sched(xid, &sched)==-1) {
353 PERROR("vc_set_sched()");
359 limits.min = VC_LIM_KEEP;
360 limits.soft = VC_LIM_KEEP;
362 if (vc_set_rlimit(xid, RLIMIT_RSS, &limits)) {
363 PERROR("vc_set_rlimit(%d, %d, %d/%d/%d)",
364 xid, RLIMIT_RSS, limits.min, limits.soft, limits.hard);
369 limits.min = VC_LIM_KEEP;
370 limits.soft = VC_LIM_KEEP;
372 if (vc_set_rlimit(xid, RLIMIT_NPROC, &limits)) {
373 PERROR("vc_set_rlimit(%d, %d, %d/%d/%d)",
374 xid, RLIMIT_NPROC, limits.min, limits.soft, limits.hard);
378 if (vc_set_ccaps(xid, &caps) == -1) {
379 PERROR("vc_set_ccaps(%d, 0x%16ullx/0x%16ullx, 0x%16ullx/0x%16ullx)",
380 xid, caps.ccaps, caps.cmask, caps.bcaps, caps.bmask);
384 if (vc_set_cflags(xid, &flags) == -1) {
385 PERROR("vc_set_cflags(%d, 0x%16llx/0x%16llx)",
386 xid, flags.flagword, flags.mask);
390 /* context already exists, migrate to it */
391 if (ctx == VC_NOCTX && vc_ctx_migrate(xid) == -1) {
392 PERROR("vc_ctx_migrate(%d)", xid);
400 void runas_slice_user(char *username)
402 struct passwd pwdd, *pwd = &pwdd, *result;
404 char *home_env, *logname_env, *mail_env, *shell_env, *user_env;
405 int home_len, logname_len, mail_len, shell_len, user_len;
407 static char *envp[10];
410 pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
411 if (pwdBuffer_len == -1) {
412 PERROR("sysconf(_SC_GETPW_R_SIZE_MAX)");
416 pwdBuffer = (char*)malloc(pwdBuffer_len);
417 if (pwdBuffer == NULL) {
418 PERROR("malloc(%d)", pwdBuffer_len);
423 if ((getpwnam_r(username,pwd,pwdBuffer,pwdBuffer_len, &result) != 0) || (errno != 0)) {
424 PERROR("getpwnam_r(%s)", username);
428 if (setgid(pwd->pw_gid) < 0) {
429 PERROR("setgid(%d)", pwd->pw_gid);
433 if (setuid(pwd->pw_uid) < 0) {
434 PERROR("setuid(%d)", pwd->pw_uid);
438 if (chdir(pwd->pw_dir) < 0) {
439 PERROR("chdir(%s)", pwd->pw_dir);
443 home_len = strlen("HOME=") + strlen(pwd->pw_dir) + NULLBYTE_SIZE;
444 logname_len = strlen("LOGNAME=") + strlen(username) + NULLBYTE_SIZE;
445 mail_len = strlen("MAIL=/var/spool/mail/") + strlen(username)
447 shell_len = strlen("SHELL=") + strlen(pwd->pw_shell) + NULLBYTE_SIZE;
448 user_len = strlen("USER=") + strlen(username) + NULLBYTE_SIZE;
450 home_env = (char *)malloc(home_len);
451 logname_env = (char *)malloc(logname_len);
452 mail_env = (char *)malloc(mail_len);
453 shell_env = (char *)malloc(shell_len);
454 user_env = (char *)malloc(user_len);
456 if ((home_env == NULL) ||
457 (logname_env == NULL) ||
458 (mail_env == NULL) ||
459 (shell_env == NULL) ||
460 (user_env == NULL)) {
465 sprintf(home_env, "HOME=%s", pwd->pw_dir);
466 sprintf(logname_env, "LOGNAME=%s", username);
467 sprintf(mail_env, "MAIL=/var/spool/mail/%s", username);
468 sprintf(shell_env, "SHELL=%s", pwd->pw_shell);
469 sprintf(user_env, "USER=%s", username);
471 home_env[home_len - 1] = '\0';
472 logname_env[logname_len - 1] = '\0';
473 mail_env[mail_len - 1] = '\0';
474 shell_env[shell_len - 1] = '\0';
475 user_env[user_len - 1] = '\0';
478 envp[1] = logname_env;
484 if ((putenv(home_env) < 0) ||
485 (putenv(logname_env) < 0) ||
486 (putenv(mail_env) < 0) ||
487 (putenv(shell_env) < 0) ||
488 (putenv(user_env) < 0)) {
489 PERROR("vserver: putenv error ");
494 void slice_enter(char *context)
496 struct passwd pwdd, *pwd = &pwdd, *result;
501 pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
502 if (pwdBuffer_len == -1) {
503 PERROR("sysconf(_SC_GETPW_R_SIZE_MAX)");
507 pwdBuffer = (char*)malloc(pwdBuffer_len);
508 if (pwdBuffer == NULL) {
509 PERROR("malloc(%d)", pwdBuffer_len);
514 if ((getpwnam_r(context,pwd,pwdBuffer,pwdBuffer_len, &result) != 0) || (errno != 0)) {
515 PERROR("getpwnam_r(%s)", context);
520 if (setuidgid_root() < 0) { /* For chroot, new_s_context */
521 fprintf(stderr, "vsh: Could not become root, check that SUID flag is set on binary\n");
525 #ifdef CONFIG_VSERVER_LEGACY
526 (void) (sandbox_chroot(uid));
529 if (sandbox_processes((xid_t) uid, context) < 0) {
530 fprintf(stderr, "vsh: Could not change context to %d\n", uid);
535 //--------------------------------------------------------------------
537 #define DEFAULT_SHELL "/bin/sh"
539 /* Exit statuses for programs like 'env' that exec other programs.
540 EXIT_FAILURE might not be 1, so use EXIT_FAIL in such programs. */
543 EXIT_CANNOT_INVOKE = 126,
547 int main(int argc, char **argv)
549 struct passwd pwdd, *pwd = &pwdd, *result;
550 char *context, *username, *shell, *pwdBuffer;
561 if ((pwd = getpwuid(uid)) == NULL) {
562 PERROR("getpwuid(%d)", uid);
566 context = (char*)strdup(pwd->pw_name);
572 /* enter vserver "context" */
573 slice_enter(context);
575 /* Now run as username in this context. Note that for PlanetLab's
576 vserver configuration the context name also happens to be the
577 "default" username within the vserver context.
580 runas_slice_user(username);
582 /* With the uid/gid appropriately set. Let's figure out what the
583 * shell in the vserver's /etc/passwd is for the given username.
586 pwdBuffer_len = sysconf(_SC_GETPW_R_SIZE_MAX);
587 if (pwdBuffer_len == -1) {
588 PERROR("sysconf(_SC_GETPW_R_SIZE_MAX");
591 pwdBuffer = (char*)malloc(pwdBuffer_len);
592 if (pwdBuffer == NULL) {
593 PERROR("malloc(%d)", pwdBuffer_len);
598 if ((getpwnam_r(username,pwd,pwdBuffer,pwdBuffer_len, &result) != 0) || (errno != 0)) {
599 PERROR("getpwnam_r(%s)", username);
603 /* Make sure pw->pw_shell is non-NULL.*/
604 if (pwd->pw_shell == NULL || pwd->pw_shell[0] == '\0') {
605 pwd->pw_shell = (char *) DEFAULT_SHELL;
608 shell = (char *)strdup(pwd->pw_shell);
614 /* Check whether 'su' or 'sshd' invoked us as a login shell or
615 not; did this above when testing argv[0]=='-'.
620 args = (char**)malloc(sizeof(char*)*(argc+2));
622 PERROR("malloc(%d)", sizeof(char*)*(argc+2));
627 for(i=1;i<argc+1;i++) {
632 (void) execvp(shell,argv);
634 int exit_status = (errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE);
638 return 0; /* shutup compiler */