From 03ce6ebf51c018370a8789c1229956622b9c54d1 Mon Sep 17 00:00:00 2001 From: Planet-Lab Support Date: Fri, 16 Jul 2004 17:46:35 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create branch 'ckrm'. --- kernel/ckrm/ckrm_tasks_stub.c | 59 + kernel/ckrm/rbce/Makefile | 13 + kernel/ckrm/rbce/bitvector.h | 152 ++ kernel/ckrm/rbce/crbce.h | 152 ++ kernel/ckrm/rbce/crbcemod.c | 2 + kernel/ckrm/rbce/info.h | 58 + kernel/ckrm/rbce/rbce.h | 122 ++ kernel/ckrm/rbce/rbce_fs.c | 485 ++++++ kernel/ckrm/rbce/rbcemod.c | 2566 ++++++++++++++++++++++++++++++++ kernel/ckrm/rbce/rbcemod_ext.c | 615 ++++++++ kernel/ckrm/rbce/token.c | 260 ++++ 11 files changed, 4484 insertions(+) create mode 100644 kernel/ckrm/ckrm_tasks_stub.c create mode 100644 kernel/ckrm/rbce/Makefile create mode 100644 kernel/ckrm/rbce/bitvector.h create mode 100644 kernel/ckrm/rbce/crbce.h create mode 100644 kernel/ckrm/rbce/crbcemod.c create mode 100644 kernel/ckrm/rbce/info.h create mode 100644 kernel/ckrm/rbce/rbce.h create mode 100644 kernel/ckrm/rbce/rbce_fs.c create mode 100644 kernel/ckrm/rbce/rbcemod.c create mode 100644 kernel/ckrm/rbce/rbcemod_ext.c create mode 100644 kernel/ckrm/rbce/token.c diff --git a/kernel/ckrm/ckrm_tasks_stub.c b/kernel/ckrm/ckrm_tasks_stub.c new file mode 100644 index 000000000..179e6b5d6 --- /dev/null +++ b/kernel/ckrm/ckrm_tasks_stub.c @@ -0,0 +1,59 @@ +/* ckrm_tasks_stub.c - Stub file for ckrm_tasks modules + * + * Copyright (C) Chandra Seetharaman, IBM Corp. 2004 + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 16 May 2004: Created + * + */ + +#include +#include +#include + +static spinlock_t stub_lock = SPIN_LOCK_UNLOCKED; + +static get_ref_t real_get_ref = NULL; +static put_ref_t real_put_ref = NULL; + +void ckrm_numtasks_register(get_ref_t gr, put_ref_t pr) +{ + spin_lock(&stub_lock); + real_get_ref = gr; + real_put_ref = pr; + spin_unlock(&stub_lock); +} + +int numtasks_get_ref(void *arg, int force) +{ + int ret = 1; + spin_lock(&stub_lock); + if (real_get_ref) { + ret = (*real_get_ref) (arg, force); + } + spin_unlock(&stub_lock); + return ret; +} + +void numtasks_put_ref(void *arg) +{ + spin_lock(&stub_lock); + if (real_put_ref) { + (*real_put_ref) (arg); + } + spin_unlock(&stub_lock); +} + +EXPORT_SYMBOL(ckrm_numtasks_register); +EXPORT_SYMBOL(numtasks_get_ref); +EXPORT_SYMBOL(numtasks_put_ref); diff --git a/kernel/ckrm/rbce/Makefile b/kernel/ckrm/rbce/Makefile new file mode 100644 index 000000000..6355d0b5a --- /dev/null +++ b/kernel/ckrm/rbce/Makefile @@ -0,0 +1,13 @@ +# +# Makefile for CKRM +# + +obj-$(CONFIG_CKRM_RBCE) += rbce.o +rbce-objs := rbcemod.o rbce_fs.o + +obj-$(CONFIG_CKRM_CRBCE) += crbce.o +crbce-objs := crbcemod.o rbce_fs.o + +CFLAGS_crbcemod.o += -DRBCE_EXTENSION # compile rbcemod.c into crbce +CFLAGS_crbcemod.o += -DRBCE_DO_SAMPLE # disable if sampling not desired +CFLAGS_crbcemod.o += -DRBCE_DO_DELAY # disable if delay info not desired diff --git a/kernel/ckrm/rbce/bitvector.h b/kernel/ckrm/rbce/bitvector.h new file mode 100644 index 000000000..4f53f9847 --- /dev/null +++ b/kernel/ckrm/rbce/bitvector.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) Hubertus Franke, IBM Corp. 2003 + * + * Bitvector package + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 15 Nov 2003 + * Created + */ + +#ifndef BITVECTOR_H +#define BITVECTOR_H + +typedef struct { + int size; // maxsize in longs + unsigned long bits[0]; // bit vector +} bitvector_t; + +#define BITS_2_LONGS(sz) (((sz)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define BITS_2_BYTES(sz) (((sz)+7)/8) + +#if 0 +#define CHECK_VEC(vec) (vec) /* check against NULL */ +#else +#define CHECK_VEC(vec) (1) /* assume no problem */ +#endif + +#define CHECK_VEC_VOID(vec) do { if (!CHECK_VEC(vec)) return; } while(0) +#define CHECK_VEC_RC(vec, val) \ +do { if (!CHECK_VEC(vec)) return (val); } while(0) + +inline static void bitvector_zero(bitvector_t * bitvec) +{ + int sz; + + CHECK_VEC_VOID(bitvec); + sz = BITS_2_BYTES(bitvec->size); + memset(bitvec->bits, 0, sz); + return; +} + +inline static unsigned long bitvector_bytes(unsigned long size) +{ + return sizeof(bitvector_t) + BITS_2_BYTES(size); +} + +inline static void bitvector_init(bitvector_t * bitvec, unsigned long size) +{ + bitvec->size = size; + bitvector_zero(bitvec); + return; +} + +inline static bitvector_t *bitvector_alloc(unsigned long size) +{ + bitvector_t *vec = + (bitvector_t *) kmalloc(bitvector_bytes(size), GFP_KERNEL); + if (vec) { + vec->size = size; + bitvector_zero(vec); + } + return vec; +} + +inline static void bitvector_free(bitvector_t * bitvec) +{ + CHECK_VEC_VOID(bitvec); + kfree(bitvec); + return; +} + +#define def_bitvec_op(name,mod1,op,mod2) \ +inline static int name(bitvector_t *res, bitvector_t *op1, \ + bitvector_t *op2) \ +{ \ + unsigned int i, size; \ + \ + CHECK_VEC_RC(res, 0); \ + CHECK_VEC_RC(op1, 0); \ + CHECK_VEC_RC(op2, 0); \ + size = res->size; \ + if (((size != (op1)->size) || (size != (op2)->size))) { \ + return 0; \ + } \ + size = BITS_2_LONGS(size); \ + for (i = 0; i < size; i++) { \ + (res)->bits[i] = (mod1 (op1)->bits[i]) op \ + (mod2 (op2)->bits[i]); \ + } \ + return 1; \ +} + +def_bitvec_op(bitvector_or,, |,); +def_bitvec_op(bitvector_and,, &,); +def_bitvec_op(bitvector_xor,, ^,); +def_bitvec_op(bitvector_or_not,, |, ~); +def_bitvec_op(bitvector_not_or, ~, |,); +def_bitvec_op(bitvector_and_not,, &, ~); +def_bitvec_op(bitvector_not_and, ~, &,); + +inline static void bitvector_set(int idx, bitvector_t * vec) +{ + set_bit(idx, vec->bits); + return; +} + +inline static void bitvector_clear(int idx, bitvector_t * vec) +{ + clear_bit(idx, vec->bits); + return; +} + +inline static int bitvector_test(int idx, bitvector_t * vec) +{ + return test_bit(idx, vec->bits); +} + +#ifdef DEBUG +inline static void bitvector_print(int flag, bitvector_t * vec) +{ + unsigned int i; + int sz; + extern int rbcedebug; + + if ((rbcedebug & flag) == 0) { + return; + } + if (vec == NULL) { + printk("v<0>-NULL\n"); + return; + } + printk("v<%d>-", sz = vec->size); + for (i = 0; i < sz; i++) { + printk("%c", test_bit(i, vec->bits) ? '1' : '0'); + } + return; +} +#else +#define bitvector_print(x, y) +#endif + +#endif // BITVECTOR_H diff --git a/kernel/ckrm/rbce/crbce.h b/kernel/ckrm/rbce/crbce.h new file mode 100644 index 000000000..c2967d18d --- /dev/null +++ b/kernel/ckrm/rbce/crbce.h @@ -0,0 +1,152 @@ +/* + * crbce.h + * + * Copyright (C) Hubertus Franke, IBM Corp. 2003 + * + * This files contains the type definition of the record + * created by the CRBCE CKRM classification engine + * + * Changes + * + * 2003-11-11 Created by H.Franke + * 2003-12-01 Sanitized for Delivery by H.Franke + * + */ + +#ifndef CRBCE_RECORDS_H +#define CRBCE_RECORDS_H + +#include +#include +#include +#include + +#define CRBCE_UKCC_NAME "crbce_ukcc" +#define CRBCE_UKCC_PATH "/mnt/relayfs" + +#define CRBCE_UKCC_PATH_NAME CRBCE_UKCC_PATH"/"CRBCE_UKCC_NAME + +#define CRBCE_MAX_CLASS_NAME_LEN 256 + +/**************************************************************** + * + * CRBCE EVENT SET is and extension to the standard CKRM_EVENTS + * + ****************************************************************/ +enum { + + /* we use the standard CKRM_EVENT_<..> + * to identify reclassification cause actions + * and extend by additional ones we need + */ + + /* up event flow */ + + CRBCE_REC_EXIT = CKRM_NUM_EVENTS, + CRBCE_REC_DATA_DELIMITER, + CRBCE_REC_SAMPLE, + CRBCE_REC_TASKINFO, + CRBCE_REC_SYS_INFO, + CRBCE_REC_CLASS_INFO, + CRBCE_REC_KERNEL_CMD_DONE, + CRBCE_REC_UKCC_FULL, + + /* down command issueance */ + CRBCE_REC_KERNEL_CMD, + + CRBCE_NUM_EVENTS +}; + +struct task_sample_info { + uint32_t cpu_running; + uint32_t cpu_waiting; + uint32_t io_delayed; + uint32_t memio_delayed; +}; + +/********************************************* + * KERNEL -> USER records * + *********************************************/ + +/* we have records with either a time stamp or not */ +struct crbce_hdr { + int type; + pid_t pid; +}; + +struct crbce_hdr_ts { + int type; + pid_t pid; + uint32_t jiffies; + uint64_t cls; +}; + +/* individual records */ + +struct crbce_rec_fork { + struct crbce_hdr_ts hdr; + pid_t ppid; +}; + +struct crbce_rec_data_delim { + struct crbce_hdr_ts hdr; + int is_stop; /* 0 start, 1 stop */ +}; + +struct crbce_rec_task_data { + struct crbce_hdr_ts hdr; + struct task_sample_info sample; + struct task_delay_info delay; +}; + +struct crbce_ukcc_full { + struct crbce_hdr_ts hdr; +}; + +struct crbce_class_info { + struct crbce_hdr_ts hdr; + int action; + int namelen; + char name[CRBCE_MAX_CLASS_NAME_LEN]; +}; + +/********************************************* + * USER -> KERNEL records * + *********************************************/ + +enum crbce_kernel_cmd { + CRBCE_CMD_START, + CRBCE_CMD_STOP, + CRBCE_CMD_SET_TIMER, + CRBCE_CMD_SEND_DATA, +}; + +struct crbce_command { + int type; /* we need this for the K->U reflection */ + int cmd; + uint32_t len; /* added in the kernel for reflection */ +}; + +#define set_cmd_hdr(rec,tok) \ +((rec).hdr.type=CRBCE_REC_KERNEL_CMD,(rec).hdr.cmd=(tok)) + +struct crbce_cmd_done { + struct crbce_command hdr; + int rc; +}; + +struct crbce_cmd { + struct crbce_command hdr; +}; + +struct crbce_cmd_send_data { + struct crbce_command hdr; + int delta_mode; +}; + +struct crbce_cmd_settimer { + struct crbce_command hdr; + uint32_t interval; /* in msec .. 0 means stop */ +}; + +#endif diff --git a/kernel/ckrm/rbce/crbcemod.c b/kernel/ckrm/rbce/crbcemod.c new file mode 100644 index 000000000..3492049a3 --- /dev/null +++ b/kernel/ckrm/rbce/crbcemod.c @@ -0,0 +1,2 @@ +/* Easiest way to transmit a symbolic link as a patch */ +#include "rbcemod.c" diff --git a/kernel/ckrm/rbce/info.h b/kernel/ckrm/rbce/info.h new file mode 100644 index 000000000..3bc13b519 --- /dev/null +++ b/kernel/ckrm/rbce/info.h @@ -0,0 +1,58 @@ +static char *info = + "1. Magic files\n" + "\t|--rbce_info - read only file detailing how to setup and use RBCE.\n\n" + "\t|--rbce_reclassify - contains nothing. Writing a pid to it" + "reclassifies\n" + "\tthe given task according to the current set of rules.\n" + "\tWriting 0 to it reclassifies all tasks in the system according to the \n" + "\tsurrent set of rules. This is typically done by the user/sysadmin \n" + "\tafter changing/creating rules. \n\n" + "\t|--rbce_state - determines whether RBCE is currently active" + " or inactive.\n" + "\tWriting 1 (0) activates (deactivates) the CE. Reading the file\n" + "\treturns the current state.\n\n" + "\t|--rbce_tag - set tag of the given pid, syntax - \"pid tag\"\n\n" + "2. Rules subdirectory: Each rule of the RBCE is represented by a file in\n" + "/rcfs/ce/rules.\n\n" + "Following are the different attr/value pairs that can be specified.\n\n" + "Note: attr/value pairs must be separated by commas(,) with no space" + "between them\n\n" + "\t<*id> number where ={>,<,=,!}\n" + "\t<*id>={uid,euid,gid,egid}\n\n" + "\tcmd=\"string\" // basename of the command\n\n" + "\tpath=\"/path/to/string\" // full pathname of the command\n\n" + "\targs=\"string\" // argv[1] - argv[argc] of command\n\n" + "\ttag=\"string\" // application tag of the task\n\n" + "\t[+,-]depend=rule_filename\n" + "\t\t\t// used to chain a rule's terms with existing rules\n" + "\t\t\t// to avoid respecifying the latter's rule terms.\n" + "\t\t\t// A rule's dependent rules are evaluated before \n" + "\t\t\t// its rule terms get evaluated.\n" + "\t\t\t//\n" + "\t\t\t// An optional + or - can precede the depend keyword.\n" + "\t\t\t// +depend adds a dependent rule to the tail of the\n" + "\t\t\t// current chain, -depend removes an existing \n" + "\t\t\t// dependent rule\n\n" + "\torder=number // order in which this rule is executed relative to\n" + "\t\t\t// other independent rules.\n" + "\t\t\t// rule with order 1 is checked first and so on.\n" + "\t\t\t// As soon as a rule matches, the class of that rule\n" + "\t\t\t// is returned to Core. So, order really matters.\n" + "\t\t\t// If no order is specified by the user, the next\n" + "\t\t\t// highest available order number is assigned to\n" + "\t\t\t// the rule.\n\n" + "\tclass=\"/rcfs/.../classname\" // target class of this rule.\n" + "\t\t\t// /rcfs all by itself indicates the\n" + "\t\t\t// systemwide default class\n\n" + "\tstate=number // 1 or 0, provides the ability to deactivate a\n" + "\t\t\t// specific rule, if needed.\n\n" + "\tipv4=\"string\" // ipv4 address in dotted decimal and port\n" + "\t\t\t// e.g. \"127.0.0.1\\80\"\n" + "\t\t\t// e.g. \"*\\80\" for CE to match any address\n" + "\t\t\t// used in socket accept queue classes\n\n" + "\tipv6=\"string\" // ipv6 address in hex and port\n" + "\t\t\t// e.g. \"fe80::4567\\80\"\n" + "\t\t\t// e.g. \"*\\80\" for CE to match any address \n" + "\t\t\t// used in socket accept queue classes\n\n" + "\texample:\n" + "\techo \"uid=100,euid<200,class=/rcfs\" > /rcfs/ce/rules/rule1\n"; diff --git a/kernel/ckrm/rbce/rbce.h b/kernel/ckrm/rbce/rbce.h new file mode 100644 index 000000000..a3af72fcd --- /dev/null +++ b/kernel/ckrm/rbce/rbce.h @@ -0,0 +1,122 @@ +/* Rule-based Classification Engine (RBCE) module + * + * Copyright (C) Hubertus Franke, IBM Corp. 2003 + * (C) Chandra Seetharaman, IBM Corp. 2003 + * + * Module for loading of classification policies and providing + * a user API for Class-based Kernel Resource Management (CKRM) + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 25 Mar 2004 + * Integrate RBCE and CRBE into a single module + */ + +#ifndef RBCE_H +#define RBCE_H + +// data types defined in main rbcemod.c +struct rbce_private_data; +struct rbce_class; +struct ckrm_core_class; + +#ifndef RBCE_EXTENSION + +/**************************************************************************** + * + * RBCE STANDALONE VERSION, NO CHOICE FOR DATA COLLECTION + * + ****************************************************************************/ + +#ifdef RBCE_SHOW_INCL +#warning " ... RBCE .." +#endif + +#define RBCE_MOD_DESCR "Rule Based Classification Engine Module for CKRM" +#define RBCE_MOD_NAME "rbce" + +/* extension to private data: NONE */ +struct rbce_ext_private_data { + /* empty data */ +}; +static inline void init_ext_private_data(struct rbce_private_data *dst) +{ +} + +/* sending notification to user: NONE */ + +static void notify_class_action(struct rbce_class *cls, int action) +{ +} +static inline void send_fork_notification(struct task_struct *tsk, + struct ckrm_core_class *cls) +{ +} +static inline void send_exit_notification(struct task_struct *tsk) +{ +} +static inline void send_manual_notification(struct task_struct *tsk) +{ +} + +/* extension initialization and destruction at module init and exit */ +static inline int init_rbce_ext_pre(void) +{ + return 0; +} +static inline int init_rbce_ext_post(void) +{ + return 0; +} +static inline void exit_rbce_ext(void) +{ +} + +#else + +/*************************************************************************** + * + * RBCE with User Level Notification + * + ***************************************************************************/ + +#ifdef RBCE_SHOW_INCL +#warning " ... CRBCE .." +#ifdef RBCE_DO_SAMPLE +#warning " ... CRBCE doing sampling ..." +#endif +#ifdef RBCE_DO_DELAY +#warning " ... CRBCE doing delay ..." +#endif +#endif + +#define RBCE_MOD_DESCR "Rule Based Classification Engine Module" \ + "with Data Sampling/Delivery for CKRM" +#define RBCE_MOD_NAME "crbce" + +#include "crbce.h" + +struct rbce_ext_private_data { + struct task_sample_info sample; +}; + +static void notify_class_action(struct rbce_class *cls, int action); +#if 0 +static void send_fork_notification(struct task_struct *tsk, + struct ckrm_core_class *cls); +static void send_exit_notification(struct task_struct *tsk); +static void send_manual_notification(struct task_struct *tsk); +#endif + +#endif + +#endif // RBCE_H diff --git a/kernel/ckrm/rbce/rbce_fs.c b/kernel/ckrm/rbce/rbce_fs.c new file mode 100644 index 000000000..bb92fb94c --- /dev/null +++ b/kernel/ckrm/rbce/rbce_fs.c @@ -0,0 +1,485 @@ +/* + * This file is released under the GPL. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int rbce_enabled; +extern void get_rule(const char *, char *); +extern int rule_exists(const char *); +extern int change_rule(const char *, char *); +extern int delete_rule(const char *); +//extern int reclassify_pid(int); +extern int set_tasktag(int, char *); +extern int rename_rule(const char *, const char *); + +extern int rcfs_register_engine(rbce_eng_callback_t * rcbs); +extern int rcfs_unregister_engine(rbce_eng_callback_t * rcbs); +extern int rcfs_mkroot(struct rcfs_magf *, int, struct dentry **); +extern int rcfs_rmroot(struct dentry *); + +static int rbce_unlink(struct inode *, struct dentry *); + +#include "info.h" +static ssize_t +rbce_write(struct file *file, const char __user * buf, + size_t len, loff_t * ppos) +{ + char *line, *ptr; + int rc = 0, pid; + + line = (char *)kmalloc(len + 1, GFP_KERNEL); + if (!line) { + return -ENOMEM; + } + if (copy_from_user(line, buf, len)) { + kfree(line); + return -EFAULT; + } + line[len] = '\0'; + ptr = line + strlen(line) - 1; + if (*ptr == '\n') { + *ptr = '\0'; + } +#if 0 + if (!strcmp(file->f_dentry->d_name.name, "rbce_reclassify")) { + pid = simple_strtol(line, NULL, 0); + rc = reclassify_pid(pid); + } else +#endif + if (!strcmp(file->f_dentry->d_name.name, "rbce_tag")) { + pid = simple_strtol(line, &ptr, 0); + rc = set_tasktag(pid, ptr + 1); // expected syntax "pid tag" + } else if (!strcmp(file->f_dentry->d_name.name, "rbce_state")) { + rbce_enabled = line[0] - '0'; + } else if (!strcmp(file->f_dentry->d_name.name, "rbce_info")) { + len = -EPERM; + } else { + rc = change_rule(file->f_dentry->d_name.name, line); + } + if (rc) { + len = rc; + } + // printk("kernel read |%s|\n", line); + // printk("kernel read-2 |%s|\n", line+1000); + // printk prints only 1024 bytes once :) + // + kfree(line); + return len; +} + +static int rbce_show(struct seq_file *seq, void *offset) +{ + struct file *file = (struct file *)seq->private; + char result[256]; + + memset(result, 0, 256); + if (!strcmp(file->f_dentry->d_name.name, "rbce_reclassify") || + !strcmp(file->f_dentry->d_name.name, "rbce_tag")) { + return -EPERM; + } + if (!strcmp(file->f_dentry->d_name.name, "rbce_state")) { + seq_printf(seq, "%d\n", rbce_enabled); + return 0; + } + if (!strcmp(file->f_dentry->d_name.name, "rbce_info")) { + seq_printf(seq, info); + return 0; + } + + get_rule(file->f_dentry->d_name.name, result); + seq_printf(seq, "%s\n", result); + return 0; +} + +static int rbce_open(struct inode *inode, struct file *file) +{ + //printk("mnt_mountpoint %s\n", + // file->f_vfsmnt->mnt_mountpoint->d_name.name); + //printk("mnt_root %s\n", file->f_vfsmnt->mnt_root->d_name.name); + return single_open(file, rbce_show, file); +} + +static int rbce_close(struct inode *ino, struct file *file) +{ + const char *name = file->f_dentry->d_name.name; + + if (strcmp(name, "rbce_reclassify") && + strcmp(name, "rbce_state") && + strcmp(name, "rbce_tag") && strcmp(name, "rbce_info")) { + + if (!rule_exists(name)) { + // need more stuff to happen in the vfs layer + rbce_unlink(file->f_dentry->d_parent->d_inode, + file->f_dentry); + } + } + return single_release(ino, file); +} + +#define RCFS_MAGIC 0x4feedbac + +static struct file_operations rbce_file_operations; +static struct inode_operations rbce_file_inode_operations; +static struct inode_operations rbce_dir_inode_operations; + +static struct inode *rbce_get_inode(struct inode *dir, int mode, dev_t dev) +{ + struct inode *inode = new_inode(dir->i_sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_mapping->a_ops = dir->i_mapping->a_ops; + inode->i_mapping->backing_dev_info = + dir->i_mapping->backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + /* Treat as default assignment */ + inode->i_op = &rbce_file_inode_operations; + inode->i_fop = &rbce_file_operations; + break; + case S_IFDIR: + inode->i_op = &rbce_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 + (for "." entry) */ + inode->i_nlink++; + break; + } + } + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +/* SMP-safe */ +static int +rbce_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + struct inode *inode = rbce_get_inode(dir, mode, dev); + int error = -ENOSPC; + + if (inode) { + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + inode->i_mode |= S_ISGID; + } + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + + } + return error; +} + +static int rbce_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + int rc; + + rc = delete_rule(dentry->d_name.name); + if (rc == 0) { + if (dir) { + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + } + inode->i_ctime = CURRENT_TIME; + inode->i_nlink--; + dput(dentry); + } + return rc; +} + +static int +rbce_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + int rc; + struct inode *inode = old_dentry->d_inode; + struct dentry *old_d = list_entry(old_dir->i_dentry.next, + struct dentry, d_alias); + struct dentry *new_d = list_entry(new_dir->i_dentry.next, + struct dentry, d_alias); + + // cannot rename any directory + if (S_ISDIR(old_dentry->d_inode->i_mode)) { + return -EINVAL; + } + // cannot rename anything under /ce + if (!strcmp(old_d->d_name.name, "ce")) { + return -EINVAL; + } + // cannot move anything to /ce + if (!strcmp(new_d->d_name.name, "ce")) { + return -EINVAL; + } + + rc = rename_rule(old_dentry->d_name.name, new_dentry->d_name.name); + + if (!rc) { + old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = + new_dir->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return rc; +} + +// CE allows only the rules directory to be created +int rbce_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int retval = -EINVAL; + + struct dentry *pd = + list_entry(dir->i_dentry.next, struct dentry, d_alias); + + // Allow only /rcfs/ce and ce/rules + if ((!strcmp(pd->d_name.name, "ce") && + !strcmp(dentry->d_name.name, "rules")) || + (!strcmp(pd->d_name.name, "/") && + !strcmp(dentry->d_name.name, "ce"))) { + + if (!strcmp(dentry->d_name.name, "ce")) { + try_module_get(THIS_MODULE); + } + retval = rbce_mknod(dir, dentry, mode | S_IFDIR, 0); + if (!retval) { + dir->i_nlink++; + } + } + + return retval; +} + +// CE doesn't allow deletion of directory +int rbce_rmdir(struct inode *dir, struct dentry *dentry) +{ + int rc; + // printk("removal of directory %s prohibited\n", dentry->d_name.name); + rc = simple_rmdir(dir, dentry); + + if (!rc && !strcmp(dentry->d_name.name, "ce")) { + module_put(THIS_MODULE); + } + return rc; +} + +static int +rbce_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct dentry *pd = + list_entry(dir->i_dentry.next, struct dentry, d_alias); + + // Under /ce only "rbce_reclassify", "rbce_state", "rbce_tag" and + // "rbce_info" are allowed + if (!strcmp(pd->d_name.name, "ce")) { + if (strcmp(dentry->d_name.name, "rbce_reclassify") && + strcmp(dentry->d_name.name, "rbce_state") && + strcmp(dentry->d_name.name, "rbce_tag") && + strcmp(dentry->d_name.name, "rbce_info")) { + return -EINVAL; + } + } + + return rbce_mknod(dir, dentry, mode | S_IFREG, 0); +} + +static int rbce_link(struct dentry *old_d, struct inode *dir, struct dentry *d) +{ + return -EINVAL; +} + +static int +rbce_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +{ + return -EINVAL; +} + +/******************************* Magic files ********************/ + +#define RBCE_NR_MAGF 6 +struct rcfs_magf rbce_magf_files[RBCE_NR_MAGF] = { + { + .name = "ce", + .mode = RCFS_DEFAULT_DIR_MODE, + .i_op = &rbce_dir_inode_operations, + }, + { + .name = "rbce_tag", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_fop = &rbce_file_operations, + }, + { + .name = "rbce_info", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_fop = &rbce_file_operations, + }, + { + .name = "rbce_state", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_fop = &rbce_file_operations, + }, + { + .name = "rbce_reclassify", + .mode = RCFS_DEFAULT_FILE_MODE, + .i_fop = &rbce_file_operations, + }, + { + .name = "rules", + .mode = (RCFS_DEFAULT_DIR_MODE | S_IWUSR), + .i_fop = &simple_dir_operations, + .i_op = &rbce_dir_inode_operations, + } +}; + +static struct dentry *ce_root_dentry; + +int rbce_create_magic(void) +{ + int rc; + + // Make root dentry + rc = rcfs_mkroot(rbce_magf_files, RBCE_NR_MAGF, &ce_root_dentry); + if ((!ce_root_dentry) || rc) + return rc; + + // Create magic files + if ((rc = rcfs_create_magic(ce_root_dentry, &rbce_magf_files[1], + RBCE_NR_MAGF - 1))) { + printk(KERN_ERR "Failed to create c/rbce magic files." + " Deleting c/rbce root\n"); + rcfs_rmroot(ce_root_dentry); + return rc; + } + + return rc; +} + +int rbce_clear_magic(void) +{ + int rc = 0; + if (ce_root_dentry) + rc = rcfs_rmroot(ce_root_dentry); + return rc; +} + +/******************************* File ops ********************/ + +static struct file_operations rbce_file_operations = { + .owner = THIS_MODULE, + .open = rbce_open, + .llseek = seq_lseek, + .read = seq_read, + .write = rbce_write, + .release = rbce_close, +}; + +static struct inode_operations rbce_file_inode_operations = { + .getattr = simple_getattr, +}; + +static struct inode_operations rbce_dir_inode_operations = { + .create = rbce_create, + .lookup = simple_lookup, + .link = rbce_link, + .unlink = rbce_unlink, + .symlink = rbce_symlink, + .mkdir = rbce_mkdir, + .rmdir = rbce_rmdir, + .mknod = rbce_mknod, + .rename = rbce_rename, + .getattr = simple_getattr, +}; + +#if 0 +static void rbce_put_super(struct super_block *sb) +{ + module_put(THIS_MODULE); + printk("rbce_put_super called\n"); +} + +static struct super_operations rbce_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .put_super = rbce_put_super, +}; + +static int rbce_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *inode; + struct dentry *root; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = RCFS_MAGIC; + sb->s_op = &rbce_ops; + inode = rbce_get_inode(sb, S_IFDIR | 0755, 0); + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + + return 0; +} + +static struct super_block *rbce_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + struct super_block *sb = + get_sb_nodev(fs_type, flags, data, rbce_fill_super); + if (sb) { + try_module_get(THIS_MODULE); + } + return sb; +} + +static struct file_system_type rbce_fs_type = { + .name = "rbce", + .get_sb = rbce_get_sb, + .kill_sb = kill_litter_super, +}; + +static int +__init init_rbce_fs(void) +{ + return register_filesystem(&rbce_fs_type); +} + +static void +__exit exit_rbce_fs(void) +{ + unregister_filesystem(&rbce_fs_type); +} + +module_init(init_rbce_fs) + module_exit(exit_rbce_fs) + MODULE_LICENSE("GPL"); +#endif diff --git a/kernel/ckrm/rbce/rbcemod.c b/kernel/ckrm/rbce/rbcemod.c new file mode 100644 index 000000000..f61d0879c --- /dev/null +++ b/kernel/ckrm/rbce/rbcemod.c @@ -0,0 +1,2566 @@ +/* Rule-based Classification Engine (RBCE) module + * + * Copyright (C) Hubertus Franke, IBM Corp. 2003 + * (C) Chandra Seetharaman, IBM Corp. 2003 + * (C) Vivek Kashyap, IBM Corp. 2004 + * + * Module for loading of classification policies and providing + * a user API for Class-based Kernel Resource Management (CKRM) + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/* Changes + * + * 28 Aug 2003 + * Created. First cut with much scope for cleanup ! + * 07 Nov 2003 + * Made modifications to suit the new RBCE module. + * Made modifications to address sampling and delivery + * 16 Mar 2004 + * Integrated changes from original RBCE module + * 25 Mar 2004 + * Merged RBCE and CRBCE into common code base + * 29 Mar 2004 + * Incorporated listen call back and IPv4 match support + * 23 Apr 2004 + * Added Multi-Classtype Support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "bitvector.h" +#include "rbce.h" + +#define DEBUG + +MODULE_DESCRIPTION(RBCE_MOD_DESCR); +MODULE_AUTHOR("Hubertus Franke, Chandra Seetharaman (IBM)"); +MODULE_LICENSE("GPL"); + +static char modname[] = RBCE_MOD_NAME; + +/* ==================== typedef, global variables etc., ==================== */ +struct named_obj_hdr { + struct list_head link; + int referenced; + char *name; +}; + +#define GET_REF(x) ((x)->obj.referenced) +#define INC_REF(x) (GET_REF(x)++) +#define DEC_REF(x) (--GET_REF(x)) +struct rbce_class { + struct named_obj_hdr obj; + int classtype; + void *classobj; +}; + +typedef enum { + RBCE_RULE_CMD_PATH = 1, // full qualified path + RBCE_RULE_CMD, // basename of the command + RBCE_RULE_ARGS, // arguments of the command + RBCE_RULE_REAL_UID, // task's real uid + RBCE_RULE_REAL_GID, // task's real gid + RBCE_RULE_EFFECTIVE_UID, // task's effective uid + RBCE_RULE_EFFECTIVE_GID, // task's effective gid + RBCE_RULE_APP_TAG, // task's application tag + RBCE_RULE_IPV4, // IP address of listen(), ipv4 format + RBCE_RULE_IPV6, // IP address of listen(), ipv6 format + RBCE_RULE_DEP_RULE, // dependent rule; must be the first term + RBCE_RULE_INVALID, // invalid, for filler + RBCE_RULE_INVALID2, // invalid, for filler +} rbce_rule_op_t; + +typedef enum { + RBCE_EQUAL = 1, + RBCE_NOT, + RBCE_LESS_THAN, + RBCE_GREATER_THAN, +} rbce_operator_t; + +struct rbce_rule_term { + rbce_rule_op_t op; + rbce_operator_t operator; + union { + char *string; // path, cmd, arg, tag, ipv4 and ipv6 + long id; // uid, gid, euid, egid + struct rbce_rule *deprule; + } u; +}; + +struct rbce_rule { + struct named_obj_hdr obj; + struct rbce_class *target_class; + int classtype; + int num_terms; + int *terms; // vector of indices into the global term vector + int index; // index of this rule into the global term vector + int termflag; // which term ids would require a recalculation + int do_opt; // do we have to consider this rule during optimize + char *strtab; // string table to store the strings of all terms + int order; // order of execution of this rule + int state; // RBCE_RULE_ENABLED/RBCE_RULE_DISABLED +}; + +// rules states +#define RBCE_RULE_DISABLED 0 +#define RBCE_RULE_ENABLED 1 + +/// +// Data structures and macros used for optimization +#define RBCE_TERM_CMD (0) +#define RBCE_TERM_UID (1) +#define RBCE_TERM_GID (2) +#define RBCE_TERM_TAG (3) +#define RBCE_TERM_IPV4 (4) +#define RBCE_TERM_IPV6 (5) + +#define NUM_TERM_MASK_VECTOR (6) + +// Rule flags. 1 bit for each type of rule term +#define RBCE_TERMFLAG_CMD (1 << RBCE_TERM_CMD) +#define RBCE_TERMFLAG_UID (1 << RBCE_TERM_UID) +#define RBCE_TERMFLAG_GID (1 << RBCE_TERM_GID) +#define RBCE_TERMFLAG_TAG (1 << RBCE_TERM_TAG) +#define RBCE_TERMFLAG_IPV4 (1 << RBCE_TERM_IPV4) +#define RBCE_TERMFLAG_IPV6 (1 << RBCE_TERM_IPV6) +#define RBCE_TERMFLAG_ALL (RBCE_TERMFLAG_CMD | RBCE_TERMFLAG_UID | \ + RBCE_TERMFLAG_GID | RBCE_TERMFLAG_TAG | \ + RBCE_TERMFLAG_IPV4 | RBCE_TERMFLAG_IPV6) + +int termop_2_vecidx[RBCE_RULE_INVALID] = { + [RBCE_RULE_CMD_PATH] = RBCE_TERM_CMD, + [RBCE_RULE_CMD] = RBCE_TERM_CMD, + [RBCE_RULE_ARGS] = RBCE_TERM_CMD, + [RBCE_RULE_REAL_UID] = RBCE_TERM_UID, + [RBCE_RULE_REAL_GID] = RBCE_TERM_GID, + [RBCE_RULE_EFFECTIVE_UID] = RBCE_TERM_UID, + [RBCE_RULE_EFFECTIVE_GID] = RBCE_TERM_GID, + [RBCE_RULE_APP_TAG] = RBCE_TERM_TAG, + [RBCE_RULE_IPV4] = RBCE_TERM_IPV4, + [RBCE_RULE_IPV6] = RBCE_TERM_IPV6, + [RBCE_RULE_DEP_RULE] = -1 +}; + +#define TERMOP_2_TERMFLAG(x) (1 << termop_2_vecidx[x]) +#define TERM_2_TERMFLAG(x) (1 << x) + +#define POLICY_INC_NUMTERMS (BITS_PER_LONG) // No. of terms added at a time +#define POLICY_ACTION_NEW_VERSION 0x01 // Force reallocation +#define POLICY_ACTION_REDO_ALL 0x02 // Recompute all rule flags +#define POLICY_ACTION_PACK_TERMS 0x04 // Time to pack the terms + +struct ckrm_eng_callback ckrm_ecbs; + +// Term vector state +// +static int gl_bitmap_version, gl_action, gl_num_terms; +static int gl_allocated, gl_released; +struct rbce_rule_term *gl_terms; +bitvector_t *gl_mask_vecs[NUM_TERM_MASK_VECTOR]; + +extern int errno; +static void optimize_policy(void); + +#ifndef CKRM_MAX_CLASSTYPES +#define CKRM_MAX_CLASSTYPES 32 +#endif + +struct list_head rules_list[CKRM_MAX_CLASSTYPES]; +LIST_HEAD(class_list); // List of classes used + +static int gl_num_rules; +static int gl_rules_version; +int rbce_enabled = 1; +static rwlock_t global_rwlock = RW_LOCK_UNLOCKED; + /* + * One lock to protect them all !!! + * Additions, deletions to rules must + * happen with this lock being held in write mode. + * Access(read/write) to any of the data structures must happen + * with this lock held in read mode. + * Since, rule related changes do not happen very often it is ok to + * have single rwlock. + */ + +/* + * data structure rbce_private_data holds the bit vector 'eval' which + * specifies if rules and terms of rules are evaluated against the task + * and if they were evaluated, bit vector 'true' holds the result of that + * evaluation. + * + * This data structure is maintained in a task, and the bitvectors are + * updated only when needed. + * + * Each rule and each term of a rule has a corresponding bit in the vector. + * + */ +struct rbce_private_data { + struct rbce_ext_private_data ext_data; + int evaluate; // whether to evaluate rules or not ? + int rules_version; // whether to evaluate rules or not ? + char *app_tag; + unsigned long bitmap_version; + bitvector_t *eval; + bitvector_t *true; + char data[0]; // eval points to this variable size data array +}; + +#define RBCE_DATA(tsk) ((struct rbce_private_data*)((tsk)->ce_data)) +#define RBCE_DATAP(tsk) ((tsk)->ce_data) + +/* ======================= DEBUG Functions ========================= */ + +#ifdef DEBUG + +int rbcedebug = 0x00; + +#define DBG_CLASSIFY_RES ( 0x01 ) +#define DBG_CLASSIFY_DETAILS ( 0x02 ) +#define DBG_OPTIMIZATION ( 0x04 ) +#define DBG_SHOW_RESCTL ( 0x08 ) +#define DBG_CLASS ( 0x10 ) +#define DBG_RULE ( 0x20 ) +#define DBG_POLICY ( 0x40 ) + +#define DPRINTK(x, y...) if (rbcedebug & (x)) printk(y) + // debugging selectively enabled through /proc/sys/debug/rbce + +static void print_context_vectors(void) +{ + int i; + + if ((rbcedebug & DBG_OPTIMIZATION) == 0) { + return; + } + for (i = 0; i < NUM_TERM_MASK_VECTOR; i++) { + printk("%d: ", i); + bitvector_print(DBG_OPTIMIZATION, gl_mask_vecs[i]); + printk("\n"); + } +} +#else + +#define DPRINTK(x, y...) +#define print_context_vectors(x) +#endif + +/* ======================= Helper Functions ========================= */ + +#include "token.c" + +static struct ckrm_core_class *rbce_classify(struct task_struct *, + struct ckrm_net_struct *, + unsigned long, int classtype); + +static inline struct rbce_rule *find_rule_name(const char *name) +{ + struct named_obj_hdr *pos; + int i; + + for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) { + list_for_each_entry(pos, &rules_list[i], link) { + if (!strcmp(pos->name, name)) { + return ((struct rbce_rule *)pos); + } + } + } + return NULL; +} + +static inline struct rbce_class *find_class_name(const char *name) +{ + struct named_obj_hdr *pos; + + list_for_each_entry(pos, &class_list, link) { + if (!strcmp(pos->name, name)) + return (struct rbce_class *)pos; + } + return NULL; +} + +/* + * Insert the given rule at the specified order + * order = -1 ==> insert at the tail. + * + * Caller must hold global_rwlock in write mode. + */ +static int insert_rule(struct rbce_rule *rule, int order) +{ +#define ORDER_COUNTER_INCR 10 + static int order_counter; + int old_counter; + struct list_head *head = &rules_list[rule->classtype]; + struct list_head *insert = head; + struct rbce_rule *tmp; + + if (gl_num_rules == 0) { + order_counter = 0; + } + + switch (order) { + case -1: + rule->order = order_counter; + // FIXME: order_counter overflow/wraparound!! + order_counter += ORDER_COUNTER_INCR; + break; + default: + old_counter = order_counter; + if (order_counter < order) { + order_counter = order; + } + rule->order = order; + order_counter += ORDER_COUNTER_INCR; + list_for_each_entry(tmp, head, obj.link) { + if (rule->order == tmp->order) { + order_counter = old_counter; + return -EEXIST; + } + if (rule->order < tmp->order) { + insert = &tmp->obj.link; + break; + } + } + } + list_add_tail(&rule->obj.link, insert); + // protect the module from removed when any rule is + // defined + try_module_get(THIS_MODULE); + gl_num_rules++; + gl_rules_version++; + return 0; +} + +/* + * Remove the rule and reinsert at the specified order. + * + * Caller must hold global_rwlock in write mode. + */ +static int reinsert_rule(struct rbce_rule *rule, int order) +{ + list_del(&rule->obj.link); + gl_num_rules--; + gl_rules_version++; + module_put(THIS_MODULE); + return insert_rule(rule, order); +} + +/* + * Get a refernece to the class, create one if it doesn't exist + * + * Caller need to hold global_rwlock in write mode. + * __GFP_WAIT + */ + +static struct rbce_class *create_rbce_class(const char *classname, + int classtype, void *classobj) +{ + struct rbce_class *cls; + + if (classtype >= CKRM_MAX_CLASSTYPES) { + printk(KERN_ERR + "ckrm_classobj returned %d as classtype which cannot " + " be handled by RBCE\n", classtype); + return NULL; + } + + cls = kmalloc(sizeof(struct rbce_class), GFP_ATOMIC); + if (!cls) { + return NULL; + } + cls->obj.name = kmalloc(strlen(classname) + 1, GFP_ATOMIC); + if (cls->obj.name) { + GET_REF(cls) = 1; + cls->classobj = classobj; + strcpy(cls->obj.name, classname); + list_add_tail(&cls->obj.link, &class_list); + cls->classtype = classtype; + } else { + kfree(cls); + cls = NULL; + } + return cls; +} + +static struct rbce_class *get_class(char *classname, int *classtype) +{ + struct rbce_class *cls; + void *classobj; + + if (!classname) { + return NULL; + } + cls = find_class_name(classname); + if (cls) { + if (cls->classobj) { + INC_REF(cls); + *classtype = cls->classtype; + return cls; + } + return NULL; + } + classobj = ckrm_classobj(classname, classtype); + if (!classobj) { + return NULL; + } + + return create_rbce_class(classname, *classtype, classobj); +} + +/* + * Drop a refernece to the class, create one if it doesn't exist + * + * Caller need to hold global_rwlock in write mode. + */ +static void put_class(struct rbce_class *cls) +{ + if (cls) { + if (DEC_REF(cls) <= 0) { + list_del(&cls->obj.link); + kfree(cls->obj.name); + kfree(cls); + } + } + return; +} + +/* + * Callback from core when a class is added + */ + +#ifdef RBCE_EXTENSION +static void rbce_class_addcb(const char *classname, void *clsobj, int classtype) +{ + struct rbce_class *cls; + + write_lock(&global_rwlock); + cls = find_class_name((char *)classname); + if (cls) { + cls->classobj = clsobj; + } else { + cls = create_rbce_class(classname, classtype, clsobj); + } + if (cls) + notify_class_action(cls, 1); + write_unlock(&global_rwlock); + return; +} +#endif + +/* + * Callback from core when a class is deleted. + */ +static void +rbce_class_deletecb(const char *classname, void *classobj, int classtype) +{ + static struct rbce_class *cls; + struct named_obj_hdr *pos; + struct rbce_rule *rule; + + write_lock(&global_rwlock); + cls = find_class_name(classname); + if (cls) { + if (cls->classobj != classobj) { + printk(KERN_ERR "rbce: class %s changed identity\n", + classname); + } + notify_class_action(cls, 0); + cls->classobj = NULL; + list_for_each_entry(pos, &rules_list[cls->classtype], link) { + rule = (struct rbce_rule *)pos; + if (rule->target_class) { + if (!strcmp + (rule->target_class->obj.name, classname)) { + put_class(cls); + rule->target_class = NULL; + rule->classtype = -1; + } + } + } + put_class(cls); + if ((cls = find_class_name(classname)) != NULL) { + printk(KERN_ERR + "rbce ERROR: class %s exists in rbce after " + "removal in core\n", classname); + } + } + write_unlock(&global_rwlock); + return; +} + +/* + * Allocate an index in the global term vector + * On success, returns the index. On failure returns -errno. + * Caller must hold the global_rwlock in write mode as global data is + * written onto. + */ +static int alloc_term_index(void) +{ + int size = gl_allocated; + + if (gl_num_terms >= size) { + int i; + struct rbce_rule_term *oldv, *newv; + int newsize = size + POLICY_INC_NUMTERMS; + + oldv = gl_terms; + newv = + kmalloc(newsize * sizeof(struct rbce_rule_term), + GFP_ATOMIC); + if (!newv) { + return -ENOMEM; + } + memcpy(newv, oldv, size * sizeof(struct rbce_rule_term)); + for (i = size; i < newsize; i++) { + newv[i].op = -1; + } + gl_terms = newv; + gl_allocated = newsize; + kfree(oldv); + + gl_action |= POLICY_ACTION_NEW_VERSION; + DPRINTK(DBG_OPTIMIZATION, + "alloc_term_index: Expanding size from %d to %d\n", + size, newsize); + } + return gl_num_terms++; +} + +/* + * Release an index in the global term vector + * + * Caller must hold the global_rwlock in write mode as the global data + * is written onto. + */ +static void release_term_index(int idx) +{ + if ((idx < 0) || (idx > gl_num_terms)) + return; + + gl_terms[idx].op = -1; + gl_released++; + if ((gl_released > POLICY_INC_NUMTERMS) && + (gl_allocated > + (gl_num_terms - gl_released + POLICY_INC_NUMTERMS))) { + gl_action |= POLICY_ACTION_PACK_TERMS; + } + return; +} + +/* + * Release the indices, string memory, and terms associated with the given + * rule. + * + * Caller should be holding global_rwlock + */ +static void __release_rule(struct rbce_rule *rule) +{ + int i, *terms = rule->terms; + + // remove memory and references from other rules + for (i = rule->num_terms; --i >= 0;) { + struct rbce_rule_term *term = &gl_terms[terms[i]]; + + if (term->op == RBCE_RULE_DEP_RULE) { + DEC_REF(term->u.deprule); + } + release_term_index(terms[i]); + } + rule->num_terms = 0; + if (rule->strtab) { + kfree(rule->strtab); + rule->strtab = NULL; + } + if (rule->terms) { + kfree(rule->terms); + rule->terms = NULL; + } + return; +} + +/* + * delete the given rule and all memory associated with it. + * + * Caller is responsible for protecting the global data + */ +static inline int __delete_rule(struct rbce_rule *rule) +{ + // make sure we are not referenced by other rules + if (GET_REF(rule)) { + return -EBUSY; + } + __release_rule(rule); + put_class(rule->target_class); + release_term_index(rule->index); + list_del(&rule->obj.link); + gl_num_rules--; + gl_rules_version++; + module_put(THIS_MODULE); + kfree(rule->obj.name); + kfree(rule); + return 0; +} + +/* + * Optimize the rule evaluation logic + * + * Caller must hold global_rwlock in write mode. + */ +static void optimize_policy(void) +{ + int i, ii; + struct rbce_rule *rule; + struct rbce_rule_term *terms; + int num_terms; + int bsize; + bitvector_t **mask_vecs; + int pack_terms = 0; + int redoall; + + /* + * Due to dynamic rule addition/deletion of rules the term + * vector can get sparse. As a result the bitvectors grow as we don't + * reuse returned indices. If it becomes sparse enough we pack them + * closer. + */ + + pack_terms = (gl_action & POLICY_ACTION_PACK_TERMS); + DPRINTK(DBG_OPTIMIZATION, + "----- Optimize Policy ----- act=%x pt=%d (a=%d n=%d r=%d)\n", + gl_action, pack_terms, gl_allocated, gl_num_terms, gl_released); + + if (pack_terms) { + int nsz = ALIGN((gl_num_terms - gl_released), + POLICY_INC_NUMTERMS); + int newidx = 0; + struct rbce_rule_term *newterms; + + terms = gl_terms; + newterms = + kmalloc(nsz * sizeof(struct rbce_rule_term), GFP_ATOMIC); + if (newterms) { + for (ii = 0; ii < CKRM_MAX_CLASSTYPES; ii++) { + // FIXME: check only for task class types + list_for_each_entry_reverse(rule, + &rules_list[ii], + obj.link) { + rule->index = newidx++; + for (i = rule->num_terms; --i >= 0;) { + int idx = rule->terms[i]; + newterms[newidx] = terms[idx]; + rule->terms[i] = newidx++; + } + } + } + kfree(terms); + gl_allocated = nsz; + gl_released = 0; + gl_num_terms = newidx; + gl_terms = newterms; + + gl_action &= ~POLICY_ACTION_PACK_TERMS; + gl_action |= POLICY_ACTION_NEW_VERSION; + } + } + + num_terms = gl_num_terms; + bsize = gl_allocated / 8 + sizeof(bitvector_t); + mask_vecs = gl_mask_vecs; + terms = gl_terms; + + if (gl_action & POLICY_ACTION_NEW_VERSION) { + /* allocate new mask vectors */ + char *temp = kmalloc(NUM_TERM_MASK_VECTOR * bsize, GFP_ATOMIC); + + DPRINTK(DBG_OPTIMIZATION, + "------ allocmasks act=%x ------- ver=%d\n", gl_action, + gl_bitmap_version); + if (!temp) { + return; + } + if (mask_vecs[0]) {// index 0 has the alloc returned address + kfree(mask_vecs[0]); + } + for (i = 0; i < NUM_TERM_MASK_VECTOR; i++) { + mask_vecs[i] = (bitvector_t *) (temp + i * bsize); + bitvector_init(mask_vecs[i], gl_allocated); + } + gl_action &= ~POLICY_ACTION_NEW_VERSION; + gl_action |= POLICY_ACTION_REDO_ALL; + gl_bitmap_version++; + } + + /* We do two things here at once + * 1) recompute the rulemask for each required rule + * we guarantee proper dependency order during creation time and + * by reversely running through this list. + * 2) recompute the mask for each term and rule, if required + */ + + redoall = gl_action & POLICY_ACTION_REDO_ALL; + gl_action &= ~POLICY_ACTION_REDO_ALL; + + DPRINTK(DBG_OPTIMIZATION, "------- run act=%x -------- redoall=%d\n", + gl_action, redoall); + for (ii = 0; ii < CKRM_MAX_CLASSTYPES; ii++) { + // FIXME: check only for task class types + list_for_each_entry_reverse(rule, &rules_list[ii], obj.link) { + unsigned long termflag; + + if (!redoall && !rule->do_opt) + continue; + termflag = 0; + for (i = rule->num_terms; --i >= 0;) { + int j, idx = rule->terms[i]; + struct rbce_rule_term *term = &terms[idx]; + int vecidx = termop_2_vecidx[term->op]; + + if (vecidx == -1) { + termflag |= term->u.deprule->termflag; + /* mark this term belonging to all + contexts of deprule */ + for (j = 0; j < NUM_TERM_MASK_VECTOR; + j++) { + if (term->u.deprule->termflag + & (1 << j)) { + bitvector_set(idx, + mask_vecs + [j]); + } + } + } else { + termflag |= TERM_2_TERMFLAG(vecidx); + /* mark this term belonging to + a particular context */ + bitvector_set(idx, mask_vecs[vecidx]); + } + } + for (i = 0; i < NUM_TERM_MASK_VECTOR; i++) { + if (termflag & (1 << i)) { + bitvector_set(rule->index, + mask_vecs[i]); + } + } + rule->termflag = termflag; + rule->do_opt = 0; + DPRINTK(DBG_OPTIMIZATION, "r-%s: %x %d\n", + rule->obj.name, rule->termflag, rule->index); + } + } + print_context_vectors(); + return; +} + +/* ======================= Rule related Functions ========================= */ + +/* + * Caller need to hold global_rwlock in write mode. + */ +static int +fill_rule(struct rbce_rule *newrule, struct rbce_rule_term *terms, int nterms) +{ + char *class, *strtab; + int i, j, order, state, real_nterms, index; + int strtablen, rc = 0, counter; + struct rbce_rule_term *term = NULL; + struct rbce_class *targetcls = NULL; + struct rbce_rule *deprule; + + if (!newrule) { + return -EINVAL; + } + // Digest filled terms. + real_nterms = 0; + strtab = class = NULL; + strtablen = 0; + state = -1; + order = -1; + index = -1; + for (i = 0; i < nterms; i++) { + if (terms[i].op != RBCE_RULE_INVALID) { + real_nterms++; + + switch (terms[i].op) { + case RBCE_RULE_DEP_RULE: + // check if the depend rule is valid + // + deprule = find_rule_name(terms[i].u.string); + if (!deprule || deprule == newrule) { + rc = -EINVAL; + goto out; + } else { + // make sure _a_ depend rule + // appears in only one term. + for (j = 0; j < i; j++) { + if (terms[j].op == + RBCE_RULE_DEP_RULE + && terms[j].u.deprule == + deprule) { + rc = -EINVAL; + goto out; + } + } + terms[i].u.deprule = deprule; + } + + // +depend is acceptable and -depend is not + if (terms[i].operator != TOKEN_OP_DEP_DEL) { + terms[i].operator = RBCE_EQUAL; + } else { + rc = -EINVAL; + goto out; + } + break; + + case RBCE_RULE_CMD_PATH: + case RBCE_RULE_CMD: + case RBCE_RULE_ARGS: + case RBCE_RULE_APP_TAG: + case RBCE_RULE_IPV4: + case RBCE_RULE_IPV6: + // sum up the string length + strtablen += strlen(terms[i].u.string) + 1; + break; + default: + break; + + } + } else { + switch (terms[i].operator) { + case TOKEN_OP_ORDER: + order = terms[i].u.id; + if (order < 0) { + rc = -EINVAL; + goto out; + } + break; + case TOKEN_OP_STATE: + state = terms[i].u.id != 0; + break; + case TOKEN_OP_CLASS: + class = terms[i].u.string; + break; + default: + break; + } + } + } + + // Check if class was specified + if (class != NULL) { + int classtype; + if ((targetcls = get_class(class, &classtype)) == NULL) { + rc = -EINVAL; + goto out; + } + put_class(newrule->target_class); + + newrule->target_class = targetcls; + newrule->classtype = classtype; + } + if (!newrule->target_class) { + rc = -EINVAL; + goto out; + } + + if (state != -1) { + newrule->state = state; + } + if (order != -1) { + newrule->order = order; + } + newrule->terms = kmalloc(real_nterms * sizeof(int), GFP_ATOMIC); + if (!newrule->terms) { + rc = -ENOMEM; + goto out; + } + newrule->num_terms = real_nterms; + if (strtablen && ((strtab = kmalloc(strtablen, GFP_ATOMIC)) == NULL)) { + rc = -ENOMEM; + goto out; + } + + if (newrule->index == -1) { + index = alloc_term_index(); + if (index < 0) { + rc = -ENOMEM; + goto out; + } + newrule->index = index; + term = &gl_terms[newrule->index]; + term->op = RBCE_RULE_DEP_RULE; + term->u.deprule = newrule; + } + newrule->strtab = strtab; + newrule->termflag = 0; + + // Fill the term vector + strtablen = 0; + counter = 0; + for (i = 0; i < nterms; i++) { + if (terms[i].op == RBCE_RULE_INVALID) { + continue; + } + + newrule->terms[counter] = alloc_term_index(); + if (newrule->terms[counter] < 0) { + for (j = 0; j < counter; j++) { + release_term_index(newrule->terms[j]); + } + rc = -ENOMEM; + goto out; + } + term = &gl_terms[newrule->terms[counter]]; + term->op = terms[i].op; + term->operator = terms[i].operator; + switch (terms[i].op) { + case RBCE_RULE_CMD_PATH: + case RBCE_RULE_CMD: + case RBCE_RULE_ARGS: + case RBCE_RULE_APP_TAG: + case RBCE_RULE_IPV4: + case RBCE_RULE_IPV6: + term->u.string = &strtab[strtablen]; + strcpy(term->u.string, terms[i].u.string); + strtablen = strlen(term->u.string) + 1; + break; + + case RBCE_RULE_REAL_UID: + case RBCE_RULE_REAL_GID: + case RBCE_RULE_EFFECTIVE_UID: + case RBCE_RULE_EFFECTIVE_GID: + term->u.id = terms[i].u.id; + break; + + case RBCE_RULE_DEP_RULE: + term->u.deprule = terms[i].u.deprule; + INC_REF(term->u.deprule); + break; + default: + break; + } + counter++; + } + + out: + if (rc) { + if (targetcls) { + put_class(targetcls); + } + if (index >= 0) { + release_term_index(index); + } + kfree(newrule->terms); + kfree(strtab); + + } + return rc; +} + +int change_rule(const char *rname, char *rdefn) +{ + struct rbce_rule *rule = NULL, *deprule; + struct rbce_rule_term *new_terms = NULL, *term, *terms; + int nterms, new_term_mask = 0, oterms, tot_terms; + int i, j, k, rc, new_order = 0; + + if ((nterms = rules_parse(rdefn, &new_terms, &new_term_mask)) <= 0) { + return !nterms ? -EINVAL : nterms; + } + + write_lock(&global_rwlock); + rule = find_rule_name(rname); + if (rule == NULL) { + rule = kmalloc(sizeof(struct rbce_rule), GFP_ATOMIC); + if (rule) { + rule->obj.name = kmalloc(strlen(rname) + 1, GFP_ATOMIC); + if (rule->obj.name) { + strcpy(rule->obj.name, rname); + GET_REF(rule) = 0; + rule->order = -1; + rule->index = -1; + rule->state = RBCE_RULE_ENABLED; + rule->target_class = NULL; + rule->classtype = -1; + rule->terms = NULL; + rule->do_opt = 1; + INIT_LIST_HEAD(&rule->obj.link); + rc = fill_rule(rule, new_terms, nterms); + if (rc) { + kfree(rule); + } else { + if ((rc = + insert_rule(rule, + rule->order)) == 0) { + if (rbce_enabled) { + optimize_policy(); + } + } else { + __delete_rule(rule); + } + } + } else { + kfree(rule); + rc = -ENOMEM; + } + kfree(new_terms); + } else { + rc = -ENOMEM; + } + write_unlock(&global_rwlock); + return rc; + } + + oterms = rule->num_terms; + tot_terms = nterms + oterms; + + terms = kmalloc(tot_terms * sizeof(struct rbce_rule_term), GFP_ATOMIC); + + if (!terms) { + kfree(new_terms); + write_unlock(&global_rwlock); + return -ENOMEM; + } + + new_term_mask &= ~(1 << RBCE_RULE_DEP_RULE); + //ignore the new deprule terms for the first iteration. + // taken care of later. + for (i = 0; i < oterms; i++) { + term = &gl_terms[rule->terms[i]]; // old term + + if ((1 << term->op) & new_term_mask) { + // newrule has this attr/value + for (j = 0; j < nterms; j++) { + if (term->op == new_terms[j].op) { + terms[i].op = new_terms[j].op; + terms[i].operator = new_terms[j]. + operator; + terms[i].u.string = + new_terms[j].u.string; + new_terms[j].op = RBCE_RULE_INVALID2; + break; + } + } + } else { + terms[i].op = term->op; + terms[i].operator = term->operator; + terms[i].u.string = term->u.string; + } + } + + i = oterms; // for readability + + for (j = 0; j < nterms; j++) { + // handled in the previous iteration + if (new_terms[j].op == RBCE_RULE_INVALID2) { + continue; + } + + if (new_terms[j].op == RBCE_RULE_DEP_RULE) { + if (new_terms[j].operator == TOKEN_OP_DEP) { + // "depend=rule" deletes all depends in the + // original rule so, delete all depend rule + // terms in the original rule + for (k = 0; k < oterms; k++) { + if (terms[k].op == RBCE_RULE_DEP_RULE) { + terms[k].op = RBCE_RULE_INVALID; + } + } + // must copy the new deprule term + } else { + // delete the depend rule term if was defined + // in the original rule for both +depend + // and -depend + deprule = find_rule_name(new_terms[j].u.string); + if (deprule) { + for (k = 0; k < oterms; k++) { + if (terms[k].op == + RBCE_RULE_DEP_RULE + && terms[k].u.deprule == + deprule) { + terms[k].op = + RBCE_RULE_INVALID; + break; + } + } + } + if (new_terms[j].operator == TOKEN_OP_DEP_DEL) { + // No need to copy the new deprule term + continue; + } + } + } else { + if ((new_terms[j].op == RBCE_RULE_INVALID) && + (new_terms[j].operator == TOKEN_OP_ORDER)) { + new_order++; + } + } + terms[i].op = new_terms[j].op; + terms[i].operator = new_terms[j].operator; + terms[i].u.string = new_terms[j].u.string; + i++; + new_terms[j].op = RBCE_RULE_INVALID2; + } + + tot_terms = i; + + // convert old deprule pointers to name pointers. + for (i = 0; i < oterms; i++) { + if (terms[i].op != RBCE_RULE_DEP_RULE) + continue; + terms[i].u.string = terms[i].u.deprule->obj.name; + } + + // release the rule + __release_rule(rule); + + rule->do_opt = 1; + rc = fill_rule(rule, terms, tot_terms); + if (rc == 0 && new_order) { + rc = reinsert_rule(rule, rule->order); + } + if (rc != 0) { // rule creation/insertion failed + __delete_rule(rule); + } + if (rbce_enabled) { + optimize_policy(); + } + write_unlock(&global_rwlock); + kfree(new_terms); + kfree(terms); + return rc; +} + +/* + * Delete the specified rule. + * + */ +int delete_rule(const char *rname) +{ + int rc = 0; + struct rbce_rule *rule; + + write_lock(&global_rwlock); + + if ((rule = find_rule_name(rname)) == NULL) { + write_unlock(&global_rwlock); + goto out; + } + rc = __delete_rule(rule); + if (rbce_enabled && (gl_action & POLICY_ACTION_PACK_TERMS)) { + optimize_policy(); + } + write_unlock(&global_rwlock); + out: + DPRINTK(DBG_RULE, "delete rule %s\n", rname); + return rc; +} + +/* + * copy the rule specified by rname and to the given result string. + * + */ +void get_rule(const char *rname, char *result) +{ + int i; + struct rbce_rule *rule; + struct rbce_rule_term *term; + char *cp = result, oper, idtype[3], str[5]; + + read_lock(&global_rwlock); + + rule = find_rule_name(rname); + if (rule != NULL) { + for (i = 0; i < rule->num_terms; i++) { + term = gl_terms + rule->terms[i]; + switch (term->op) { + case RBCE_RULE_REAL_UID: + strcpy(idtype, "u"); + goto handleid; + case RBCE_RULE_REAL_GID: + strcpy(idtype, "g"); + goto handleid; + case RBCE_RULE_EFFECTIVE_UID: + strcpy(idtype, "eu"); + goto handleid; + case RBCE_RULE_EFFECTIVE_GID: + strcpy(idtype, "eg"); + handleid: + if (term->operator == RBCE_LESS_THAN) { + oper = '<'; + } else if (term->operator == RBCE_GREATER_THAN) { + oper = '>'; + } else if (term->operator == RBCE_NOT) { + oper = '!'; + } else { + oper = '='; + } + cp += + sprintf(cp, "%sid%c%ld,", idtype, oper, + term->u.id); + break; + case RBCE_RULE_CMD_PATH: + strcpy(str, "path"); + goto handle_str; + case RBCE_RULE_CMD: + strcpy(str, "cmd"); + goto handle_str; + case RBCE_RULE_ARGS: + strcpy(str, "args"); + goto handle_str; + case RBCE_RULE_APP_TAG: + strcpy(str, "tag"); + goto handle_str; + case RBCE_RULE_IPV4: + strcpy(str, "ipv4"); + goto handle_str; + case RBCE_RULE_IPV6: + strcpy(str, "ipv6"); + handle_str: + cp += + sprintf(cp, "%s=%s,", str, term->u.string); + break; + case RBCE_RULE_DEP_RULE: + cp += + sprintf(cp, "depend=%s,", + term->u.deprule->obj.name); + break; + default: + break; + } + } + if (!rule->num_terms) { + cp += sprintf(cp, "***** no terms defined ***** "); + } + + cp += + sprintf(cp, "order=%d,state=%d,", rule->order, rule->state); + cp += + sprintf(cp, "class=%s", + rule->target_class ? rule->target_class->obj. + name : "***** REMOVED *****"); + *cp = '\0'; + } else { + sprintf(result, "***** Rule %s doesn't exist *****", rname); + } + + read_unlock(&global_rwlock); + return; +} + +/* + * Change the name of the given rule "from_rname" to "to_rname" + * + */ +int rename_rule(const char *from_rname, const char *to_rname) +{ + struct rbce_rule *rule; + int nlen, rc = -EINVAL; + + if (!to_rname || !*to_rname) { + return rc; + } + write_lock(&global_rwlock); + + rule = find_rule_name(from_rname); + if (rule != NULL) { + if ((nlen = strlen(to_rname)) > strlen(rule->obj.name)) { + char *name = kmalloc(nlen + 1, GFP_ATOMIC); + if (!name) { + return -ENOMEM; + } + kfree(rule->obj.name); + rule->obj.name = name; + } + strcpy(rule->obj.name, to_rname); + rc = 0; + } + write_unlock(&global_rwlock); + return rc; +} + +/* + * Return TRUE if the given rule exists, FALSE otherwise + * + */ +int rule_exists(const char *rname) +{ + struct rbce_rule *rule; + + read_lock(&global_rwlock); + rule = find_rule_name(rname); + read_unlock(&global_rwlock); + return rule != NULL; +} + +/*====================== Magic file handling =======================*/ +/* + * Reclassify + */ +static struct rbce_private_data *create_private_data(struct rbce_private_data *, + int); + +int rbce_ckrm_reclassify(int pid) +{ + printk("ckrm_reclassify_pid ignored\n"); + return -EINVAL; +} + +int reclassify_pid(int pid) +{ + struct task_struct *tsk; + + // FIXME: Need to treat -pid as process group + if (pid < 0) { + return -EINVAL; + } + + if (pid == 0) { + rbce_ckrm_reclassify(0); // just reclassify all tasks. + } + // if pid is +ve take control of the task, start evaluating it + if ((tsk = find_task_by_pid(pid)) == NULL) { + return -EINVAL; + } + + if (unlikely(!RBCE_DATA(tsk))) { + RBCE_DATAP(tsk) = create_private_data(NULL, 0); + if (!RBCE_DATA(tsk)) { + return -ENOMEM; + } + } + RBCE_DATA(tsk)->evaluate = 1; + rbce_ckrm_reclassify(pid); + return 0; +} + +int set_tasktag(int pid, char *tag) +{ + char *tp; + struct task_struct *tsk; + struct rbce_private_data *pdata; + + if (!tag) { + return -EINVAL; + } + + if ((tsk = find_task_by_pid(pid)) == NULL) { + return -EINVAL; + } + + tp = kmalloc(strlen(tag) + 1, GFP_ATOMIC); + + if (!tp) { + return -ENOMEM; + } + + if (unlikely(!RBCE_DATA(tsk))) { + RBCE_DATAP(tsk) = create_private_data(NULL, 0); + if (!RBCE_DATA(tsk)) { + kfree(tp); + return -ENOMEM; + } + } + pdata = RBCE_DATA(tsk); + if (pdata->app_tag) { + kfree(pdata->app_tag); + } + pdata->app_tag = tp; + strcpy(pdata->app_tag, tag); + rbce_ckrm_reclassify(pid); + + return 0; +} + +/*====================== Classification Functions =======================*/ + +/* + * Match the given full path name with the command expression. + * This function treats the folowing 2 charaters as special if seen in + * cmd_exp, all other chanracters are compared as is: + * ? - compares to any one single character + * * - compares to one or more single characters + * + * If fullpath is 1, tsk_comm is compared in full. otherwise only the command + * name (basename(tsk_comm)) is compared. + */ +static int match_cmd(const char *tsk_comm, const char *cmd_exp, int fullpath) +{ + const char *c, *t, *last_ast, *cmd = tsk_comm; + char next_c; + + // get the command name if we don't have to match the fullpath + if (!fullpath && ((c = strrchr(tsk_comm, '/')) != NULL)) { + cmd = c + 1; + } + + /* now faithfully assume the entire pathname is in cmd */ + + /* we now have to effectively implement a regular expression + * for now assume + * '?' any single character + * '*' one or more '?' + * rest must match + */ + + c = cmd_exp; + t = cmd; + if (t == NULL || c == NULL) { + return 0; + } + + last_ast = NULL; + next_c = '\0'; + + while (*c && *t) { + switch (*c) { + case '?': + if (*t == '/') { + return 0; + } + c++; + t++; + continue; + case '*': + if (*t == '/') { + return 0; + } + // eat up all '*' in c + while (*(c + 1) == '*') + c++; + next_c = '\0'; + last_ast = c; + //t++; // Add this for matching '*' with "one" + // or more chars. + while (*t && (*t != *(c + 1)) && *t != '/') + t++; + if (*t == *(c + 1)) { + c++; + if (*c != '/') { + if (*c == '?') { + if (*t == '/') { + return 0; + } + t++; + c++; + } + next_c = *c; + if (*c) { + if (*t == '/') { + return 0; + } + t++; + c++; + if (!*c && *t) + c = last_ast; + } + } else { + last_ast = NULL; + } + continue; + } + return 0; + case '/': + next_c = '\0'; + /*FALLTHRU*/ default: + if (*t == *c && next_c != *t) { + c++, t++; + continue; + } else { + /* reset to last asterix and + continue from there */ + if (last_ast) { + c = last_ast; + } else { + return 0; + } + } + } + } + + /* check for trailing "*" */ + while (*c == '*') + c++; + + return (!*c && !*t); +} + +static void reverse(char *str, int n) +{ + char s; + int i, j = n - 1; + + for (i = 0; i < j; i++, j--) { + s = str[i]; + str[i] = str[j]; + str[j] = s; + } +} + +static int itoa(int n, char *str) +{ + int i = 0, sz = 0; + + do { + str[i++] = n % 10 + '0'; + sz++; + n = n / 10; + } while (n > 0); + + (void)reverse(str, sz); + return sz; +} + +static int v4toa(__u32 y, char *a) +{ + int i; + int size = 0; + + for (i = 0; i < 4; i++) { + size += itoa(y & 0xff, &a[size]); + a[size++] = '.'; + y >>= 8; + } + return --size; +} + +int match_ipv4(struct ckrm_net_struct *ns, char **string) +{ + char *ptr = *string; + int size; + char a4[16]; + + size = v4toa(ns->ns_daddrv4, a4); + + *string += size; + return !strncmp(a4, ptr, size); +} + +int match_port(struct ckrm_net_struct *ns, char *ptr) +{ + char a[5]; + int size = itoa(ns->ns_dport, a); + + return !strncmp(a, ptr, size); +} + +static int __evaluate_rule(struct task_struct *tsk, struct ckrm_net_struct *ns, + struct rbce_rule *rule, bitvector_t * vec_eval, + bitvector_t * vec_true, char **filename); +/* + * evaluate the given task against the given rule with the vec_eval and + * vec_true in context. Return 1 if the task satisfies the given rule, 0 + * otherwise. + * + * If the bit corresponding to the rule is set in the vec_eval, then the + * corresponding bit in vec_true is the result. If it is not set, evaluate + * the rule and set the bits in both the vectors accordingly. + * + * On return, filename will have the pointer to the pathname of the task's + * executable, if the rule had any command related terms. + * + * Caller must hold the global_rwlock atleast in read mode. + */ +static inline int +evaluate_rule(struct task_struct *tsk, struct ckrm_net_struct *ns, + struct rbce_rule *rule, bitvector_t * vec_eval, + bitvector_t * vec_true, char **filename) +{ + int tidx = rule->index; + + if (!bitvector_test(tidx, vec_eval)) { + if (__evaluate_rule + (tsk, ns, rule, vec_eval, vec_true, filename)) { + bitvector_set(tidx, vec_true); + } + bitvector_set(tidx, vec_eval); + } + return bitvector_test(tidx, vec_true); +} + +/* + * evaluate the given task against every term in the given rule with + * vec_eval and vec_true in context. + * + * If the bit corresponding to a rule term is set in the vec_eval, then the + * corresponding bit in vec_true is the result for taht particular. If it is + * not set, evaluate the rule term and set the bits in both the vectors + * accordingly. + * + * This fucntions returns true only if all terms in the rule evaluate true. + * + * On return, filename will have the pointer to the pathname of the task's + * executable, if the rule had any command related terms. + * + * Caller must hold the global_rwlock atleast in read mode. + */ +static int +__evaluate_rule(struct task_struct *tsk, struct ckrm_net_struct *ns, + struct rbce_rule *rule, bitvector_t * vec_eval, + bitvector_t * vec_true, char **filename) +{ + int i; + int no_ip = 1; + + for (i = rule->num_terms; --i >= 0;) { + int rc = 1, tidx = rule->terms[i]; + + if (!bitvector_test(tidx, vec_eval)) { + struct rbce_rule_term *term = &gl_terms[tidx]; + + switch (term->op) { + + case RBCE_RULE_CMD_PATH: + case RBCE_RULE_CMD: +#if 0 + if (!*filename) { /* get this once */ + if (((*filename = + kmalloc(NAME_MAX, + GFP_ATOMIC)) == NULL) + || + (get_exe_path_name + (tsk, *filename, NAME_MAX) < 0)) { + rc = 0; + break; + } + } + rc = match_cmd(*filename, term->u.string, + (term->op == + RBCE_RULE_CMD_PATH)); +#else + rc = match_cmd(tsk->comm, term->u.string, + (term->op == + RBCE_RULE_CMD_PATH)); +#endif + break; + case RBCE_RULE_REAL_UID: + if (term->operator == RBCE_LESS_THAN) { + rc = (tsk->uid < term->u.id); + } else if (term->operator == RBCE_GREATER_THAN){ + rc = (tsk->uid > term->u.id); + } else if (term->operator == RBCE_NOT) { + rc = (tsk->uid != term->u.id); + } else { + rc = (tsk->uid == term->u.id); + } + break; + case RBCE_RULE_REAL_GID: + if (term->operator == RBCE_LESS_THAN) { + rc = (tsk->gid < term->u.id); + } else if (term->operator == RBCE_GREATER_THAN){ + rc = (tsk->gid > term->u.id); + } else if (term->operator == RBCE_NOT) { + rc = (tsk->gid != term->u.id); + } else { + rc = (tsk->gid == term->u.id); + } + break; + case RBCE_RULE_EFFECTIVE_UID: + if (term->operator == RBCE_LESS_THAN) { + rc = (tsk->euid < term->u.id); + } else if (term->operator == RBCE_GREATER_THAN){ + rc = (tsk->euid > term->u.id); + } else if (term->operator == RBCE_NOT) { + rc = (tsk->euid != term->u.id); + } else { + rc = (tsk->euid == term->u.id); + } + break; + case RBCE_RULE_EFFECTIVE_GID: + if (term->operator == RBCE_LESS_THAN) { + rc = (tsk->egid < term->u.id); + } else if (term->operator == RBCE_GREATER_THAN){ + rc = (tsk->egid > term->u.id); + } else if (term->operator == RBCE_NOT) { + rc = (tsk->egid != term->u.id); + } else { + rc = (tsk->egid == term->u.id); + } + break; + case RBCE_RULE_APP_TAG: + rc = (RBCE_DATA(tsk) + && RBCE_DATA(tsk)-> + app_tag) ? !strcmp(RBCE_DATA(tsk)-> + app_tag, + term->u.string) : 0; + break; + case RBCE_RULE_DEP_RULE: + rc = evaluate_rule(tsk, NULL, term->u.deprule, + vec_eval, vec_true, + filename); + break; + + case RBCE_RULE_IPV4: + // TBD: add NOT_EQUAL match. At present rbce + // recognises EQUAL matches only. + if (ns && term->operator == RBCE_EQUAL) { + int ma = 0; + int mp = 0; + char *ptr = term->u.string; + + if (term->u.string[0] == '*') + ma = 1; + else + ma = match_ipv4(ns, &ptr); + + if (*ptr != '\\') { // error + rc = 0; + break; + } else { + ++ptr; + if (*ptr == '*') + mp = 1; + else + mp = match_port(ns, + ptr); + } + rc = mp && ma; + } else + rc = 0; + no_ip = 0; + break; + + case RBCE_RULE_IPV6: // no support yet + rc = 0; + no_ip = 0; + break; + + default: + rc = 0; + printk(KERN_ERR "Error evaluate term op=%d\n", + term->op); + break; + } + if (!rc && no_ip) { + bitvector_clear(tidx, vec_true); + } else { + bitvector_set(tidx, vec_true); + } + bitvector_set(tidx, vec_eval); + } else { + rc = bitvector_test(tidx, vec_true); + } + if (!rc) { + return 0; + } + } + return 1; +} + +//#define PDATA_DEBUG +#ifdef PDATA_DEBUG + +#define MAX_PDATA 10000 +void *pdata_arr[MAX_PDATA]; +int pdata_count, pdata_next; +static spinlock_t pdata_lock = SPIN_LOCK_UNLOCKED; + +static inline int valid_pdata(struct rbce_private_data *pdata) +{ + int i; + + if (!pdata) { + return 1; + } + spin_lock(&pdata_lock); + for (i = 0; i < MAX_PDATA; i++) { + if (pdata_arr[i] == pdata) { + spin_unlock(&pdata_lock); + return 1; + } + } + spin_unlock(&pdata_lock); + printk("INVALID/CORRUPT PDATA %p\n", pdata); + return 0; +} + +static inline void store_pdata(struct rbce_private_data *pdata) +{ + int i = 0; + + if (pdata) { + spin_lock(&pdata_lock); + + while (i < MAX_PDATA) { + if (pdata_arr[pdata_next] == NULL) { + printk("storing %p at %d, count %d\n", pdata, + pdata_next, pdata_count); + pdata_arr[pdata_next++] = pdata; + if (pdata_next == MAX_PDATA) { + pdata_next = 0; + } + pdata_count++; + break; + } + pdata_next++; + i++; + } + spin_unlock(&pdata_lock); + } + if (i == MAX_PDATA) { + printk("PDATA BUFFER FULL pdata_count %d pdata %p\n", + pdata_count, pdata); + } +} + +static inline void unstore_pdata(struct rbce_private_data *pdata) +{ + int i; + if (pdata) { + spin_lock(&pdata_lock); + for (i = 0; i < MAX_PDATA; i++) { + if (pdata_arr[i] == pdata) { + printk("unstoring %p at %d, count %d\n", pdata, + i, pdata_count); + pdata_arr[i] = NULL; + pdata_count--; + pdata_next = i; + break; + } + } + spin_unlock(&pdata_lock); + if (i == MAX_PDATA) { + printk("pdata %p not found in the stored array\n", + pdata); + } + } + return; +} + +#else // PDATA_DEBUG + +#define valid_pdata(pdata) (1) +#define store_pdata(pdata) +#define unstore_pdata(pdata) + +#endif // PDATA_DEBUG + +const int use_persistent_state = 1; + +/* + * Allocate and initialize a rbce_private_data data structure. + * + * Caller must hold global_rwlock atleast in read mode. + */ + +static inline void +copy_ext_private_data(struct rbce_private_data *src, + struct rbce_private_data *dst) +{ + if (src) + dst->ext_data = src->ext_data; + else + memset(&dst->ext_data, 0, sizeof(dst->ext_data)); +} + +static struct rbce_private_data *create_private_data(struct rbce_private_data + *src, int copy_sample) +{ + int vsize, psize, bsize; + struct rbce_private_data *pdata; + + if (use_persistent_state) { + vsize = gl_allocated; + bsize = vsize / 8 + sizeof(bitvector_t); + psize = sizeof(struct rbce_private_data) + 2 * bsize; + } else { + psize = sizeof(struct rbce_private_data); + } + + pdata = kmalloc(psize, GFP_ATOMIC); + if (pdata != NULL) { + if (use_persistent_state) { + pdata->bitmap_version = gl_bitmap_version; + pdata->eval = (bitvector_t *) & pdata->data[0]; + pdata->true = (bitvector_t *) & pdata->data[bsize]; + if (src && (src->bitmap_version == gl_bitmap_version)) { + memcpy(pdata->data, src->data, 2 * bsize); + } else { + bitvector_init(pdata->eval, vsize); + bitvector_init(pdata->true, vsize); + } + } + copy_ext_private_data(src, pdata); + //if (src) { // inherit evaluate and app_tag + // pdata->evaluate = src->evaluate; + // if(src->app_tag) { + // int len = strlen(src->app_tag)+1; + // printk("CREATE_PRIVATE: apptag %s len %d\n", + // src->app_tag,len); + // pdata->app_tag = kmalloc(len, GFP_ATOMIC); + // if (pdata->app_tag) { + // strcpy(pdata->app_tag, src->app_tag); + // } + // } + //} else { + pdata->evaluate = 1; + pdata->rules_version = src ? src->rules_version : 0; + pdata->app_tag = NULL; + //} + } + store_pdata(pdata); + return pdata; +} + +static inline void free_private_data(struct rbce_private_data *pdata) +{ + if (valid_pdata(pdata)) { + unstore_pdata(pdata); + kfree(pdata); + } +} + +static void free_all_private_data(void) +{ + struct task_struct *proc, *thread; + + read_lock(&tasklist_lock); + do_each_thread(proc, thread) { + struct rbce_private_data *pdata; + + pdata = RBCE_DATA(thread); + RBCE_DATAP(thread) = NULL; + free_private_data(pdata); + } while_each_thread(proc, thread); + read_unlock(&tasklist_lock); + return; +} + +/* + * reclassify function, which is called by all the callback functions. + * + * Takes that task to be reclassified and ruleflags that indicates the + * attributes that caused this reclassification request. + * + * On success, returns the core class pointer to which the given task should + * belong to. + */ +static struct ckrm_core_class *rbce_classify(struct task_struct *tsk, + struct ckrm_net_struct *ns, + unsigned long termflag, + int classtype) +{ + int i; + struct rbce_rule *rule; + bitvector_t *vec_true = NULL, *vec_eval = NULL; + struct rbce_class *tgt = NULL; + struct ckrm_core_class *cls = NULL; + char *filename = NULL; + + if (!valid_pdata(RBCE_DATA(tsk))) { + return NULL; + } + if (classtype >= CKRM_MAX_CLASSTYPES) { + // can't handle more than CKRM_MAX_CLASSTYPES + return NULL; + } + // fast path to avoid locking in case CE is not enabled or if no rules + // are defined or if the tasks states that no evaluation is needed. + if (!rbce_enabled || !gl_num_rules || + (RBCE_DATA(tsk) && !RBCE_DATA(tsk)->evaluate)) { + return NULL; + } + // FIXME: optimize_policy should be called from here if + // gl_action is non-zero. Also, it has to be called with the + // global_rwlock held in write mode. + + read_lock(&global_rwlock); + + vec_eval = vec_true = NULL; + if (use_persistent_state) { + struct rbce_private_data *pdata = RBCE_DATA(tsk); + + if (!pdata + || (pdata + && (gl_bitmap_version != pdata->bitmap_version))) { + struct rbce_private_data *new_pdata = + create_private_data(pdata, 1); + + if (new_pdata) { + if (pdata) { + new_pdata->rules_version = + pdata->rules_version; + new_pdata->evaluate = pdata->evaluate; + new_pdata->app_tag = pdata->app_tag; + free_private_data(pdata); + } + pdata = RBCE_DATAP(tsk) = new_pdata; + termflag = RBCE_TERMFLAG_ALL; + // need to evaluate them all + } else { + // we shouldn't free the pdata as it has more + // details than the vectors. But, this + // reclassification should go thru + pdata = NULL; + } + } + if (!pdata) { + goto cls_determined; + } + vec_eval = pdata->eval; + vec_true = pdata->true; + } else { + int bsize = gl_allocated; + + vec_eval = bitvector_alloc(bsize); + vec_true = bitvector_alloc(bsize); + + if (vec_eval == NULL || vec_true == NULL) { + goto cls_determined; + } + termflag = RBCE_TERMFLAG_ALL; + // need to evaluate all of them now + } + + /* + * using bit ops invalidate all terms related to this termflag + * context (only in per task vec) + */ + DPRINTK(DBG_CLASSIFY_DETAILS, "\nClassify: termflag=%lx\n", termflag); + DPRINTK(DBG_CLASSIFY_DETAILS, " eval before: "); + bitvector_print(DBG_CLASSIFY_DETAILS, vec_eval); + DPRINTK(DBG_CLASSIFY_DETAILS, "\n true before: "); + bitvector_print(DBG_CLASSIFY_DETAILS, vec_true); + DPRINTK(DBG_CLASSIFY_DETAILS, "\n redo => "); + + if (termflag == RBCE_TERMFLAG_ALL) { + DPRINTK(DBG_CLASSIFY_DETAILS, " redoall "); + bitvector_zero(vec_eval); + } else { + for (i = 0; i < NUM_TERM_MASK_VECTOR; i++) { + if (test_bit(i, &termflag)) { + bitvector_t *maskvec = gl_mask_vecs[i]; + + DPRINTK(DBG_CLASSIFY_DETAILS, " mask(%d) ", i); + bitvector_print(DBG_CLASSIFY_DETAILS, maskvec); + bitvector_and_not(vec_eval, vec_eval, maskvec); + } + } + } + bitvector_and(vec_true, vec_true, vec_eval); + + DPRINTK(DBG_CLASSIFY_DETAILS, "\n eval now: "); + bitvector_print(DBG_CLASSIFY_DETAILS, vec_eval); + DPRINTK(DBG_CLASSIFY_DETAILS, "\n"); + + /* run through the rules in order and see what needs evaluation */ + list_for_each_entry(rule, &rules_list[classtype], obj.link) { + if (rule->state == RBCE_RULE_ENABLED && + rule->target_class && + rule->target_class->classobj && + evaluate_rule(tsk, ns, rule, vec_eval, vec_true, + &filename)) { + tgt = rule->target_class; + cls = rule->target_class->classobj; + break; + } + } + + cls_determined: + DPRINTK(DBG_CLASSIFY_RES, + "==> |%s|; pid %d; euid %d; egid %d; ruid %d; rgid %d;" + "tag |%s| ===> class |%s|\n", + filename ? filename : tsk->comm, + tsk->pid, + tsk->euid, + tsk->egid, + tsk->uid, + tsk->gid, + RBCE_DATA(tsk) ? RBCE_DATA(tsk)->app_tag : "", + tgt ? tgt->obj.name : ""); + DPRINTK(DBG_CLASSIFY_DETAILS, " eval after: "); + bitvector_print(DBG_CLASSIFY_DETAILS, vec_eval); + DPRINTK(DBG_CLASSIFY_DETAILS, "\n true after: "); + bitvector_print(DBG_CLASSIFY_DETAILS, vec_true); + DPRINTK(DBG_CLASSIFY_DETAILS, "\n"); + + if (!use_persistent_state) { + if (vec_eval) { + bitvector_free(vec_eval); + } + if (vec_true) { + bitvector_free(vec_true); + } + } + ckrm_core_grab(cls); + read_unlock(&global_rwlock); + if (filename) { + kfree(filename); + } + if (RBCE_DATA(tsk)) { + RBCE_DATA(tsk)->rules_version = gl_rules_version; + } + return cls; +} + +/***************************************************************************** + * + * Module specific utilization of core RBCE functionality + * + * Includes support for the various classtypes + * New classtypes will require extensions here + * + *****************************************************************************/ + +/* helper functions that are required in the extended version */ + +static inline void rbce_tc_manual(struct task_struct *tsk) +{ + read_lock(&global_rwlock); + + if (!RBCE_DATA(tsk)) { + RBCE_DATAP(tsk) = + (void *)create_private_data(RBCE_DATA(tsk->parent), 0); + } + if (RBCE_DATA(tsk)) { + RBCE_DATA(tsk)->evaluate = 0; + } + read_unlock(&global_rwlock); + return; +} + +/***************************************************************************** + * load any extensions + *****************************************************************************/ + +#ifdef RBCE_EXTENSION +#include "rbcemod_ext.c" +#endif + +/***************************************************************************** + * VARIOUS CLASSTYPES + *****************************************************************************/ + +// to enable type coercion of the function pointers + +/*============================================================================ + * TASKCLASS CLASSTYPE + *============================================================================*/ + +int tc_classtype = -1; + +/* + * fork callback to be registered with core module. + */ +inline static void *rbce_tc_forkcb(struct task_struct *tsk) +{ + int rule_version_changed = 1; + struct ckrm_core_class *cls; + read_lock(&global_rwlock); + // dup ce_data + RBCE_DATAP(tsk) = + (void *)create_private_data(RBCE_DATA(tsk->parent), 0); + read_unlock(&global_rwlock); + + if (RBCE_DATA(tsk->parent)) { + rule_version_changed = + (RBCE_DATA(tsk->parent)->rules_version != gl_rules_version); + } + cls = rule_version_changed ? + rbce_classify(tsk, NULL, RBCE_TERMFLAG_ALL, tc_classtype) : NULL; + + // note the fork notification to any user client will be sent through + // the guaranteed fork-reclassification + return cls; +} + +/* + * exit callback to be registered with core module. + */ +static void rbce_tc_exitcb(struct task_struct *tsk) +{ + struct rbce_private_data *pdata; + + send_exit_notification(tsk); + + pdata = RBCE_DATA(tsk); + RBCE_DATAP(tsk) = NULL; + if (pdata) { + if (pdata->app_tag) { + kfree(pdata->app_tag); + } + free_private_data(pdata); + } + return; +} + +#define AENT(x) [ CKRM_EVENT_##x] = #x +static const char *event_names[CKRM_NUM_EVENTS] = { + AENT(NEWTASK), + AENT(FORK), + AENT(EXIT), + AENT(EXEC), + AENT(UID), + AENT(GID), + AENT(LOGIN), + AENT(USERADD), + AENT(USERDEL), + AENT(LISTEN_START), + AENT(LISTEN_STOP), + AENT(APPTAG), + AENT(RECLASSIFY), + AENT(MANUAL), +}; + +void *rbce_tc_classify(enum ckrm_event event, ...) +{ + va_list args; + void *cls = NULL; + struct task_struct *tsk; + + va_start(args, event); + tsk = va_arg(args, struct task_struct *); + va_end(args); + + /* we only have to deal with events between + * [ CKRM_LATCHABLE_EVENTS .. CKRM_NONLATCHABLE_EVENTS ) + */ + + // printk("tc_classify %p:%d:%s '%s'\n",tsk,tsk->pid, + // tsk->comm,event_names[event]); + + switch (event) { + + case CKRM_EVENT_FORK: + cls = rbce_tc_forkcb(tsk); + break; + + case CKRM_EVENT_EXIT: + rbce_tc_exitcb(tsk); + break; + + case CKRM_EVENT_EXEC: + cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_CMD | + RBCE_TERMFLAG_UID | RBCE_TERMFLAG_GID, + tc_classtype); + break; + + case CKRM_EVENT_UID: + cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_UID, tc_classtype); + break; + + case CKRM_EVENT_GID: + cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_GID, tc_classtype); + break; + + case CKRM_EVENT_LOGIN: + case CKRM_EVENT_USERADD: + case CKRM_EVENT_USERDEL: + case CKRM_EVENT_LISTEN_START: + case CKRM_EVENT_LISTEN_STOP: + case CKRM_EVENT_APPTAG: + /* no interest in this events .. */ + break; + + default: + /* catch all */ + break; + + case CKRM_EVENT_RECLASSIFY: + cls = rbce_classify(tsk, NULL, RBCE_TERMFLAG_ALL, tc_classtype); + break; + + } + // printk("tc_classify %p:%d:%s '%s' ==> %p\n",tsk,tsk->pid, + // tsk->comm,event_names[event],cls); + + return cls; +} + +#ifndef RBCE_EXTENSION +static void rbce_tc_notify(int event, void *core, struct task_struct *tsk) +{ + printk("tc_manual %p:%d:%s '%s'\n", tsk, tsk->pid, tsk->comm, + event_names[event]); + if (event != CKRM_EVENT_MANUAL) + return; + rbce_tc_manual(tsk); +} +#endif + +static struct ckrm_eng_callback rbce_taskclass_ecbs = { + .c_interest = (unsigned long)(-1), // set whole bitmap + .classify = (ce_classify_fct_t) rbce_tc_classify, + .class_delete = rbce_class_deletecb, +#ifndef RBCE_EXTENSION + .n_interest = (1 << CKRM_EVENT_MANUAL), + .notify = (ce_notify_fct_t) rbce_tc_notify, + .always_callback = 0, +#else + .n_interest = (unsigned long)(-1), // set whole bitmap + .notify = (ce_notify_fct_t) rbce_tc_ext_notify, + .class_add = rbce_class_addcb, + .always_callback = 1, +#endif +}; + +/*============================================================================ + * ACCEPTQ CLASSTYPE + *============================================================================*/ + +int sc_classtype = -1; + +void *rbce_sc_classify(enum ckrm_event event, ...) +{ + // no special consideratation + void *result; + va_list args; + struct task_struct *tsk; + struct ckrm_net_struct *ns; + + va_start(args, event); + ns = va_arg(args, struct ckrm_net_struct *); + tsk = va_arg(args, struct task_struct *); + va_end(args); + + result = rbce_classify(tsk, ns, RBCE_TERMFLAG_ALL, sc_classtype); + + DPRINTK(DBG_CLASSIFY_RES, + "==> %d.%d.%d.%d\\%d , %p:%d:%s '%s' => %p\n", + NIPQUAD(ns->ns_daddrv4), ns->ns_dport, + tsk, tsk ? tsk->pid : 0, tsk ? tsk->comm : "-", + event_names[event], result); + return result; +} + +static struct ckrm_eng_callback rbce_acceptQclass_ecbs = { + .c_interest = (unsigned long)(-1), + .always_callback = 0, // enable during debugging only + .classify = (ce_classify_fct_t) & rbce_sc_classify, + .class_delete = rbce_class_deletecb, +}; + +/*============================================================================ + * Module Initialization ... + *============================================================================*/ + +#define TASKCLASS_NAME "taskclass" +#define SOCKCLASS_NAME "socket_class" + +struct ce_regtable_struct { + const char *name; + struct ckrm_eng_callback *cbs; + int *clsvar; +}; + +struct ce_regtable_struct ce_regtable[] = { + {TASKCLASS_NAME, &rbce_taskclass_ecbs, &tc_classtype}, + {SOCKCLASS_NAME, &rbce_acceptQclass_ecbs, &sc_classtype}, + {NULL} +}; + +static int register_classtype_engines(void) +{ + int rc; + struct ce_regtable_struct *ceptr = ce_regtable; + + while (ceptr->name) { + rc = ckrm_register_engine(ceptr->name, ceptr->cbs); + printk("ce register with <%s> typeId=%d\n", ceptr->name, rc); + if ((rc < 0) && (rc != -ENOENT)) + return (rc); + if (rc != -ENOENT) + *ceptr->clsvar = rc; + ceptr++; + } + return 0; +} + +static void unregister_classtype_engines(void) +{ + int rc; + struct ce_regtable_struct *ceptr = ce_regtable; + + while (ceptr->name) { + if (*ceptr->clsvar >= 0) { + printk("ce unregister with <%s>\n", ceptr->name); + rc = ckrm_unregister_engine(ceptr->name); + printk("ce unregister with <%s> rc=%d\n", ceptr->name, + rc); + *ceptr->clsvar = -1; + } + ceptr++; + } +} + +// =========== /proc/sysctl/debug/rbce debug stuff ============= + +#ifdef DEBUG +static struct ctl_table_header *rbce_sysctl_table_header; + +#define CTL_RBCE_DEBUG (201) // picked some number.. dont know algo to pick +static struct ctl_table rbce_entry_table[] = { + { + .ctl_name = CTL_RBCE_DEBUG, + .procname = "rbce", + .data = &rbcedebug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + {0} +}; + +static struct ctl_table rbce_root_table[] = { + { + .ctl_name = CTL_DEBUG, + .procname = "debug", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = rbce_entry_table}, + {0} +}; + +static inline void start_debug(void) +{ + rbce_sysctl_table_header = register_sysctl_table(rbce_root_table, 1); +} +static inline void stop_debug(void) +{ + if (rbce_sysctl_table_header) + unregister_sysctl_table(rbce_sysctl_table_header); +} + +#else + +static inline void start_debug(void) +{ +} +static inline void stop_debug(void) +{ +} + +#endif // DEBUG + +extern int rbce_mkdir(struct inode *, struct dentry *, int); +extern int rbce_rmdir(struct inode *, struct dentry *); +extern int rbce_create_magic(void); +extern int rbce_clear_magic(void); + +rbce_eng_callback_t rcfs_ecbs = { + rbce_mkdir, + rbce_rmdir, + rbce_create_magic, + rbce_clear_magic +}; + +/* ======================= Module definition Functions ====================== */ + +int init_rbce(void) +{ + int rc, i, line; + + printk("<1>\nInstalling \'%s\' module\n", modname); + + for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) { + INIT_LIST_HEAD(&rules_list[i]); + } + + rc = init_rbce_ext_pre(); + line = __LINE__; + if (rc) + goto out; + + rc = register_classtype_engines(); + line = __LINE__; + if (rc) + goto out_unreg_ckrm; // need to remove anyone opened + + /* register any other class type engine here */ + + rc = rcfs_register_engine(&rcfs_ecbs); + line = __LINE__; + if (rc) + goto out_unreg_ckrm; + + if (rcfs_mounted) { + rc = rbce_create_magic(); + line = __LINE__; + if (rc) + goto out_unreg_rcfs; + } + + start_debug(); + + rc = init_rbce_ext_post(); + line = __LINE__; + if (rc) + goto out_debug; + + return 0; // SUCCESS + + out_debug: + stop_debug(); + + out_unreg_rcfs: + rcfs_unregister_engine(&rcfs_ecbs); + out_unreg_ckrm: + unregister_classtype_engines(); + exit_rbce_ext(); + out: + + printk("<1>%s: error installing rc=%d line=%d\n", __FUNCTION__, rc, + line); + return rc; +} + +void exit_rbce(void) +{ + int i; + + printk("<1>Removing \'%s\' module\n", modname); + + stop_debug(); + exit_rbce_ext(); + + // Print warnings if lists are not empty, which is a bug + if (!list_empty(&class_list)) { + printk("exit_rbce: Class list is not empty\n"); + } + + for (i = 0; i < CKRM_MAX_CLASSTYPES; i++) { + if (!list_empty(&rules_list[i])) { + printk("exit_rbce: Rules list for classtype %d" + " is not empty\n", i); + } + } + + if (rcfs_mounted) + rbce_clear_magic(); + + rcfs_unregister_engine(&rcfs_ecbs); + unregister_classtype_engines(); + free_all_private_data(); +} + +EXPORT_SYMBOL(get_rule); +EXPORT_SYMBOL(rule_exists); +EXPORT_SYMBOL(change_rule); +EXPORT_SYMBOL(delete_rule); +EXPORT_SYMBOL(rename_rule); +EXPORT_SYMBOL(reclassify_pid); +EXPORT_SYMBOL(set_tasktag); + +module_init(init_rbce); +module_exit(exit_rbce); diff --git a/kernel/ckrm/rbce/rbcemod_ext.c b/kernel/ckrm/rbce/rbcemod_ext.c new file mode 100644 index 000000000..90c8f154b --- /dev/null +++ b/kernel/ckrm/rbce/rbcemod_ext.c @@ -0,0 +1,615 @@ +/* Data Collection Extension to Rule-based Classification Engine (RBCE) module + * + * Copyright (C) Hubertus Franke, IBM Corp. 2003 + * + * Extension to be included into RBCE to collect delay and sample information + * requires user daemon to activate. + * + * Latest version, more details at http://ckrm.sf.net + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +/******************************************************************************* + * + * User-Kernel Communication Channel (UKCC) + * Protocol and communication handling + * + ******************************************************************************/ + +#include + +#define PSAMPLE(pdata) (&((pdata)->ext_data.sample)) +#define UKCC_N_SUB_BUFFERS (4) +#define UKCC_SUB_BUFFER_SIZE (1<<15) +#define UKCC_TOTAL_BUFFER_SIZE (UKCC_N_SUB_BUFFERS * UKCC_SUB_BUFFER_SIZE) + +#define CHANNEL_AUTO_CONT 0 /* this is during debugging only. It allows + the module to continue sending data through + the UKCC if space frees up vs. going into + the recovery driven mode + */ + +enum ukcc_state { + UKCC_OK = 0, + UKCC_STANDBY = 1, + UKCC_FULL = 2 +}; + +int ukcc_channel = -1; +static enum ukcc_state chan_state = UKCC_STANDBY; + +inline static int ukcc_ok(void) +{ + return (chan_state == UKCC_OK); +} + +static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len); +static void client_attached(void); +static void client_detached(void); + +static int ukcc_fileop_notify(int rchan_id, + struct file *filp, enum relay_fileop fileop) +{ + static int readers = 0; + if (fileop == RELAY_FILE_OPEN) { + // printk("got fileop_notify RELAY_FILE_OPEN for file %p\n", + // filp); + if (readers) { + printk("only one client allowed, backoff .... \n"); + return -EPERM; + } + if (!try_module_get(THIS_MODULE)) + return -EPERM; + readers++; + client_attached(); + + } else if (fileop == RELAY_FILE_CLOSE) { + // printk("got fileop_notify RELAY_FILE_CLOSE for file %p\n", + // filp); + client_detached(); + readers--; + module_put(THIS_MODULE); + } + return 0; +} + +static int create_ukcc_channel(void) +{ + static struct rchan_callbacks ukcc_callbacks = { + .buffer_start = NULL, + .buffer_end = NULL, + .deliver = NULL, + .user_deliver = ukcc_cmd_deliver, + .needs_resize = NULL, + .fileop_notify = ukcc_fileop_notify, + }; + + u32 channel_flags = + RELAY_USAGE_GLOBAL | RELAY_SCHEME_ANY | RELAY_TIMESTAMP_ANY; + + // notify on subbuffer full (through poll) + channel_flags |= RELAY_DELIVERY_BULK; + // channel_flags |= RELAY_DELIVERY_PACKET; + // avoid overwrite, otherwise recovery will be nasty... + channel_flags |= RELAY_MODE_NO_OVERWRITE; + + ukcc_channel = relay_open(CRBCE_UKCC_NAME, + UKCC_SUB_BUFFER_SIZE, + UKCC_N_SUB_BUFFERS, + channel_flags, + &ukcc_callbacks, 0, 0, 0, 0, 0, 0, NULL, 0); + if (ukcc_channel < 0) + printk("crbce: ukcc creation failed, errcode: %d\n", + ukcc_channel); + else + printk("crbce: ukcc created (%u KB)\n", + UKCC_TOTAL_BUFFER_SIZE >> 10); + return ukcc_channel; +} + +static inline void close_ukcc_channel(void) +{ + if (ukcc_channel >= 0) { + relay_close(ukcc_channel); + ukcc_channel = -1; + chan_state = UKCC_STANDBY; + } +} + +#define rec_set_hdr(r,t,p) ((r)->hdr.type = (t), (r)->hdr.pid = (p)) +#define rec_set_timehdr(r,t,p,c) (rec_set_hdr(r,t,p), \ +(r)->hdr.jiffies = jiffies, (r)->hdr.cls=(unsigned long)(c) ) + +#if CHANNEL_AUTO_CONT + +/* we only provide this for debugging.. it allows us to send records + * based on availability in the channel when the UKCC stalles rather + * going through the UKCC recovery protocol + */ + +#define rec_send_len(r,l) \ + do { \ + int chan_wasok = (chan_state == UKCC_OK); \ + int chan_isok = (relay_write(ukcc_channel, \ + (r),(l),-1,NULL) > 0); \ + chan_state = chan_isok ? UKCC_OK : UKCC_STANDBY; \ + if (chan_wasok && !chan_isok) { \ + printk("Channel stalled\n"); \ + } else if (!chan_wasok && chan_isok) { \ + printk("Channel continues\n"); \ + } \ + } while (0) + +#define rec_send(r) rec_send_len(r,sizeof(*(r))) + +#else + +/* Default UKCC channel protocol. + * Though a UKCC buffer overflow should not happen ever, it is possible iff + * the user daemon stops reading for some reason. Hence we provide a simple + * protocol based on 3 states + * UKCC_OK := channel is active and properly working. When a channel + * write fails we move to state CHAN_FULL. + * UKCC_FULL := channel is active, but the last send_rec has failed. As + * a result we will try to send an indication to the daemon + * that this has happened. When that succeeds, we move to + * state UKCC_STANDBY. + * UKCC_STANDBY := we are waiting to be restarted by the user daemon + * + */ + +static void ukcc_full(void) +{ + static spinlock_t ukcc_state_lock = SPIN_LOCK_UNLOCKED; + /* protect transition from OK -> FULL to ensure only one record is sent, + rest we do not need to protect, protocol implies that. we keep the + channel OK until + */ + int send = 0; + spin_lock(&ukcc_state_lock); + if ((send = (chan_state != UKCC_STANDBY))) + chan_state = UKCC_STANDBY; /* assume we can send */ + spin_unlock(&ukcc_state_lock); + + if (send) { + struct crbce_ukcc_full rec; + rec_set_timehdr(&rec, CRBCE_REC_UKCC_FULL, 0, 0); + if (relay_write(ukcc_channel, &rec, + sizeof(rec), -1, NULL) <= 0) { + /* channel is remains full .. try with next one */ + chan_state = UKCC_FULL; + } + } +} + +#define rec_send_len(r,l) \ + do { \ + switch (chan_state) { \ + case UKCC_OK: \ + if (relay_write(ukcc_channel,(r), \ + (l),-1,NULL) > 0) \ + break; \ + case UKCC_FULL: \ + ukcc_full(); \ + break; \ + default: \ + break; \ + } \ + } while (0) + +#define rec_send(r) rec_send_len(r,sizeof(*(r))) + +#endif + +/****************************************************************************** + * + * Callbacks for the CKRM engine. + * In each we do the necessary classification and event record generation + * We generate 3 kind of records in the callback + * (a) FORK send the pid, the class and the ppid + * (b) RECLASSIFICATION send the pid, the class and < sample data + + * delay data > + * (b) EXIT send the pid + * + ******************************************************************************/ + +int delta_mode = 0; + +static inline void copy_delay(struct task_delay_info *delay, + struct task_struct *tsk) +{ + *delay = tsk->delays; +} + +static inline void zero_delay(struct task_delay_info *delay) +{ + memset(delay, 0, sizeof(struct task_delay_info)); + /* we need to think about doing this 64-bit atomic */ +} + +static inline void zero_sample(struct task_sample_info *sample) +{ + memset(sample, 0, sizeof(struct task_sample_info)); + /* we need to think about doing this 64-bit atomic */ +} + +static inline int check_zero(void *ptr, int len) +{ + int iszero = 1; + int i; + unsigned long *uptr = (unsigned long *)ptr; + + for (i = len / sizeof(unsigned long); i-- && iszero; uptr++) + // assume its rounded + iszero &= (*uptr == 0); + return iszero; +} + +static inline int check_not_zero(void *ptr, int len) +{ + int i; + unsigned long *uptr = (unsigned long *)ptr; + + for (i = len / sizeof(unsigned long); i--; uptr++) + // assume its rounded + if (*uptr) + return 1; + return 0; +} + +static inline int sample_changed(struct task_sample_info *s) +{ + return check_not_zero(s, sizeof(*s)); +} +static inline int delay_changed(struct task_delay_info *d) +{ + return check_not_zero(d, sizeof(*d)); +} + +static inline int +send_task_record(struct task_struct *tsk, int event, + struct ckrm_core_class *core, int send_forced) +{ + struct crbce_rec_task_data rec; + struct rbce_private_data *pdata; + int send = 0; + + if (!ukcc_ok()) + return 0; + pdata = RBCE_DATA(tsk); + if (pdata == NULL) { + // printk("send [%d]<%s>: no pdata\n",tsk->pid,tsk->comm); + return 0; + } + if (send_forced || (delta_mode == 0) + || sample_changed(PSAMPLE(RBCE_DATA(tsk))) + || delay_changed(&tsk->delays)) { + rec_set_timehdr(&rec, event, tsk->pid, + core ? core : (struct ckrm_core_class *)tsk-> + taskclass); + rec.sample = *PSAMPLE(RBCE_DATA(tsk)); + copy_delay(&rec.delay, tsk); + rec_send(&rec); + if (delta_mode || send_forced) { + // on reclassify or delta mode reset the counters + zero_sample(PSAMPLE(RBCE_DATA(tsk))); + zero_delay(&tsk->delays); + } + send = 1; + } + return send; +} + +static inline void send_exit_notification(struct task_struct *tsk) +{ + send_task_record(tsk, CRBCE_REC_EXIT, NULL, 1); +} + +static inline void +rbce_tc_ext_notify(int event, void *core, struct task_struct *tsk) +{ + struct crbce_rec_fork rec; + + switch (event) { + case CKRM_EVENT_FORK: + if (ukcc_ok()) { + rec.ppid = tsk->parent->pid; + rec_set_timehdr(&rec, CKRM_EVENT_FORK, tsk->pid, core); + rec_send(&rec); + } + break; + case CKRM_EVENT_MANUAL: + rbce_tc_manual(tsk); + + default: + send_task_record(tsk, event, (struct ckrm_core_class *)core, 1); + break; + } +} + +/*====================== end classification engine =======================*/ + +static void sample_task_data(unsigned long unused); + +struct timer_list sample_timer = {.expires = 0,.function = sample_task_data }; +unsigned long timer_interval_length = (250 * HZ) / 1000; + +inline void stop_sample_timer(void) +{ + if (sample_timer.expires > 0) { + del_timer_sync(&sample_timer); + sample_timer.expires = 0; + } +} + +inline void start_sample_timer(void) +{ + if (timer_interval_length > 0) { + sample_timer.expires = + jiffies + (timer_interval_length * HZ) / 1000; + add_timer(&sample_timer); + } +} + +static void send_task_data(void) +{ + struct crbce_rec_data_delim limrec; + struct task_struct *proc, *thread; + int sendcnt = 0; + int taskcnt = 0; + limrec.is_stop = 0; + rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0); + rec_send(&limrec); + + read_lock(&tasklist_lock); + do_each_thread(proc, thread) { + taskcnt++; + task_lock(thread); + sendcnt += send_task_record(thread, CRBCE_REC_SAMPLE, NULL, 0); + task_unlock(thread); + } while_each_thread(proc, thread); + read_unlock(&tasklist_lock); + + limrec.is_stop = 1; + rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0); + rec_send(&limrec); + + // printk("send_task_data mode=%d t#=%d s#=%d\n", + // delta_mode,taskcnt,sendcnt); +} + +static void notify_class_action(struct rbce_class *cls, int action) +{ + struct crbce_class_info cinfo; + int len; + + rec_set_timehdr(&cinfo, CRBCE_REC_CLASS_INFO, 0, cls->classobj); + cinfo.action = action; + len = strnlen(cls->obj.name, CRBCE_MAX_CLASS_NAME_LEN - 1); + memcpy(&cinfo.name, cls->obj.name, len); + cinfo.name[len] = '\0'; + len++; + cinfo.namelen = len; + + len += sizeof(cinfo) - CRBCE_MAX_CLASS_NAME_LEN; + rec_send_len(&cinfo, len); +} + +static void send_classlist(void) +{ + struct rbce_class *cls; + + read_lock(&global_rwlock); + list_for_each_entry(cls, &class_list, obj.link) { + notify_class_action(cls, 1); + } + read_unlock(&global_rwlock); +} + +/* + * resend_task_info + * + * This function resends all essential task information to the client. + * + */ +static void resend_task_info(void) +{ + struct crbce_rec_data_delim limrec; + struct crbce_rec_fork rec; + struct task_struct *proc, *thread; + + send_classlist(); // first send available class information + + limrec.is_stop = 2; + rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0); + rec_send(&limrec); + + write_lock(&tasklist_lock); // avoid any mods during this phase + do_each_thread(proc, thread) { + if (ukcc_ok()) { + rec.ppid = thread->parent->pid; + rec_set_timehdr(&rec, CRBCE_REC_TASKINFO, thread->pid, + thread->taskclass); + rec_send(&rec); + } + } + while_each_thread(proc, thread); + write_unlock(&tasklist_lock); + + limrec.is_stop = 3; + rec_set_timehdr(&limrec, CRBCE_REC_DATA_DELIMITER, 0, 0); + rec_send(&limrec); +} + +extern int task_running_sys(struct task_struct *); + +static void add_all_private_data(void) +{ + struct task_struct *proc, *thread; + + write_lock(&tasklist_lock); + do_each_thread(proc, thread) { + if (RBCE_DATA(thread) == NULL) + RBCE_DATAP(thread) = create_private_data(NULL, 0); + } + while_each_thread(proc, thread); + write_unlock(&tasklist_lock); +} + +static void sample_task_data(unsigned long unused) +{ + struct task_struct *proc, *thread; + + int run = 0; + int wait = 0; + read_lock(&tasklist_lock); + do_each_thread(proc, thread) { + struct rbce_private_data *pdata = RBCE_DATA(thread); + + if (pdata == NULL) { + // some wierdo race condition .. simply ignore + continue; + } + if (thread->state == TASK_RUNNING) { + if (task_running_sys(thread)) { + atomic_inc((atomic_t *) & + (PSAMPLE(pdata)->cpu_running)); + run++; + } else { + atomic_inc((atomic_t *) & + (PSAMPLE(pdata)->cpu_waiting)); + wait++; + } + } + /* update IO state */ + if (thread->flags & PF_IOWAIT) { + if (thread->flags & PF_MEMIO) + atomic_inc((atomic_t *) & + (PSAMPLE(pdata)->memio_delayed)); + else + atomic_inc((atomic_t *) & + (PSAMPLE(pdata)->io_delayed)); + } + } + while_each_thread(proc, thread); + read_unlock(&tasklist_lock); +// printk("sample_timer: run=%d wait=%d\n",run,wait); + start_sample_timer(); +} + +static void ukcc_cmd_deliver(int rchan_id, char *from, u32 len) +{ + struct crbce_command *cmdrec = (struct crbce_command *)from; + struct crbce_cmd_done cmdret; + int rc = 0; + +// printk("ukcc_cmd_deliver: %d %d len=%d:%d\n",cmdrec->type, +// cmdrec->cmd,cmdrec->len,len); + + cmdrec->len = len; // add this to reflection so the user doesn't + // accidently write the wrong length and the + // protocol is getting screwed up + + if (cmdrec->type != CRBCE_REC_KERNEL_CMD) { + rc = EINVAL; + goto out; + } + + switch (cmdrec->cmd) { + case CRBCE_CMD_SET_TIMER: + { + struct crbce_cmd_settimer *cptr = + (struct crbce_cmd_settimer *)cmdrec; + if (len != sizeof(*cptr)) { + rc = EINVAL; + break; + } + stop_sample_timer(); + timer_interval_length = cptr->interval; + if ((timer_interval_length > 0) + && (timer_interval_length < 10)) + timer_interval_length = 10; + // anything finer can create problems + printk(KERN_INFO "CRBCE set sample collect timer %lu\n", + timer_interval_length); + start_sample_timer(); + break; + } + case CRBCE_CMD_SEND_DATA: + { + struct crbce_cmd_send_data *cptr = + (struct crbce_cmd_send_data *)cmdrec; + if (len != sizeof(*cptr)) { + rc = EINVAL; + break; + } + delta_mode = cptr->delta_mode; + send_task_data(); + break; + } + case CRBCE_CMD_START: + add_all_private_data(); + chan_state = UKCC_OK; + resend_task_info(); + break; + + case CRBCE_CMD_STOP: + chan_state = UKCC_STANDBY; + free_all_private_data(); + break; + + default: + rc = EINVAL; + break; + } + + out: + cmdret.hdr.type = CRBCE_REC_KERNEL_CMD_DONE; + cmdret.hdr.cmd = cmdrec->cmd; + cmdret.rc = rc; + rec_send(&cmdret); +// printk("ukcc_cmd_deliver ACK: %d %d rc=%d %d\n",cmdret.hdr.type, +// cmdret.hdr.cmd,rc,sizeof(cmdret)); +} + +static void client_attached(void) +{ + printk("client [%d]<%s> attached to UKCC\n", current->pid, + current->comm); + relay_reset(ukcc_channel); +} + +static void client_detached(void) +{ + printk("client [%d]<%s> detached to UKCC\n", current->pid, + current->comm); + chan_state = UKCC_STANDBY; + stop_sample_timer(); + relay_reset(ukcc_channel); + free_all_private_data(); +} + +static int init_rbce_ext_pre(void) +{ + int rc; + + rc = create_ukcc_channel(); + return ((rc < 0) ? rc : 0); +} + +static int init_rbce_ext_post(void) +{ + init_timer(&sample_timer); + return 0; +} + +static void exit_rbce_ext(void) +{ + stop_sample_timer(); + close_ukcc_channel(); +} diff --git a/kernel/ckrm/rbce/token.c b/kernel/ckrm/rbce/token.c new file mode 100644 index 000000000..dd85aaf6e --- /dev/null +++ b/kernel/ckrm/rbce/token.c @@ -0,0 +1,260 @@ +#include +#include + +enum rule_token_t { + TOKEN_PATH, + TOKEN_CMD, + TOKEN_ARGS, + TOKEN_RUID_EQ, + TOKEN_RUID_LT, + TOKEN_RUID_GT, + TOKEN_RUID_NOT, + TOKEN_RGID_EQ, + TOKEN_RGID_LT, + TOKEN_RGID_GT, + TOKEN_RGID_NOT, + TOKEN_EUID_EQ, + TOKEN_EUID_LT, + TOKEN_EUID_GT, + TOKEN_EUID_NOT, + TOKEN_EGID_EQ, + TOKEN_EGID_LT, + TOKEN_EGID_GT, + TOKEN_EGID_NOT, + TOKEN_TAG, + TOKEN_IPV4, + TOKEN_IPV6, + TOKEN_DEP, + TOKEN_DEP_ADD, + TOKEN_DEP_DEL, + TOKEN_ORDER, + TOKEN_CLASS, + TOKEN_STATE, + TOKEN_INVALID +}; + +int token_to_ruleop[TOKEN_INVALID + 1] = { + [TOKEN_PATH] = RBCE_RULE_CMD_PATH, + [TOKEN_CMD] = RBCE_RULE_CMD, + [TOKEN_ARGS] = RBCE_RULE_ARGS, + [TOKEN_RUID_EQ] = RBCE_RULE_REAL_UID, + [TOKEN_RUID_LT] = RBCE_RULE_REAL_UID, + [TOKEN_RUID_GT] = RBCE_RULE_REAL_UID, + [TOKEN_RUID_NOT] = RBCE_RULE_REAL_UID, + [TOKEN_RGID_EQ] = RBCE_RULE_REAL_GID, + [TOKEN_RGID_LT] = RBCE_RULE_REAL_GID, + [TOKEN_RGID_GT] = RBCE_RULE_REAL_GID, + [TOKEN_RGID_NOT] = RBCE_RULE_REAL_GID, + [TOKEN_EUID_EQ] = RBCE_RULE_EFFECTIVE_UID, + [TOKEN_EUID_LT] = RBCE_RULE_EFFECTIVE_UID, + [TOKEN_EUID_GT] = RBCE_RULE_EFFECTIVE_UID, + [TOKEN_EUID_NOT] = RBCE_RULE_EFFECTIVE_UID, + [TOKEN_EGID_EQ] = RBCE_RULE_EFFECTIVE_GID, + [TOKEN_EGID_LT] = RBCE_RULE_EFFECTIVE_GID, + [TOKEN_EGID_GT] = RBCE_RULE_EFFECTIVE_GID, + [TOKEN_EGID_NOT] = RBCE_RULE_EFFECTIVE_GID, + [TOKEN_TAG] = RBCE_RULE_APP_TAG, + [TOKEN_IPV4] = RBCE_RULE_IPV4, + [TOKEN_IPV6] = RBCE_RULE_IPV6, + [TOKEN_DEP] = RBCE_RULE_DEP_RULE, + [TOKEN_DEP_ADD] = RBCE_RULE_DEP_RULE, + [TOKEN_DEP_DEL] = RBCE_RULE_DEP_RULE, + [TOKEN_ORDER] = RBCE_RULE_INVALID, + [TOKEN_CLASS] = RBCE_RULE_INVALID, + [TOKEN_STATE] = RBCE_RULE_INVALID, +}; + +enum op_token { + TOKEN_OP_EQUAL = RBCE_EQUAL, + TOKEN_OP_NOT = RBCE_NOT, + TOKEN_OP_LESS_THAN = RBCE_LESS_THAN, + TOKEN_OP_GREATER_THAN = RBCE_GREATER_THAN, + TOKEN_OP_DEP, + TOKEN_OP_DEP_ADD, + TOKEN_OP_DEP_DEL, + TOKEN_OP_ORDER, + TOKEN_OP_CLASS, + TOKEN_OP_STATE, +}; + +enum op_token token_to_operator[TOKEN_INVALID + 1] = { + [TOKEN_PATH] = TOKEN_OP_EQUAL, + [TOKEN_CMD] = TOKEN_OP_EQUAL, + [TOKEN_ARGS] = TOKEN_OP_EQUAL, + [TOKEN_RUID_EQ] = TOKEN_OP_EQUAL, + [TOKEN_RUID_LT] = TOKEN_OP_LESS_THAN, + [TOKEN_RUID_GT] = TOKEN_OP_GREATER_THAN, + [TOKEN_RUID_NOT] = TOKEN_OP_NOT, + [TOKEN_RGID_EQ] = TOKEN_OP_EQUAL, + [TOKEN_RGID_LT] = TOKEN_OP_LESS_THAN, + [TOKEN_RGID_GT] = TOKEN_OP_GREATER_THAN, + [TOKEN_RGID_NOT] = TOKEN_OP_NOT, + [TOKEN_EUID_EQ] = TOKEN_OP_EQUAL, + [TOKEN_EUID_LT] = TOKEN_OP_LESS_THAN, + [TOKEN_EUID_GT] = TOKEN_OP_GREATER_THAN, + [TOKEN_EUID_NOT] = TOKEN_OP_NOT, + [TOKEN_EGID_EQ] = TOKEN_OP_EQUAL, + [TOKEN_EGID_LT] = TOKEN_OP_LESS_THAN, + [TOKEN_EGID_GT] = TOKEN_OP_GREATER_THAN, + [TOKEN_EGID_NOT] = TOKEN_OP_NOT, + [TOKEN_TAG] = TOKEN_OP_EQUAL, + [TOKEN_IPV4] = TOKEN_OP_EQUAL, + [TOKEN_IPV6] = TOKEN_OP_EQUAL, + [TOKEN_DEP] = TOKEN_OP_DEP, + [TOKEN_DEP_ADD] = TOKEN_OP_DEP_ADD, + [TOKEN_DEP_DEL] = TOKEN_OP_DEP_DEL, + [TOKEN_ORDER] = TOKEN_OP_ORDER, + [TOKEN_CLASS] = TOKEN_OP_CLASS, + [TOKEN_STATE] = TOKEN_OP_STATE +}; + +static match_table_t tokens = { + {TOKEN_PATH, "path=%s"}, + {TOKEN_CMD, "cmd=%s"}, + {TOKEN_ARGS, "args=%s"}, + {TOKEN_RUID_EQ, "uid=%d"}, + {TOKEN_RUID_LT, "uid<%d"}, + {TOKEN_RUID_GT, "uid>%d"}, + {TOKEN_RUID_NOT, "uid!%d"}, + {TOKEN_RGID_EQ, "gid=%d"}, + {TOKEN_RGID_LT, "gid<%d"}, + {TOKEN_RGID_GT, "gid>%d"}, + {TOKEN_RGID_NOT, "gid!d"}, + {TOKEN_EUID_EQ, "euid=%d"}, + {TOKEN_EUID_LT, "euid<%d"}, + {TOKEN_EUID_GT, "euid>%d"}, + {TOKEN_EUID_NOT, "euid!%d"}, + {TOKEN_EGID_EQ, "egid=%d"}, + {TOKEN_EGID_LT, "egid<%d"}, + {TOKEN_EGID_GT, "egid>%d"}, + {TOKEN_EGID_NOT, "egid!%d"}, + {TOKEN_TAG, "tag=%s"}, + {TOKEN_IPV4, "ipv4=%s"}, + {TOKEN_IPV6, "ipv6=%s"}, + {TOKEN_DEP, "depend=%s"}, + {TOKEN_DEP_ADD, "+depend=%s"}, + {TOKEN_DEP_DEL, "-depend=%s"}, + {TOKEN_ORDER, "order=%d"}, + {TOKEN_CLASS, "class=%s"}, + {TOKEN_STATE, "state=%d"}, + {TOKEN_INVALID, NULL} +}; + +/* + * return -EINVAL in case of failures + * returns number of terms in terms on success. + * never returns 0. + */ + +static int +rules_parse(char *rule_defn, struct rbce_rule_term **rterms, int *term_mask) +{ + char *p, *rp = rule_defn; + int option, i = 0, nterms; + struct rbce_rule_term *terms; + + *rterms = NULL; + *term_mask = 0; + if (!rule_defn) + return -EINVAL; + + nterms = 0; + while (*rp++) { + if (*rp == '>' || *rp == '<' || *rp == '=') { + nterms++; + } + } + + if (!nterms) { + return -EINVAL; + } + + terms = kmalloc(nterms * sizeof(struct rbce_rule_term), GFP_KERNEL); + if (!terms) { + return -ENOMEM; + } + + while ((p = strsep(&rule_defn, ",")) != NULL) { + + substring_t args[MAX_OPT_ARGS]; + int token; + + while (*p && isspace(*p)) + p++; + if (!*p) + continue; + + token = match_token(p, tokens, args); + + terms[i].op = token_to_ruleop[token]; + terms[i].operator = token_to_operator[token]; + switch (token) { + + case TOKEN_PATH: + case TOKEN_CMD: + case TOKEN_ARGS: + case TOKEN_TAG: + case TOKEN_IPV4: + case TOKEN_IPV6: + // all these tokens can be specified only once + if (*term_mask & (1 << terms[i].op)) { + nterms = -EINVAL; + goto out; + } + /*FALLTHRU*/ case TOKEN_CLASS: + case TOKEN_DEP: + case TOKEN_DEP_ADD: + case TOKEN_DEP_DEL: + terms[i].u.string = args->from; + break; + + case TOKEN_RUID_EQ: + case TOKEN_RUID_LT: + case TOKEN_RUID_GT: + case TOKEN_RUID_NOT: + case TOKEN_RGID_EQ: + case TOKEN_RGID_LT: + case TOKEN_RGID_GT: + case TOKEN_RGID_NOT: + case TOKEN_EUID_EQ: + case TOKEN_EUID_LT: + case TOKEN_EUID_GT: + case TOKEN_EUID_NOT: + case TOKEN_EGID_EQ: + case TOKEN_EGID_LT: + case TOKEN_EGID_GT: + case TOKEN_EGID_NOT: + // all these tokens can be specified only once + if (*term_mask & (1 << terms[i].op)) { + nterms = -EINVAL; + goto out; + } + /*FALLTHRU*/ case TOKEN_ORDER: + case TOKEN_STATE: + if (match_int(args, &option)) { + nterms = -EINVAL; + goto out; + } + terms[i].u.id = option; + break; + default: + nterms = -EINVAL; + goto out; + } + *term_mask |= (1 << terms[i].op); + i++; + } + *rterms = terms; + + out: + if (nterms < 0) { + kfree(terms); + *term_mask = 0; + } /* else { + for (i = 0; i < nterms; i++) { + printk("token: i %d; op %d, operator %d, str %ld\n", + i, terms[i].op, terms[i].operator, terms[i].u.id); + } + } */ + return nterms; +} -- 2.47.0