#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
+#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
-#include <linux/timer.h>
+#include <linux/workqueue.h>
#include <linux/swap.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/mutex.h>
#include <net/ip.h>
+#include <net/route.h>
#include <net/sock.h>
#include <asm/uaccess.h>
#include <net/ip_vs.h>
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
-static DECLARE_MUTEX(__ip_vs_mutex);
+static DEFINE_MUTEX(__ip_vs_mutex);
/* lock for service table */
-static rwlock_t __ip_vs_svc_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(__ip_vs_svc_lock);
/* lock for table with the real services */
-static rwlock_t __ip_vs_rs_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(__ip_vs_rs_lock);
/* lock for state and timeout tables */
-static rwlock_t __ip_vs_securetcp_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
/* lock for drop entry handling */
-static spinlock_t __ip_vs_dropentry_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
/* lock for drop packet handling */
-static spinlock_t __ip_vs_droppacket_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
/* 1/rate drop and drop-entry variables */
int ip_vs_drop_rate = 0;
int ip_vs_drop_counter = 0;
-atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
+static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
/* number of virtual services */
static int ip_vs_num_services = 0;
static int sysctl_ip_vs_am_droprate = 10;
int sysctl_ip_vs_cache_bypass = 0;
int sysctl_ip_vs_expire_nodest_conn = 0;
+int sysctl_ip_vs_expire_quiescent_template = 0;
int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
int sysctl_ip_vs_nat_icmp_send = 0;
#endif
/*
- * update_defense_level is called from timer bh and from sysctl.
+ * update_defense_level is called from keventd and from sysctl,
+ * so it needs to protect itself from softirqs
*/
static void update_defense_level(void)
{
nomem = (availmem < sysctl_ip_vs_amemthresh);
+ local_bh_disable();
+
/* drop_entry */
spin_lock(&__ip_vs_dropentry_lock);
switch (sysctl_ip_vs_drop_entry) {
if (to_change >= 0)
ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
write_unlock(&__ip_vs_securetcp_lock);
+
+ local_bh_enable();
}
/*
* Timer for checking the defense
*/
-static struct timer_list defense_timer;
#define DEFENSE_TIMER_PERIOD 1*HZ
+static void defense_work_handler(void *data);
+static DECLARE_WORK(defense_work, defense_work_handler, NULL);
-static void defense_timer_handler(unsigned long data)
+static void defense_work_handler(void *data)
{
update_defense_level();
if (atomic_read(&ip_vs_dropentry))
ip_vs_random_dropentry();
- mod_timer(&defense_timer, jiffies + DEFENSE_TIMER_PERIOD);
+ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
}
-
int
ip_vs_use_count_inc(void)
{
out:
read_unlock(&__ip_vs_svc_lock);
- IP_VS_DBG(6, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
+ IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
fwmark, ip_vs_proto_name(protocol),
NIPQUAD(vaddr), ntohs(vport),
svc?"hit":"not hit");
*/
list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
- "refcnt=%d\n",
+ "dest->refcnt=%d\n",
dest->vfwmark,
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
atomic_set(&dest->refcnt, 0);
INIT_LIST_HEAD(&dest->d_list);
- dest->dst_lock = SPIN_LOCK_UNLOCKED;
- dest->stats.lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&dest->dst_lock);
+ spin_lock_init(&dest->stats.lock);
__ip_vs_update_dest(svc, dest, udest);
ip_vs_new_estimator(&dest->stats);
dest = ip_vs_trash_get_dest(svc, daddr, dport);
if (dest != NULL) {
IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
- "refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
+ "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
NIPQUAD(daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
atomic_dec(&dest->svc->refcnt);
kfree(dest);
} else {
- IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, refcnt=%d\n",
+ IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
+ "dest->refcnt=%d\n",
NIPQUAD(dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
list_add(&dest->n_list, &ip_vs_dest_trash);
svc->netmask = u->netmask;
INIT_LIST_HEAD(&svc->destinations);
- svc->sched_lock = RW_LOCK_UNLOCKED;
- svc->stats.lock = SPIN_LOCK_UNLOCKED;
+ rwlock_init(&svc->sched_lock);
+ spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
ret = ip_vs_bind_scheduler(svc, sched);
static int
proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
- void __user *buffer, size_t *lenp)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
int val = *valp;
int rc;
- rc = proc_dointvec(table, write, filp, buffer, lenp);
+ rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
if (write && (*valp != val)) {
if ((*valp < 0) || (*valp > 3)) {
/* Restore the correct value */
*valp = val;
} else {
- local_bh_disable();
update_defense_level();
- local_bh_enable();
}
}
return rc;
static int
proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
- void __user *buffer, size_t *lenp)
+ void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
int val[2];
/* backup the value first */
memcpy(val, valp, sizeof(val));
- rc = proc_dointvec(table, write, filp, buffer, lenp);
+ rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
/* Restore the correct value */
memcpy(valp, val, sizeof(val));
{
.ctl_name = NET_IPV4_VS_TO_ES,
.procname = "timeout_established",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
+ .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_synsent",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_synrecv",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_finwait",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_close",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_closewait",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_lastack",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_listen",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_synack",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.procname = "timeout_udp",
.data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
},
{
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
+ .procname = "expire_quiescent_template",
+ .data = &sysctl_ip_vs_expire_quiescent_template,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{
.ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
.procname = "sync_threshold",
{ .ctl_name = 0 }
};
-static ctl_table ipv4_table[] = {
+static ctl_table ipvs_ipv4_table[] = {
{
.ctl_name = NET_IPV4,
.procname = "ipv4",
.ctl_name = CTL_NET,
.procname = "net",
.mode = 0555,
- .child = ipv4_table,
+ .child = ipvs_ipv4_table,
},
{ .ctl_name = 0 }
};
#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
#define MAX_ARG_LEN SVCDEST_ARG_LEN
-static unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
+static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
[SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
[SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
[SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
/* increase the module use count */
ip_vs_use_count_inc();
- if (down_interruptible(&__ip_vs_mutex)) {
+ if (mutex_lock_interruptible(&__ip_vs_mutex)) {
ret = -ERESTARTSYS;
goto out_dec;
}
ip_vs_service_put(svc);
out_unlock:
- up(&__ip_vs_mutex);
+ mutex_unlock(&__ip_vs_mutex);
out_dec:
/* decrease the module use count */
ip_vs_use_count_dec();
dst->addr = src->addr;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strcpy(dst->sched_name, src->scheduler->name);
+ strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
if (count >= get->num_services)
goto out;
+ memset(&entry, 0, sizeof(entry));
ip_vs_copy_service(&entry, svc);
if (copy_to_user(&uptr->entrytable[count],
&entry, sizeof(entry))) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
if (count >= get->num_services)
goto out;
+ memset(&entry, 0, sizeof(entry));
ip_vs_copy_service(&entry, svc);
if (copy_to_user(&uptr->entrytable[count],
&entry, sizeof(entry))) {
#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
-static unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
+static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
[GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
[GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
[GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
return -EFAULT;
- if (down_interruptible(&__ip_vs_mutex))
+ if (mutex_lock_interruptible(&__ip_vs_mutex))
return -ERESTARTSYS;
switch (cmd) {
memset(&d, 0, sizeof(d));
if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn);
+ strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
d[0].syncid = ip_vs_master_syncid;
}
if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn);
+ strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
d[1].syncid = ip_vs_backup_syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
}
out:
- up(&__ip_vs_mutex);
+ mutex_unlock(&__ip_vs_mutex);
return ret;
}
}
memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
- ip_vs_stats.lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&ip_vs_stats.lock);
ip_vs_new_estimator(&ip_vs_stats);
/* Hook the defense timer */
- init_timer(&defense_timer);
- defense_timer.function = defense_timer_handler;
- defense_timer.expires = jiffies + DEFENSE_TIMER_PERIOD;
- add_timer(&defense_timer);
+ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
LeaveFunction(2);
return 0;
{
EnterFunction(2);
ip_vs_trash_cleanup();
- del_timer_sync(&defense_timer);
+ cancel_rearming_delayed_work(&defense_work);
ip_vs_kill_estimator(&ip_vs_stats);
unregister_sysctl_table(sysctl_header);
proc_net_remove("ip_vs_stats");