Commit 7fd10678 authored by Simon Horman's avatar Simon Horman
Browse files

Merge branch 'master' of...

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6 into lvs-next-2.6
parents a4356b29 e3c2ced8
......@@ -242,4 +242,164 @@ struct ip_vs_daemon_user {
int syncid;
};
/*
*
* IPVS Generic Netlink interface definitions
*
*/
/* Generic Netlink family info */
#define IPVS_GENL_NAME "IPVS"
#define IPVS_GENL_VERSION 0x1
struct ip_vs_flags {
__be32 flags;
__be32 mask;
};
/* Generic Netlink command attributes */
enum {
IPVS_CMD_UNSPEC = 0,
IPVS_CMD_NEW_SERVICE, /* add service */
IPVS_CMD_SET_SERVICE, /* modify service */
IPVS_CMD_DEL_SERVICE, /* delete service */
IPVS_CMD_GET_SERVICE, /* get service info */
IPVS_CMD_NEW_DEST, /* add destination */
IPVS_CMD_SET_DEST, /* modify destination */
IPVS_CMD_DEL_DEST, /* delete destination */
IPVS_CMD_GET_DEST, /* get destination info */
IPVS_CMD_NEW_DAEMON, /* start sync daemon */
IPVS_CMD_DEL_DAEMON, /* stop sync daemon */
IPVS_CMD_GET_DAEMON, /* get sync daemon status */
IPVS_CMD_SET_CONFIG, /* set config settings */
IPVS_CMD_GET_CONFIG, /* get config settings */
IPVS_CMD_SET_INFO, /* only used in GET_INFO reply */
IPVS_CMD_GET_INFO, /* get general IPVS info */
IPVS_CMD_ZERO, /* zero all counters and stats */
IPVS_CMD_FLUSH, /* flush services and dests */
__IPVS_CMD_MAX,
};
#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1)
/* Attributes used in the first level of commands */
enum {
IPVS_CMD_ATTR_UNSPEC = 0,
IPVS_CMD_ATTR_SERVICE, /* nested service attribute */
IPVS_CMD_ATTR_DEST, /* nested destination attribute */
IPVS_CMD_ATTR_DAEMON, /* nested sync daemon attribute */
IPVS_CMD_ATTR_TIMEOUT_TCP, /* TCP connection timeout */
IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, /* TCP FIN wait timeout */
IPVS_CMD_ATTR_TIMEOUT_UDP, /* UDP timeout */
__IPVS_CMD_ATTR_MAX,
};
#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
/*
* Attributes used to describe a service
*
* Used inside nested attribute IPVS_CMD_ATTR_SERVICE
*/
enum {
IPVS_SVC_ATTR_UNSPEC = 0,
IPVS_SVC_ATTR_AF, /* address family */
IPVS_SVC_ATTR_PROTOCOL, /* virtual service protocol */
IPVS_SVC_ATTR_ADDR, /* virtual service address */
IPVS_SVC_ATTR_PORT, /* virtual service port */
IPVS_SVC_ATTR_FWMARK, /* firewall mark of service */
IPVS_SVC_ATTR_SCHED_NAME, /* name of scheduler */
IPVS_SVC_ATTR_FLAGS, /* virtual service flags */
IPVS_SVC_ATTR_TIMEOUT, /* persistent timeout */
IPVS_SVC_ATTR_NETMASK, /* persistent netmask */
IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */
__IPVS_SVC_ATTR_MAX,
};
#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
/*
* Attributes used to describe a destination (real server)
*
* Used inside nested attribute IPVS_CMD_ATTR_DEST
*/
enum {
IPVS_DEST_ATTR_UNSPEC = 0,
IPVS_DEST_ATTR_ADDR, /* real server address */
IPVS_DEST_ATTR_PORT, /* real server port */
IPVS_DEST_ATTR_FWD_METHOD, /* forwarding method */
IPVS_DEST_ATTR_WEIGHT, /* destination weight */
IPVS_DEST_ATTR_U_THRESH, /* upper threshold */
IPVS_DEST_ATTR_L_THRESH, /* lower threshold */
IPVS_DEST_ATTR_ACTIVE_CONNS, /* active connections */
IPVS_DEST_ATTR_INACT_CONNS, /* inactive connections */
IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */
IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */
__IPVS_DEST_ATTR_MAX,
};
#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1)
/*
* Attributes describing a sync daemon
*
* Used inside nested attribute IPVS_CMD_ATTR_DAEMON
*/
enum {
IPVS_DAEMON_ATTR_UNSPEC = 0,
IPVS_DAEMON_ATTR_STATE, /* sync daemon state (master/backup) */
IPVS_DAEMON_ATTR_MCAST_IFN, /* multicast interface name */
IPVS_DAEMON_ATTR_SYNC_ID, /* SyncID we belong to */
__IPVS_DAEMON_ATTR_MAX,
};
#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1)
/*
* Attributes used to describe service or destination entry statistics
*
* Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
*/
enum {
IPVS_STATS_ATTR_UNSPEC = 0,
IPVS_STATS_ATTR_CONNS, /* connections scheduled */
IPVS_STATS_ATTR_INPKTS, /* incoming packets */
IPVS_STATS_ATTR_OUTPKTS, /* outgoing packets */
IPVS_STATS_ATTR_INBYTES, /* incoming bytes */
IPVS_STATS_ATTR_OUTBYTES, /* outgoing bytes */
IPVS_STATS_ATTR_CPS, /* current connection rate */
IPVS_STATS_ATTR_INPPS, /* current in packet rate */
IPVS_STATS_ATTR_OUTPPS, /* current out packet rate */
IPVS_STATS_ATTR_INBPS, /* current in byte rate */
IPVS_STATS_ATTR_OUTBPS, /* current out byte rate */
__IPVS_STATS_ATTR_MAX,
};
#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1)
/* Attributes used in response to IPVS_CMD_GET_INFO command */
enum {
IPVS_INFO_ATTR_UNSPEC = 0,
IPVS_INFO_ATTR_VERSION, /* IPVS version number */
IPVS_INFO_ATTR_CONN_TAB_SIZE, /* size of connection hash table */
__IPVS_INFO_ATTR_MAX,
};
#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1)
#endif /* _IP_VS_H */
......@@ -683,6 +683,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
/*
* IPVS rate estimator prototypes (from ip_vs_est.c)
*/
extern int ip_vs_estimator_init(void);
extern void ip_vs_estimator_cleanup(void);
extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
......
......@@ -71,14 +71,20 @@ config IP_VS_PROTO_UDP
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_AH_ESP
bool
depends on UNDEFINED
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
select IP_VS_PROTO_AH_ESP
---help---
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
......
......@@ -6,8 +6,7 @@
ip_vs_proto-objs-y :=
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
......
......@@ -1070,10 +1070,12 @@ static int __init ip_vs_init(void)
{
int ret;
ip_vs_estimator_init();
ret = ip_vs_control_init();
if (ret < 0) {
IP_VS_ERR("can't setup control.\n");
goto cleanup_nothing;
goto cleanup_estimator;
}
ip_vs_protocol_init();
......@@ -1106,7 +1108,8 @@ static int __init ip_vs_init(void)
cleanup_protocol:
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
cleanup_nothing:
cleanup_estimator:
ip_vs_estimator_cleanup();
return ret;
}
......@@ -1117,6 +1120,7 @@ static void __exit ip_vs_cleanup(void)
ip_vs_app_cleanup();
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
ip_vs_estimator_cleanup();
IP_VS_INFO("ipvs unloaded.\n");
}
......
This diff is collapsed.
......@@ -124,8 +124,6 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
est->outbps = stats->outbps<<5;
spin_lock_bh(&est_lock);
if (list_empty(&est_list))
mod_timer(&est_timer, jiffies + 2 * HZ);
list_add(&est->list, &est_list);
spin_unlock_bh(&est_lock);
}
......@@ -136,11 +134,6 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats)
spin_lock_bh(&est_lock);
list_del(&est->list);
while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
spin_unlock_bh(&est_lock);
cpu_relax();
spin_lock_bh(&est_lock);
}
spin_unlock_bh(&est_lock);
}
......@@ -160,3 +153,14 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
est->inbps = 0;
est->outbps = 0;
}
int __init ip_vs_estimator_init(void)
{
mod_timer(&est_timer, jiffies + 2 * HZ);
return 0;
}
void ip_vs_estimator_cleanup(void)
{
del_timer_sync(&est_timer);
}
......@@ -96,7 +96,6 @@ struct ip_vs_lblc_entry {
* IPVS lblc hash table
*/
struct ip_vs_lblc_table {
rwlock_t lock; /* lock for this table */
struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
atomic_t entries; /* number of entries */
int max_size; /* maximum size of entries */
......@@ -123,31 +122,6 @@ static ctl_table vs_vars_table[] = {
static struct ctl_table_header * sysctl_header;
/*
* new/free a ip_vs_lblc_entry, which is a mapping of a destionation
* IP address to a server.
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
if (en == NULL) {
IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
return NULL;
}
INIT_LIST_HEAD(&en->list);
en->addr = daddr;
atomic_inc(&dest->refcnt);
en->dest = dest;
return en;
}
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
......@@ -173,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr)
* Hash an entry in the ip_vs_lblc_table.
* returns bool success.
*/
static int
static void
ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
{
unsigned hash;
if (!list_empty(&en->list)) {
IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
"called from %p\n", __builtin_return_address(0));
return 0;
}
unsigned hash = ip_vs_lblc_hashkey(en->addr);
/*
* Hash by destination IP address
*/
hash = ip_vs_lblc_hashkey(en->addr);
write_lock(&tbl->lock);
list_add(&en->list, &tbl->bucket[hash]);
atomic_inc(&tbl->entries);
write_unlock(&tbl->lock);
return 1;
}
/*
* Get ip_vs_lblc_entry associated with supplied parameters.
* Get ip_vs_lblc_entry associated with supplied parameters. Called under read
* lock
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
{
unsigned hash;
unsigned hash = ip_vs_lblc_hashkey(addr);
struct ip_vs_lblc_entry *en;
hash = ip_vs_lblc_hashkey(addr);
list_for_each_entry(en, &tbl->bucket[hash], list)
if (en->addr == addr)
return en;
read_lock(&tbl->lock);
return NULL;
}
list_for_each_entry(en, &tbl->bucket[hash], list) {
if (en->addr == addr) {
/* HIT */
read_unlock(&tbl->lock);
return en;
/*
* Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
* address to a server. Called under write lock.
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr,
struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
en = ip_vs_lblc_get(tbl, daddr);
if (!en) {
en = kmalloc(sizeof(*en), GFP_ATOMIC);
if (!en) {
IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
return NULL;
}
}
read_unlock(&tbl->lock);
en->addr = daddr;
en->lastuse = jiffies;
return NULL;
atomic_inc(&dest->refcnt);
en->dest = dest;
ip_vs_lblc_hash(tbl, en);
} else if (en->dest != dest) {
atomic_dec(&en->dest->refcnt);
atomic_inc(&dest->refcnt);
en->dest = dest;
}
return en;
}
......@@ -230,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr)
*/
static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
{
int i;
struct ip_vs_lblc_entry *en, *nxt;
int i;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
write_lock(&tbl->lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
write_unlock(&tbl->lock);
}
}
static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
struct ip_vs_lblc_table *tbl = svc->sched_data;
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblc_entry *en, *nxt;
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
write_lock(&tbl->lock);
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse + sysctl_ip_vs_lblc_expiration))
......@@ -262,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
ip_vs_lblc_free(en);
atomic_dec(&tbl->entries);
}
write_unlock(&tbl->lock);
write_unlock(&svc->sched_lock);
}
tbl->rover = j;
}
......@@ -281,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl)
*/
static void ip_vs_lblc_check_expire(unsigned long data)
{
struct ip_vs_lblc_table *tbl;
struct ip_vs_service *svc = (struct ip_vs_service *) data;
struct ip_vs_lblc_table *tbl = svc->sched_data;
unsigned long now = jiffies;
int goal;
int i, j;
struct ip_vs_lblc_entry *en, *nxt;
tbl = (struct ip_vs_lblc_table *)data;
if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
/* do full expiration check */
ip_vs_lblc_full_check(tbl);
ip_vs_lblc_full_check(svc);
tbl->counter = 1;
goto out;
}
......@@ -308,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
write_lock(&tbl->lock);
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
......@@ -317,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
atomic_dec(&tbl->entries);
goal--;
}
write_unlock(&tbl->lock);
write_unlock(&svc->sched_lock);
if (goal <= 0)
break;
}
......@@ -336,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
/*
* Allocate the ip_vs_lblc_table for this service
*/
tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC);
tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
if (tbl == NULL) {
IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
return -ENOMEM;
}
svc->sched_data = tbl;
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for "
"current service\n",
sizeof(struct ip_vs_lblc_table));
"current service\n", sizeof(*tbl));
/*
* Initialize the hash buckets
......@@ -352,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
INIT_LIST_HEAD(&tbl->bucket[i]);
}
rwlock_init(&tbl->lock);
tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
tbl->rover = 0;
tbl->counter = 1;
......@@ -361,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
* Hook periodic timer for garbage collection
*/
setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
(unsigned long)tbl);
tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
add_timer(&tbl->periodic_timer);
(unsigned long)svc);
mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
return 0;
}
......@@ -380,22 +360,16 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
ip_vs_lblc_flush(tbl);
/* release the table itself */
kfree(svc->sched_data);
kfree(tbl);
IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
sizeof(struct ip_vs_lblc_table));
sizeof(*tbl));
return 0;
}
static int ip_vs_lblc_update_svc(struct ip_vs_service *svc)
{
return 0;
}
static inline struct ip_vs_dest *
__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph)
{
struct ip_vs_dest *dest, *least;
int loh, doh;
......@@ -484,46 +458,54 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
static struct ip_vs_dest *
ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
struct ip_vs_dest *dest;
struct ip_vs_lblc_table *tbl;
struct ip_vs_lblc_entry *en;
struct ip_vs_lblc_table *tbl = svc->sched_data;
struct iphdr *iph = ip_hdr(skb);
struct ip_vs_dest *dest = NULL;
struct ip_vs_lblc_entry *en;
IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
tbl = (struct ip_vs_lblc_table *)svc->sched_data;
/* First look in our cache */
read_lock(&svc->sched_lock);
en = ip_vs_lblc_get(tbl, iph->daddr);
if (en == NULL) {
dest = __ip_vs_wlc_schedule(svc, iph);
if (dest == NULL) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
en = ip_vs_lblc_new(iph->daddr, dest);
if (en == NULL) {
return NULL;
}
ip_vs_lblc_hash(tbl, en);
} else {
dest = en->dest;
if (!(dest->flags & IP_VS_DEST_F_AVAILABLE)
|| atomic_read(&dest->weight) <= 0
|| is_overloaded(dest, svc)) {
dest = __ip_vs_wlc_schedule(svc, iph);
if (dest == NULL) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
atomic_dec(&en->dest->refcnt);
atomic_inc(&dest->refcnt);
en->dest = dest;
}
if (en) {
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
/*
* If the destination is not available, i.e. it's in the trash,
* we must ignore it, as it may be removed from under our feet,
* if someone drops our reference count. Our caller only makes
* sure that destinations, that are not in the trash, are not
* moved to the trash, while we are scheduling. But anyone can
* free up entries from the trash at any time.
*/
if (en->dest->flags & IP_VS_DEST_F_AVAILABLE)
dest = en->dest;
}
read_unlock(&svc->sched_lock);
/* If the destination has a weight and is not overloaded, use it */
if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
goto out;
/* No cache entry or it is invalid, time to schedule */
dest = __ip_vs_lblc_schedule(svc, iph);
if (!dest) {
IP_VS_DBG(1, "no destination available\n");
return NULL;
}
en->lastuse = jiffies;