mirror of https://github.com/apache/cloudstack.git
1560 lines
41 KiB
C
1560 lines
41 KiB
C
/*
|
|
* Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by the
|
|
* Free Software Foundation; either version 2 of the License, or (at your
|
|
* option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free software Foundation, Inc.,
|
|
* 59 Temple Place, suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
*/
|
|
|
|
#ifdef __KERNEL__
|
|
#include <linux/config.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/init.h>
|
|
#include <linux/string.h>
|
|
#include <linux/version.h>
|
|
|
|
#include <linux/net.h>
|
|
#include <linux/in.h>
|
|
#include <linux/inet.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/inetdevice.h>
|
|
#include <linux/udp.h>
|
|
|
|
#include <net/ip.h>
|
|
#include <net/protocol.h>
|
|
#include <net/route.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/spinlock.h>
|
|
#include <asm/semaphore.h>
|
|
|
|
#else
|
|
|
|
#include "sys_kernel.h"
|
|
#include <netinet/in.h>
|
|
#include <arpa/inet.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/udp.h>
|
|
#include "spinlock.h"
|
|
#include "skbuff.h"
|
|
|
|
#endif
|
|
|
|
#include <tunnel.h>
|
|
#include <vnet.h>
|
|
#include <vif.h>
|
|
#include <if_varp.h>
|
|
#include <varp.h>
|
|
#include <varp_util.h>
|
|
#include <vnet.h>
|
|
#include <etherip.h>
|
|
#include <vnet_forward.h>
|
|
|
|
#include "allocate.h"
|
|
#include "iostream.h"
|
|
#include "hash_table.h"
|
|
#include "sys_net.h"
|
|
#include "sys_string.h"
|
|
#include "skb_util.h"
|
|
#include "timer_util.h"
|
|
|
|
#define MODULE_NAME "VARP"
|
|
#define DEBUG 1
|
|
#undef DEBUG
|
|
#include "debug.h"
|
|
|
|
/** @file VARP: Virtual ARP.
|
|
*
|
|
* Handles virtual ARP requests for vnet/vmac.
|
|
*/
|
|
|
|
/*
|
|
|
|
Varp uses UDP on port 1798.
|
|
|
|
on domain up: ?
|
|
send varp.announce { id, vmac, vnet, coa } for each vif
|
|
that haven't announced before, or has changed.
|
|
install vif entries in local table.
|
|
|
|
on varp.announce{ id, vmac, vnet, coa }:
|
|
update VARP entry for vmac x vnet if have one, reset ttl.
|
|
|
|
on varp.request { id, vmac, vnet }:
|
|
if have a vif for the requested vmac/vnet,
|
|
reply with varp.announce{ id, vmac, vnet, coa }
|
|
|
|
on timer:
|
|
traverse VARP table, flush old entries.
|
|
|
|
on probe timer:
|
|
probe again if not out of tries.
|
|
if out of tries invalidate entry.
|
|
|
|
*/
|
|
|
|
/** Time-to-live of varp entries (in jiffies).*/
|
|
#define VARP_ENTRY_TTL (60*HZ)
|
|
|
|
/** Maximum number of varp probes to make. */
|
|
#define VARP_PROBE_MAX 5
|
|
|
|
/** Interval between varp probes (in jiffies). */
|
|
#define VARP_PROBE_INTERVAL (3*HZ)
|
|
|
|
/** Maximum number of queued skbs for a varp entry. */
|
|
#define VARP_QUEUE_MAX 16
|
|
|
|
/** Number of buckets in the varp table (must be prime). */
|
|
#define VARP_TABLE_BUCKETS 3001
|
|
|
|
/** Varp entry states. */
|
|
enum {
|
|
VARP_STATE_INCOMPLETE = 1,
|
|
VARP_STATE_REACHABLE = 2,
|
|
VARP_STATE_FAILED = 3,
|
|
};
|
|
|
|
/** Varp entry flags. */
|
|
enum {
|
|
VARP_FLAG_PROBING = 1,
|
|
VARP_FLAG_PERMANENT = 2,
|
|
};
|
|
|
|
/** Key for varp entries. */
|
|
typedef struct VarpKey {
|
|
/** Vnet id (network order). */
|
|
VnetId vnet;
|
|
/** Virtual MAC address. */
|
|
Vmac vmac;
|
|
} VarpKey;
|
|
|
|
/** An entry in the varp cache. */
|
|
typedef struct VarpEntry {
|
|
/** Key for the entry. */
|
|
VarpKey key;
|
|
/** Care-of address for the key. */
|
|
VarpAddr addr;
|
|
/** Last-updated timestamp. */
|
|
unsigned long timestamp;
|
|
/** State. */
|
|
short state;
|
|
/** Flags. */
|
|
short flags;
|
|
/** Reference count. */
|
|
atomic_t refcount;
|
|
/** Lock. */
|
|
rwlock_t lock;
|
|
unsigned long lflags;
|
|
|
|
/** How many probes have been made. */
|
|
atomic_t probes;
|
|
/** Probe timer. */
|
|
struct timer_list timer;
|
|
void (*error)(struct VarpEntry *ventry, struct sk_buff *skb);
|
|
/** Outbound skb queue. */
|
|
struct sk_buff_head queue;
|
|
/** Maximum size of the queue. */
|
|
int queue_max;
|
|
atomic_t deleted;
|
|
} VarpEntry;
|
|
|
|
/** The varp cache. Varp entries indexed by VarpKey. */
|
|
typedef struct VarpTable {
|
|
|
|
HashTable *table;
|
|
|
|
/** Sweep timer. */
|
|
struct timer_list timer;
|
|
|
|
rwlock_t lock;
|
|
struct semaphore mutex;
|
|
|
|
int entry_ttl;
|
|
int probe_max;
|
|
int probe_interval;
|
|
int queue_max;
|
|
|
|
} VarpTable;
|
|
|
|
/** The varp cache. */
|
|
static VarpTable *varp_table = NULL;
|
|
|
|
/** Module parameter for the multicast address. */
|
|
static char *varp_mcaddr = NULL;
|
|
|
|
/** Multicast address (network order). */
|
|
u32 varp_mcast_addr = 0;
|
|
|
|
/** UDP port (network order). */
|
|
u16 varp_port = 0;
|
|
|
|
char *varp_device = "xen-br0";
|
|
|
|
#define VarpTable_read_lock(vtable, flags) \
|
|
do{ read_lock_irqsave(&(vtable)->lock, (flags)); } while(0)
|
|
|
|
#define VarpTable_read_unlock(vtable, flags) \
|
|
do{ read_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)
|
|
|
|
#define VarpTable_write_lock(vtable, flags) \
|
|
do{ write_lock_irqsave(&(vtable)->lock, (flags)); } while(0)
|
|
|
|
#define VarpTable_write_unlock(vtable, flags) \
|
|
do{ write_unlock_irqrestore(&(vtable)->lock, (flags)); } while(0)
|
|
|
|
#define VarpEntry_lock(ventry, flags) \
|
|
do{ write_lock_irqsave(&(ventry)->lock, (flags)); (ventry)->lflags = (flags); } while(0)
|
|
|
|
#define VarpEntry_unlock(ventry, flags) \
|
|
do{ (flags) = (ventry)->lflags; write_unlock_irqrestore(&(ventry)->lock, (flags)); } while(0)
|
|
|
|
void VarpTable_sweep(VarpTable *vtable);
|
|
void VarpTable_flush(VarpTable *vtable);
|
|
void VarpTable_print(VarpTable *vtable, IOStream *io);
|
|
int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb);
|
|
|
|
#include "./varp_util.c"
|
|
|
|
/** Print the varp cache (if debug on).
|
|
*/
|
|
void varp_dprint(void){
|
|
#ifdef DEBUG
|
|
VarpTable_print(varp_table, iostdout);
|
|
#endif
|
|
}
|
|
|
|
/** Flush the varp cache.
|
|
*/
|
|
void varp_flush(void){
|
|
VarpTable_flush(varp_table);
|
|
}
|
|
|
|
#ifdef __KERNEL__
|
|
static int device_ucast_addr(const char *device, uint32_t *addr)
|
|
{
|
|
int err;
|
|
struct net_device *dev = NULL;
|
|
|
|
err = vnet_get_device(device, &dev);
|
|
if(err) goto exit;
|
|
err = vnet_get_device_address(dev, addr);
|
|
exit:
|
|
if(err){
|
|
*addr = 0;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
/** Get the unicast address of the varp device.
|
|
*/
|
|
int varp_ucast_addr(uint32_t *addr)
|
|
{
|
|
int err = -ENODEV;
|
|
const char *devices[] = { varp_device, "eth0", "eth1", "eth2", NULL };
|
|
const char **p;
|
|
for(p = devices; err && *p; p++){
|
|
err = device_ucast_addr(*p, addr);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
/** Lookup a network device by name.
|
|
*
|
|
* @param name device name
|
|
* @param dev return parameter for the device
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int vnet_get_device(const char *name, struct net_device **dev){
|
|
int err = 0;
|
|
*dev = dev_get_by_name(name);
|
|
if(!*dev){
|
|
err = -ENETDOWN;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
/** Get the source address from a device.
|
|
*
|
|
* @param dev device
|
|
* @param addr return parameter for address
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int vnet_get_device_address(struct net_device *dev, u32 *addr){
|
|
int err = 0;
|
|
struct in_device *in_dev;
|
|
|
|
in_dev = in_dev_get(dev);
|
|
if(!in_dev){
|
|
err = -ENODEV;
|
|
goto exit;
|
|
}
|
|
*addr = in_dev->ifa_list->ifa_address;
|
|
in_dev_put(in_dev);
|
|
exit:
|
|
return err;
|
|
}
|
|
|
|
#else
|
|
|
|
int varp_ucast_addr(uint32_t *addr)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
/** Print varp info and the varp cache.
|
|
*/
|
|
void varp_print(IOStream *io){
|
|
uint32_t addr = 0;
|
|
varp_ucast_addr(&addr);
|
|
|
|
IOStream_print(io, "(varp \n");
|
|
IOStream_print(io, " (device %s)\n", varp_device);
|
|
IOStream_print(io, " (mcast_addr " IPFMT ")\n", NIPQUAD(varp_mcast_addr));
|
|
IOStream_print(io, " (ucast_addr " IPFMT ")\n", NIPQUAD(addr));
|
|
IOStream_print(io, " (port %d)\n", ntohs(varp_port));
|
|
IOStream_print(io, " (encapsulation %s)\n",
|
|
(etherip_in_udp ? "etherip_in_udp" : "etherip"));
|
|
IOStream_print(io, " (entry_ttl %lu)\n", varp_table->entry_ttl);
|
|
IOStream_print(io, ")\n");
|
|
VarpTable_print(varp_table, io);
|
|
}
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
|
|
|
|
static inline int addr_route(u32 daddr, struct rtable **prt){
|
|
int err = 0;
|
|
struct flowi fl = {
|
|
.nl_u = {
|
|
.ip4_u = {
|
|
.daddr = daddr,
|
|
}
|
|
}
|
|
};
|
|
|
|
err = ip_route_output_key(prt, &fl);
|
|
return err;
|
|
}
|
|
|
|
#else
|
|
|
|
static inline int addr_route(u32 daddr, struct rtable **prt){
|
|
int err = 0;
|
|
struct rt_key key = { .dst = daddr };
|
|
err = ip_route_output_key(prt, &key);
|
|
return err;
|
|
}
|
|
|
|
#endif // LINUX_VERSION_CODE
|
|
|
|
#ifndef LL_RESERVED_SPACE
|
|
#define HH_DATA_MOD 16
|
|
#define LL_RESERVED_SPACE(dev) \
|
|
((dev->hard_header_len & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
|
|
|
|
#endif // LL_RESERVED_SPACE
|
|
|
|
#else // __KERNEL__
|
|
|
|
#define ip_eth_mc_map(daddr, dmac) do{ }while(0)
|
|
|
|
#endif // __KERNEL__
|
|
|
|
/** Send a varp protocol message.
|
|
*
|
|
* @param opcode varp opcode (host order)
|
|
* @param dev device (may be null)
|
|
* @param skb skb being replied to (may be null)
|
|
* @param vnet vnet id (in network order)
|
|
* @param vmac vmac (in network order)
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin,
|
|
VnetId *vnet, Vmac *vmac){
|
|
int err = 0;
|
|
int link_n = 0;
|
|
int ip_n = sizeof(struct iphdr);
|
|
int udp_n = sizeof(struct udphdr);
|
|
int varp_n = sizeof(VarpHdr);
|
|
struct sk_buff *skbout = NULL;
|
|
VarpHdr *varph = NULL;
|
|
u8 smacbuf[6] = {}, dmacbuf[6] = {};
|
|
u8 *smac = smacbuf, *dmac = dmacbuf;
|
|
u32 saddr = 0, daddr = 0;
|
|
u16 sport = 0, dport = 0;
|
|
#if defined(DEBUG)
|
|
char vnetbuf[VNET_ID_BUF];
|
|
#endif
|
|
|
|
dprintf("> opcode=%d vnet= %s vmac=" MACFMT "\n",
|
|
opcode, VnetId_ntoa(vnet, vnetbuf), MAC6TUPLE(vmac->mac));
|
|
|
|
dport = varp_port;
|
|
if(skbin){
|
|
daddr = skbin->nh.iph->saddr;
|
|
dmac = eth_hdr(skbin)->h_source;
|
|
sport = skbin->h.uh->dest;
|
|
} else {
|
|
if(MULTICAST(varp_mcast_addr)){
|
|
daddr = varp_mcast_addr;
|
|
ip_eth_mc_map(daddr, dmac);
|
|
} else {
|
|
daddr = INADDR_BROADCAST;
|
|
}
|
|
sport = varp_port;
|
|
}
|
|
|
|
#ifdef __KERNEL__
|
|
{
|
|
struct in_device *in_dev = NULL;
|
|
if(!dev){
|
|
struct rtable *rt = NULL;
|
|
err = addr_route(daddr, &rt);
|
|
if(err) goto exit;
|
|
dev = rt->u.dst.dev;
|
|
}
|
|
|
|
in_dev = in_dev_get(dev);
|
|
if(!in_dev){
|
|
err = -ENODEV;
|
|
goto exit;
|
|
}
|
|
link_n = LL_RESERVED_SPACE(dev);
|
|
saddr = in_dev->ifa_list->ifa_address;
|
|
smac = dev->dev_addr;
|
|
if(daddr == INADDR_BROADCAST){
|
|
daddr = in_dev->ifa_list->ifa_broadcast;
|
|
dmac = dev->broadcast;
|
|
}
|
|
in_dev_put(in_dev);
|
|
}
|
|
#else
|
|
{
|
|
extern uint32_t vnetd_addr(void);
|
|
saddr = vnetd_addr();
|
|
}
|
|
#endif // __KERNEL__
|
|
|
|
dprintf("> dev=%s\n", (dev ? dev->name : "<none>"));
|
|
dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac), MAC6TUPLE(dmac));
|
|
dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr), NIPQUAD(daddr));
|
|
dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport));
|
|
|
|
skbout = alloc_skb(link_n + ip_n + udp_n + varp_n, GFP_ATOMIC);
|
|
if (!skbout){
|
|
err = -ENOMEM;
|
|
goto exit;
|
|
}
|
|
skbout->dev = dev;
|
|
skb_reserve(skbout, link_n);
|
|
skbout->protocol = htons(ETH_P_IP);
|
|
|
|
#ifdef __KERNEL__
|
|
// Device header. Pushes device header on front of skb.
|
|
if (dev->hard_header){
|
|
err = dev->hard_header(skbout, dev, ETH_P_IP, dmac, smac, skbout->len);
|
|
if(err < 0) goto exit;
|
|
//skbout->mac.raw = skbout->data;
|
|
skb_reset_mac_header(skbout);
|
|
}
|
|
#else
|
|
smac = smac; // Defeat unused variable warning.
|
|
#endif // __KERNEL__
|
|
|
|
// IP header.
|
|
skbout->nh.raw = skb_put(skbout, ip_n);
|
|
skbout->nh.iph->version = 4;
|
|
skbout->nh.iph->ihl = ip_n / 4;
|
|
skbout->nh.iph->tos = 0;
|
|
skbout->nh.iph->tot_len = htons(ip_n + udp_n + varp_n);
|
|
skbout->nh.iph->id = 0;
|
|
skbout->nh.iph->frag_off = 0;
|
|
skbout->nh.iph->ttl = 64;
|
|
skbout->nh.iph->protocol = IPPROTO_UDP;
|
|
skbout->nh.iph->saddr = saddr;
|
|
skbout->nh.iph->daddr = daddr;
|
|
skbout->nh.iph->check = 0;
|
|
|
|
// UDP header.
|
|
skbout->h.raw = skb_put(skbout, udp_n);
|
|
skbout->h.uh->source = sport;
|
|
skbout->h.uh->dest = dport;
|
|
skbout->h.uh->len = htons(udp_n + varp_n);
|
|
skbout->h.uh->check = 0;
|
|
|
|
// Varp header.
|
|
varph = (void*)skb_put(skbout, varp_n);
|
|
*varph = (VarpHdr){};
|
|
varph->hdr.id = htons(VARP_ID);
|
|
varph->hdr.opcode = htons(opcode);
|
|
varph->vnet = *vnet;
|
|
varph->vmac = *vmac;
|
|
varph->addr.family = AF_INET;
|
|
varph->addr.u.ip4.s_addr = saddr;
|
|
|
|
err = skb_xmit(skbout);
|
|
|
|
exit:
|
|
if(err && skbout) kfree_skb(skbout);
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
|
|
/** Send a varp request for the vnet and destination mac of a packet.
|
|
* Assumes the ventry is locked.
|
|
*
|
|
* @param skb packet
|
|
* @param vnet vnet (in network order)
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int varp_solicit(VnetId *vnet, Vmac *vmac){
|
|
return varp_send(VARP_OP_REQUEST, NULL, NULL, vnet, vmac);
|
|
}
|
|
|
|
/* Test some flags.
|
|
*
|
|
* @param ventry varp entry
|
|
* @param flags to test
|
|
* @return nonzero if flags set
|
|
*/
|
|
int VarpEntry_get_flags(VarpEntry *ventry, int flags){
|
|
return ventry->flags & flags;
|
|
}
|
|
|
|
/** Set some flags.
|
|
*
|
|
* @param ventry varp entry
|
|
* @param flags to set
|
|
* @param set set flags on if nonzero, off if zero
|
|
* @return new flags value
|
|
*/
|
|
int VarpEntry_set_flags(VarpEntry *ventry, int flags, int set){
|
|
if(set){
|
|
ventry->flags |= flags;
|
|
} else {
|
|
ventry->flags &= ~flags;
|
|
}
|
|
return ventry->flags;
|
|
}
|
|
|
|
/** Print a varp entry.
|
|
*
|
|
* @param ventry varp entry
|
|
*/
|
|
void VarpEntry_print(VarpEntry *ventry, IOStream *io){
|
|
IOStream_print(io, "(ventry \n");
|
|
if(ventry){
|
|
unsigned long now = jiffies;
|
|
char *state, *flags;
|
|
char vnetbuf[VNET_ID_BUF];
|
|
char addrbuf[VARP_ADDR_BUF];
|
|
|
|
switch(ventry->state){
|
|
case VARP_STATE_INCOMPLETE: state = "incomplete"; break;
|
|
case VARP_STATE_REACHABLE: state = "reachable"; break;
|
|
case VARP_STATE_FAILED: state = "failed"; break;
|
|
default: state = "unknown"; break;
|
|
}
|
|
flags = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : "-");
|
|
|
|
IOStream_print(io, " (ref %d)\n", atomic_read(&ventry->refcount));
|
|
IOStream_print(io, " (state %s)\n", state);
|
|
IOStream_print(io, " (flags %s)\n", flags);
|
|
IOStream_print(io, " (addr %s)\n", VarpAddr_ntoa(&ventry->addr, addrbuf));
|
|
IOStream_print(io, " (queue %d)\n", skb_queue_len(&ventry->queue));
|
|
IOStream_print(io, " (age %lu)\n", now - ventry->timestamp);
|
|
IOStream_print(io, " (vmac " MACFMT ")\n", MAC6TUPLE(ventry->key.vmac.mac));
|
|
IOStream_print(io, " (vnet %s)\n", VnetId_ntoa(&ventry->key.vnet, vnetbuf));
|
|
}
|
|
IOStream_print(io, ")\n");
|
|
}
|
|
|
|
/** Free a varp entry.
|
|
*
|
|
* @param ventry varp entry
|
|
*/
|
|
static void VarpEntry_free(VarpEntry *ventry){
|
|
if(!ventry) return;
|
|
deallocate(ventry);
|
|
}
|
|
|
|
/** Increment reference count.
|
|
*
|
|
* @param ventry varp entry (may be null)
|
|
*/
|
|
void VarpEntry_incref(VarpEntry *ventry){
|
|
if(!ventry) return;
|
|
atomic_inc(&ventry->refcount);
|
|
}
|
|
|
|
/** Decrement reference count, freeing if zero.
|
|
*
|
|
* @param ventry varp entry (may be null)
|
|
*/
|
|
void VarpEntry_decref(VarpEntry *ventry){
|
|
if(!ventry) return;
|
|
if(atomic_dec_and_test(&ventry->refcount)){
|
|
VarpEntry_free(ventry);
|
|
}
|
|
}
|
|
|
|
/** Call the error handler.
|
|
*
|
|
* @param ventry varp entry
|
|
*/
|
|
void VarpEntry_error(VarpEntry *ventry){
|
|
struct sk_buff *skb;
|
|
skb = skb_peek(&ventry->queue);
|
|
if(!skb) return;
|
|
if(ventry->error) ventry->error(ventry, skb);
|
|
skb_queue_purge(&ventry->queue);
|
|
}
|
|
|
|
/** Schedule the varp entry timer.
|
|
* Must increment the reference count before doing
|
|
* this the first time, so the ventry won't be freed
|
|
* before the timer goes off.
|
|
*
|
|
* @param ventry varp entry
|
|
*/
|
|
void VarpEntry_schedule(VarpEntry *ventry){
|
|
timer_set(&ventry->timer, VARP_PROBE_INTERVAL);
|
|
}
|
|
|
|
/** Function called when a varp entry timer goes off.
|
|
* If the entry is still incomplete, carries on probing.
|
|
* Otherwise stops probing.
|
|
*
|
|
* @param arg ventry
|
|
*/
|
|
static void varp_timer_fn(unsigned long arg){
|
|
unsigned long flags;
|
|
VarpEntry *ventry = (VarpEntry *)arg;
|
|
struct sk_buff *skb = NULL;
|
|
int probing = 0;
|
|
|
|
dprintf(">\n");
|
|
VarpEntry_lock(ventry, flags);
|
|
if(!atomic_read(&ventry->deleted)){
|
|
switch(ventry->state){
|
|
case VARP_STATE_REACHABLE:
|
|
case VARP_STATE_FAILED:
|
|
break;
|
|
case VARP_STATE_INCOMPLETE:
|
|
// Probe if haven't run out of tries, otherwise fail.
|
|
if(atomic_read(&ventry->probes) < VARP_PROBE_MAX){
|
|
unsigned long qflags;
|
|
VnetId vnet;
|
|
Vmac vmac;
|
|
|
|
probing = 1;
|
|
spin_lock_irqsave(&ventry->queue.lock, qflags);
|
|
skb = skb_peek(&ventry->queue);
|
|
if(skb){
|
|
vmac = *(Vmac*)eth_hdr(skb)->h_dest;
|
|
}
|
|
spin_unlock_irqrestore(&ventry->queue.lock, qflags);
|
|
if(skb){
|
|
dprintf("> skbs in queue - solicit\n");
|
|
vnet = ventry->key.vnet;
|
|
atomic_inc(&ventry->probes);
|
|
VarpEntry_unlock(ventry, flags);
|
|
varp_solicit(&vnet, &vmac);
|
|
VarpEntry_lock(ventry, flags);
|
|
} else {
|
|
dprintf("> empty queue.\n");
|
|
}
|
|
VarpEntry_schedule(ventry);
|
|
} else {
|
|
VarpEntry_error(ventry);
|
|
ventry->state = VARP_STATE_FAILED;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, probing);
|
|
VarpEntry_unlock(ventry, flags);
|
|
if(!probing) VarpEntry_decref(ventry);
|
|
dprintf("<\n");
|
|
}
|
|
|
|
/** Default error function for varp entries.
|
|
*
|
|
* @param ventry varp entry
|
|
* @param skb packet dropped because of error
|
|
*/
|
|
static void varp_error_fn(VarpEntry *ventry, struct sk_buff *skb){
|
|
}
|
|
|
|
/** Create a varp entry. Initializes the internal state.
|
|
*
|
|
* @param vnet vnet id
|
|
* @param vmac virtual MAC address (copied)
|
|
* @return ventry or null
|
|
*/
|
|
VarpEntry * VarpEntry_new(VnetId *vnet, Vmac *vmac){
|
|
VarpEntry *ventry = ALLOCATE(VarpEntry);
|
|
if(ventry){
|
|
unsigned long now = jiffies;
|
|
|
|
atomic_set(&ventry->refcount, 1);
|
|
atomic_set(&ventry->probes, 0);
|
|
atomic_set(&ventry->deleted, 0);
|
|
ventry->lock = RW_LOCK_UNLOCKED;
|
|
ventry->state = VARP_STATE_INCOMPLETE;
|
|
ventry->queue_max = VARP_QUEUE_MAX;
|
|
skb_queue_head_init(&ventry->queue);
|
|
timer_init(&ventry->timer, varp_timer_fn, ventry);
|
|
ventry->timestamp = now;
|
|
ventry->error = varp_error_fn;
|
|
|
|
ventry->key.vnet = *vnet;
|
|
ventry->key.vmac = *vmac;
|
|
}
|
|
return ventry;
|
|
}
|
|
|
|
/** Hash function for keys in the varp cache.
|
|
* Hashes the vnet id and mac.
|
|
*
|
|
* @param k key (VarpKey)
|
|
* @return hashcode
|
|
*/
|
|
static Hashcode varp_key_hash_fn(void *k){
|
|
return hash_hvoid(0, k, sizeof(VarpKey));
|
|
}
|
|
|
|
/** Test equality for keys in the varp cache.
|
|
* Compares vnet and mac.
|
|
*
|
|
* @param k1 key to compare (VarpKey)
|
|
* @param k2 key to compare (VarpKey)
|
|
* @return 1 if equal, 0 otherwise
|
|
*/
|
|
static int varp_key_equal_fn(void *k1, void *k2){
|
|
return memcmp(k1, k2, sizeof(VarpKey)) == 0;
|
|
}
|
|
|
|
/** Free an entry in the varp cache.
|
|
*
|
|
* @param table containing table
|
|
* @param entry entry to free
|
|
*/
|
|
static void varp_entry_free_fn(HashTable *table, HTEntry *entry){
|
|
VarpEntry *ventry;
|
|
if(!entry) return;
|
|
ventry = entry->value;
|
|
if(ventry) VarpEntry_decref(ventry);
|
|
HTEntry_free(entry);
|
|
}
|
|
|
|
/** Free the whole varp cache.
|
|
* Dangerous.
|
|
*
|
|
* @param vtable varp cache
|
|
*/
|
|
void VarpTable_free(VarpTable *vtable){
|
|
unsigned long vtflags;
|
|
dprintf(">\n");
|
|
if(!vtable) return;
|
|
VarpTable_write_lock(vtable, vtflags);
|
|
timer_cancel(&vtable->timer);
|
|
vtable->timer.data = 0;
|
|
if(vtable->table){
|
|
HashTable *table = vtable->table;
|
|
HashTable_for_decl(entry);
|
|
|
|
vtable->table = NULL;
|
|
HashTable_for_each(entry, table){
|
|
VarpEntry *ventry = entry->value;
|
|
unsigned long flags;
|
|
VarpEntry_lock(ventry, flags);
|
|
atomic_set(&ventry->deleted, 1);
|
|
if(VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){
|
|
timer_cancel(&ventry->timer);
|
|
ventry->timer.data = 0;
|
|
VarpEntry_decref(ventry);
|
|
}
|
|
VarpEntry_unlock(ventry, flags);
|
|
}
|
|
HashTable_free(table);
|
|
}
|
|
VarpTable_write_unlock(vtable, vtflags);
|
|
deallocate(vtable);
|
|
}
|
|
|
|
/** Schedule the varp table timer.
|
|
*
|
|
* @param vtable varp table
|
|
*/
|
|
void VarpTable_schedule(VarpTable *vtable){
|
|
timer_set(&vtable->timer, vtable->entry_ttl);
|
|
}
|
|
|
|
/** Function called when the varp table timer goes off.
|
|
* Sweeps old varp cache entries and reschedules itself.
|
|
*
|
|
* @param arg varp table
|
|
*/
|
|
static void varp_table_timer_fn(unsigned long arg){
|
|
VarpTable *vtable = (VarpTable *)arg;
|
|
if(vtable){
|
|
VarpTable_sweep(vtable);
|
|
VarpTable_schedule(vtable);
|
|
}
|
|
}
|
|
|
|
/** Print a varp table.
|
|
*
|
|
* @param vtable table
|
|
*/
|
|
void VarpTable_print(VarpTable *vtable, IOStream *io){
|
|
HashTable_for_decl(entry);
|
|
VarpEntry *ventry;
|
|
unsigned long vtflags, flags;
|
|
|
|
VarpTable_read_lock(vtable, vtflags);
|
|
HashTable_for_each(entry, vtable->table){
|
|
ventry = entry->value;
|
|
VarpEntry_lock(ventry, flags);
|
|
VarpEntry_print(ventry, io);
|
|
VarpEntry_unlock(ventry, flags);
|
|
}
|
|
VarpTable_read_unlock(vtable, vtflags);
|
|
}
|
|
|
|
/** Create a varp table.
|
|
*
|
|
* @return new table or null
|
|
*/
|
|
VarpTable * VarpTable_new(void){
|
|
int err = -ENOMEM;
|
|
VarpTable *vtable = NULL;
|
|
|
|
vtable = ALLOCATE(VarpTable);
|
|
if(!vtable) goto exit;
|
|
vtable->table = HashTable_new(VARP_TABLE_BUCKETS);
|
|
if(!vtable->table) goto exit;
|
|
vtable->table->key_size = sizeof(VarpKey);
|
|
vtable->table->key_equal_fn = varp_key_equal_fn;
|
|
vtable->table->key_hash_fn = varp_key_hash_fn;
|
|
vtable->table->entry_free_fn = varp_entry_free_fn;
|
|
|
|
vtable->entry_ttl = VARP_ENTRY_TTL;
|
|
vtable->probe_max = VARP_PROBE_MAX;
|
|
vtable->probe_interval = VARP_PROBE_INTERVAL;
|
|
vtable->queue_max = VARP_QUEUE_MAX;
|
|
|
|
init_MUTEX(&vtable->mutex);
|
|
vtable->lock = RW_LOCK_UNLOCKED;
|
|
timer_init(&vtable->timer, varp_table_timer_fn, vtable);
|
|
err = 0;
|
|
exit:
|
|
if(err){
|
|
VarpTable_free(vtable);
|
|
vtable = NULL;
|
|
}
|
|
return vtable;
|
|
}
|
|
|
|
/** Add a new entry to the varp table.
|
|
*
|
|
* @param vtable table
|
|
* @param vnet vnet id
|
|
* @param vmac virtual MAC address (copied)
|
|
* @return new entry or null
|
|
*/
|
|
VarpEntry * VarpTable_add(VarpTable *vtable, VnetId *vnet, Vmac *vmac){
|
|
int err = 0;
|
|
VarpKey key = { .vnet = *vnet, .vmac = *vmac};
|
|
VarpEntry *ventry = NULL;
|
|
HTEntry *entry = NULL;
|
|
unsigned long vtflags;
|
|
|
|
VarpTable_write_lock(vtable, vtflags);
|
|
ventry = HashTable_get(vtable->table, &key);
|
|
if(ventry){
|
|
VarpEntry_incref(ventry);
|
|
goto exit;
|
|
}
|
|
err = -ENOMEM;
|
|
ventry = VarpEntry_new(vnet, vmac);
|
|
if(!ventry) goto exit;
|
|
entry = HashTable_add(vtable->table, ventry, ventry);
|
|
if(!entry){
|
|
VarpEntry_decref(ventry);
|
|
ventry = NULL;
|
|
goto exit;
|
|
}
|
|
err = 0;
|
|
VarpEntry_incref(ventry);
|
|
exit:
|
|
VarpTable_write_unlock(vtable, vtflags);
|
|
return ventry;
|
|
}
|
|
|
|
/** Remove an entry from the varp table.
|
|
*
|
|
* @param vtable table
|
|
* @param ventry entry to remove
|
|
* @return removed count
|
|
*/
|
|
int VarpTable_remove(VarpTable *vtable, VarpEntry *ventry){
|
|
//TODO: Could send a varp announce with null addr for the entry
|
|
// vnet and vmac to notify others, so they will resolve the addr
|
|
// instead of sending traffic to us.
|
|
atomic_set(&ventry->deleted, 1);
|
|
skb_queue_purge(&ventry->queue);
|
|
return HashTable_remove(vtable->table, ventry);
|
|
}
|
|
|
|
/** Remove all entries using a vnet.
|
|
* Caller must hold the table lock.
|
|
*
|
|
* @param vtable table
|
|
* @param vnet vnet
|
|
* @return removed count
|
|
*/
|
|
int VarpTable_remove_vnet(VarpTable *vtable, VnetId *vnet){
|
|
int count = 0;
|
|
HashTable_for_decl(entry);
|
|
|
|
HashTable_for_each(entry, vtable->table){
|
|
VarpEntry *ventry = entry->value;
|
|
if(VnetId_eq(&ventry->key.vnet, vnet)){
|
|
count += VarpTable_remove(vtable, ventry);
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/** Remove all entries using a vnet from the varp table.
|
|
*
|
|
* @param vnet vnet
|
|
* @return removed count
|
|
*/
|
|
int varp_remove_vnet(VnetId *vnet){
|
|
int count = 0;
|
|
unsigned long vtflags;
|
|
|
|
VarpTable_write_lock(varp_table, vtflags);
|
|
count = VarpTable_remove_vnet(varp_table, vnet);
|
|
VarpTable_write_unlock(varp_table, vtflags);
|
|
return count;
|
|
}
|
|
|
|
/** Lookup an entry in the varp table.
|
|
*
|
|
* @param vtable table
|
|
* @param vnet vnet id
|
|
* @param vmac virtual MAC address
|
|
* @param create create a new entry if needed if true
|
|
* @return entry found or null
|
|
*/
|
|
VarpEntry * VarpTable_lookup(VarpTable *vtable, VnetId *vnet, Vmac *vmac, int create){
|
|
VarpKey key = { .vnet = *vnet, .vmac = *vmac };
|
|
VarpEntry *ventry = NULL;
|
|
unsigned long vtflags;
|
|
|
|
VarpTable_read_lock(vtable, vtflags);
|
|
ventry = HashTable_get(vtable->table, &key);
|
|
if(ventry) VarpEntry_incref(ventry);
|
|
VarpTable_read_unlock(vtable, vtflags);
|
|
|
|
if(!ventry && create){
|
|
ventry = VarpTable_add(vtable, vnet, vmac);
|
|
}
|
|
return ventry;
|
|
}
|
|
|
|
/** Handle output for a reachable ventry.
|
|
* Send the skb using the tunnel to the care-of address.
|
|
* Assumes the ventry lock is held.
|
|
*
|
|
* @param ventry varp entry
|
|
* @param skb skb to send
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int VarpEntry_send(VarpEntry *ventry, struct sk_buff *skb){
|
|
int err = 0;
|
|
unsigned long flags = 0;
|
|
VarpAddr addr;
|
|
VnetId vnet;
|
|
|
|
dprintf("> skb=%p\n", skb);
|
|
vnet = ventry->key.vnet;
|
|
addr = ventry->addr;
|
|
VarpEntry_unlock(ventry, flags);
|
|
err = vnet_tunnel_send(&vnet, &addr, skb);
|
|
VarpEntry_lock(ventry, flags);
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Handle output for a non-reachable ventry. Send messages to complete it.
|
|
* If the entry is still incomplete, queue the skb, otherwise
|
|
* send it. If the queue is full, dequeue and free an old skb to
|
|
* make room for the new one.
|
|
* Assumes the ventry lock is held.
|
|
*
|
|
* @param ventry varp entry
|
|
* @param skb skb to send
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int VarpEntry_resolve(VarpEntry *ventry, struct sk_buff *skb){
|
|
int err = 0;
|
|
unsigned long flags = 0;
|
|
VnetId vnet;
|
|
Vmac vmac;
|
|
|
|
dprintf("> skb=%p\n", skb);
|
|
ventry->state = VARP_STATE_INCOMPLETE;
|
|
atomic_set(&ventry->probes, 1);
|
|
if(!VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){
|
|
VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, 1);
|
|
VarpEntry_incref(ventry);
|
|
VarpEntry_schedule(ventry);
|
|
}
|
|
vnet = ventry->key.vnet;
|
|
vmac = *(Vmac*)eth_hdr(skb)->h_dest;
|
|
VarpEntry_unlock(ventry, flags);
|
|
varp_solicit(&vnet, &vmac);
|
|
VarpEntry_lock(ventry, flags);
|
|
|
|
if(ventry->state == VARP_STATE_INCOMPLETE){
|
|
while(skb_queue_len(&ventry->queue) >= ventry->queue_max){
|
|
struct sk_buff *oldskb;
|
|
oldskb = skb_dequeue(&ventry->queue);
|
|
//oldskb = ventry->queue.next;
|
|
//__skb_unlink(oldskb, &ventry->queue);
|
|
if(!oldskb) break;
|
|
dprintf("> dropping skb=%p\n", oldskb);
|
|
kfree_skb(oldskb);
|
|
}
|
|
skb_queue_tail(&ventry->queue, skb);
|
|
} else {
|
|
err = VarpEntry_send(ventry, skb);
|
|
}
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Process the output queue for a ventry. Sends the queued skbs if
|
|
* the ventry is reachable, otherwise drops them.
|
|
*
|
|
* @param ventry varp entry
|
|
*/
|
|
void VarpEntry_process_queue(VarpEntry *ventry){
|
|
struct sk_buff *skb;
|
|
for( ; ; ){
|
|
if(ventry->state != VARP_STATE_REACHABLE) break;
|
|
skb = skb_dequeue(&ventry->queue);
|
|
if(!skb) break;
|
|
VarpEntry_send(ventry, skb);
|
|
}
|
|
skb_queue_purge(&ventry->queue);
|
|
}
|
|
|
|
/** Multicast an skb on a vnet.
|
|
*
|
|
* @param vnet vnet id
|
|
* @param skb skb to send
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
static int varp_multicast(VnetId *vnet, struct sk_buff *skb){
|
|
VarpAddr addr = { .family = AF_INET };
|
|
addr.u.ip4.s_addr = varp_mcast_addr;
|
|
return vnet_tunnel_send(vnet, &addr, skb);
|
|
}
|
|
|
|
/** Handle output for a ventry. Resolves the ventry
|
|
* if necessary.
|
|
*
|
|
* @param ventry varp entry
|
|
* @param skb skb to send
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb){
|
|
int err = 0;
|
|
unsigned long flags;
|
|
|
|
VarpEntry_lock(ventry, flags);
|
|
switch(ventry->state){
|
|
case VARP_STATE_REACHABLE:
|
|
if(skb_queue_len(&ventry->queue) > 0){
|
|
VarpEntry_process_queue(ventry);
|
|
}
|
|
err = VarpEntry_send(ventry, skb);
|
|
break;
|
|
default:
|
|
if(0){
|
|
err = VarpEntry_resolve(ventry, skb);
|
|
} else {
|
|
// Multicast the skb if the entry is not reachable.
|
|
VnetId vnet = ventry->key.vnet;
|
|
VarpEntry_unlock(ventry, flags);
|
|
err = varp_multicast(&vnet, skb);
|
|
VarpEntry_lock(ventry, flags);
|
|
}
|
|
break;
|
|
}
|
|
VarpEntry_unlock(ventry, flags);
|
|
return err;
|
|
}
|
|
|
|
/** Update a ventry. Sets the address and state to those given
|
|
* and sets the timestamp to 'now'.
|
|
*
|
|
* @param ventry varp entry
|
|
* @param addr care-of address
|
|
* @param state state
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int VarpEntry_update(VarpEntry *ventry, VarpAddr *addr, int state, int vflags){
|
|
int err = 0;
|
|
unsigned long now = jiffies;
|
|
unsigned long flags;
|
|
|
|
VarpEntry_lock(ventry, flags);
|
|
//if(atomic_read(&ventry->deleted)) goto exit;
|
|
if(VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT)) goto exit;
|
|
ventry->addr = *addr;
|
|
ventry->timestamp = now;
|
|
ventry->state = state;
|
|
// Can't process the queue while atomic as it calls schedule(),
|
|
// and that's bad.
|
|
//if(0 && (vflags & VARP_UPDATE_QUEUE) && !in_atomic()){
|
|
// VarpEntry_process_queue(ventry);
|
|
//}
|
|
exit:
|
|
VarpEntry_unlock(ventry, flags);
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Update the entry for a vnet.
|
|
*
|
|
* @param vtable varp table
|
|
* @param vnet vnet id
|
|
* @param vmac mac address
|
|
* @param addr care-of-address
|
|
* @param state state
|
|
* @param flags update flags
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int VarpTable_update(VarpTable *vtable, VnetId *vnet, Vmac *vmac, VarpAddr *addr,
|
|
int state, int flags){
|
|
int err = 0;
|
|
VarpEntry *ventry;
|
|
#ifdef DEBUG
|
|
char vnetbuf[VNET_ID_BUF];
|
|
char addrbuf[VARP_ADDR_BUF];
|
|
|
|
dprintf("> vnet=%s mac=" MACFMT " addr=%s state=%d flags=%x\n",
|
|
VnetId_ntoa(vnet, vnetbuf),
|
|
MAC6TUPLE(vmac->mac),
|
|
VarpAddr_ntoa(addr, addrbuf),
|
|
state,
|
|
flags);
|
|
#endif
|
|
ventry = VarpTable_lookup(vtable, vnet, vmac, (flags & VARP_UPDATE_CREATE));
|
|
if(!ventry){
|
|
err = -ENOENT;
|
|
goto exit;
|
|
}
|
|
err = VarpEntry_update(ventry, addr, state, flags);
|
|
VarpEntry_decref(ventry);
|
|
exit:
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Update the entry for a vnet: make it reachable and create an entry
|
|
* if needed.
|
|
*
|
|
* @param vnet vnet id
|
|
* @param vmac mac address
|
|
* @param addr care-of-address
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int varp_update(VnetId *vnet, unsigned char *vmac, VarpAddr *addr){
|
|
int err = 0;
|
|
if(!varp_table){
|
|
err = -ENOSYS;
|
|
} else {
|
|
err = VarpTable_update(varp_table, vnet, (Vmac*)vmac, addr,
|
|
VARP_STATE_REACHABLE, VARP_UPDATE_CREATE);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static inline int VarpEntry_sweepable(VarpEntry *ventry){
|
|
return !VarpEntry_get_flags(ventry, (VARP_FLAG_PERMANENT | VARP_FLAG_PROBING));
|
|
}
|
|
|
|
static inline int VarpTable_old(VarpTable *vtable, VarpEntry *ventry, unsigned long now){
|
|
return now - ventry->timestamp > vtable->entry_ttl;
|
|
}
|
|
|
|
/** Sweep old varp entries.
|
|
* Doesn't affect entries that are probing or permanent.
|
|
*
|
|
* @param vtable table
|
|
*/
|
|
void VarpTable_sweep(VarpTable *vtable){
|
|
HashTable_for_decl(entry);
|
|
VarpEntry *ventry;
|
|
unsigned long now = jiffies;
|
|
unsigned long vtflags, flags;
|
|
int sweep, swept = 0;
|
|
|
|
if(!vtable) return;
|
|
VarpTable_write_lock(vtable, vtflags);
|
|
HashTable_for_each(entry, vtable->table){
|
|
ventry = entry->value;
|
|
VarpEntry_lock(ventry, flags);
|
|
sweep = VarpEntry_sweepable(ventry) && VarpTable_old(vtable, ventry, now);
|
|
if(sweep){
|
|
swept++;
|
|
//iprintf("> Sweeping:\n");
|
|
//VarpEntry_print(ventry, iostdout);
|
|
//VarpEntry_process_queue(ventry);
|
|
ventry->state = VARP_STATE_INCOMPLETE;
|
|
}
|
|
VarpEntry_unlock(ventry, flags);
|
|
if(sweep){
|
|
VarpTable_remove(vtable, ventry);
|
|
}
|
|
}
|
|
VarpTable_write_unlock(vtable, vtflags);
|
|
if(swept){
|
|
iprintf(">\n");
|
|
varp_print(iostdout);
|
|
}
|
|
}
|
|
|
|
/** Flush the varp table.
|
|
*
|
|
* @param vtable table
|
|
*/
|
|
void VarpTable_flush(VarpTable *vtable){
|
|
HashTable_for_decl(entry);
|
|
VarpEntry *ventry;
|
|
unsigned long vtflags, flags;
|
|
int flush;
|
|
|
|
VarpTable_write_lock(vtable, vtflags);
|
|
HashTable_for_each(entry, vtable->table){
|
|
ventry = entry->value;
|
|
VarpEntry_lock(ventry, flags);
|
|
flush = (!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) &&
|
|
!VarpEntry_get_flags(ventry, VARP_FLAG_PROBING));
|
|
if(flush){
|
|
iprintf("> Flushing:\n");
|
|
VarpEntry_print(ventry, iostdout);
|
|
}
|
|
VarpEntry_unlock(ventry, flags);
|
|
if(flush){
|
|
VarpTable_remove(vtable, ventry);
|
|
}
|
|
}
|
|
VarpTable_write_unlock(vtable, vtflags);
|
|
}
|
|
|
|
/** Handle a varp request. Look for a vif with the requested
|
|
* vnet and vmac. If find one, reply with the vnet, vmac and our
|
|
* address. Otherwise do nothing.
|
|
*
|
|
* @param skb incoming message
|
|
* @param varph varp message
|
|
* @return 0 if ok, -ENOENT if no matching vif, or error code
|
|
*/
|
|
int varp_handle_request(struct sk_buff *skb, VarpHdr *varph){
|
|
int err = -ENOENT;
|
|
VnetId *vnet;
|
|
Vmac *vmac;
|
|
Vif *vif = NULL;
|
|
|
|
dprintf(">\n");
|
|
vnet = &varph->vnet;
|
|
vmac = &varph->vmac;
|
|
if(vif_lookup(vnet, vmac, &vif)) goto exit;
|
|
varp_send(VARP_OP_ANNOUNCE, skb->dev, skb, vnet, vmac);
|
|
vif_decref(vif);
|
|
exit:
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Announce the vnet and vmac of a vif (gratuitous varp).
|
|
*
|
|
* @param dev device to send on (may be null)
|
|
* @param vif vif
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int varp_announce_vif(struct net_device *dev, Vif *vif){
|
|
int err = 0;
|
|
dprintf(">\n");
|
|
if(!varp_table){
|
|
err = -ENOSYS;
|
|
goto exit;
|
|
}
|
|
err = varp_send(VARP_OP_ANNOUNCE, dev, NULL, &vif->vnet, &vif->vmac);
|
|
exit:
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Handle a varp announce message.
|
|
* Update the matching ventry if we have one.
|
|
*
|
|
* @param skb incoming message
|
|
* @param varp message
|
|
* @return 0 if OK, -ENOENT if no matching entry
|
|
*/
|
|
int varp_handle_announce(struct sk_buff *skb, VarpHdr *varph){
|
|
int err = 0;
|
|
|
|
dprintf(">\n");
|
|
err = VarpTable_update(varp_table,
|
|
&varph->vnet, &varph->vmac, &varph->addr,
|
|
VARP_STATE_REACHABLE,
|
|
(VARP_UPDATE_CREATE | VARP_UPDATE_QUEUE));
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Handle an incoming varp message.
|
|
*
|
|
* @param skb incoming message
|
|
* @return 0 if OK, error code otherwise
|
|
*/
|
|
int varp_handle_message(struct sk_buff *skb){
|
|
// Assume nh, h set, skb->data points at udp hdr (h).
|
|
int err = -EINVAL;
|
|
VarpHdr *varph; // = (void*)(skb->h.uh + 1);
|
|
|
|
dprintf("> skb=%p saddr=" IPFMT " daddr=" IPFMT " head=%p tail=%p data=%p len=%d data_len=%d users=%d\n",
|
|
skb,
|
|
NIPQUAD(skb->nh.iph->saddr),
|
|
NIPQUAD(skb->nh.iph->daddr),
|
|
skb->head, skb->tail, skb->data,skb->len, skb->data_len,
|
|
atomic_read(&skb->users));
|
|
|
|
if(!varp_table){
|
|
err = -ENOSYS;
|
|
return err;
|
|
}
|
|
#ifdef __KERNEL__
|
|
if (skb_is_nonlinear(skb) && skb_linearize(skb) != 0) {
|
|
err = -ENOMEM;
|
|
goto exit;
|
|
}
|
|
#endif
|
|
if(MULTICAST(skb->nh.iph->daddr)){
|
|
if(skb->nh.iph->daddr != varp_mcast_addr){
|
|
// Ignore multicast packets not addressed to us.
|
|
err = 0;
|
|
dprintf("> Ignoring daddr=" IPFMT " mcaddr=" IPFMT "\n",
|
|
NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr));
|
|
goto exit;
|
|
}
|
|
}
|
|
#ifdef __KERNEL__
|
|
varph = (void*) __pskb_pull(skb, sizeof(struct udphdr));
|
|
#else
|
|
varph = (void*)skb_pull_vn(skb, sizeof(struct udphdr));
|
|
#endif
|
|
|
|
|
|
if(skb->len < sizeof(struct VnetMsgHdr)){
|
|
wprintf("> Varp msg too short: %d < %ld\n", skb->len, sizeof(struct VnetMsgHdr));
|
|
goto exit;
|
|
}
|
|
|
|
if (! varph || ! skb->data) {
|
|
err = -EINVAL;
|
|
goto exit;
|
|
}
|
|
|
|
switch(ntohs(varph->hdr.id)){
|
|
case VARP_ID: // Varp message. Handled below.
|
|
if(skb->len < sizeof(*varph)){
|
|
wprintf("> Varp msg too short: %d < %ld\n", skb->len, sizeof(*varph));
|
|
goto exit;
|
|
}
|
|
break;
|
|
case VUDP_ID: // Etherip-in-udp packet.
|
|
skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
|
|
err = etherip_protocol_recv(skb);
|
|
goto exit;
|
|
case VFWD_ID: // Forwarded.
|
|
skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
|
|
err = vnet_forward_recv(skb);
|
|
goto exit;
|
|
default:
|
|
// It's not varp at all - ignore it.
|
|
wprintf("> Invalid varp id: %d\n", ntohs(varph->hdr.id));
|
|
print_skb("INVALID", 0, skb);
|
|
goto exit;
|
|
}
|
|
#ifdef DEBUG
|
|
{
|
|
char vnetbuf[VNET_ID_BUF];
|
|
char addrbuf[VARP_ADDR_BUF];
|
|
dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n",
|
|
NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr));
|
|
dprintf("> sport=%u dport=%u\n", ntohs(skb->h.uh->source), ntohs(skb->h.uh->dest));
|
|
dprintf("> opcode=%d vnet=%s vmac=" MACFMT " addr=%s\n",
|
|
ntohs(varph->hdr.opcode),
|
|
VnetId_ntoa(&varph->vnet, vnetbuf),
|
|
MAC6TUPLE(varph->vmac.mac),
|
|
VarpAddr_ntoa(&varph->addr, addrbuf));
|
|
varp_dprint();
|
|
}
|
|
#endif
|
|
switch(ntohs(varph->hdr.opcode)){
|
|
case VARP_OP_REQUEST:
|
|
err = varp_handle_request(skb, varph);
|
|
break;
|
|
case VARP_OP_ANNOUNCE:
|
|
err = varp_handle_announce(skb, varph);
|
|
break;
|
|
default:
|
|
wprintf("> Unknown opcode: %d \n", ntohs(varph->hdr.opcode));
|
|
break;
|
|
}
|
|
exit:
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Send an outgoing packet on the appropriate vnet tunnel.
|
|
*
|
|
* @param skb outgoing message
|
|
* @param vnet vnet (network order)
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int varp_output(struct sk_buff *skb, VnetId *vnet){
|
|
int err = 0;
|
|
unsigned char *mac = NULL;
|
|
Vmac *vmac = NULL;
|
|
VarpEntry *ventry = NULL;
|
|
#if defined(DEBUG)
|
|
char vnetbuf[VNET_ID_BUF];
|
|
#endif
|
|
|
|
dprintf("> vnet=%s\n", VnetId_ntoa(vnet, vnetbuf));
|
|
if(!varp_table){
|
|
err = -ENOSYS;
|
|
goto exit;
|
|
}
|
|
if(!skb->mac.raw){
|
|
wprintf("> No ethhdr in skb!\n");
|
|
err = -EINVAL;
|
|
goto exit;
|
|
}
|
|
mac = eth_hdr(skb)->h_dest;
|
|
vmac = (Vmac*)mac;
|
|
if(mac_is_multicast(mac)){
|
|
err = varp_multicast(vnet, skb);
|
|
} else {
|
|
ventry = VarpTable_lookup(varp_table, vnet, vmac, 1);
|
|
if(ventry){
|
|
err = VarpEntry_output(ventry, skb);
|
|
VarpEntry_decref(ventry);
|
|
} else {
|
|
err = -ENOMEM;
|
|
}
|
|
}
|
|
exit:
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Set the varp multicast address (after initialization).
|
|
*
|
|
* @param addr address (network order)
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int varp_set_mcast_addr(uint32_t addr){
|
|
int err = 0;
|
|
varp_close();
|
|
varp_mcast_addr = addr;
|
|
err = varp_open(varp_mcast_addr, varp_port);
|
|
return err;
|
|
}
|
|
|
|
/** Initialize the varp multicast address from a module parameter.
|
|
*
|
|
* @param s address in IPv4 notation
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
static void varp_init_mcast_addr(char *s){
|
|
unsigned long v = 0;
|
|
|
|
dprintf("> %s\n", s);
|
|
if(s && (get_inet_addr(s, &v) >= 0)){
|
|
varp_mcast_addr = (u32)v;
|
|
} else {
|
|
varp_mcast_addr = htonl(VARP_MCAST_ADDR);
|
|
}
|
|
}
|
|
|
|
/** Initialize the varp cache.
|
|
*
|
|
* @return 0 on success, error code otherwise
|
|
*/
|
|
int varp_init(void){
|
|
int err = 0;
|
|
|
|
dprintf(">\n");
|
|
varp_table = VarpTable_new();
|
|
if(!varp_table){
|
|
err = -ENOMEM;
|
|
goto exit;
|
|
}
|
|
VarpTable_schedule(varp_table);
|
|
varp_init_mcast_addr(varp_mcaddr);
|
|
varp_port = htons(VARP_PORT);
|
|
|
|
err = varp_open(varp_mcast_addr, varp_port);
|
|
exit:
|
|
dprintf("< err=%d\n", err);
|
|
return err;
|
|
}
|
|
|
|
/** Close the varp cache.
|
|
*/
|
|
void varp_exit(void){
|
|
dprintf(">\n");
|
|
varp_close();
|
|
if(varp_table){
|
|
VarpTable *vtable = varp_table;
|
|
varp_table = NULL;
|
|
VarpTable_free(vtable);
|
|
}
|
|
dprintf("<\n");
|
|
}
|
|
|
|
module_param(varp_mcaddr, charp, 0644);
|
|
module_param(varp_device, charp, 0644);
|
|
MODULE_PARM_DESC(varp_mcaddr, "VARP multicast address");
|
|
MODULE_PARM_DESC(varp_device, "VARP network device");
|