From 7f48bab7c7b468961cf70efa1d86a75173e3987a Mon Sep 17 00:00:00 2001 From: Mans Rullgard <mans@mansr.com> Date: Thu, 26 May 2016 16:04:02 +0100 Subject: [PATCH] ivshmem-net: virtual network device for Jailhouse Work in progress. (cherry picked from commit ed818547b45e652db57d5966efe336ed646feb45) --- drivers/net/Kconfig | 4 + drivers/net/Makefile | 2 + drivers/net/ivshmem-net.c | 923 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 929 insertions(+) create mode 100644 drivers/net/ivshmem-net.c --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -528,4 +528,8 @@ config NET_FAILOVER a VM with direct attached VF by failing over to the paravirtual datapath when the VF is unplugged. +config IVSHMEM_NET + tristate "IVSHMEM virtual network device" + depends on PCI + endif # NETDEVICES --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -79,3 +79,5 @@ thunderbolt-net-y += thunderbolt.o obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o obj-$(CONFIG_NETDEVSIM) += netdevsim/ obj-$(CONFIG_NET_FAILOVER) += net_failover.o + +obj-$(CONFIG_IVSHMEM_NET) += ivshmem-net.o --- /dev/null +++ b/drivers/net/ivshmem-net.c @@ -0,0 +1,923 @@ +/* + * Copyright 2016 Mans Rullgard <mans@mansr.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/virtio_ring.h> + +#define DRV_NAME "ivshmem-net" + +#define JAILHOUSE_CFG_SHMEM_PTR 0x40 +#define JAILHOUSE_CFG_SHMEM_SZ 0x48 + +#define IVSHM_NET_STATE_RESET 0 +#define IVSHM_NET_STATE_INIT 1 +#define IVSHM_NET_STATE_READY 2 +#define IVSHM_NET_STATE_RUN 3 + +#define IVSHM_NET_MTU_MIN 256 +#define IVSHM_NET_MTU_MAX 65535 +#define IVSHM_NET_MTU_DEF 16384 + +#define IVSHM_NET_FRAME_SIZE(s) ALIGN(18 + (s), SMP_CACHE_BYTES) + +#define IVSHM_NET_VQ_ALIGN 64 + +struct ivshmem_regs { + u32 imask; + u32 istat; + u32 ivpos; + u32 doorbell; + u32 lstate; + u32 rstate; +}; + +struct ivshm_net_queue { + struct vring vr; + u32 free_head; + u32 num_free; + u32 num_added; + u16 last_avail_idx; + u16 last_used_idx; + + void *data; + void *end; + u32 size; + u32 head; + u32 tail; +}; + +struct ivshm_net_stats { + u32 interrupts; + u32 tx_packets; + u32 tx_notify; + u32 tx_pause; + u32 rx_packets; + u32 rx_notify; + u32 napi_poll; + u32 napi_complete; + u32 napi_poll_n[10]; +}; + +struct ivshm_net { + struct ivshm_net_queue rx; + struct ivshm_net_queue tx; + + u32 vrsize; + u32 qlen; + u32 qsize; + + spinlock_t tx_free_lock; + spinlock_t tx_clean_lock; + + struct napi_struct napi; + + u32 lstate; + u32 rstate; + + struct workqueue_struct *state_wq; + struct work_struct state_work; + + struct ivshm_net_stats stats; + + struct ivshmem_regs __iomem *ivshm_regs; + void *shm; + phys_addr_t shmaddr; + resource_size_t shmlen; + u32 peer_id; + + struct pci_dev *pdev; + struct msix_entry msix; + bool using_msix; +}; + +static void *ivshm_net_desc_data(struct ivshm_net *in, + struct ivshm_net_queue *q, + struct vring_desc *desc, + u32 *len) +{ + u64 addr = READ_ONCE(desc->addr); + u32 dlen = READ_ONCE(desc->len); + void *data; + + if (addr < in->shmaddr || desc->addr > in->shmaddr + in->shmlen) + return NULL; + + data = in->shm + (addr - in->shmaddr); + + if (data < q->data || data >= q->end) + return NULL; + + if (dlen > q->end - data) + return NULL; + + *len = dlen; + + return data; +} + +static void ivshm_net_init_queue(struct ivshm_net *in, + struct ivshm_net_queue *q, + void *mem, unsigned int len) +{ + memset(q, 0, sizeof(*q)); + + vring_init(&q->vr, len, mem, IVSHM_NET_VQ_ALIGN); + q->data = mem + in->vrsize; + q->end = q->data + in->qsize; + q->size = in->qsize; +} + +static void ivshm_net_init_queues(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + int ivpos = readl(&in->ivshm_regs->ivpos); + void *tx; + void *rx; + int i; + + tx = in->shm + ivpos * in->shmlen / 2; + rx = in->shm + !ivpos * in->shmlen / 2; + + memset(tx, 0, in->shmlen / 2); + + ivshm_net_init_queue(in, &in->rx, rx, in->qlen); + ivshm_net_init_queue(in, &in->tx, tx, in->qlen); + + swap(in->rx.vr.used, in->tx.vr.used); + + in->tx.num_free = in->tx.vr.num; + + for (i = 0; i < in->tx.vr.num - 1; i++) + in->tx.vr.desc[i].next = i + 1; +} + +static int ivshm_net_calc_qsize(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + unsigned int vrsize; + unsigned int qsize; + unsigned int qlen; + + for (qlen = 4096; qlen > 32; qlen >>= 1) { + vrsize = vring_size(qlen, IVSHM_NET_VQ_ALIGN); + vrsize = ALIGN(vrsize, IVSHM_NET_VQ_ALIGN); + if (vrsize < in->shmlen / 16) + break; + } + + if (vrsize > in->shmlen / 2) + return -EINVAL; + + qsize = in->shmlen / 2 - vrsize; + + if (qsize < 4 * IVSHM_NET_MTU_MIN) + return -EINVAL; + + in->vrsize = vrsize; + in->qlen = qlen; + in->qsize = qsize; + + return 0; +} + +static void ivshm_net_notify_tx(struct ivshm_net *in, unsigned int num) +{ + u16 evt, old, new; + + virt_mb(); + + evt = READ_ONCE(vring_avail_event(&in->tx.vr)); + old = in->tx.last_avail_idx - num; + new = in->tx.last_avail_idx; + + if (vring_need_event(evt, new, old)) { + writel(in->peer_id << 16, &in->ivshm_regs->doorbell); + in->stats.tx_notify++; + } +} + +static void ivshm_net_enable_rx_irq(struct ivshm_net *in) +{ + vring_avail_event(&in->rx.vr) = in->rx.last_avail_idx; + virt_wmb(); +} + +static void ivshm_net_notify_rx(struct ivshm_net *in, unsigned int num) +{ + u16 evt, old, new; + + virt_mb(); + + evt = vring_used_event(&in->rx.vr); + old = in->rx.last_used_idx - num; + new = in->rx.last_used_idx; + + if (vring_need_event(evt, new, old)) { + writel(in->peer_id << 16, &in->ivshm_regs->doorbell); + in->stats.rx_notify++; + } +} + +static void ivshm_net_enable_tx_irq(struct ivshm_net *in) +{ + vring_used_event(&in->tx.vr) = in->tx.last_used_idx; + virt_wmb(); +} + +static bool ivshm_net_rx_avail(struct ivshm_net *in) +{ + virt_mb(); + return READ_ONCE(in->rx.vr.avail->idx) != in->rx.last_avail_idx; +} + +static size_t ivshm_net_tx_space(struct ivshm_net *in) +{ + struct ivshm_net_queue *tx = &in->tx; + u32 tail = tx->tail; + u32 head = tx->head; + u32 space; + + if (head < tail) + space = tail - head; + else + space = max(tx->size - head, tail); + + return space; +} + +static bool ivshm_net_tx_ok(struct ivshm_net *in, unsigned int mtu) +{ + return in->tx.num_free >= 2 && + ivshm_net_tx_space(in) >= 2 * IVSHM_NET_FRAME_SIZE(mtu); +} + +static u32 ivshm_net_tx_advance(struct ivshm_net_queue *q, u32 *pos, u32 len) +{ + u32 p = *pos; + + len = IVSHM_NET_FRAME_SIZE(len); + + if (q->size - p < len) + p = 0; + *pos = p + len; + + return p; +} + +static int ivshm_net_tx_frame(struct net_device *ndev, struct sk_buff *skb) +{ + struct ivshm_net *in = netdev_priv(ndev); + struct ivshm_net_queue *tx = &in->tx; + struct vring *vr = &tx->vr; + struct vring_desc *desc; + unsigned int desc_idx; + unsigned int avail; + u32 head; + void *buf; + + BUG_ON(tx->num_free < 1); + + spin_lock(&in->tx_free_lock); + desc_idx = tx->free_head; + desc = &vr->desc[desc_idx]; + tx->free_head = desc->next; + tx->num_free--; + spin_unlock(&in->tx_free_lock); + + head = ivshm_net_tx_advance(tx, &tx->head, skb->len); + + buf = tx->data + head; + skb_copy_and_csum_dev(skb, buf); + + desc->addr = in->shmaddr + (buf - in->shm); + desc->len = skb->len; + + avail = tx->last_avail_idx++ & (vr->num - 1); + vr->avail->ring[avail] = desc_idx; + tx->num_added++; + + if (!skb->xmit_more) { + virt_store_release(&vr->avail->idx, tx->last_avail_idx); + ivshm_net_notify_tx(in, tx->num_added); + tx->num_added = 0; + } + + return 0; +} + +static void ivshm_net_tx_clean(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + struct ivshm_net_queue *tx = &in->tx; + struct vring *vr = &tx->vr; + struct vring_desc *desc; + struct vring_desc *fdesc; + unsigned int used; + unsigned int num; + u16 used_idx; + u16 last; + u32 fhead; + + if (!spin_trylock(&in->tx_clean_lock)) + return; + + used_idx = virt_load_acquire(&vr->used->idx); + last = tx->last_used_idx; + + fdesc = NULL; + num = 0; + + while (last != used_idx) { + void *data; + u32 len; + u32 tail; + + used = vr->used->ring[last & (vr->num - 1)].id; + if (used >= vr->num) { + netdev_err(ndev, "invalid tx used %d\n", used); + break; + } + + desc = &vr->desc[used]; + + data = ivshm_net_desc_data(in, &in->tx, desc, &len); + if (!data) { + netdev_err(ndev, "bad tx descriptor\n"); + break; + } + + tail = ivshm_net_tx_advance(tx, &tx->tail, len); + if (data != tx->data + tail) { + netdev_err(ndev, "bad tx descriptor\n"); + break; + } + + if (!num) + fdesc = desc; + else + desc->next = fhead; + + fhead = used; + last++; + num++; + } + + tx->last_used_idx = last; + + spin_unlock(&in->tx_clean_lock); + + if (num) { + spin_lock(&in->tx_free_lock); + fdesc->next = tx->free_head; + tx->free_head = fhead; + tx->num_free += num; + BUG_ON(tx->num_free > vr->num); + spin_unlock(&in->tx_free_lock); + } +} + +static struct vring_desc *ivshm_net_rx_desc(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + struct ivshm_net_queue *rx = &in->rx; + struct vring *vr = &rx->vr; + unsigned int avail; + u16 avail_idx; + + avail_idx = virt_load_acquire(&vr->avail->idx); + + if (avail_idx == rx->last_avail_idx) + return NULL; + + avail = vr->avail->ring[rx->last_avail_idx++ & (vr->num - 1)]; + if (avail >= vr->num) { + netdev_err(ndev, "invalid rx avail %d\n", avail); + return NULL; + } + + return &vr->desc[avail]; +} + +static void ivshm_net_rx_finish(struct ivshm_net *in, struct vring_desc *desc) +{ + struct ivshm_net_queue *rx = &in->rx; + struct vring *vr = &rx->vr; + unsigned int desc_id = desc - vr->desc; + unsigned int used; + + used = rx->last_used_idx++ & (vr->num - 1); + vr->used->ring[used].id = desc_id; + + virt_store_release(&vr->used->idx, rx->last_used_idx); +} + +static int ivshm_net_poll(struct napi_struct *napi, int budget) +{ + struct net_device *ndev = napi->dev; + struct ivshm_net *in = container_of(napi, struct ivshm_net, napi); + int received = 0; + + in->stats.napi_poll++; + + ivshm_net_tx_clean(ndev); + + while (received < budget) { + struct vring_desc *desc; + struct sk_buff *skb; + void *data; + u32 len; + + desc = ivshm_net_rx_desc(ndev); + if (!desc) + break; + + data = ivshm_net_desc_data(in, &in->rx, desc, &len); + if (!data) { + netdev_err(ndev, "bad rx descriptor\n"); + break; + } + + skb = napi_alloc_skb(napi, len); + + if (skb) { + memcpy(skb_put(skb, len), data, len); + skb->protocol = eth_type_trans(skb, ndev); + napi_gro_receive(napi, skb); + } + + ndev->stats.rx_packets++; + ndev->stats.rx_bytes += len; + + ivshm_net_rx_finish(in, desc); + received++; + } + + if (received < budget) { + in->stats.napi_complete++; + napi_complete_done(napi, received); + ivshm_net_enable_rx_irq(in); + if (ivshm_net_rx_avail(in)) + napi_schedule(napi); + } + + if (received) + ivshm_net_notify_rx(in, received); + + in->stats.rx_packets += received; + in->stats.napi_poll_n[received ? 1 + min(ilog2(received), 8) : 0]++; + + if (ivshm_net_tx_ok(in, ndev->mtu)) + netif_wake_queue(ndev); + + return received; +} + +static netdev_tx_t ivshm_net_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + + ivshm_net_tx_clean(ndev); + + if (!ivshm_net_tx_ok(in, ndev->mtu)) { + ivshm_net_enable_tx_irq(in); + netif_stop_queue(ndev); + skb->xmit_more = 0; + in->stats.tx_pause++; + } + + ivshm_net_tx_frame(ndev, skb); + + in->stats.tx_packets++; + ndev->stats.tx_packets++; + ndev->stats.tx_bytes += skb->len; + + dev_consume_skb_any(skb); + + return NETDEV_TX_OK; +} + +static void ivshm_net_set_state(struct ivshm_net *in, u32 state) +{ + virt_wmb(); + WRITE_ONCE(in->lstate, state); + writel(state, &in->ivshm_regs->lstate); +} + +static void ivshm_net_run(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + + netif_start_queue(ndev); + napi_enable(&in->napi); + napi_schedule(&in->napi); + ivshm_net_set_state(in, IVSHM_NET_STATE_RUN); +} + +static void ivshm_net_state_change(struct work_struct *work) +{ + struct ivshm_net *in = container_of(work, struct ivshm_net, state_work); + struct net_device *ndev = in->napi.dev; + u32 rstate = readl(&in->ivshm_regs->rstate); + + + switch (in->lstate) { + case IVSHM_NET_STATE_RESET: + if (rstate < IVSHM_NET_STATE_READY) + ivshm_net_set_state(in, IVSHM_NET_STATE_INIT); + break; + + case IVSHM_NET_STATE_INIT: + if (rstate > IVSHM_NET_STATE_RESET) { + ivshm_net_init_queues(ndev); + ivshm_net_set_state(in, IVSHM_NET_STATE_READY); + + rtnl_lock(); + call_netdevice_notifiers(NETDEV_CHANGEADDR, ndev); + rtnl_unlock(); + } + break; + + case IVSHM_NET_STATE_READY: + if (rstate >= IVSHM_NET_STATE_READY) { + netif_carrier_on(ndev); + if (ndev->flags & IFF_UP) + ivshm_net_run(ndev); + } else { + netif_carrier_off(ndev); + ivshm_net_set_state(in, IVSHM_NET_STATE_RESET); + } + break; + + case IVSHM_NET_STATE_RUN: + if (rstate < IVSHM_NET_STATE_READY) { + netif_stop_queue(ndev); + napi_disable(&in->napi); + netif_carrier_off(ndev); + ivshm_net_set_state(in, IVSHM_NET_STATE_RESET); + } + break; + } + + virt_wmb(); + WRITE_ONCE(in->rstate, rstate); +} + +static bool ivshm_net_check_state(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + u32 rstate = readl(&in->ivshm_regs->rstate); + + if (rstate != READ_ONCE(in->rstate) || + in->lstate != IVSHM_NET_STATE_RUN) { + queue_work(in->state_wq, &in->state_work); + return false; + } + + return true; +} + +static irqreturn_t ivshm_net_int(int irq, void *data) +{ + struct net_device *ndev = data; + struct ivshm_net *in = netdev_priv(ndev); + + in->stats.interrupts++; + + ivshm_net_check_state(ndev); + napi_schedule_irqoff(&in->napi); + + return IRQ_HANDLED; +} + +static int ivshm_net_open(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + + netdev_reset_queue(ndev); + ndev->operstate = IF_OPER_UP; + + if (in->lstate == IVSHM_NET_STATE_READY) + ivshm_net_run(ndev); + + return 0; +} + +static int ivshm_net_stop(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + + ndev->operstate = IF_OPER_DOWN; + + if (in->lstate == IVSHM_NET_STATE_RUN) { + napi_disable(&in->napi); + netif_stop_queue(ndev); + ivshm_net_set_state(in, IVSHM_NET_STATE_READY); + } + + return 0; +} + +static int ivshm_net_change_mtu(struct net_device *ndev, int mtu) +{ + struct ivshm_net *in = netdev_priv(ndev); + struct ivshm_net_queue *tx = &in->tx; + + if (mtu < IVSHM_NET_MTU_MIN || mtu > IVSHM_NET_MTU_MAX) + return -EINVAL; + + if (in->tx.size / mtu < 4) + return -EINVAL; + + if (ivshm_net_tx_space(in) < 2 * IVSHM_NET_FRAME_SIZE(mtu)) + return -EBUSY; + + if (in->tx.size - tx->head < IVSHM_NET_FRAME_SIZE(mtu) && + tx->head < tx->tail) + return -EBUSY; + + netif_tx_lock_bh(ndev); + if (in->tx.size - tx->head < IVSHM_NET_FRAME_SIZE(mtu)) + tx->head = 0; + netif_tx_unlock_bh(ndev); + + ndev->mtu = mtu; + + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ivshm_net_poll_controller(struct net_device *ndev) +{ + struct ivshm_net *in = netdev_priv(ndev); + + napi_schedule(&in->napi); +} +#endif + +static const struct net_device_ops ivshm_net_ops = { + .ndo_open = ivshm_net_open, + .ndo_stop = ivshm_net_stop, + .ndo_start_xmit = ivshm_net_xmit, + .ndo_change_mtu = ivshm_net_change_mtu, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ivshm_net_poll_controller, +#endif +}; + +static const char ivshm_net_stats[][ETH_GSTRING_LEN] = { + "interrupts", + "tx_packets", + "tx_notify", + "tx_pause", + "rx_packets", + "rx_notify", + "napi_poll", + "napi_complete", + "napi_poll_0", + "napi_poll_1", + "napi_poll_2", + "napi_poll_4", + "napi_poll_8", + "napi_poll_16", + "napi_poll_32", + "napi_poll_64", + "napi_poll_128", + "napi_poll_256", +}; + +#define NUM_STATS ARRAY_SIZE(ivshm_net_stats) + +static int ivshm_net_get_sset_count(struct net_device *ndev, int sset) +{ + if (sset == ETH_SS_STATS) + return NUM_STATS; + + return -EOPNOTSUPP; +} + +static void ivshm_net_get_strings(struct net_device *ndev, u32 sset, u8 *buf) +{ + if (sset == ETH_SS_STATS) + memcpy(buf, &ivshm_net_stats, sizeof(ivshm_net_stats)); +} + +static void ivshm_net_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *estats, u64 *st) +{ + struct ivshm_net *in = netdev_priv(ndev); + unsigned int n = 0; + unsigned int i; + + st[n++] = in->stats.interrupts; + st[n++] = in->stats.tx_packets; + st[n++] = in->stats.tx_notify; + st[n++] = in->stats.tx_pause; + st[n++] = in->stats.rx_packets; + st[n++] = in->stats.rx_notify; + st[n++] = in->stats.napi_poll; + st[n++] = in->stats.napi_complete; + + for (i = 0; i < ARRAY_SIZE(in->stats.napi_poll_n); i++) + st[n++] = in->stats.napi_poll_n[i]; + + memset(&in->stats, 0, sizeof(in->stats)); +} + +static const struct ethtool_ops ivshm_net_ethtool_ops = { + .get_sset_count = ivshm_net_get_sset_count, + .get_strings = ivshm_net_get_strings, + .get_ethtool_stats = ivshm_net_get_ethtool_stats, +}; + +static int ivshm_net_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct net_device *ndev; + struct ivshm_net *in; + struct ivshmem_regs __iomem *regs; + resource_size_t shmaddr; + resource_size_t shmlen; + int interrupt; + void *shm; + u32 ivpos; + int err; + + err = pcim_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device: %d\n", err); + return err; + } + + err = pcim_iomap_regions(pdev, BIT(0), DRV_NAME); + if (err) { + dev_err(&pdev->dev, "pcim_iomap_regions: %d\n", err); + return err; + } + + regs = pcim_iomap_table(pdev)[0]; + + shmlen = pci_resource_len(pdev, 2); + + if (shmlen) { + shmaddr = pci_resource_start(pdev, 2); + } else { + union { u64 v; u32 hl[2]; } val; + + pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_PTR, + &val.hl[0]); + pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_PTR + 4, + &val.hl[1]); + shmaddr = val.v; + + pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_SZ, + &val.hl[0]); + pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_SZ + 4, + &val.hl[1]); + shmlen = val.v; + } + + + if (!devm_request_mem_region(&pdev->dev, shmaddr, shmlen, DRV_NAME)) + return -EBUSY; + + shm = devm_memremap(&pdev->dev, shmaddr, shmlen, MEMREMAP_WC); + if (!shm) + return -ENOMEM; + + ivpos = readl(®s->ivpos); + if (ivpos > 1) { + dev_err(&pdev->dev, "invalid IVPosition %d\n", ivpos); + return -EINVAL; + } + + dev_info(&pdev->dev, "shared memory size %pa\n", &shmlen); + + ndev = alloc_etherdev(sizeof(*in)); + if (!ndev) + return -ENOMEM; + + pci_set_drvdata(pdev, ndev); + SET_NETDEV_DEV(ndev, &pdev->dev); + + in = netdev_priv(ndev); + in->ivshm_regs = regs; + in->shm = shm; + in->shmaddr = shmaddr; + in->shmlen = shmlen; + in->peer_id = !ivpos; + in->pdev = pdev; + spin_lock_init(&in->tx_free_lock); + spin_lock_init(&in->tx_clean_lock); + + err = ivshm_net_calc_qsize(ndev); + if (err) + goto err_free; + + in->state_wq = alloc_ordered_workqueue(DRV_NAME, 0); + if (!in->state_wq) + goto err_free; + + INIT_WORK(&in->state_work, ivshm_net_state_change); + + eth_random_addr(ndev->dev_addr); + ndev->netdev_ops = &ivshm_net_ops; + ndev->ethtool_ops = &ivshm_net_ethtool_ops; + ndev->mtu = min_t(u32, IVSHM_NET_MTU_DEF, in->qsize / 16); + ndev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG; + ndev->features = ndev->hw_features; + + netif_carrier_off(ndev); + netif_napi_add(ndev, &in->napi, ivshm_net_poll, NAPI_POLL_WEIGHT); + + err = register_netdev(ndev); + if (err) + goto err_wq; + + err = pci_enable_msix(pdev, &in->msix, 1); + if (!err) { + interrupt = in->msix.vector; + in->using_msix = true; + } else { + interrupt = pdev->irq; + in->using_msix = false; + } + + err = request_irq(interrupt, ivshm_net_int, 0, DRV_NAME, ndev); + if (err) + goto err_int; + + pci_set_master(pdev); + + writel(IVSHM_NET_STATE_RESET, &in->ivshm_regs->lstate); + + return 0; + +err_int: + if (in->using_msix) + pci_disable_msix(pdev); + unregister_netdev(ndev); +err_wq: + destroy_workqueue(in->state_wq); +err_free: + free_netdev(ndev); + + return err; +} + +static void ivshm_net_remove(struct pci_dev *pdev) +{ + struct net_device *ndev = pci_get_drvdata(pdev); + struct ivshm_net *in = netdev_priv(ndev); + + if (in->using_msix) { + free_irq(in->msix.vector, ndev); + pci_disable_msix(pdev); + } else { + free_irq(pdev->irq, ndev); + } + + unregister_netdev(ndev); + cancel_work_sync(&in->state_work); + destroy_workqueue(in->state_wq); + free_netdev(ndev); +} + +static const struct pci_device_id ivshm_net_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1110), + (PCI_CLASS_OTHERS << 16) | (0x01 << 8), 0xffff00 }, + { 0 } +}; +MODULE_DEVICE_TABLE(pci, ivshm_net_id_table); + +static struct pci_driver ivshm_net_driver = { + .name = DRV_NAME, + .id_table = ivshm_net_id_table, + .probe = ivshm_net_probe, + .remove = ivshm_net_remove, +}; +module_pci_driver(ivshm_net_driver); + +MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>"); +MODULE_LICENSE("GPL");