Add layer to support Jailhouse hypervisor
[AGL/meta-agl-devel.git] / meta-agl-jailhouse / recipes-kernel / linux / linux / 0013-ivshmem-net-virtual-network-device-for-Jailhouse.patch
1 From fa0e2362149bb814d6b7431a7c42989d33002f60 Mon Sep 17 00:00:00 2001
2 From: Mans Rullgard <mans@mansr.com>
3 Date: Thu, 26 May 2016 16:04:02 +0100
4 Subject: [PATCH 13/32] ivshmem-net: virtual network device for Jailhouse
5
6 Work in progress.
7 ---
8  drivers/net/Kconfig       |   4 +
9  drivers/net/Makefile      |   2 +
10  drivers/net/ivshmem-net.c | 923 ++++++++++++++++++++++++++++++++++++++++++++++
11  3 files changed, 929 insertions(+)
12  create mode 100644 drivers/net/ivshmem-net.c
13
14 diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
15 index df1c7989e13d..8c65f55163e3 100644
16 --- a/drivers/net/Kconfig
17 +++ b/drivers/net/Kconfig
18 @@ -527,4 +527,8 @@ config NET_FAILOVER
19           a VM with direct attached VF by failing over to the paravirtual
20           datapath when the VF is unplugged.
21  
22 +config IVSHMEM_NET
23 +       tristate "IVSHMEM virtual network device"
24 +       depends on PCI
25 +
26  endif # NETDEVICES
27 diff --git a/drivers/net/Makefile b/drivers/net/Makefile
28 index 0d3ba056cda3..5041c293d4d0 100644
29 --- a/drivers/net/Makefile
30 +++ b/drivers/net/Makefile
31 @@ -79,3 +79,5 @@ thunderbolt-net-y += thunderbolt.o
32  obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o
33  obj-$(CONFIG_NETDEVSIM) += netdevsim/
34  obj-$(CONFIG_NET_FAILOVER) += net_failover.o
35 +
36 +obj-$(CONFIG_IVSHMEM_NET) += ivshmem-net.o
37 diff --git a/drivers/net/ivshmem-net.c b/drivers/net/ivshmem-net.c
38 new file mode 100644
39 index 000000000000..b676bed2cc2e
40 --- /dev/null
41 +++ b/drivers/net/ivshmem-net.c
42 @@ -0,0 +1,923 @@
43 +/*
44 + * Copyright 2016 Mans Rullgard <mans@mansr.com>
45 + *
46 + * This program is free software; you can redistribute it and/or modify
47 + * it under the terms of the GNU General Public License as published by
48 + * the Free Software Foundation; either version 2 of the License, or
49 + * (at your option) any later version.
50 + *
51 + * This program is distributed in the hope that it will be useful,
52 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
53 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
54 + * GNU General Public License for more details.
55 + *
56 + * You should have received a copy of the GNU General Public License
57 + * along with this program; if not, see <http://www.gnu.org/licenses/>.
58 + */
59 +
60 +#include <linux/kernel.h>
61 +#include <linux/module.h>
62 +#include <linux/pci.h>
63 +#include <linux/io.h>
64 +#include <linux/bitops.h>
65 +#include <linux/interrupt.h>
66 +#include <linux/netdevice.h>
67 +#include <linux/etherdevice.h>
68 +#include <linux/rtnetlink.h>
69 +#include <linux/virtio_ring.h>
70 +
71 +#define DRV_NAME "ivshmem-net"
72 +
73 +#define JAILHOUSE_CFG_SHMEM_PTR        0x40
74 +#define JAILHOUSE_CFG_SHMEM_SZ 0x48
75 +
76 +#define IVSHM_NET_STATE_RESET  0
77 +#define IVSHM_NET_STATE_INIT   1
78 +#define IVSHM_NET_STATE_READY  2
79 +#define IVSHM_NET_STATE_RUN    3
80 +
81 +#define IVSHM_NET_MTU_MIN 256
82 +#define IVSHM_NET_MTU_MAX 65535
83 +#define IVSHM_NET_MTU_DEF 16384
84 +
85 +#define IVSHM_NET_FRAME_SIZE(s) ALIGN(18 + (s), SMP_CACHE_BYTES)
86 +
87 +#define IVSHM_NET_VQ_ALIGN 64
88 +
89 +struct ivshmem_regs {
90 +       u32 imask;
91 +       u32 istat;
92 +       u32 ivpos;
93 +       u32 doorbell;
94 +       u32 lstate;
95 +       u32 rstate;
96 +};
97 +
98 +struct ivshm_net_queue {
99 +       struct vring vr;
100 +       u32 free_head;
101 +       u32 num_free;
102 +       u32 num_added;
103 +       u16 last_avail_idx;
104 +       u16 last_used_idx;
105 +
106 +       void *data;
107 +       void *end;
108 +       u32 size;
109 +       u32 head;
110 +       u32 tail;
111 +};
112 +
113 +struct ivshm_net_stats {
114 +       u32 interrupts;
115 +       u32 tx_packets;
116 +       u32 tx_notify;
117 +       u32 tx_pause;
118 +       u32 rx_packets;
119 +       u32 rx_notify;
120 +       u32 napi_poll;
121 +       u32 napi_complete;
122 +       u32 napi_poll_n[10];
123 +};
124 +
125 +struct ivshm_net {
126 +       struct ivshm_net_queue rx;
127 +       struct ivshm_net_queue tx;
128 +
129 +       u32 vrsize;
130 +       u32 qlen;
131 +       u32 qsize;
132 +
133 +       spinlock_t tx_free_lock;
134 +       spinlock_t tx_clean_lock;
135 +
136 +       struct napi_struct napi;
137 +
138 +       u32 lstate;
139 +       u32 rstate;
140 +
141 +       struct workqueue_struct *state_wq;
142 +       struct work_struct state_work;
143 +
144 +       struct ivshm_net_stats stats;
145 +
146 +       struct ivshmem_regs __iomem *ivshm_regs;
147 +       void *shm;
148 +       phys_addr_t shmaddr;
149 +       resource_size_t shmlen;
150 +       u32 peer_id;
151 +
152 +       struct pci_dev *pdev;
153 +       struct msix_entry msix;
154 +       bool using_msix;
155 +};
156 +
157 +static void *ivshm_net_desc_data(struct ivshm_net *in,
158 +                                struct ivshm_net_queue *q,
159 +                                struct vring_desc *desc,
160 +                                u32 *len)
161 +{
162 +       u64 addr = READ_ONCE(desc->addr);
163 +       u32 dlen = READ_ONCE(desc->len);
164 +       void *data;
165 +
166 +       if (addr < in->shmaddr || desc->addr > in->shmaddr + in->shmlen)
167 +               return NULL;
168 +
169 +       data = in->shm + (addr - in->shmaddr);
170 +
171 +       if (data < q->data || data >= q->end)
172 +               return NULL;
173 +
174 +       if (dlen > q->end - data)
175 +               return NULL;
176 +
177 +       *len = dlen;
178 +
179 +       return data;
180 +}
181 +
182 +static void ivshm_net_init_queue(struct ivshm_net *in,
183 +                                struct ivshm_net_queue *q,
184 +                                void *mem, unsigned int len)
185 +{
186 +       memset(q, 0, sizeof(*q));
187 +
188 +       vring_init(&q->vr, len, mem, IVSHM_NET_VQ_ALIGN);
189 +       q->data = mem + in->vrsize;
190 +       q->end = q->data + in->qsize;
191 +       q->size = in->qsize;
192 +}
193 +
194 +static void ivshm_net_init_queues(struct net_device *ndev)
195 +{
196 +       struct ivshm_net *in = netdev_priv(ndev);
197 +       int ivpos = readl(&in->ivshm_regs->ivpos);
198 +       void *tx;
199 +       void *rx;
200 +       int i;
201 +
202 +       tx = in->shm +  ivpos * in->shmlen / 2;
203 +       rx = in->shm + !ivpos * in->shmlen / 2;
204 +
205 +       memset(tx, 0, in->shmlen / 2);
206 +
207 +       ivshm_net_init_queue(in, &in->rx, rx, in->qlen);
208 +       ivshm_net_init_queue(in, &in->tx, tx, in->qlen);
209 +
210 +       swap(in->rx.vr.used, in->tx.vr.used);
211 +
212 +       in->tx.num_free = in->tx.vr.num;
213 +
214 +       for (i = 0; i < in->tx.vr.num - 1; i++)
215 +               in->tx.vr.desc[i].next = i + 1;
216 +}
217 +
218 +static int ivshm_net_calc_qsize(struct net_device *ndev)
219 +{
220 +       struct ivshm_net *in = netdev_priv(ndev);
221 +       unsigned int vrsize;
222 +       unsigned int qsize;
223 +       unsigned int qlen;
224 +
225 +       for (qlen = 4096; qlen > 32; qlen >>= 1) {
226 +               vrsize = vring_size(qlen, IVSHM_NET_VQ_ALIGN);
227 +               vrsize = ALIGN(vrsize, IVSHM_NET_VQ_ALIGN);
228 +               if (vrsize < in->shmlen / 16)
229 +                       break;
230 +       }
231 +
232 +       if (vrsize > in->shmlen / 2)
233 +               return -EINVAL;
234 +
235 +       qsize = in->shmlen / 2 - vrsize;
236 +
237 +       if (qsize < 4 * IVSHM_NET_MTU_MIN)
238 +               return -EINVAL;
239 +
240 +       in->vrsize = vrsize;
241 +       in->qlen = qlen;
242 +       in->qsize = qsize;
243 +
244 +       return 0;
245 +}
246 +
247 +static void ivshm_net_notify_tx(struct ivshm_net *in, unsigned int num)
248 +{
249 +       u16 evt, old, new;
250 +
251 +       virt_mb();
252 +
253 +       evt = READ_ONCE(vring_avail_event(&in->tx.vr));
254 +       old = in->tx.last_avail_idx - num;
255 +       new = in->tx.last_avail_idx;
256 +
257 +       if (vring_need_event(evt, new, old)) {
258 +               writel(in->peer_id << 16, &in->ivshm_regs->doorbell);
259 +               in->stats.tx_notify++;
260 +       }
261 +}
262 +
263 +static void ivshm_net_enable_rx_irq(struct ivshm_net *in)
264 +{
265 +       vring_avail_event(&in->rx.vr) = in->rx.last_avail_idx;
266 +       virt_wmb();
267 +}
268 +
269 +static void ivshm_net_notify_rx(struct ivshm_net *in, unsigned int num)
270 +{
271 +       u16 evt, old, new;
272 +
273 +       virt_mb();
274 +
275 +       evt = vring_used_event(&in->rx.vr);
276 +       old = in->rx.last_used_idx - num;
277 +       new = in->rx.last_used_idx;
278 +
279 +       if (vring_need_event(evt, new, old)) {
280 +               writel(in->peer_id << 16, &in->ivshm_regs->doorbell);
281 +               in->stats.rx_notify++;
282 +       }
283 +}
284 +
285 +static void ivshm_net_enable_tx_irq(struct ivshm_net *in)
286 +{
287 +       vring_used_event(&in->tx.vr) = in->tx.last_used_idx;
288 +       virt_wmb();
289 +}
290 +
291 +static bool ivshm_net_rx_avail(struct ivshm_net *in)
292 +{
293 +       virt_mb();
294 +       return READ_ONCE(in->rx.vr.avail->idx) != in->rx.last_avail_idx;
295 +}
296 +
297 +static size_t ivshm_net_tx_space(struct ivshm_net *in)
298 +{
299 +       struct ivshm_net_queue *tx = &in->tx;
300 +       u32 tail = tx->tail;
301 +       u32 head = tx->head;
302 +       u32 space;
303 +
304 +       if (head < tail)
305 +               space = tail - head;
306 +       else
307 +               space = max(tx->size - head, tail);
308 +
309 +       return space;
310 +}
311 +
312 +static bool ivshm_net_tx_ok(struct ivshm_net *in, unsigned int mtu)
313 +{
314 +       return in->tx.num_free >= 2 &&
315 +               ivshm_net_tx_space(in) >= 2 * IVSHM_NET_FRAME_SIZE(mtu);
316 +}
317 +
318 +static u32 ivshm_net_tx_advance(struct ivshm_net_queue *q, u32 *pos, u32 len)
319 +{
320 +       u32 p = *pos;
321 +
322 +       len = IVSHM_NET_FRAME_SIZE(len);
323 +
324 +       if (q->size - p < len)
325 +               p = 0;
326 +       *pos = p + len;
327 +
328 +       return p;
329 +}
330 +
331 +static int ivshm_net_tx_frame(struct net_device *ndev, struct sk_buff *skb)
332 +{
333 +       struct ivshm_net *in = netdev_priv(ndev);
334 +       struct ivshm_net_queue *tx = &in->tx;
335 +       struct vring *vr = &tx->vr;
336 +       struct vring_desc *desc;
337 +       unsigned int desc_idx;
338 +       unsigned int avail;
339 +       u32 head;
340 +       void *buf;
341 +
342 +       BUG_ON(tx->num_free < 1);
343 +
344 +       spin_lock(&in->tx_free_lock);
345 +       desc_idx = tx->free_head;
346 +       desc = &vr->desc[desc_idx];
347 +       tx->free_head = desc->next;
348 +       tx->num_free--;
349 +       spin_unlock(&in->tx_free_lock);
350 +
351 +       head = ivshm_net_tx_advance(tx, &tx->head, skb->len);
352 +
353 +       buf = tx->data + head;
354 +       skb_copy_and_csum_dev(skb, buf);
355 +
356 +       desc->addr = in->shmaddr + (buf - in->shm);
357 +       desc->len = skb->len;
358 +
359 +       avail = tx->last_avail_idx++ & (vr->num - 1);
360 +       vr->avail->ring[avail] = desc_idx;
361 +       tx->num_added++;
362 +
363 +       if (!skb->xmit_more) {
364 +               virt_store_release(&vr->avail->idx, tx->last_avail_idx);
365 +               ivshm_net_notify_tx(in, tx->num_added);
366 +               tx->num_added = 0;
367 +       }
368 +
369 +       return 0;
370 +}
371 +
372 +static void ivshm_net_tx_clean(struct net_device *ndev)
373 +{
374 +       struct ivshm_net *in = netdev_priv(ndev);
375 +       struct ivshm_net_queue *tx = &in->tx;
376 +       struct vring *vr = &tx->vr;
377 +       struct vring_desc *desc;
378 +       struct vring_desc *fdesc;
379 +       unsigned int used;
380 +       unsigned int num;
381 +       u16 used_idx;
382 +       u16 last;
383 +       u32 fhead;
384 +
385 +       if (!spin_trylock(&in->tx_clean_lock))
386 +               return;
387 +
388 +       used_idx = virt_load_acquire(&vr->used->idx);
389 +       last = tx->last_used_idx;
390 +
391 +       fdesc = NULL;
392 +       num = 0;
393 +
394 +       while (last != used_idx) {
395 +               void *data;
396 +               u32 len;
397 +               u32 tail;
398 +
399 +               used = vr->used->ring[last & (vr->num - 1)].id;
400 +               if (used >= vr->num) {
401 +                       netdev_err(ndev, "invalid tx used %d\n", used);
402 +                       break;
403 +               }
404 +
405 +               desc = &vr->desc[used];
406 +
407 +               data = ivshm_net_desc_data(in, &in->tx, desc, &len);
408 +               if (!data) {
409 +                       netdev_err(ndev, "bad tx descriptor\n");
410 +                       break;
411 +               }
412 +
413 +               tail = ivshm_net_tx_advance(tx, &tx->tail, len);
414 +               if (data != tx->data + tail) {
415 +                       netdev_err(ndev, "bad tx descriptor\n");
416 +                       break;
417 +               }
418 +
419 +               if (!num)
420 +                       fdesc = desc;
421 +               else
422 +                       desc->next = fhead;
423 +
424 +               fhead = used;
425 +               last++;
426 +               num++;
427 +       }
428 +
429 +       tx->last_used_idx = last;
430 +
431 +       spin_unlock(&in->tx_clean_lock);
432 +
433 +       if (num) {
434 +               spin_lock(&in->tx_free_lock);
435 +               fdesc->next = tx->free_head;
436 +               tx->free_head = fhead;
437 +               tx->num_free += num;
438 +               BUG_ON(tx->num_free > vr->num);
439 +               spin_unlock(&in->tx_free_lock);
440 +       }
441 +}
442 +
443 +static struct vring_desc *ivshm_net_rx_desc(struct net_device *ndev)
444 +{
445 +       struct ivshm_net *in = netdev_priv(ndev);
446 +       struct ivshm_net_queue *rx = &in->rx;
447 +       struct vring *vr = &rx->vr;
448 +       unsigned int avail;
449 +       u16 avail_idx;
450 +
451 +       avail_idx = virt_load_acquire(&vr->avail->idx);
452 +
453 +       if (avail_idx == rx->last_avail_idx)
454 +               return NULL;
455 +
456 +       avail = vr->avail->ring[rx->last_avail_idx++ & (vr->num - 1)];
457 +       if (avail >= vr->num) {
458 +               netdev_err(ndev, "invalid rx avail %d\n", avail);
459 +               return NULL;
460 +       }
461 +
462 +       return &vr->desc[avail];
463 +}
464 +
465 +static void ivshm_net_rx_finish(struct ivshm_net *in, struct vring_desc *desc)
466 +{
467 +       struct ivshm_net_queue *rx = &in->rx;
468 +       struct vring *vr = &rx->vr;
469 +       unsigned int desc_id = desc - vr->desc;
470 +       unsigned int used;
471 +
472 +       used = rx->last_used_idx++ & (vr->num - 1);
473 +       vr->used->ring[used].id = desc_id;
474 +
475 +       virt_store_release(&vr->used->idx, rx->last_used_idx);
476 +}
477 +
478 +static int ivshm_net_poll(struct napi_struct *napi, int budget)
479 +{
480 +       struct net_device *ndev = napi->dev;
481 +       struct ivshm_net *in = container_of(napi, struct ivshm_net, napi);
482 +       int received = 0;
483 +
484 +       in->stats.napi_poll++;
485 +
486 +       ivshm_net_tx_clean(ndev);
487 +
488 +       while (received < budget) {
489 +               struct vring_desc *desc;
490 +               struct sk_buff *skb;
491 +               void *data;
492 +               u32 len;
493 +
494 +               desc = ivshm_net_rx_desc(ndev);
495 +               if (!desc)
496 +                       break;
497 +
498 +               data = ivshm_net_desc_data(in, &in->rx, desc, &len);
499 +               if (!data) {
500 +                       netdev_err(ndev, "bad rx descriptor\n");
501 +                       break;
502 +               }
503 +
504 +               skb = napi_alloc_skb(napi, len);
505 +
506 +               if (skb) {
507 +                       memcpy(skb_put(skb, len), data, len);
508 +                       skb->protocol = eth_type_trans(skb, ndev);
509 +                       napi_gro_receive(napi, skb);
510 +               }
511 +
512 +               ndev->stats.rx_packets++;
513 +               ndev->stats.rx_bytes += len;
514 +
515 +               ivshm_net_rx_finish(in, desc);
516 +               received++;
517 +       }
518 +
519 +       if (received < budget) {
520 +               in->stats.napi_complete++;
521 +               napi_complete_done(napi, received);
522 +               ivshm_net_enable_rx_irq(in);
523 +               if (ivshm_net_rx_avail(in))
524 +                       napi_schedule(napi);
525 +       }
526 +
527 +       if (received)
528 +               ivshm_net_notify_rx(in, received);
529 +
530 +       in->stats.rx_packets += received;
531 +       in->stats.napi_poll_n[received ? 1 + min(ilog2(received), 8) : 0]++;
532 +
533 +       if (ivshm_net_tx_ok(in, ndev->mtu))
534 +               netif_wake_queue(ndev);
535 +
536 +       return received;
537 +}
538 +
539 +static netdev_tx_t ivshm_net_xmit(struct sk_buff *skb, struct net_device *ndev)
540 +{
541 +       struct ivshm_net *in = netdev_priv(ndev);
542 +
543 +       ivshm_net_tx_clean(ndev);
544 +
545 +       if (!ivshm_net_tx_ok(in, ndev->mtu)) {
546 +               ivshm_net_enable_tx_irq(in);
547 +               netif_stop_queue(ndev);
548 +               skb->xmit_more = 0;
549 +               in->stats.tx_pause++;
550 +       }
551 +
552 +       ivshm_net_tx_frame(ndev, skb);
553 +
554 +       in->stats.tx_packets++;
555 +       ndev->stats.tx_packets++;
556 +       ndev->stats.tx_bytes += skb->len;
557 +
558 +       dev_consume_skb_any(skb);
559 +
560 +       return NETDEV_TX_OK;
561 +}
562 +
563 +static void ivshm_net_set_state(struct ivshm_net *in, u32 state)
564 +{
565 +       virt_wmb();
566 +       WRITE_ONCE(in->lstate, state);
567 +       writel(state, &in->ivshm_regs->lstate);
568 +}
569 +
570 +static void ivshm_net_run(struct net_device *ndev)
571 +{
572 +       struct ivshm_net *in = netdev_priv(ndev);
573 +
574 +       netif_start_queue(ndev);
575 +       napi_enable(&in->napi);
576 +       napi_schedule(&in->napi);
577 +       ivshm_net_set_state(in, IVSHM_NET_STATE_RUN);
578 +}
579 +
580 +static void ivshm_net_state_change(struct work_struct *work)
581 +{
582 +       struct ivshm_net *in = container_of(work, struct ivshm_net, state_work);
583 +       struct net_device *ndev = in->napi.dev;
584 +       u32 rstate = readl(&in->ivshm_regs->rstate);
585 +
586 +
587 +       switch (in->lstate) {
588 +       case IVSHM_NET_STATE_RESET:
589 +               if (rstate < IVSHM_NET_STATE_READY)
590 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_INIT);
591 +               break;
592 +
593 +       case IVSHM_NET_STATE_INIT:
594 +               if (rstate > IVSHM_NET_STATE_RESET) {
595 +                       ivshm_net_init_queues(ndev);
596 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_READY);
597 +
598 +                       rtnl_lock();
599 +                       call_netdevice_notifiers(NETDEV_CHANGEADDR, ndev);
600 +                       rtnl_unlock();
601 +               }
602 +               break;
603 +
604 +       case IVSHM_NET_STATE_READY:
605 +               if (rstate >= IVSHM_NET_STATE_READY) {
606 +                       netif_carrier_on(ndev);
607 +                       if (ndev->flags & IFF_UP)
608 +                               ivshm_net_run(ndev);
609 +               } else {
610 +                       netif_carrier_off(ndev);
611 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_RESET);
612 +               }
613 +               break;
614 +
615 +       case IVSHM_NET_STATE_RUN:
616 +               if (rstate < IVSHM_NET_STATE_READY) {
617 +                       netif_stop_queue(ndev);
618 +                       napi_disable(&in->napi);
619 +                       netif_carrier_off(ndev);
620 +                       ivshm_net_set_state(in, IVSHM_NET_STATE_RESET);
621 +               }
622 +               break;
623 +       }
624 +
625 +       virt_wmb();
626 +       WRITE_ONCE(in->rstate, rstate);
627 +}
628 +
629 +static bool ivshm_net_check_state(struct net_device *ndev)
630 +{
631 +       struct ivshm_net *in = netdev_priv(ndev);
632 +       u32 rstate = readl(&in->ivshm_regs->rstate);
633 +
634 +       if (rstate != READ_ONCE(in->rstate) ||
635 +           in->lstate != IVSHM_NET_STATE_RUN) {
636 +               queue_work(in->state_wq, &in->state_work);
637 +               return false;
638 +       }
639 +
640 +       return true;
641 +}
642 +
643 +static irqreturn_t ivshm_net_int(int irq, void *data)
644 +{
645 +       struct net_device *ndev = data;
646 +       struct ivshm_net *in = netdev_priv(ndev);
647 +
648 +       in->stats.interrupts++;
649 +
650 +       ivshm_net_check_state(ndev);
651 +       napi_schedule_irqoff(&in->napi);
652 +
653 +       return IRQ_HANDLED;
654 +}
655 +
656 +static int ivshm_net_open(struct net_device *ndev)
657 +{
658 +       struct ivshm_net *in = netdev_priv(ndev);
659 +
660 +       netdev_reset_queue(ndev);
661 +       ndev->operstate = IF_OPER_UP;
662 +
663 +       if (in->lstate == IVSHM_NET_STATE_READY)
664 +               ivshm_net_run(ndev);
665 +
666 +       return 0;
667 +}
668 +
669 +static int ivshm_net_stop(struct net_device *ndev)
670 +{
671 +       struct ivshm_net *in = netdev_priv(ndev);
672 +
673 +       ndev->operstate = IF_OPER_DOWN;
674 +
675 +       if (in->lstate == IVSHM_NET_STATE_RUN) {
676 +               napi_disable(&in->napi);
677 +               netif_stop_queue(ndev);
678 +               ivshm_net_set_state(in, IVSHM_NET_STATE_READY);
679 +       }
680 +
681 +       return 0;
682 +}
683 +
684 +static int ivshm_net_change_mtu(struct net_device *ndev, int mtu)
685 +{
686 +       struct ivshm_net *in = netdev_priv(ndev);
687 +       struct ivshm_net_queue *tx = &in->tx;
688 +
689 +       if (mtu < IVSHM_NET_MTU_MIN || mtu > IVSHM_NET_MTU_MAX)
690 +               return -EINVAL;
691 +
692 +       if (in->tx.size / mtu < 4)
693 +               return -EINVAL;
694 +
695 +       if (ivshm_net_tx_space(in) < 2 * IVSHM_NET_FRAME_SIZE(mtu))
696 +               return -EBUSY;
697 +
698 +       if (in->tx.size - tx->head < IVSHM_NET_FRAME_SIZE(mtu) &&
699 +           tx->head < tx->tail)
700 +               return -EBUSY;
701 +
702 +       netif_tx_lock_bh(ndev);
703 +       if (in->tx.size - tx->head < IVSHM_NET_FRAME_SIZE(mtu))
704 +               tx->head = 0;
705 +       netif_tx_unlock_bh(ndev);
706 +
707 +       ndev->mtu = mtu;
708 +
709 +       return 0;
710 +}
711 +
712 +#ifdef CONFIG_NET_POLL_CONTROLLER
713 +static void ivshm_net_poll_controller(struct net_device *ndev)
714 +{
715 +       struct ivshm_net *in = netdev_priv(ndev);
716 +
717 +       napi_schedule(&in->napi);
718 +}
719 +#endif
720 +
721 +static const struct net_device_ops ivshm_net_ops = {
722 +       .ndo_open       = ivshm_net_open,
723 +       .ndo_stop       = ivshm_net_stop,
724 +       .ndo_start_xmit = ivshm_net_xmit,
725 +       .ndo_change_mtu = ivshm_net_change_mtu,
726 +#ifdef CONFIG_NET_POLL_CONTROLLER
727 +       .ndo_poll_controller = ivshm_net_poll_controller,
728 +#endif
729 +};
730 +
731 +static const char ivshm_net_stats[][ETH_GSTRING_LEN] = {
732 +       "interrupts",
733 +       "tx_packets",
734 +       "tx_notify",
735 +       "tx_pause",
736 +       "rx_packets",
737 +       "rx_notify",
738 +       "napi_poll",
739 +       "napi_complete",
740 +       "napi_poll_0",
741 +       "napi_poll_1",
742 +       "napi_poll_2",
743 +       "napi_poll_4",
744 +       "napi_poll_8",
745 +       "napi_poll_16",
746 +       "napi_poll_32",
747 +       "napi_poll_64",
748 +       "napi_poll_128",
749 +       "napi_poll_256",
750 +};
751 +
752 +#define NUM_STATS ARRAY_SIZE(ivshm_net_stats)
753 +
754 +static int ivshm_net_get_sset_count(struct net_device *ndev, int sset)
755 +{
756 +       if (sset == ETH_SS_STATS)
757 +               return NUM_STATS;
758 +
759 +       return -EOPNOTSUPP;
760 +}
761 +
762 +static void ivshm_net_get_strings(struct net_device *ndev, u32 sset, u8 *buf)
763 +{
764 +       if (sset == ETH_SS_STATS)
765 +               memcpy(buf, &ivshm_net_stats, sizeof(ivshm_net_stats));
766 +}
767 +
768 +static void ivshm_net_get_ethtool_stats(struct net_device *ndev,
769 +                                       struct ethtool_stats *estats, u64 *st)
770 +{
771 +       struct ivshm_net *in = netdev_priv(ndev);
772 +       unsigned int n = 0;
773 +       unsigned int i;
774 +
775 +       st[n++] = in->stats.interrupts;
776 +       st[n++] = in->stats.tx_packets;
777 +       st[n++] = in->stats.tx_notify;
778 +       st[n++] = in->stats.tx_pause;
779 +       st[n++] = in->stats.rx_packets;
780 +       st[n++] = in->stats.rx_notify;
781 +       st[n++] = in->stats.napi_poll;
782 +       st[n++] = in->stats.napi_complete;
783 +
784 +       for (i = 0; i < ARRAY_SIZE(in->stats.napi_poll_n); i++)
785 +               st[n++] = in->stats.napi_poll_n[i];
786 +
787 +       memset(&in->stats, 0, sizeof(in->stats));
788 +}
789 +
790 +static const struct ethtool_ops ivshm_net_ethtool_ops = {
791 +       .get_sset_count         = ivshm_net_get_sset_count,
792 +       .get_strings            = ivshm_net_get_strings,
793 +       .get_ethtool_stats      = ivshm_net_get_ethtool_stats,
794 +};
795 +
796 +static int ivshm_net_probe(struct pci_dev *pdev,
797 +                          const struct pci_device_id *id)
798 +{
799 +       struct net_device *ndev;
800 +       struct ivshm_net *in;
801 +       struct ivshmem_regs __iomem *regs;
802 +       resource_size_t shmaddr;
803 +       resource_size_t shmlen;
804 +       int interrupt;
805 +       void *shm;
806 +       u32 ivpos;
807 +       int err;
808 +
809 +       err = pcim_enable_device(pdev);
810 +       if (err) {
811 +               dev_err(&pdev->dev, "pci_enable_device: %d\n", err);
812 +               return err;
813 +       }
814 +
815 +       err = pcim_iomap_regions(pdev, BIT(0), DRV_NAME);
816 +       if (err) {
817 +               dev_err(&pdev->dev, "pcim_iomap_regions: %d\n", err);
818 +               return err;
819 +       }
820 +
821 +       regs = pcim_iomap_table(pdev)[0];
822 +
823 +       shmlen = pci_resource_len(pdev, 2);
824 +
825 +       if (shmlen) {
826 +               shmaddr = pci_resource_start(pdev, 2);
827 +       } else {
828 +               union { u64 v; u32 hl[2]; } val;
829 +
830 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_PTR,
831 +                                     &val.hl[0]);
832 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_PTR + 4,
833 +                                     &val.hl[1]);
834 +               shmaddr = val.v;
835 +
836 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_SZ,
837 +                                     &val.hl[0]);
838 +               pci_read_config_dword(pdev, JAILHOUSE_CFG_SHMEM_SZ + 4,
839 +                                     &val.hl[1]);
840 +               shmlen = val.v;
841 +       }
842 +
843 +
844 +       if (!devm_request_mem_region(&pdev->dev, shmaddr, shmlen, DRV_NAME))
845 +               return -EBUSY;
846 +
847 +       shm = devm_memremap(&pdev->dev, shmaddr, shmlen, MEMREMAP_WC);
848 +       if (!shm)
849 +               return -ENOMEM;
850 +
851 +       ivpos = readl(&regs->ivpos);
852 +       if (ivpos > 1) {
853 +               dev_err(&pdev->dev, "invalid IVPosition %d\n", ivpos);
854 +               return -EINVAL;
855 +       }
856 +
857 +       dev_info(&pdev->dev, "shared memory size %pa\n", &shmlen);
858 +
859 +       ndev = alloc_etherdev(sizeof(*in));
860 +       if (!ndev)
861 +               return -ENOMEM;
862 +
863 +       pci_set_drvdata(pdev, ndev);
864 +       SET_NETDEV_DEV(ndev, &pdev->dev);
865 +
866 +       in = netdev_priv(ndev);
867 +       in->ivshm_regs = regs;
868 +       in->shm = shm;
869 +       in->shmaddr = shmaddr;
870 +       in->shmlen = shmlen;
871 +       in->peer_id = !ivpos;
872 +       in->pdev = pdev;
873 +       spin_lock_init(&in->tx_free_lock);
874 +       spin_lock_init(&in->tx_clean_lock);
875 +
876 +       err = ivshm_net_calc_qsize(ndev);
877 +       if (err)
878 +               goto err_free;
879 +
880 +       in->state_wq = alloc_ordered_workqueue(DRV_NAME, 0);
881 +       if (!in->state_wq)
882 +               goto err_free;
883 +
884 +       INIT_WORK(&in->state_work, ivshm_net_state_change);
885 +
886 +       eth_random_addr(ndev->dev_addr);
887 +       ndev->netdev_ops = &ivshm_net_ops;
888 +       ndev->ethtool_ops = &ivshm_net_ethtool_ops;
889 +       ndev->mtu = min_t(u32, IVSHM_NET_MTU_DEF, in->qsize / 16);
890 +       ndev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG;
891 +       ndev->features = ndev->hw_features;
892 +
893 +       netif_carrier_off(ndev);
894 +       netif_napi_add(ndev, &in->napi, ivshm_net_poll, NAPI_POLL_WEIGHT);
895 +
896 +       err = register_netdev(ndev);
897 +       if (err)
898 +               goto err_wq;
899 +
900 +       err = pci_enable_msix(pdev, &in->msix, 1);
901 +       if (!err) {
902 +               interrupt = in->msix.vector;
903 +               in->using_msix = true;
904 +       } else {
905 +               interrupt = pdev->irq;
906 +               in->using_msix = false;
907 +       }
908 +
909 +       err = request_irq(interrupt, ivshm_net_int, 0, DRV_NAME, ndev);
910 +       if (err)
911 +               goto err_int;
912 +
913 +       pci_set_master(pdev);
914 +
915 +       writel(IVSHM_NET_STATE_RESET, &in->ivshm_regs->lstate);
916 +
917 +       return 0;
918 +
919 +err_int:
920 +       if (in->using_msix)
921 +               pci_disable_msix(pdev);
922 +       unregister_netdev(ndev);
923 +err_wq:
924 +       destroy_workqueue(in->state_wq);
925 +err_free:
926 +       free_netdev(ndev);
927 +
928 +       return err;
929 +}
930 +
931 +static void ivshm_net_remove(struct pci_dev *pdev)
932 +{
933 +       struct net_device *ndev = pci_get_drvdata(pdev);
934 +       struct ivshm_net *in = netdev_priv(ndev);
935 +
936 +       if (in->using_msix)  {
937 +               free_irq(in->msix.vector, ndev);
938 +               pci_disable_msix(pdev);
939 +       } else {
940 +               free_irq(pdev->irq, ndev);
941 +       }
942 +
943 +       unregister_netdev(ndev);
944 +       cancel_work_sync(&in->state_work);
945 +       destroy_workqueue(in->state_wq);
946 +       free_netdev(ndev);
947 +}
948 +
949 +static const struct pci_device_id ivshm_net_id_table[] = {
950 +       { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1110),
951 +               (PCI_CLASS_OTHERS << 16) | (0x01 << 8), 0xffff00 },
952 +       { 0 }
953 +};
954 +MODULE_DEVICE_TABLE(pci, ivshm_net_id_table);
955 +
956 +static struct pci_driver ivshm_net_driver = {
957 +       .name           = DRV_NAME,
958 +       .id_table       = ivshm_net_id_table,
959 +       .probe          = ivshm_net_probe,
960 +       .remove         = ivshm_net_remove,
961 +};
962 +module_pci_driver(ivshm_net_driver);
963 +
964 +MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
965 +MODULE_LICENSE("GPL");
966 -- 
967 2.11.0
968