From 7b6d11e7136cee21cc9a76614174abac999f6173 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 12 Sep 2007 22:17:43 +0100 Subject: [PATCH] Started IB driver rewrite --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 7 + src/drivers/net/mlx_ipoib/mt25218.c | 206 ++++++++++++++++++++++++- src/include/gpxe/errfile.h | 1 + src/include/gpxe/infiniband.h | 52 +++++++ src/include/gpxe/netdevice.h | 4 +- src/net/infiniband.c | 118 ++++++++++++++ 6 files changed, 381 insertions(+), 7 deletions(-) create mode 100644 src/include/gpxe/infiniband.h create mode 100644 src/net/infiniband.c diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 8f3873e6..631a95cb 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1586,6 +1586,13 @@ static void prep_send_wqe_buf(void *qph, len += offset; } snd_wqe->mpointer[0].byte_count = cpu_to_be32(len); + + DBG ( "prep_send_wqe_buf()\n" ); + DBG ( "snd_wqe:\n" ); + DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); + DBG ( "packet:\n" ); + DBG_HD ( bus_to_virt(be32_to_cpu(snd_wqe->mpointer[0].local_addr_l)), + len ); } static void *alloc_ud_av(void) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 11a35c2e..b9b12c36 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -10,6 +10,15 @@ Skeleton NIC driver for Etherboot * your option) any later version. */ +#include +#include +#include +#include +#include + +struct mlx_nic { +}; + /* to get some global routines like printf */ #include "etherboot.h" /* to get the interface to the body of the program */ @@ -145,6 +154,131 @@ static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination * } } +/** + * Open network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int mlx_open ( struct net_device *netdev ) { + return 0; +} + +/** + * Close network device + * + * @v netdev Network device + */ +static void mlx_close ( struct net_device *netdev ) { +} + +#warning "Broadcast address?" +static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; + + +/** + * Transmit packet + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int mlx_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct ibhdr *ibhdr = iobuf->data; + + DBG ( "Sending packet:\n" ); + // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + DBG ( "Peer:\n" ); + DBG_HD ( &ibhdr->peer[0], IB_ALEN ); + DBG ( "Bcast:\n" ); + DBG_HD ( &ib_broadcast[0], IB_ALEN ); + + iob_pull ( iobuf, sizeof ( *ibhdr ) ); + + if ( memcmp ( ibhdr->peer, ib_broadcast, IB_ALEN ) == 0 ) { + printf ( "Sending broadcast packet\n" ); + return send_bcast_packet ( ibhdr->proto, iobuf->data, + iob_len ( iobuf ) ); + } else { + printf ( "Sending unicast packet\n" ); + return send_ucast_packet ( ibhdr->peer, ibhdr->proto, + iobuf->data, iob_len ( iobuf ) ); + } +} + +/** + * Poll for completed and received packets + * + * @v netdev Network device + */ +static void mlx_poll ( struct net_device *netdev ) { + struct ib_cqe_st ib_cqe; + uint8_t num_cqes; + unsigned int len; + struct io_buffer *iobuf; + void *buf; + int rc; + + if ( ( rc = poll_error_buf() ) != 0 ) { + DBG ( "poll_error_buf() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ( rc = drain_eq() ) != 0 ) { + DBG ( "drain_eq() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ( rc = ib_poll_cq ( ipoib_data.rcv_cqh, &ib_cqe, + &num_cqes ) ) != 0 ) { + DBG ( "ib_poll_cq() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ! num_cqes ) + return; + + if ( ib_cqe.is_error ) { + DBG ( "cqe error\n" ); + free_wqe ( ib_cqe.wqe ); + return; + } + + len = ib_cqe.count; + iobuf = alloc_iob ( len ); + if ( ! iobuf ) { + DBG ( "out of memory\n" ); + free_wqe ( ib_cqe.wqe ); + return; + } + memcpy ( iob_put ( iobuf, len ), buf, len ); + DBG ( "Received packet:\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + netdev_rx ( netdev, iobuf ); + + free_wqe ( ib_cqe.wqe ); +} + +/** + * Enable or disable interrupts + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void mlx_irq ( struct net_device *netdev, int enable ) { +} + +static struct net_device_operations mlx_operations = { + .open = mlx_open, + .close = mlx_close, + .transmit = mlx_transmit, + .poll = mlx_poll, + .irq = mlx_irq, +}; + /************************************************************************** DISABLE - Turn off ethernet interface ***************************************************************************/ @@ -165,6 +299,21 @@ static void mt25218_disable(struct nic *nic) } } +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void mlx_remove ( struct pci_device *pci ) { + struct net_device *netdev = pci_get_drvdata ( pci ); + struct mlx_nic *mlx = netdev->priv; + + unregister_netdev ( netdev ); + ipoib_close(0); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} + static struct nic_operations mt25218_operations = { .connect = dummy_connect, .poll = mt25218_poll, @@ -233,12 +382,59 @@ static int mt25218_probe(struct nic *nic, struct pci_device *pci) return 0; } -static struct pci_device_id mt25218_nics[] = { +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int mlx_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct net_device *netdev; + struct mlx_nic *mlx; + int rc; + + /* Allocate net device */ + netdev = alloc_ibdev ( sizeof ( *mlx ) ); + if ( ! netdev ) + return -ENOMEM; + netdev_init ( netdev, &mlx_operations ); + mlx = netdev->priv; + pci_set_drvdata ( pci, netdev ); + netdev->dev = &pci->dev; + memset ( mlx, 0, sizeof ( *mlx ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Initialise hardware */ + if ( ( rc = ipoib_init ( pci ) ) != 0 ) + goto err_ipoib_init; + memcpy ( netdev->ll_addr, ipoib_data.port_gid_raw, IB_ALEN ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + return 0; + + err_register_netdev: + err_ipoib_init: + ipoib_close(0); + netdev_nullify ( netdev ); + netdev_put ( netdev ); + return rc; +} + +static struct pci_device_id mlx_nics[] = { PCI_ROM(0x15b3, 0x6282, "MT25218", "MT25218 HCA driver"), PCI_ROM(0x15b3, 0x6274, "MT25204", "MT25204 HCA driver"), }; -PCI_DRIVER ( mt25218_driver, mt25218_nics, PCI_NO_CLASS ); - -DRIVER ( "MT25218", nic_driver, pci_driver, mt25218_driver, - mt25218_probe, mt25218_disable ); +struct pci_driver mlx_driver __pci_driver = { + .ids = mlx_nics, + .id_count = ( sizeof ( mlx_nics ) / sizeof ( mlx_nics[0] ) ), + .probe = mlx_probe, + .remove = mlx_remove, +}; diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 4f9e7bc6..3413f9cf 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -123,6 +123,7 @@ #define ERRFILE_dhcp ( ERRFILE_NET | 0x00100000 ) #define ERRFILE_dns ( ERRFILE_NET | 0x00110000 ) #define ERRFILE_tftp ( ERRFILE_NET | 0x00120000 ) +#define ERRFILE_infiniband ( ERRFILE_NET | 0x00130000 ) #define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 ) #define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h new file mode 100644 index 00000000..126113a7 --- /dev/null +++ b/src/include/gpxe/infiniband.h @@ -0,0 +1,52 @@ +#ifndef _GPXE_INFINIBAND_H +#define _GPXE_INFINIBAND_H + +/** @file + * + * Infiniband protocol + * + */ + +#include +#include + +/** Infiniband hardware address length */ +#define IB_ALEN 20 +#define IB_HLEN 24 + +/** An Infiniband header + * + * This data structure doesn't represent the on-wire format, but does + * contain all the information required by the driver to construct the + * packet. + */ +struct ibhdr { + /** Peer address */ + uint8_t peer[IB_ALEN]; + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); + +extern struct ll_protocol infiniband_protocol; + +extern const char * ib_ntoa ( const void *ll_addr ); + +/** + * Allocate Infiniband device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +static inline struct net_device * alloc_ibdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = &infiniband_protocol; + } + return netdev; +} + +#endif /* _GPXE_INFINIBAND_H */ diff --git a/src/include/gpxe/netdevice.h b/src/include/gpxe/netdevice.h index d82c6d8f..2cbd0efb 100644 --- a/src/include/gpxe/netdevice.h +++ b/src/include/gpxe/netdevice.h @@ -19,10 +19,10 @@ struct ll_protocol; struct device; /** Maximum length of a link-layer address */ -#define MAX_LL_ADDR_LEN 6 +#define MAX_LL_ADDR_LEN 20 /** Maximum length of a link-layer header */ -#define MAX_LL_HEADER_LEN 16 +#define MAX_LL_HEADER_LEN 32 /** Maximum length of a network-layer address */ #define MAX_NET_ADDR_LEN 4 diff --git a/src/net/infiniband.c b/src/net/infiniband.c new file mode 100644 index 00000000..bcfac292 --- /dev/null +++ b/src/net/infiniband.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * Infiniband protocol + * + */ + +/** Infiniband broadcast MAC address */ +static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; + +/** + * Transmit Infiniband packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Link-layer destination address + * + * Prepends the Infiniband link-layer header and transmits the packet. + */ +static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, const void *ll_dest ) { + struct ibhdr *ibhdr = iob_push ( iobuf, sizeof ( *ibhdr ) ); + + + /* Build Infiniband header */ + memcpy ( ibhdr->peer, ll_dest, IB_ALEN ); + ibhdr->proto = net_protocol->net_proto; + ibhdr->reserved = 0; + + /* Hand off to network device */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received Infiniband packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * + * Strips off the Infiniband link-layer header and passes up to the + * network-layer protocol. + */ +static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + struct ibhdr *ibhdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ibhdr ) ) { + DBG ( "Infiniband packet too short (%d bytes)\n", + iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EINVAL; + } + + /* Strip off Infiniband header */ + iob_pull ( iobuf, sizeof ( *ibhdr ) ); + + /* Hand off to network-layer protocol */ + return net_rx ( iobuf, netdev, ibhdr->proto, ibhdr->peer ); +} + +/** + * Transcribe Infiniband address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * ib_ntoa ( const void *ll_addr ) { + static char buf[61]; + const uint8_t *ib_addr = ll_addr; + unsigned int i; + char *p = buf; + + for ( i = 0 ; i < IB_ALEN ; i++ ) { + p += sprintf ( p, ":%02x", ib_addr[i] ); + } + return ( buf + 1 ); +} + +/** Infiniband protocol */ +struct ll_protocol infiniband_protocol __ll_protocol = { + .name = "Infiniband", + .ll_proto = htons ( ARPHRD_INFINIBAND ), + .ll_addr_len = IB_ALEN, + .ll_header_len = IB_HLEN, + .ll_broadcast = ib_broadcast, + .tx = ib_tx, + .rx = ib_rx, + .ntoa = ib_ntoa, +};